Python DB.append Examples

Programming Language: Python

Namespace/Package Name: kyotocabinet

Class/Type: DB

Method/Function: append

Examples at hotexamples.com: 2

Python DB.append - 2 examples found. These are the top rated real world Python examples of kyotocabinet.DB.append extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DB(30)

open(30)

close(21)

cursor(14)

error(11)

get(9)

set(5)

check(2)

set_bulk(2)

count(2)

increment(2)

remove(2)

size(1)

remove_bulk(1)

get_bulk(1)

process(1)

match_prefix(1)

append(1)

end_transaction(1)

commit(1)

begin_transaction(1)

synchronize(1)

Example #1

Show file

File: mdl_bknn.py Project: rbergmair-utopiarefraktor/recruitment_challenge

class BKNNModel( Model ):


  def __init__( self, fn, mode, catfe, binfe, contfe, fdisc, fsel, kval ):

    Model.__init__( self, fn, mode, catfe, binfe, contfe, fdisc, fsel );

    self._kval = kval;

    self._fn_cdata = self._fn;
    self._fn_ddata = self._fn.replace( '.kch', '-discrete.kch' );
    self._fn_meta = self._fn.replace( '.kch', '-meta.pickle' );
    self._fn_icov = self._fn.replace( '.kch', '-icov.pickle' );

    self._cdata = None;
    self._ddata = None;

    self._len_c = None;
    self._len_b = None;
    self._len_x = None;

    self._rowcount = None;
    self._total_pos = None;
    self._total_neg = None;

    self._icov = None;
    self._co = None;

    self._sample_y = [];
    self._sample_c = [];
    self._sample_b = [];
    self._sample_x = [];
    self._sample_x_ = [];

    self._needs_finalization = False;
    self._needs_initialization = True;

    self._dmarginals = {};
    self._dscores = {};

    self._sparse_points = 0;

    self._bias = None;


  def __enter__( self ):

    self._cdata = DB();
    self._ddata = DB();

    try:
      if self._mode == "r":
        assert self._cdata.open( self._fn_cdata, DB.OREADER );
      elif self._mode == "w":
        if isfile( self._fn_cdata ):
          remove( self._fn_cdata );
        assert self._cdata.open( self._fn_cdata, DB.OWRITER | DB.OCREATE );
      else:
        assert False;
    except:
      if self._cdata is not None:
        print( str( self._cdata.error() ) );
      raise;

    try:
      if self._mode == "r":
        assert self._ddata.open( self._fn_ddata, DB.OREADER );
      elif self._mode == "w":
        if isfile( self._fn_ddata ):
          remove( self._fn_ddata );
        assert self._ddata.open( self._fn_ddata, DB.OWRITER | DB.OCREATE );
      else:
        assert False;
    except:
      if self._ddata is not None:
        print( str( self._ddata.error() ) );
      raise;

    if self._mode == "r":

      with open( self._fn_meta, 'rb' ) as f:
        r = pickle_load( f );
        self._len_c = r[ "c" ];
        self._len_b = r[ "b" ];
        self._len_x = r[ "x" ];
        self._co = r[ "co" ];

      with open( self._fn_icov, 'rb' ) as f:
        self._icov = pickle_load( f );

    return self;


  def __exit__( self, exc_type, exc_value, traceback ):

    ex_w_exc = False;
    ex_w_exc = ex_w_exc or ( exc_type is not None );
    ex_w_exc = ex_w_exc or ( exc_value is not None );
    ex_w_exc = ex_w_exc or ( traceback is not None );

    if ( not ex_w_exc ) and ( self._mode == "w" ):

      if self._needs_finalization:
        self._finalize();

      with open( self._fn_meta, 'wb' ) as f:

        r = { "c": self._len_c,
              "b": self._len_b,
              "x": self._len_x,
              "co": self._co };

        pickle_dump( r, f );

      with open( self._fn_icov, 'wb' ) as f:

        pickle_dump( self._icov, f );

    if self._cdata is not None:
      try:
        assert self._cdata.close();
      except:
        print( str( self._cdata.error() ) );
        raise;
      self._cdata = None;

    if self._ddata is not None:
      try:
        assert self._ddata.close();
      except:
        print( str( self._ddata.error() ) );
        raise;
      self._ddata = None;

    if ex_w_exc and ( self._mode == "w" ):

      if isfile( self._fn_cdata ):
        remove( self._fn_cdata );

      if isfile( self._fn_ddata ):
        remove( self._fn_ddata );

      if isfile( self._fn_meta ):
        remove( self._fn_meta );

      if isfile( self._fn_icov ):
        remove( self._fn_icov );

    return False;


  def train( self, row ):

    self._needs_finalization = True;

    ( y, c, b, x ) = row;

    c = self._fsel.apply_c( self._catfe( c ) );
    b = self._fsel.apply_b( self._binfe( b ) );

    x = self._contfe( x );
    x_ = self._fdisc( x );

    x = self._fsel.apply_x( x );
    x_ = self._fsel.apply_x( x_ );

    if False:
      print( y, c, b, x, x_ );

    if self._len_c is None:
      self._len_c = len(c);
    assert self._len_c == len(c);

    if self._len_b is None:
      self._len_b = len(b);
    assert self._len_b == len(b);

    if self._len_x is None:
      self._len_x = len(x);
    assert self._len_x == len(x);

    if self._rowcount is None:
      self._rowcount = 0;

    self._rowcount += 1;

    dkeyfmt = '>' + ( 'I' * ( 1 + self._len_c + self._len_b ) );
    self._ddata.increment( pack( dkeyfmt, y, *(c+b) ), 1, 0 );

    ckeyfmt = '>' + ( 'I' * len(x) );
    cvalfmt = '>I' + ( 'f' * len(x) );
    self._cdata.append( pack( ckeyfmt, *x_ ), pack( cvalfmt, y, *x ) );

    if len( self._sample_x ) < 50000:

      assert len( self._sample_x ) == len( self._sample_y );
      assert len( self._sample_x ) == len( self._sample_c );
      assert len( self._sample_x ) == len( self._sample_b );
      assert len( self._sample_x ) == len( self._sample_x_ );

      self._sample_y.append( y );
      self._sample_c.append( c );
      self._sample_b.append( b );
      self._sample_x.append( x );
      self._sample_x_.append( x_ );

    return False;


  def _init( self ):

    self._needs_initialization = False;

    c = self._ddata.cursor();
    c.jump();

    keyfmt = '>' + ( 'I' * ( 1 + self._len_c + self._len_b ) );
    valfmt = '>Q';


    while True:

      r = c.get( True );
      if not r:
        break;

      dbkey = unpack( keyfmt, r[0] );
      dbval = unpack( valfmt, r[1] )[ 0 ];

      additional_count = dbval;

      y = dbkey[ 0 ];

      for ( i, value_of_variable_i ) in enumerate( dbkey[ 1: ] ):

        if not i in self._dmarginals:
          self._dmarginals[ i ] = {};

        self._dmarginals[ i ][ (y,value_of_variable_i) ] \
          = self._dmarginals[ i ].get( (y,value_of_variable_i), 0 ) \
              + additional_count;


    for ( i, count_by_val ) in self._dmarginals.items():

      total = 0;
      total_neg = 0;
      total_pos = 0;

      for ( ( y, val ), cnt ) in count_by_val.items():
        total += cnt;
        if y == 0:
          total_neg += cnt;
        elif y == 1:
          total_pos += cnt;

      if self._rowcount is None:
        self._rowcount = total;
      assert self._rowcount == total;

      if self._total_neg is None:
        self._total_neg = total_neg;
      try:
        assert self._total_neg == total_neg;
      except: 
        print( self._total_neg, total_neg );
        raise;

      if self._total_pos is None:
        self._total_pos = total_pos;
      try:
        assert self._total_pos == total_pos;
      except: 
        print( self._total_pos, total_pos );
        raise;

    assert ( self._total_pos + self._total_neg ) == self._rowcount;


    for i in self._dmarginals:

      values = set([ val for (y,val) in self._dmarginals[ i ].keys() ]);

      if i not in self._dscores:
        self._dscores[ i ] = {};

      for val in values:

        pos_cnt = self._dmarginals[ i ].get( (1,val), 0 );
        neg_cnt = self._dmarginals[ i ].get( (0,val), 0 );

        p_pos \
          =   log( float(pos_cnt) + SMOOTHING, 2.0 ) \
            - log( float(self._total_pos) + float( len(values) ) * SMOOTHING, 2.0 );

        p_neg \
          =   log( float(neg_cnt) + SMOOTHING, 2.0 ) \
            - log( float(self._total_neg) + float( len(values) ) * SMOOTHING, 2.0 );

        self._dscores[ i ][ val ] = p_pos - p_neg;

    
    p_pos \
      =   log( float(self._total_pos), 2.0 ) \
        - log( float(self._rowcount), 2.0 );

    p_neg \
      =   log( float(self._total_neg), 2.0 ) \
        - log( float(self._rowcount), 2.0 );

    self._bias = p_pos - p_neg;


    if False:
      for i in sorted( self._dscores.keys() ):
        score_by_val = self._dscores[ i ];
        for ( val, score ) in score_by_val.items():
          print( "{:d} {:10d} {:+2.4f}".format( i, val, score ) );


  def _apply( self, row ):

    if self._needs_initialization:
      self._init();

    ( c, b, x, x_ ) = row;

    ckeyfmt = '>' + ( 'I' * len(x_) );
    cvalfmt = '>I' + ( 'f' * len(x) );
    cvalsz = calcsize( cvalfmt );

    rng = [];
    for xval in x_:
      rng.append(
          [ xv \
              for xv \
               in [ xval-2, xval-1, xval, xval+1, xval+2 ] \
               if 0 <= xv <= 31 ]
        );

    x_vec = np.array( x ).reshape( 1, self._len_x ).T;

    nearest_positive = [];
    all_negative = [];
    found_ident = 0;

    for xvals in product( *rng ):

      try:
        ckey = pack( ckeyfmt, *xvals );
      except:
        print( ckeyfmt, xvals );
        raise;
      val = self._cdata.get( ckey );

      while val:

        if len(val) <= cvalsz:
          assert len(val) == cvalsz;

        val_ = val[:cvalsz];
        val = val[cvalsz:];

        pt = unpack( cvalfmt, val_ );
        pt_y = pt[0];
        pt_x = pt[1:];

        pt_x_vec = np.array( pt_x ).reshape( 1, self._len_x ).T;
        diff = pt_x_vec - x_vec;
        dist = np.sqrt( np.dot( np.dot( diff.T, self._icov ), diff ) );

        if dist <= 0.0001:
          found_ident += 1;
          continue;

        if pt_y == 0:
          all_negative.append( dist );
          continue;

        assert pt_y == 1;

        nearest_positive.append( dist );
        nearest_positive.sort();
        nearest_positive = nearest_positive[:self._kval];

    # assert found_ident == 1;
    # assert len( nearest_positive ) == self._kval;
    if len( nearest_positive ) < self._kval:
      self._sparse_points += 1;

    score = self._bias;

    # if len( nearest_positive ) > 0:
    if True:

      if len( nearest_positive ) == 0:
        threshold = None;
      else:
        threshold = nearest_positive[-1];

      neg_cnt = 0;
      for dist in all_negative:
        if ( threshold is None ) or ( dist <= threshold ):
          neg_cnt += 1;

      p_pos \
        =   log( float( len(nearest_positive) ) + SMOOTHING, 2.0 ) \
          - log( float(self._total_pos) + 2.0 * SMOOTHING, 2.0 );

      p_neg \
        =   log( float(neg_cnt) + SMOOTHING, 2.0 ) \
          - log( float(self._total_neg) + 2.0 * SMOOTHING, 2.0 );

      score += p_pos - p_neg;

    for ( i, dval ) in enumerate( c+b ):
      score += self._dscores[ i ].get( dval, 0.0 );

    if self._co is None:
      return score;
    else:
      if score >= self._co:
        return 1;
      else:
        return 0;


  def _finalize( self ):

    self._needs_finalization = False;

    covsample = np.array( self._sample_x );
    cov = np.cov( covsample.T );
    self._icov = LA.inv( cov );

    sample \
      = zip(
            self._sample_c,
            self._sample_b,
            self._sample_x,
            self._sample_x_
          );

    scores = [];
    for ( c, b, x, x_ ) in sample:
      scores.append( self._apply( [ c, b, x, x_ ] ) );

    sorted_scores = list( sorted( scores ) );

    cutoffs = [];
    for idx in range(0,1000):
      ratio = float(idx) / 1000.0;
      cutoffs.append(
          sorted_scores[ int( float( len(sorted_scores) ) * ratio ) ]
        );

    if False:
      pprint( cutoffs );

    stats_by_co = [];
    for coidx in range( 0, len(cutoffs) ):
      stats_by_co.append( { "tp": 0, "fp": 0, "tn": 0, "fn": 0 } );

    for ( y, score ) in zip( self._sample_y, scores ):
      for ( coidx, co ) in enumerate( cutoffs ):
        if score >= co:
          if y == 1:
            stats_by_co[ coidx ][ "tp" ] += 1;
          else:
            assert y == 0;
            stats_by_co[ coidx ][ "fp" ] += 1;
        else:
          if y == 0:
            stats_by_co[ coidx ][ "tn" ] += 1;
          else:
            assert y == 1;
            stats_by_co[ coidx ][ "fn" ] += 1;

    max_fscore = None;
    max_fscore_coidx = None;
    
    for ( coidx, co ) in enumerate( cutoffs ):

      tp = stats_by_co[ coidx ][ "tp" ];
      fp = stats_by_co[ coidx ][ "fp" ];
      tn = stats_by_co[ coidx ][ "tn" ];
      fn = stats_by_co[ coidx ][ "fn" ];

      if (tp+fp) <= 0:
        continue;

      if (tp+fn) <= 0:
        continue;

      precision = float(tp) / float(tp+fp);
      recall = float(tp) / float(tp+fn);

      if (precision+recall) <= 0.0:
        continue;

      fscore = 2.0 * ( ( precision * recall ) / ( precision + recall ) );

      if ( max_fscore is None ) or ( fscore > max_fscore ):

        max_fscore = fscore;
        max_fscore_coidx = coidx;

    assert max_fscore_coidx is not None;
    self._co = cutoffs[ max_fscore_coidx ];

    # assert self._sparse_points == 0;

    if True:
      print( self._sparse_points );
      print( self._co );
      print( max_fscore );


  def __call__( self, row ):

    ( c, b, x ) = row;

    c = self._fsel.apply_c( self._catfe( c ) );
    b = self._fsel.apply_b( self._binfe( b ) );

    x = self._contfe( x );
    x_ = self._fdisc( x );

    x = self._fsel.apply_x( x );
    x_ = self._fsel.apply_x( x_ );

    try:
      assert self._len_c == len(c);
      assert self._len_b == len(b);
      assert self._len_x == len(x);
      assert self._len_x == len(x_);
    except:
      print( self._len_c, self._len_b, self._len_x );
      raise;

    return self._apply( ( c, b, x, x_ ) );

Example #2

Show file

File: kyotocab.py Project: fanjinfei/grapheekdb

class KyotoCabinetGraph(BaseGraph):
    def __init__(self, path):
        # create the database object
        self._path = path
        self._db = DB()
        # open the database
        if not self._db.open(path, DB.OREADER | DB.OWRITER | DB.OCREATE):
            raise GrapheekDataKyotoCabinetInitFailureException(
                str(self._db.error()))
        super(KyotoCabinetGraph, self).__init__()
        self._ensure_prepared()
        self._closed = False

    # Start method overriding :

    def _db_close(self):
        if not self._closed:
            self._db.close()

    def _transaction_begin(self):
        self._db.begin_transaction()
        return True

    def _transaction_commit(self, txn):
        self._db.end_transaction(True)

    def _transaction_rollback(self, txn):
        self._db.end_transaction(False)

    def _has_key(self, key):
        return self._db.check(key) >= 0

    def _get(self, txn, key):
        raw_data = self._db.get(key)
        if raw_data is None:
            return UNDEFINED  # Not returning None, as None is a valid value
        return msgpack.loads(raw_data, encoding='utf8')

    def _bulk_get(self, txn, keys):
        result = {}
        key_raw_datas = self._db.get_bulk(keys)
        for key, raw_data in list(key_raw_datas.items()):
            if PYTHON2:  # pragma : no cover
                k = key
            else:  # pragma : no cover
                k = str(key, encoding='utf8')
            result[k] = msgpack.loads(raw_data, encoding='utf8')
        return result

    def _set(self, txn, key, value):
        res = self._db.set(key, msgpack.dumps(value, encoding='utf8'))
        if not (res):  # pragma : no cover
            raise GrapheekDataKyotoCabinetException(
                'KyotoCabinet : error while saving')
        return res

    def _bulk_set(self, txn, updates):
        dic = {}
        for key, value in list(updates.items()):
            dic[key] = msgpack.dumps(value, encoding='utf8')
        res = self._db.set_bulk(dic)
        if res == -1:  # pragma : no cover
            raise GrapheekDataKyotoCabinetException(
                'KyotoCabinet : error while saving')
        return res

    def _remove(self, txn, key):
        # Contrary to LocalMemoryGraph implementation, it is not needed to wrap
        # key removal in try.. except because KyotoCabinet only send "False"
        # when key does not exist
        # Thus ... _removemethod is idempotent (cf LocalMemoryGraph _remove method comment)
        self._db.remove(key)

    def _bulk_remove(self, txn, keys):
        res = self._db.remove_bulk(list(keys))
        if res == -1:  # pragma : no cover
            raise GrapheekDataKyotoCabinetException(
                'KyotoCabinet : error while saving')
        return res

    def _remove_prefix(self, txn, prefix):
        keys = self._db.match_prefix(prefix)
        self._db.remove_bulk(keys)

    # overriding list methods
    # looks like a bucket of hacks, and yes indeed it is :)
    # btw, it REALLY improves performance if we compare to default implementation which,
    # in the case of KyotoCabinet would involve msgpack deserialization followed by a serialization

    def _init_lst(self, txn, key):
        res = self._db.set(key, '')
        if not (res):  # pragma : no cover
            raise GrapheekDataKyotoCabinetException(
                'KyotoCabinet : error while saving')
        return res

    def _get_lst(self, txn, key):
        value = self._db.get(key)
        if value is None:
            return UNDEFINED
        # look _append_to_lst code below to understand why a split is done
        # And why resulting list is sliced from 1
        if PYTHON2:  # pragma : no cover
            return list(map(int, value.split('|')[1:]))
        return list(map(
            int,
            str(value, encoding='utf8').split('|')[1:]))  # pragma : no cover

    def _set_lst(self, txn, key, values):
        newval = '|'.join([str(value) for value in values])
        res = self._db.set(key, '|' + newval)
        if not (res):  # pragma : no cover
            raise GrapheekDataKyotoCabinetException(
                'KyotoCabinet : error while saving')
        return res

    def _bulk_get_lst(self, txn, keys):
        dic_values = self._db.get_bulk(keys)
        results = []
        for key in keys:
            if PYTHON2:  # pragma : no cover
                values = dic_values.get(key, UNDEFINED)
            else:  # pragma : no cover
                values = dic_values.get(bytes(key, encoding='utf8'), UNDEFINED)
            if values == UNDEFINED:
                results.append([])
            else:
                if PYTHON2:  # pragma : no cover
                    results.append(list(map(int, values.split('|')[1:])))
                else:  # pragma : no cover
                    results.append(
                        list(
                            map(int,
                                str(values, encoding='utf8').split('|')[1:])))
        return results

    def _append_to_lst(self, txn, key, value):
        self._db.append(key, '|' + str(value))

    def _bulk_append_to_lst(self, txn, key, values):
        newval = '|'.join([str(value) for value in values])
        self._db.append(key, '|' + newval)

    def _remove_from_lst(self, txn, key, value):
        old = self._db.get(key)
        if not PYTHON2:  # pragma : no cover
            old = str(old, encoding='utf8')
        # Caution : we are only removing ONE occurence
        # This is voluntary
        # For instance, it lst contains neighbour node, we need to remove only one occurence
        # cause current entity and neighbour node can be linked multiple time
        new = old.replace('|%s' % value, '', 1)
        if new == old:
            raise ValueError("list.remove(x): x not in list")
        res = self._db.set(key, new)
        if not (res):  # pragma : no cover
            raise GrapheekDataKyotoCabinetException(
                'KyotoCabinet : error while saving')
        return res

    def _bulk_remove_from_lst(self, txn, key, values):
        assert (len(values))
        old = self._db.get(key)
        if PYTHON2:  # pragma : no cover
            new = old
        else:  # pragma : no cover
            new = str(old, encoding='utf8')
        for value in values:
            new = new.replace('|%s' % value, '', 1)
        if new == old:  # pragma : no cover
            raise ValueError("list.remove(x): x not in list")
        res = self._db.set(key, new)
        if not (res):  # pragma : no cover
            raise GrapheekDataKyotoCabinetException(
                'KyotoCabinet : error while saving')
        return res