コード例 #1
0
  def __init__( self, fn, mode ):    

    Frontend.__init__( self, fn, mode );

    self._kdbfn = None;
    self._kdb = None;

    self._ldbdn = None;
    self._ldb = None;

    self._len_c = None;
    self._len_b = None;
    self._len_x = None;

    self._ic = None;
    self._icbp = None;

    self._needs_initialization = True;

    self._core_dims = set();
    self._satellite_dims = set();
    self._removed_dims = set();

    self._remove_c = set();
    self._remove_b = set();
    self._remove_x = set();

    self.bypass_c = False;
    self.bypass_b = False;
    self.bypass_x = False;
  def __init__( self, fn, mode ):

    Frontend.__init__( self, fn, mode );
    self._max_rows = 50000;

    self._dbs = {};
    self._dbdirs = [];
  def _finalize( self ):

    assert Frontend._finalize( self ) is None;

    rowids_for_ntile_bounds = [];
    for i in range( 1, 32 ):
      rowids_for_ntile_bounds.append(
          int( float(self._rowcount) * float(i)/32.0 )
        );

    ntile_bound_by_dim = {};
    for i in self._dbs:
      ntile_bound_by_dim[ i ] = [];

    for ( i, db ) in self._dbs.items():

      with db.iterator() as it:

        rowid = 0;

        for ( valid, ( xval, xcnt ) ) in enumerate( it ):

          xval = unpack( '>I', xval )[ 0 ] - (1<<31);
          xcnt = unpack( '>I', xcnt )[ 0 ];

          rowid_old = rowid;
          rowid = rowid_old + xcnt;

          for bound in rowids_for_ntile_bounds:
            if rowid_old < bound <= rowid:
              ntile_bound_by_dim[ i ].append( xval );

    self._state = ntile_bound_by_dim;
  def train( self, row ):

    if Frontend.train( self, row ):
      return True;

    for ( i, xval ) in enumerate( row ):

      if not i in self._dbs:
        dbdn = None;
        with TemporaryDirectory() as tmpdirname:
          dbdn = tmpdirname;        
        self._dbdirs.append( dbdn );
        self._dbs[ i ] = plyvel.DB( dbdn, create_if_missing=True );

      xval \
        = int( float(xval) * 1000.0 );

      assert xval < (1<<30);

      xval \
        = pack( ">I", (1<<31) + xval );

      xcnt \
        = unpack(
              ">I",
              self._dbs[ i ].get( xval, default=pack( ">I", 0 ) )
            )[ 0 ];

      xcnt += 1;

      self._dbs[ i ].put( xval, pack( ">I", xcnt ) );

    return False;
  def train( self, row ):

    if Frontend.train( self, row ):
      return True;

    self._data.append( row );

    return False;
  def __exit__( self, exc_type, exc_value, traceback ):

    assert Frontend.__exit__( self, exc_type, exc_value, traceback ) == False;

    sleep( 3.0 );

    for db in self._dbs.values():
      db.close();    
    for dn in self._dbdirs:
      rmtree( dn );

    return False;
コード例 #7
0
  def train( self, row ):

    if Frontend.train( self, row ):
      return True;

    b = 0;
    for i in range( 0, len(row) ):
      if row[i] == 1:
        b |= (1<<i);

    self._stats[ b ] = self._stats.get( b, 0 ) + 1;

    return False;
  def _finalize( self ):

    assert Frontend._finalize( self ) is None;

    data = np.array( self._data ).T;

    rest = [];
    for i in range( 1, len(data)+1 ):
      rest.append( {i} );

    while True:
      len_rest_before = len(rest);
      rest = self._merge( data, rest );    
      # print( rest );
      if len( rest ) == len_rest_before:
        break;

    self._state = rest;

    if False:

      print( "-- CLUSTERS --" );

      print( len(rest), rest );

    if False:

      print( "-- INSIDE CORRELATIONS --" );

      for dc in rest:
        print( dc );
        for di in dc:
          for dj in dc:
            if abs(di) >= abs(dj):
              continue;
            corr = np.corrcoef( data[abs(di)-1], data[abs(dj)-1] )[ 0 ][ 1 ];
            print( di, dj, corr );

    if False:

      print( "-- OUTSIDE CORRELATIONS --" );

      for ( i, dci ) in enumerate( rest ):
        ivals = self._cluster_val( data, dci );
        for ( j, dcj ) in enumerate( rest ):
          if i >= j:
            continue;
          jvals = self._cluster_val( data, dcj );
          corr = np.corrcoef( ivals, jvals )[ 0 ][ 1 ];
          print( i, j, corr );
コード例 #9
0
  def __call__( self, row ):

    assert Frontend.__call__( self, row ) is row;

    val = 0;
    for (i,row_i) in enumerate( row ):
      if row_i == 1:
        val |= (1<<i);

    row_ = [];

    for cluster in self._state:
      mask = 0;
      for dim in cluster:
        mask |= (1<<dim);
      row_.append( val & mask );

    return row_;
コード例 #10
0
  def __exit__( self, exc_type, exc_value, traceback ):

    assert Frontend.__exit__( self, exc_type, exc_value, traceback ) == False;

    if self._ldb is not None:
      sleep( 3.0 );
      self._ldb.close()

    if self._ldbdn is not None:
      rmtree( self._ldbdn );

    if self._kdb is not None:
      try:
        assert self._kdb.close();
      except:
        print( str( self._kdb.error() ) );
        raise;

    if self._kdbfn is not None:
      remove( self._kdbfn );
  def __call__( self, row ):

    assert Frontend.__call__( self, row ) is row;

    row_ = [];

    for cluster in self._state:

      val = 0.0;

      for dim in cluster:
        if dim > 0:
          val += row[dim];
        else:
          assert dim < 0;
          val -= row[dim];

      val /= len(cluster);

      row_.append( val );

    return row_;
コード例 #12
0
  def train( self, row ):

    ( y, c, b, x ) = row;

    if self._len_c is None:
      self._len_c = len(c);
    assert self._len_c == len(c);

    if self._len_b is None:
      self._len_b = len(b);
    assert self._len_b == len(b);

    if self._len_x is None:
      self._len_x = len(x);
    assert self._len_x == len(x);

    row = c + b + x;

    if Frontend.train( self, row ):
      return True;

    keyfmt = '>IIIII';

    for i in range( 0, self._lenrow ):
      for j in range( 0, self._lenrow ):

        if ( i >= j ) and ( not ( i == self._lenrow-1 ) ):
          continue;

        key = pack( keyfmt, i, j, y, row[i], row[j] );

        try:
          assert self._kdb.increment( key, 1, 0 );
        except:
          print( str(self._kdb.error()) );
          raise;
コード例 #13
0
  def _finalize( self ):

    assert Frontend._finalize( self ) is None;

    self._state = [];
    rest = set( range( 0, self._lenrow ) );

    while rest:
      if len( rest ) >= 2:
        ( left, right, rest ) = self._split( rest );
        self._state.append( left );
        self._state.append( right );
      else:
        self._state.append( rest );
        rest = set();

    if False:

      for ( cluster_id, cluster ) in enumerate( self._state ):
        print( "-->", cluster_id, cluster );
        for i in cluster:
          a = { i };
          b = cluster - a;
          print( "    {:d} {:1.4f}".format( i, self._i_corr( a, b ) ) );
コード例 #14
0
  def _finalize( self ):

    assert Frontend._finalize( self ) is None;

    if False:
      print( "unique combinations = ", self._kdb.count() );

    keyfmt = '>IIIII';
    valfmt = '>Q';

    c = self._kdb.cursor();
    c.jump();

    gt2 = 0;
    gt4 = 0;
    gt8 = 0;
    gt16 = 0;
    gt32 = 0;

    while True:

      r = c.get( True );
      if not r:
        break;

      self._ldb.put( r[0], r[1] );

      key = unpack( keyfmt, r[0] );
      val = unpack( valfmt, r[1] )[ 0 ];

      if val > 2:
        gt2 += 1;
      if val > 4:
        gt4 += 1;
      if val > 8:
        gt8 += 1;
      if val > 16:
        gt16 += 1;
      if val > 32:
        gt32 += 1;

    if False:
      print( gt2, gt4, gt8, gt16, gt32 );

    self._ic = {};
    for i in range( 0, self._lenrow ):
      self._ic[ i ] = self._get_info_content_by_dimension( i );

    self._icbp = {};

    for i in range( 0, self._lenrow ):
      for j in range( 0, self._lenrow ):

        if i >= j:
          continue;

        self._icbp[ (i,j) ] = self._get_info_content_by_pair( i, j );


    self._state \
      = { "ic": self._ic,
          "icbp": self._icbp,
          "c": self._len_c,
          "b": self._len_b,
          "x": self._len_x };
コード例 #15
0
  def __init__( self, fn, mode ):

    Frontend.__init__( self, fn, mode );
    self._max_rows = 100000;
    
    self._stats = {};
  def __init__( self, fn, mode ):

    Frontend.__init__( self, fn, mode );
    self._max_rows = 100000;

    self._data = [];