Exemple #1
0
    def write_batch(self, batch):
        """
        Write a batch to the database

        Arguments:
        batch -- an array of Datums
        """
        keys = self.get_keys(len(batch))
        if self.backend == 'lmdb':
            lmdb_txn = self.db.begin(write=True)
            for i, datum in enumerate(batch):
                lmdb_txn.put('%08d_%d' % (keys[i], datum.label),
                             datum.SerializeToString())
            lmdb_txn.commit()
        elif self.backend == 'leveldb':
            leveldb_batch = leveldb.WriteBatch()
            for i, datum in enumerate(batch):
                leveldb_batch.Put('%08d_%d' % (keys[i], datum.label),
                                  datum.SerializeToString())
            self.db.Write(leveldb_batch)
        else:
            logger.error('unsupported backend')
            return False
Exemple #2
0
    def __init__(self, db_name):
        self.idf_hd = idf()
        with open("idf_dumps.txt", "r") as fd:
            s = fd.read()

        self.idf_hd.loads(s)
        self.hot_word_dic = {}
        self.short_url_hd = fast_search.load("short_url.txt")
        self.dbhd = leveldb.LevelDB(db_name)
        self.url_re = re.compile(
            r'(http:\/\/)*[\w\d]+\.[\w\d\.]+\/[\w\d_!@#$%^&\*-_=\+]+')
        #内部使用batch做缓存 add_doc时暂时不写入db文件
        #要获取结果,或者达到阈值(batch_limit)时才写入文件
        self.batch = leveldb.WriteBatch()
        self.batch_counter = 0
        self.batch_limit = 100000
        self.fid = 0
        #self.get_file_word_flag = "percent"
        self.get_file_word_flag = "num"
        self.word_list_n = 5
        self.get_file_word_cbk = {}
        self.get_file_word_cbk["num"] = self.get_file_word_list_by_num
        self.get_file_word_cbk["percent"] = self.get_file_word_list_by_persent
Exemple #3
0
def add(db, key, value):
    print('## ADDING {} {} ##'.format(key.rstrip(b'\x00'),
                                      value.rstrip(b'\x00')))
    assert len(key) == 32
    batch = leveldb.WriteBatch()
    for i, byte in enumerate(key):
        path = key[:i]
        node_at_path = deserialize(db_get(db, path))
        if node_at_path is None:
            new_leaf = LeafNode(key, value)
            db_put(batch, path, new_leaf.serialize())
            propagate_along_path(db, batch, path, hash_node(new_leaf))
            return
        if isinstance(node_at_path, LeafNode):
            new_leaf = LeafNode(key, value)
            if node_at_path.key == key:
                db_put(batch, path, new_leaf.serialize())
                propagate_along_path(db, batch, path, hash_node(new_leaf))
            else:
                propagation_path = get_common_prefix(key, node_at_path.key)
                common_prefix_length = len(propagation_path)
                db_put(batch, key[:common_prefix_length + 1],
                       new_leaf.serialize())
                db_put(batch, node_at_path.key[:common_prefix_length + 1],
                       node_at_path.serialize())
                new_branch_node = BranchNode({
                    key[common_prefix_length]:
                    hash_node(new_leaf),
                    node_at_path.key[common_prefix_length]:
                    hash_node(node_at_path)
                })
                db_put(batch, key[:common_prefix_length],
                       new_branch_node.serialize())
                propagate_along_path(db, batch, propagation_path,
                                     hash_node(new_branch_node))
            return
    raise Exception("How did we get here?")
Exemple #4
0
def clean_removed(config, database):
    """
    Check if any file from source dir was removed.
    """

    logger = logging.getLogger('mylog')
    dropbox_folder = config.get('DropBox', 'drop_box_dir')
    batch = leveldb.WriteBatch()

    for key, value in database.RangeIter():

        #unencrypted_hash, encrypted_hash, dropbox_file
        (_, encrypted_hash, dropbox_file) = \
            value.split(FileProcessor.HASH_SEPARATOR)

        dropbox_file = os.path.join(dropbox_folder,'') + dropbox_file

        if not os.path.isfile(key):
            batch.Delete(key)
            if not os.path.isfile(dropbox_file):
                logger.warning('File ' + dropbox_file + ' originally ' + key + \
                    ' does not exist in dropbox dir. ' +
                    'Consistency check will catch it.')
            else:
                #Only for informational purposes
                hsh = compute_hash(dropbox_file)
                if hsh != encrypted_hash:
                    logger.warning('File ' + dropbox_file + ' originally ' +
                    key + ' has hash that does not match db entry.' +
                    'Consistency check will catch it.')
                else:
                    logger.info('File ' + dropbox_file + ' orginally ' + key + \
                        ' was removed. Removing backed up file...')
                    batch.Delete(key)
                    os.remove(dropbox_file)

    database.Write(batch, sync=True)
 def run(self):
     while True:
         ngram_list=self.queue.get() # fetch new batch
         if not ngram_list: # end signal
             print >> sys.stderr, "no new data in "+self.dataset+", creating final text file"
             sys.stderr.flush()
             try:
                 c=self.create_final_files() # create .gz text file
             except:
                 print >> sys.stderr, "error while creating final text file: "+self.dataset+" ,returning"
                 sys.stderr.flush()
                 return
             print >> sys.stderr, c,self.dataset,"written, returning"
             sys.stderr.flush()
             return
         try:
             batch=leveldb.WriteBatch() # write new batch
             for ngram in ngram_list:
                 batch.Put(ngram.encode(u"utf-8"),u"1".encode(u"utf-8"))
             self.DB.Write(batch)
         except:
             print >> sys.stderr, "error in database writer, batch rejected: "+self.dataset
             traceback.print_exc()
             sys.stderr.flush()      
Exemple #6
0
def generate(dataset, dirs, threshold, size, unit_size, sample_rate, gpu, model, force, tt, net = '', js = None):

    if dataset not in support:

        raise ValueError('The Dataset {} is not supported right now '.format(dataset))

    dictionary = dict()

    dictionary['train'] = dict()

    dictionary['train']['foreground'] = []

    dictionary['train']['background'] = []

    dictionary['train']['annotations'] = dict()
    
    dictionary['train']['response'] = dict()

    dictionary['train']['iou'] = dict()

    db = dict()

    db['train'] = leveldb.LevelDB(dataset + '_threshold_{}_train'.format(threshold))
    
    if tt > 1:

        db['test'] = leveldb.LevelDB(dataset + '_threshold_{}_test'.format(threshold))
        
        dictionary['test'] = dict()

        dictionary['test']['foreground'] = []

        dictionary['test']['background'] = []

        dictionary['test']['annotations'] = dict()
        
        dictionary['test']['response'] = dict()

        dictionary['test']['iou'] = dict()

    b = leveldb.WriteBatch()

    miss = []

    fg_counter = 0

    bg_counter = 0

    if dataset == 'ActivityNet-1.3':

        j = json.load(open('ActivityNet/activity_net.v1-3.min.json'))

        for v in j['database'].keys(): # iterate over name of videos

            vpath = os.path.join(dirs['video'], v + '.mp4') # check if video is downloaded

            if not os.path.isfile(vpath):

                miss.append(v)

                continue

            fpath = os.path.join(dirs['feature'], '{}_US[{}]_SR[{}].h5'.format(v, unit_size, sample_rate)) # check if extracted feature is exist
            
            exist = os.path.isfile(fpath)

            if not exist and not force:

                # sampling unit level feature

                print('[!] Unit Level Feature [ {} ] is not exist'.format(fpath))

                print('[*] Extracting ... ')

                net = unit.sampling(v + '.mp4', size, unit_size, sample_rate, net, gpu, model, dirs['video'], dirs['feature'], reuse = True)

                #raise ValueError('[!] Unit Level Feature [ {} ] is not exist'.format(fpath))

            elif exist:

                print('Feature Path {} is exist'.format(fpath))
            
            elif force:

                print('[!] Unit Level Feature [ {} ] is not exist, Ignoring...'.format(fpath))

            if not force or (force and exist) :
            
                with h5py.File(fpath) as ff:#, cv2.VideoCapture(vpath) as capture:

                    #capture = cv2.VideoCapture(vpath)

                    #length = capture.get(cv2.CAP_PROP_FRAME_COUNT)
                    
                    #fps = capture.get(cv2.CAP_PROP_FPS)

                    #capture.release()

                    print('[*] Current Video => [ {} ]'.format(fpath))

                    fps = np.asarray(ff['fps'])
                    
                    units = list(ff.keys()) # name of units-level feature

                    fduration = []
                    
                    #annotations['annotations'][v] = []


                    for fs in range(len(j['database'][v]['annotations'])): # each video contains more than one foreground segment
                        
                        fduration.append((np.asarray(j['database'][v]['annotations'][fs]['segment']) * fps).astype(int)) # frame duration [start frame, end frame]
                        #annotations['annotations'][v].append(fduration[fs])

                        print(fduration[fs])

                    for u in units:

                        #print('units : {}'.format(u))

                        if u == 'feature_size' or u == 'unit_size' or u == 'sample_rate' or u == 'nframes' or u == 'fps':

                            continue

                        fg = False

                        #print(len(j['database'][v]['annotations']))

                        for fs in range(len(j['database'][v]['annotations'])): # each video contains more than one foreground segment

                            #print('fs {}'.format(fs))

                            #print(np.asarray(j['database'][v]['annotations'][fs]['segment']) * 8)

                            # convert start time and end time to start frame and end frame respectively

                            """

                            foreground duration : time * fps => index of frame

                            proposal duration : index of frame

                            """
                            
                            #fduration = (np.asarray(j['database'][v]['annotations'][fs]['segment']) * fps).astype(np.int) # frame duration [start frame, end frame]
                            pduration = np.asarray(u.split('_'), dtype = int)

                            iou = unit.iou(pduration, fduration[fs], 'clip')

                            if not iou:

                                break
                            
                            """
                            if iou < 1 and iou > 0:
                                print('Duration Time : {}'.format(j['database'][v]['annotations'][fs]['segment']))
                                print('Duration Time : {}'.format(np.asarray(j['database'][v]['annotations'][fs]['segment']) * fps))
                                print('fduration : {}'.format(fduration[fs]))
                                print('pduration : {}'.format(pduration))
                                print('iou : {}'.format(iou))

                            """
                            if iou > threshold:

                                vu = '{}_{}'.format(v, u)

                                if fg_counter >= tt:

                                    dictionary['test']['foreground'].append(vu)

                                    dictionary['test']['response'][vu] = '{}_{}'.format(fduration[fs][0], fduration[fs][1])

                                    dictionary['test']['iou'][vu] = iou

                                    fg_counter = 0
                                    
                                else:

                                    dictionary['train']['foreground'].append(vu)

                                    dictionary['train']['response'][vu] = '{}_{}'.format(fduration[fs][0], fduration[fs][1])

                                    dictionary['train']['iou'][vu] = iou

                                    fg_counter += 1

                                fg = True

                                break

                        if not fg:

                            if bg_counter >= tt:

                                dictionary['test']['background'].append('{}_{}'.format(v, u))

                                bg_counter = 0

                            else:

                                dictionary['train']['background'].append('{}_{}'.format(v, u))

                                bg_counter += 1

        #print(ujson.dumps(annotations))

        db['train'].Put('annotations'.encode(), ujson.dumps(dictionary['train']).encode())

        if tt > 1:

            db['test'].Put('annotations'.encode(), ujson.dumps(dictionary['test']).encode())

    db['train'].Write(b, sync = True)
    
    print('[*] Cache is save to [ {} ]'.format(dataset + '_threshold_{}_train'.format(threshold)))
    
    if tt > 1:
    
        db['test'].Write(b, sync = True)
        
        print('[*] Cache is save to [ {} ]'.format(dataset + '_threshold_{}_test'.format(threshold)))

    print('[!] Missing {} Video Files'.format(len(miss)))
Exemple #7
0
 def flush(self, sync_mode=False):
     self._db.Write(self._batch, sync=sync_mode)
     self.x = 0
     self._batch = leveldb.WriteBatch()
     self.lflush = clock()
Exemple #8
0
    def connect_block(self, ser_hash, block, blkmeta):
        # verify against checkpoint list
        try:
            chk_hash = self.netmagic.checkpoints[blkmeta.height]
            if chk_hash != block.sha256:
                self.logger.info(
                    "Block %064x does not match checkpoint hash %064x, height %d"
                    % (block.sha256, chk_hash, blkmeta.height))
                return False
        except KeyError:
            pass

        # check TX connectivity
        outpts = self.spent_outpts(block)
        if outpts is None:
            self.logger.info("Unconnectable block %064x" % (block.sha256, ))
            return False

        # verify script signatures
        if ('nosig' not in self.settings
                and ('forcesig' in self.settings
                     or blkmeta.height > self.netmagic.checkpoint_max)):
            for tx in block.vtx:
                tx.calc_sha256()

                if tx.is_coinbase():
                    continue

                if not self.tx_signed(tx, block, False):
                    self.logger.info("Invalid signature in block %064x" %
                                     (block.sha256, ))
                    return False

        # update database pointers for best chain
        batch = leveldb.WriteBatch()
        batch.Put('misc:total_work', hex(blkmeta.work))
        batch.Put('misc:height', str(blkmeta.height))
        batch.Put('misc:tophash', ser_hash)

        self.logger.info("ChainDb: height %d, block %064x" %
                         (blkmeta.height, block.sha256))

        # all TX's in block are connectable; index
        neverseen = 0
        for tx in block.vtx:
            tx.calc_sha256()
            if not self.mempool.remove(tx.sha256):
                neverseen += 1
            txidx = TxIdx(block.sha256)
            if not self.puttxidx(tx.sha256, txidx, batch):
                self.logger.info("TxIndex failed %064x" % (tx.sha256, ))
                return False

        self.logger.info("MemPool: blk.vtx.sz %d, neverseen %d, poolsz %d" %
                         (len(block.vtx), neverseen, self.mempool.size()))

        # mark deps as spent
        for outpt in outpts:
            self.spend_txout(outpt[0], outpt[1], batch)

        self.db.Write(batch)
        return True
Exemple #9
0
 def insert(self, **kwargs):
     assert set(kwargs.keys()) == {'s', 'p', 'o'}
     batch = leveldb.WriteBatch()
     _insert_permutations(kwargs, batch)
     self.db.Write(batch, sync=True)
 def createDB(self,dataset):
     db=leveldb.LevelDB(self.outdir+u"/"+dataset+u".leveldb",create_if_missing=True)
     batch=leveldb.WriteBatch()
     return db,batch
Exemple #11
0
    def __encrypt_copy(self, file_path, current_hash_of_unenc_file, name = None):
        """
        Encrypt file and copy to dropbox folder
        """

        self.logger.debug('Calling __encrypt_copy, file_path: ' + file_path +
            ' current_hash_of_unenc_file: ' + current_hash_of_unenc_file +
            ' name: ' + str(name))

        file_name = ntpath.basename(file_path)
        temp_dir = tempfile.gettempdir()
        temp_dir += '/'
        destination_path = temp_dir+file_name
        self.logger.debug('Temp destination: ' + destination_path)

        try:
            self.logger.debug('Copying ' + file_path + ' to '+
                destination_path)
            shutil.copy2(file_path, destination_path)
        except IOError:
            self.logger.error('Error copying ' + file_path + ' to '+
                destination_path)
            sys.exit()

        self.logger.debug('Encrypting ' + destination_path)
        #Encryptor returns the path with file name of encrypted file
        old_path = destination_path
        destination_path = self.encryptor(self.password, destination_path)
        self.logger.debug('Done...')
        #Remove original file (before encryption)
        os.remove(old_path)

        random_file_name = ''
        if name is None:
            random_file_name = random_name()
        else:
            random_file_name = name

        random_file_name_with_path = temp_dir + random_file_name

        try:
            self.logger.debug('Renaming ' + destination_path + ' to '
            +random_file_name_with_path)
            os.rename(destination_path, random_file_name_with_path)
        except OSError:
            self.logger.error('Error renaming ' + destination_path
                + ' to ' + random_file_name_with_path)
            sys.exit()

        current_hash_of_enc_file = compute_hash(random_file_name_with_path)
        combined_hash = current_hash_of_unenc_file
        combined_hash += self.HASH_SEPARATOR
        combined_hash += current_hash_of_enc_file
        combined_hash += self.HASH_SEPARATOR
        combined_hash += random_file_name
        batch = leveldb.WriteBatch()
        file_path_aug = file_path + self.FILE_PATH_MARK

        self.logger.debug('Insert in db: ' + file_path_aug + ' -> '+
            combined_hash)
        batch.Put(file_path_aug, combined_hash)

        self.logger.debug('Moving ' + random_file_name_with_path + ' to '
            + self.dropbox_folder)

        #Remove existing file
        if  os.path.isfile(self.dropbox_folder + random_file_name):
            self.logger.info('Removing file ' + self.dropbox_folder +
                random_file_name + ' from dropbox dir')
            try:
                os.remove(self.dropbox_folder + random_file_name)
            except OSError:
                self.logger.error('Error removing ' + self.dropbox_folder +
                    random_file_name)
                sys.exit()

        try:
            shutil.move(random_file_name_with_path, self.dropbox_folder +
                random_file_name)
        except IOError:
            self.logger.error('Error moving ' + random_file_name_with_path +
                ' to ' + self.dropbox_folder + random_file_name)
            sys.exit()
        self.database.Write(batch, sync=True)
Exemple #12
0
 def writebatch(self, values):
     batch = leveldb.WriteBatch()
     for k, v in values.items():
         batch.Put(k, v)
     self.__db.Write(batch)
Exemple #13
0
def make_proposal_db(input_proposal, output_db, box_order=None):
    if os.path.isfile(input_proposal) == False:
        print 'File not found %s' % input_proposal
        return

    print 'reading input data file : %s' % input_proposal
    if '.pkl' in input_proposal:
        try:
            with open(input_proposal, 'rb') as f:
                file_list = cPickle.load(f)
                box_list = cPickle.load(f)
        except:
            with open(input_proposal, 'rb') as f:
                data = cPickle.load(f)
                file_list = data['images'][0]
                box_list = data['boxes']

        if len(file_list) == 1:
            file_list = file_list[0]

        if box_order != None:
            new_list = []
            for one_box_list in box_list:
                new_one_box_list = one_box_list[:, box_order]
                new_list.append(new_one_box_list)
            box_list = new_list

        print 'finished reading the pickle file.'
    elif '.mat' in input_proposal:
        matlab_data = sio.loadmat(input_proposal)
        raw_file_data = matlab_data['images'].ravel()
        raw_box_data = matlab_data['boxes'].ravel()
        file_list = []
        for i in xrange(raw_file_data.shape[0]):
            file = raw_file_data[i]
            if isinstance(file, list) == True:
                file = file[0]
            elif isinstance(file, ndarray) == True:
                file = file[0]
            file_list.append(file.encode('ascii', 'ignore'))
        box_list = []
        for i in xrange(raw_box_data.shape[0]):
            if len(raw_box_data[i]) > 0:
                box_list.append(raw_box_data[i][:, box_order] - 1)
            else:
                box_list.append(raw_box_data[i])
        print 'finished reading the mat file.'
    else:
        print 'unsupported file format.'
        print '.pkl and .mat files are supported.'
        return

    remove_folder(output_db)

    db = leveldb.LevelDB(output_db)
    batch = leveldb.WriteBatch()

    i = 0
    for file, box in zip(file_list, box_list):
        if isinstance(file, list) == True:
            file = file[0]
        if isinstance(file, list) == True:
            file = file[0]
        batch.Put(file, cPickle.dumps(box))
        i += 1
        if i % 5000 == 0:
            print 'inserted %s data into DB' % i
            db.Write(batch, sync=True)
            del batch
            batch = leveldb.WriteBatch()

    if i % 5000 > 0:
        db.Write(batch, sync=True)

    print 'inserted total %s proposal data into DB' % i
    print 'finished writing proposal DB : %s' % output_db
Exemple #14
0
def setheight(chaindb, height, hash):
    batch = leveldb.WriteBatch()
    batch.Put('height:' + str(height), hash)
    chaindb.db.Write(batch)
def test_leveldb_multi_write(n):
    batch = leveldb.WriteBatch()
    for i in xrange(n):
        db.Put(str(i), str(i))

    db.Write(batch, sync=True)
Exemple #16
0
def init_batch():
    return leveldb.WriteBatch()
Exemple #17
0
 def clear_db(self):
     # db = leveldb.LevelDB('./data')
     b = leveldb.WriteBatch()
     for k in self.db.RangeIter(include_value=False, reverse=True):
         b.Delete(k)
     self.db.Write(b)
    def convert_data_to_db(self, train_data_folder, valid_data_folder, test_data_folder, 
                           min_pixel, 
                           train_db_name, valid_db_name, test_db_name,
                           train_list_file, valid_list_file, test_list_file,
                           channel_no, preserve_ar):
        
        #self.remove_folder(train_db_name)
        #self.remove_folder(valid_db_name)
        self.remove_folder(test_db_name)
                
        #self.train_db = leveldb.LevelDB(train_db_name)
        #self.valid_db = leveldb.LevelDB(valid_db_name)
        self.test_db = leveldb.LevelDB(test_db_name)
    
        self.datum = caffe.proto.caffe_pb2.Datum()
        self.datum.channels = channel_no
        self.datum.width = min_pixel
        self.datum.height = min_pixel
    
        print "convert_train_data"
        print "train_db_name : %s" % train_db_name
        print "valid_db_name : %s" % valid_db_name
        print "test_db_name : %s" % test_db_name
        print "channel_no : %s" % channel_no
    
        #modes = ['train', 'valid']
        #modes = ['train']
        #modes = ['valid']
        modes = ['test']
        
        start_time = time.time()
        
        for mode in modes:
            if mode == 'train':
                image_list_file = open(train_list_file, 'rb')
                data_folder = train_data_folder
            elif mode == 'valid':
                image_list_file = open(valid_list_file, 'rb')
                data_folder = valid_data_folder
            elif mode == 'test':
                image_list_file = open(test_list_file, 'rb')
                data_folder = test_data_folder
            else:
                print 'not supported mode : %s' % mode
                return
                
            lines = image_list_file.readlines()
            image_list_file.close()
            
            total_data_no = len(lines)
        
            print ''
            print 'processing %s' % mode
            
            for i, line in enumerate(lines):
                if mode == 'test':
                    parsed = line.split(' ')
                    file_path = parsed[0] + '.JPEG'
                    label = None
                else:
                    parsed = line.split('\t')
                    label = parsed[1]
                    file_path = parsed[0]
                    file_path = file_path.replace('\r', '')
                    file_path = file_path.replace('\n', '')

                org_image = cv2.imread(data_folder + '/' + file_path)                
                org_size = org_image.shape
                
                if preserve_ar == 'preserve':
                    if org_size[1] > org_size[0]:
                        im_scale = (min_pixel/float(org_size[0]))
                        wsize = int((float(org_size[1])*float(im_scale)))
                        image_width = wsize
                        image_height = min_pixel
                    else:
                        im_scale = (min_pixel/float(org_size[1]))
                        hsize = int((float(org_size[0])*float(im_scale)))
                        image_width = min_pixel
                        image_height = hsize
                elif preserve_ar == 'ignore':
                    image_width = min_pixel
                    image_height = min_pixel

                #if org_image.mode != 'RGB':                    
                #    org_image = org_image.convert('RGB')

                image = cv2.resize(org_image, (image_width, image_height),
                    interpolation=cv2.INTER_LINEAR)

                self.datum.width = image_width
                self.datum.height = image_height
                    
                self.insert_db(mode, image, label, None, channel_no, False)
                
                if mode == 'train' and self.train_no > 0 and self.train_no % 1000 == 0:
                    self.train_db.Write(self.train_batch, sync = True)
                    del self.train_batch
                    self.train_batch = leveldb.WriteBatch()
                    print "%.1f %% done." % (i * 100.0 / total_data_no)
                    print 'Processed %i total train images. %d sec' % (self.train_no, (time.time() - start_time))
                    start_time = time.time()
    
                if mode == 'valid' and self.valid_no > 0 and self.valid_no % 1000 == 0:
                    self.valid_db.Write(self.valid_batch, sync = True)
                    del self.valid_batch
                    self.valid_batch = leveldb.WriteBatch()
                    print 'Processed %i valid images.' % self.valid_no
    
                if mode == 'test' and self.test_no > 0 and self.test_no % 1000 == 0:
                    self.test_db.Write(self.test_batch, sync = True)
                    del self.test_batch
                    self.test_batch = leveldb.WriteBatch()
                    print 'Processed %i test images.' % self.test_no

        # Write last batch of images
        if self.train_no % 1000 != 0:
            self.train_db.Write(self.train_batch, sync = True)
        if self.valid_no % 1000 != 0:
            self.valid_db.Write(self.valid_batch, sync = True)
        if self.test_no % 1000 != 0:
            self.test_db.Write(self.test_batch, sync = True)
    
        print 'Processed %d train, %d valid, %d test' % (self.train_no, self.valid_no, self.test_no)
    def convert_test_data(self, data_set_folder, min_pixel, test_db_name, test_output_pickle_path, 
                          inverse, channel_no = 1):
        self.remove_folder(test_db_name)
            
        test_db = leveldb.LevelDB(test_db_name)
        
        pickleTestX = test_output_pickle_path + "/testX_size_" + str(min_pixel) + ".pickle"
        pickleFileNames = test_output_pickle_path + "/fileNames.pickle"
        
        if not os.path.exists(test_output_pickle_path):
            os.makedirs(test_output_pickle_path)

        numberofImages = 0    
    
        datum = caffe.proto.caffe_pb2.Datum()
        datum.channels = channel_no
        datum.width = min_pixel
        datum.height = min_pixel
        
        test_batch = leveldb.WriteBatch()
    
        print "Load test dataset from image files"
    
        for fileNameDir in os.walk(data_set_folder):   
            for index, fileName in enumerate(fileNameDir[2]):
                if fileName[-5:] != ".JPEG":
                  continue
                numberofImages += 1
        
        imageSize = min_pixel * min_pixel
        num_rows = numberofImages # one row for each image in the test dataset

        batch_size = 10000    
        data_size = min(batch_size, numberofImages)
        testX = numpy.zeros((data_size, channel_no, imageSize), dtype=numpy.uint8)
        
        files = []
        db_index = 0
        pickle_index = 0
        batch_no = 1
        
        print "Reading images"
        for fileNameDir in os.walk(data_set_folder):   
            for index, fileName in enumerate(fileNameDir[2]):
                if fileName[-5:] != ".JPEG":
                  continue
                
                nameFileImage = "{0}{1}{2}".format(fileNameDir[0], os.sep, fileName)            
                org_image = Image.open(nameFileImage)
                files.append(fileName)
                
                image = org_image.resize((min_pixel, min_pixel), Image.ANTIALIAS)
    
                """
                print fileName
                
                plt.figure(1, figsize=(1, 1), dpi=100)
                plt.gray();                
                plt.subplot(1, 1, 1)
                plt.imshow(image)
                plt.show()
                """
    
                if inverse:
                    image_ubyte = 255 - img_as_ubyte(image)
                else:
                    image_ubyte = img_as_ubyte(image)
                
                if channel_no > 1:
                    selem = disk(6)
                    w_tophat = white_tophat(image_ubyte, selem)
                    b_tophat = black_tophat(image_ubyte, selem)
                    datum.data = image_ubyte.tostring() + w_tophat.tostring() + b_tophat.tostring()
                    image_output = numpy.concatenate((image_ubyte, w_tophat, b_tophat), axis=1)
                else:
                    datum.data = image_ubyte.tostring()
                    image_output = image_ubyte
                
                    
                test_batch.Put("%08d" % db_index, datum.SerializeToString())
    
                testX[pickle_index] = numpy.reshape(image_output, (channel_no, imageSize))
    
                db_index += 1
                pickle_index += 1
                
                if db_index % 1000 == 0:
                    test_db.Write(test_batch, sync = True)
                    del test_batch
                    test_batch = leveldb.WriteBatch()
                    print 'Processed %i test images.' % db_index
    
                if pickle_index % batch_size == 0:
                    pickle_file_name = pickleTestX + "_" + str(batch_no)
                    with open(pickle_file_name,'wb') as fp:
                        cPickle.dump(testX, fp)
                        print "pickled %s" % pickle_file_name
                        data_size = min(batch_size, numberofImages - batch_size * batch_no)
                        testX = numpy.zeros((data_size, channel_no, imageSize), dtype=numpy.uint8)
                        batch_no += 1
                        pickle_index = 0
                
                report = [int((j+1)*num_rows/20.) for j in range(20)]
                if db_index in report: print numpy.ceil(db_index *100.0 / num_rows), "% done"
    
    
        # Write last batch of images
        if db_index % 1000 != 0:
            test_db.Write(test_batch, sync = True)
    
        if pickle_index % batch_size > 0:
            pickle_file_name = pickleTestX + "_" + str(batch_no)
            with open(pickle_file_name,'wb') as fp:
                cPickle.dump(testX, fp)
                print "pickled %s" % pickle_file_name
                        
        with open(pickleFileNames,'wb') as fp:
            cPickle.dump(files, fp)
    
        print 'Processed a total of %i images.' % db_index
Exemple #20
0
def make_db(input_proposal, output_db, box_order=None):
    if os.path.isfile(input_proposal) == False:
        print 'File not found %s' % input_proposal
        return

    print 'reading input data file : %s' % input_proposal
    if '.pkl' in input_proposal:
        try:
            with open(input_proposal, 'rb') as f:
                file_list = cPickle.load(f)
                box_list = cPickle.load(f)
        except:
            with open(input_proposal, 'rb') as f:
                data = cPickle.load(f)
                file_list = data['images'][0]
                box_list = data['boxes']

        if len(file_list) == 1:
            file_list = file_list[0]

        if box_order != None:
            new_list = []
            for one_box_list in box_list:
                new_one_box_list = one_box_list[:, box_order]
                new_list.append(new_one_box_list)
            box_list = new_list

        print 'finished reading the pickle file.'
    else:
        print 'unsupported file format.'
        print '.pkl and .mat files are supported.'
        return

    remove_folder(output_db)

    db = leveldb.LevelDB(output_db)
    batch = leveldb.WriteBatch()

    i = 0
    for file, box in zip(file_list, box_list):
        if isinstance(file, list) == True:
            file = file[0]
        if isinstance(file, list) == True:
            file = file[0]
        batch.Put(file, cPickle.dumps(box))
        i += 1
        if i % 5000 == 0:
            print 'inserted %s data into DB' % i
            db.Write(batch, sync=True)
            del batch
            batch = leveldb.WriteBatch()

    if i % 5000 > 0:
        db.Write(batch, sync=True)

    print 'inserted total %s proposal data into DB' % i
    print 'finished writing proposal DB : %s' % output_db


#if __name__ == '__main__':
#    make_db()
    def loop(self):
        logging.info("Starting loop for %s" % self.calendar_url)

        try:
            logging.debug("Opening %s" % self.up_to_path)
            with open(self.up_to_path, 'r') as up_to_fd:
                last_known = int(up_to_fd.read().strip())
        except FileNotFoundError as exp:
            last_known = -1
        logging.info("Checking calendar " + str(self.calendar_url) +
                     ", last_known commitment:" + str(last_known))

        if self.btc_net == 'testnet':
            bitcoin.SelectParams('testnet')
        elif self.btc_net == 'regtest':
            bitcoin.SelectParams('regtest')

        while True:
            start_time = time.time()
            backup_url = urljoin(self.calendar_url,
                                 "/experimental/backup/%d" % (last_known + 1))
            logging.debug("Asking " + str(backup_url))
            try:
                r = requests.get(backup_url)
            except Exception as err:
                logging.error(
                    "Exception asking %s error message %s, sleeping for %d seconds"
                    % (str(backup_url), str(err), SLEEP_SECS))
                time.sleep(SLEEP_SECS)
                continue

            if r.status_code != 200:
                logging.info("%s not found, sleeping for %d seconds" %
                             (backup_url, SLEEP_SECS))
                time.sleep(SLEEP_SECS)
                continue

            kv_map = Backup.bytes_to_kv_map(r.content)
            attestations = {}
            ops = {}
            for key, value in kv_map.items():
                # print("--- key=" + b2x(key) + " value=" + b2x(value))
                ctx = BytesDeserializationContext(value)

                for _a in range(ctx.read_varuint()):
                    attestation = TimeAttestation.deserialize(ctx)
                    attestations[key] = attestation

                for _b in range(ctx.read_varuint()):
                    op = Op.deserialize(ctx)
                    ops[key] = op

            proxy = bitcoin.rpc.Proxy()

            # Verify all bitcoin attestation are valid
            logging.debug("Total attestations: " + str(len(attestations)))
            for key, attestation in attestations.items():
                if attestation.__class__ == BitcoinBlockHeaderAttestation:
                    blockhash = proxy.getblockhash(attestation.height)
                    block_header = proxy.getblockheader(blockhash)
                    # the following raise an exception and block computation if the attestation does not verify
                    attested_time = attestation.verify_against_blockheader(
                        key, block_header)
                    logging.debug("Verifying " + b2x(key) + " result " +
                                  str(attested_time))

            # verify all ops connects to an attestation
            logging.debug("Total ops: " + str(len(ops)))
            for key, op in ops.items():
                current_key = key
                current_op = op
                while True:
                    next_key = current_op(current_key)
                    if next_key in ops:
                        current_key = next_key
                        current_op = ops[next_key]
                    else:
                        break
                assert next_key in attestations

            batch = leveldb.WriteBatch()
            for key, value in kv_map.items():
                batch.Put(key, value)
            self.db.db.Write(batch, sync=True)

            last_known = last_known + 1
            try:
                with open(self.up_to_path, 'w') as up_to_fd:
                    up_to_fd.write('%d\n' % last_known)
            except FileNotFoundError as exp:
                logging.error(str(exp))
                break

            elapsed_time = time.time() - start_time
            logging.info("Took %ds for %s" % (elapsed_time, str(backup_url)))
Exemple #22
0
 def get_batch(self):
     return leveldb.WriteBatch()
                        help='old json format of 2012 crawl')
    parser.add_argument('--batchsize', help='size of levelDB write batches',
                        default=100000, type=int)
    parser.add_argument('--prefix', help='prefix for filename',
                        default='')
    parser.add_argument('crawl', help='crawl id, e.g. 2013_11')
    parser.add_argument('folder', help='subfolder, e.g. 1368696381249')
    args = parser.parse_args(sys.argv[1:])

    db = None
    if args.db:
        import leveldb
        db = leveldb.LevelDB(args.db)

        batch_size = 0
        batch = leveldb.WriteBatch()

    count = 0
    kv_generator = read_cdx(args) if args.cdx else read_json(args)
    if args.old:
        kv_generator = read_old_json(args)

    for key, valuedict in kv_generator:
        if key is None or valuedict is None:
            continue
        count += 1
        if db is not None:
            if args.batchsize > 1:
                if batch_size >= args.batchsize:
                    db.Write(batch)
                    sys.stderr.write('.')
Exemple #24
0
 def __init__(self, db):
     self.db = db
     self.batch = leveldb.WriteBatch()
    def import_block(self, block, block_hash, block_height, sync, revert=False):

        self.batch_list = {}  # address -> history
        self.batch_txio = {}  # transaction i/o -> address

        block_inputs = []
        block_outputs = []
        addr_to_read = []

        # deserialize transactions
        t0 = time.time()
        tx_hashes, txdict = self.deserialize_block(block)

        t00 = time.time()

        # undo info
        if revert:
            undo_info = self.get_undo_info(block_height)
        else:
            undo_info = {}


        if not revert:
            # read addresses of tx inputs
            for tx in txdict.values():
                for x in tx.get('inputs'):
                    txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex')
                    block_inputs.append(txi)

            block_inputs.sort()
            for txi in block_inputs:
                try:
                    addr = self.db.Get(txi)
                except KeyError:
                    # the input could come from the same block
                    continue
                except:
                    traceback.print_exc(file=sys.stdout)
                    self.shared.stop()
                    raise

                self.batch_txio[txi] = addr
                addr_to_read.append(addr)

        else:
            for txid, tx in txdict.items():
                for x in tx.get('outputs'):
                    txo = (txid + int_to_hex(x.get('index'), 4)).decode('hex')
                    block_outputs.append(txo)
                    addr_to_read.append( x.get('address') )

                undo = undo_info.get(txid)
                for i, x in enumerate(tx.get('inputs')):
                    addr = undo['prev_addr'][i]
                    addr_to_read.append(addr)





        # read histories of addresses
        for txid, tx in txdict.items():
            for x in tx.get('outputs'):
                addr_to_read.append(x.get('address'))

        addr_to_read.sort()
        for addr in addr_to_read:
            try:
                self.batch_list[addr] = self.db.Get(addr)
            except KeyError:
                self.batch_list[addr] = ''
            except:
                traceback.print_exc(file=sys.stdout)
                self.shared.stop()
                raise


        # process
        t1 = time.time()

        if revert:
            tx_hashes = tx_hashes[::-1]


        for txid in tx_hashes:  # must be ordered
            tx = txdict[txid]
            if not revert:

                undo = { 'prev_addr':[] } # contains the list of pruned items for each address in the tx; also, 'prev_addr' is a list of prev addresses
                
                prev_addr = []
                for i, x in enumerate(tx.get('inputs')):
                    txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex')
                    addr = self.batch_txio[txi]

                    # add redeem item to the history.
                    # add it right next to the input txi? this will break history sorting, but it's ok if I neglect tx inputs during search
                    self.set_spent_bit(addr, txi, True, txid, i, block_height)

                    # when I prune, prune a pair
                    self.prune_history(addr, undo)
                    prev_addr.append(addr)

                undo['prev_addr'] = prev_addr 

                # here I add only the outputs to history; maybe I want to add inputs too (that's in the other loop)
                for x in tx.get('outputs'):
                    addr = x.get('address')
                    self.add_to_history(addr, txid, x.get('index'), block_height)
                    self.prune_history(addr, undo)  # prune here because we increased the length of the history

                undo_info[txid] = undo

            else:

                undo = undo_info.pop(txid)

                for x in tx.get('outputs'):
                    addr = x.get('address')
                    self.revert_prune_history(addr, undo)
                    self.revert_add_to_history(addr, txid, x.get('index'), block_height)

                prev_addr = undo.pop('prev_addr')
                for i, x in enumerate(tx.get('inputs')):
                    addr = prev_addr[i]
                    self.revert_prune_history(addr, undo)
                    txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex')
                    self.unset_spent_bit(addr, txi)

                assert undo == {}

        if revert: 
            assert undo_info == {}


        # write
        max_len = 0
        max_addr = ''
        t2 = time.time()

        batch = leveldb.WriteBatch()
        for addr, serialized_hist in self.batch_list.items():
            batch.Put(addr, serialized_hist)
            l = len(serialized_hist)/80
            if l > max_len:
                max_len = l
                max_addr = addr

        if not revert:
            # add new created outputs
            for txio, addr in self.batch_txio.items():
                batch.Put(txio, addr)
            # delete spent inputs
            for txi in block_inputs:
                batch.Delete(txi)
            # add undo info
            self.write_undo_info(batch, block_height, undo_info)
        else:
            # restore spent inputs
            for txio, addr in self.batch_txio.items():
                # print "restoring spent input", repr(txio)
                batch.Put(txio, addr)
            # delete spent outputs
            for txo in block_outputs:
                batch.Delete(txo)

        # add the max
        batch.Put('height', self.serialize([(block_hash, block_height, self.db_version)]))

        # actual write
        self.db.Write(batch, sync=sync)

        t3 = time.time()
        if t3 - t0 > 10 and not sync:
            print_log("block", block_height,
                      "parse:%0.2f " % (t00 - t0),
                      "read:%0.2f " % (t1 - t00),
                      "proc:%.2f " % (t2-t1),
                      "write:%.2f " % (t3-t2),
                      "max:", max_len, max_addr)

        for addr in self.batch_list.keys():
            self.invalidate_cache(addr)
Exemple #26
0
 def commit(self):
     batch = leveldb.WriteBatch()
     for k in self.uncommitted:
         batch.Put(k, self.uncommitted[k])
     self.db.Write(batch, sync=True)
        '-l',
        '--limit',
        type=int,
        help="Max number of recommendations to generate per-paper",
        default=10)
    args = parser.parse_args()

    db = leveldb.LevelDB(
        args.db_path,
        write_buffer_size=100 << 20,  # 100MB
        block_cache_size=400 << 20)  # 400MB
    b = Benchmark(args.benchmark_freq)
    tf = TreeFile(args.infile)

    if args.batch_size:
        writer = leveldb.WriteBatch()
    else:
        writer = db

    for recs in make_expert_rec(tf, args.limit):
        recd = [r.pid for r in recs]
        key = recs[0].target_pid + "|expert"
        writer.Put(key.encode(), msgpack.packb(recd))
        b.increment()
        if args.batch_size and b.count % args.batch_size == 0:
            db.Write(writer)

    args.infile.seek(0)
    tf = TreeFile(args.infile)
    for recs in make_classic_recs(tf, args.limit):
        recd = [r.pid for r in recs]
Exemple #28
0
    def add_block(self, block: Block):
        """인증된 블럭만 추가합니다.

        :param block: 인증완료된 추가하고자 하는 블럭
        :return:
        """
        # util.logger.spam(f"blockchain:add_block --start--")
        if block.block_status is not BlockStatus.confirmed:
            raise BlockInValidError("미인증 블럭")
        elif self.__last_block is not None and self.__last_block.height > 0:
            if self.__last_block.block_hash != block.prev_block_hash:
                # 마지막 블럭의 hash값이 추가되는 블럭의 prev_hash값과 다르면 추가 하지 않고 익셉션을 냅니다.
                logging.debug("self.last_block.block_hash: " +
                              self.__last_block.block_hash)
                logging.debug("block.prev_block_hash: " +
                              block.prev_block_hash)
                raise BlockError("최종 블럭과 해쉬값이 다릅니다.")

        # util.logger.spam(f"blockchain:add_block --1-- {block.prev_block_hash}, {block.height}")
        if block.height == 0 or ObjectManager().peer_service is None:
            # all results to success
            success_result = {'code': int(message_code.Response.success)}
            invoke_results = self.__create_invoke_result_specific_case(
                block.confirmed_transaction_list, success_result)
        else:
            try:
                invoke_results = ObjectManager().peer_service.score_invoke(
                    block, self.__channel_name)

            except Exception as e:
                # When Grpc Connection Raise Exception
                # save all result{'code': ScoreResponse.SCORE_CONTAINER_EXCEPTION, 'message': str(e)}
                logging.error(f'Error While Invoke Score fail add block : {e}')
                score_container_exception_result = {
                    'code': ScoreResponse.SCORE_CONTAINER_EXCEPTION,
                    'message': str(e)
                }
                invoke_results = self.__create_invoke_result_specific_case(
                    block.confirmed_transaction_list,
                    score_container_exception_result)

        # util.logger.spam(f"blockchain:add_block --2--")
        self.__add_tx_to_block_db(block, invoke_results)

        block_hash_encoded = block.block_hash.encode(encoding='UTF-8')

        batch = leveldb.WriteBatch()
        batch.Put(block_hash_encoded, block.serialize_block())
        batch.Put(BlockChain.LAST_BLOCK_KEY, block_hash_encoded)
        batch.Put(
            BlockChain.BLOCK_HEIGHT_KEY + block.height.to_bytes(
                conf.BLOCK_HEIGHT_BYTES_LEN, byteorder='big'),
            block_hash_encoded)
        self.__confirmed_block_db.Write(batch)

        self.__last_block = block
        self.__block_height = self.__last_block.height

        # logging.debug("ADD BLOCK Height : %i", block.height)
        # logging.debug("ADD BLOCK Hash : %s", block.block_hash)
        # logging.debug("ADD BLOCK MERKLE TREE Hash : %s", block.merkle_tree_root_hash)
        # logging.debug("ADD BLOCK Prev Hash : %s ", block.prev_block_hash)
        logging.info("ADD BLOCK HEIGHT : %i , HASH : %s", block.height,
                     block.block_hash)
        # 블럭의 Transaction 의 데이터를 저장 합니다.
        # Peer에서 Score를 파라미터로 넘김으로써 체인코드를 실행합니다.

        # util.logger.spam(f"blockchain:add_block --end--")

        util.apm_event(
            self.__peer_id, {
                'event_type': 'AddBlock',
                'peer_id': self.__peer_id,
                'data': {
                    'block_height': self.__block_height,
                    'block_type': block.block_type.name
                }
            })

        return True
Exemple #29
0
    def putoneblock(self, block):
        block.calc_sha256()

        if not block.is_valid():
            self.logger.info("Invalid block %064x" % (block.sha256, ))
            return False

        if not self.have_prevblock(block):
            self.orphans[block.sha256] = True
            self.orphan_deps[block.hashPrevBlock] = block
            self.logger.info("Orphan block %064x (%d orphans)" %
                             (block.sha256, len(self.orphan_deps)))
            return False

        top_height = self.getheight()
        top_work = long(self.db.Get('misc:total_work'), 16)

        # read metadata for previous block
        prevmeta = BlkMeta()
        if top_height >= 0:
            ser_prevhash = ser_uint256(block.hashPrevBlock)
            prevmeta.deserialize(self.db.Get('blkmeta:' + ser_prevhash))
        else:
            ser_prevhash = ''

        batch = leveldb.WriteBatch()

        # build network "block" msg, as canonical disk storage form
        msg = msg_block()
        msg.block = block
        msg_data = message_to_str(self.netmagic, msg)

        # write "block" msg to storage
        fpos = self.blk_write.tell()
        self.blk_write.write(msg_data)
        self.blk_write.flush()

        # add index entry
        ser_hash = ser_uint256(block.sha256)
        batch.Put('blocks:' + ser_hash, str(fpos))

        # store metadata related to this block
        blkmeta = BlkMeta()
        blkmeta.height = prevmeta.height + 1
        blkmeta.work = (prevmeta.work + uint256_from_compact(block.nBits))
        batch.Put('blkmeta:' + ser_hash, blkmeta.serialize())

        # store list of blocks at this height
        heightidx = HeightIdx()
        heightstr = str(blkmeta.height)
        try:
            heightidx.deserialize(self.db.Get('height:' + heightstr))
        except KeyError:
            pass
        heightidx.blocks.append(block.sha256)

        batch.Put('height:' + heightstr, heightidx.serialize())
        self.db.Write(batch)

        # if chain is not best chain, proceed no further
        if (blkmeta.work <= top_work):
            self.logger.info("ChainDb: height %d (weak), block %064x" %
                             (blkmeta.height, block.sha256))
            return True

        # update global chain pointers
        if not self.set_best_chain(ser_prevhash, ser_hash, block, blkmeta):
            return False

        return True
Exemple #30
0
 def __init__(self, dbfile):
     self._nbop = 0
     self._db = leveldb.LevelDB(dbfile)
     self._batch = leveldb.WriteBatch()
     return