def write_batch(self, batch): """ Write a batch to the database Arguments: batch -- an array of Datums """ keys = self.get_keys(len(batch)) if self.backend == 'lmdb': lmdb_txn = self.db.begin(write=True) for i, datum in enumerate(batch): lmdb_txn.put('%08d_%d' % (keys[i], datum.label), datum.SerializeToString()) lmdb_txn.commit() elif self.backend == 'leveldb': leveldb_batch = leveldb.WriteBatch() for i, datum in enumerate(batch): leveldb_batch.Put('%08d_%d' % (keys[i], datum.label), datum.SerializeToString()) self.db.Write(leveldb_batch) else: logger.error('unsupported backend') return False
def __init__(self, db_name): self.idf_hd = idf() with open("idf_dumps.txt", "r") as fd: s = fd.read() self.idf_hd.loads(s) self.hot_word_dic = {} self.short_url_hd = fast_search.load("short_url.txt") self.dbhd = leveldb.LevelDB(db_name) self.url_re = re.compile( r'(http:\/\/)*[\w\d]+\.[\w\d\.]+\/[\w\d_!@#$%^&\*-_=\+]+') #内部使用batch做缓存 add_doc时暂时不写入db文件 #要获取结果,或者达到阈值(batch_limit)时才写入文件 self.batch = leveldb.WriteBatch() self.batch_counter = 0 self.batch_limit = 100000 self.fid = 0 #self.get_file_word_flag = "percent" self.get_file_word_flag = "num" self.word_list_n = 5 self.get_file_word_cbk = {} self.get_file_word_cbk["num"] = self.get_file_word_list_by_num self.get_file_word_cbk["percent"] = self.get_file_word_list_by_persent
def add(db, key, value): print('## ADDING {} {} ##'.format(key.rstrip(b'\x00'), value.rstrip(b'\x00'))) assert len(key) == 32 batch = leveldb.WriteBatch() for i, byte in enumerate(key): path = key[:i] node_at_path = deserialize(db_get(db, path)) if node_at_path is None: new_leaf = LeafNode(key, value) db_put(batch, path, new_leaf.serialize()) propagate_along_path(db, batch, path, hash_node(new_leaf)) return if isinstance(node_at_path, LeafNode): new_leaf = LeafNode(key, value) if node_at_path.key == key: db_put(batch, path, new_leaf.serialize()) propagate_along_path(db, batch, path, hash_node(new_leaf)) else: propagation_path = get_common_prefix(key, node_at_path.key) common_prefix_length = len(propagation_path) db_put(batch, key[:common_prefix_length + 1], new_leaf.serialize()) db_put(batch, node_at_path.key[:common_prefix_length + 1], node_at_path.serialize()) new_branch_node = BranchNode({ key[common_prefix_length]: hash_node(new_leaf), node_at_path.key[common_prefix_length]: hash_node(node_at_path) }) db_put(batch, key[:common_prefix_length], new_branch_node.serialize()) propagate_along_path(db, batch, propagation_path, hash_node(new_branch_node)) return raise Exception("How did we get here?")
def clean_removed(config, database): """ Check if any file from source dir was removed. """ logger = logging.getLogger('mylog') dropbox_folder = config.get('DropBox', 'drop_box_dir') batch = leveldb.WriteBatch() for key, value in database.RangeIter(): #unencrypted_hash, encrypted_hash, dropbox_file (_, encrypted_hash, dropbox_file) = \ value.split(FileProcessor.HASH_SEPARATOR) dropbox_file = os.path.join(dropbox_folder,'') + dropbox_file if not os.path.isfile(key): batch.Delete(key) if not os.path.isfile(dropbox_file): logger.warning('File ' + dropbox_file + ' originally ' + key + \ ' does not exist in dropbox dir. ' + 'Consistency check will catch it.') else: #Only for informational purposes hsh = compute_hash(dropbox_file) if hsh != encrypted_hash: logger.warning('File ' + dropbox_file + ' originally ' + key + ' has hash that does not match db entry.' + 'Consistency check will catch it.') else: logger.info('File ' + dropbox_file + ' orginally ' + key + \ ' was removed. Removing backed up file...') batch.Delete(key) os.remove(dropbox_file) database.Write(batch, sync=True)
def run(self): while True: ngram_list=self.queue.get() # fetch new batch if not ngram_list: # end signal print >> sys.stderr, "no new data in "+self.dataset+", creating final text file" sys.stderr.flush() try: c=self.create_final_files() # create .gz text file except: print >> sys.stderr, "error while creating final text file: "+self.dataset+" ,returning" sys.stderr.flush() return print >> sys.stderr, c,self.dataset,"written, returning" sys.stderr.flush() return try: batch=leveldb.WriteBatch() # write new batch for ngram in ngram_list: batch.Put(ngram.encode(u"utf-8"),u"1".encode(u"utf-8")) self.DB.Write(batch) except: print >> sys.stderr, "error in database writer, batch rejected: "+self.dataset traceback.print_exc() sys.stderr.flush()
def generate(dataset, dirs, threshold, size, unit_size, sample_rate, gpu, model, force, tt, net = '', js = None): if dataset not in support: raise ValueError('The Dataset {} is not supported right now '.format(dataset)) dictionary = dict() dictionary['train'] = dict() dictionary['train']['foreground'] = [] dictionary['train']['background'] = [] dictionary['train']['annotations'] = dict() dictionary['train']['response'] = dict() dictionary['train']['iou'] = dict() db = dict() db['train'] = leveldb.LevelDB(dataset + '_threshold_{}_train'.format(threshold)) if tt > 1: db['test'] = leveldb.LevelDB(dataset + '_threshold_{}_test'.format(threshold)) dictionary['test'] = dict() dictionary['test']['foreground'] = [] dictionary['test']['background'] = [] dictionary['test']['annotations'] = dict() dictionary['test']['response'] = dict() dictionary['test']['iou'] = dict() b = leveldb.WriteBatch() miss = [] fg_counter = 0 bg_counter = 0 if dataset == 'ActivityNet-1.3': j = json.load(open('ActivityNet/activity_net.v1-3.min.json')) for v in j['database'].keys(): # iterate over name of videos vpath = os.path.join(dirs['video'], v + '.mp4') # check if video is downloaded if not os.path.isfile(vpath): miss.append(v) continue fpath = os.path.join(dirs['feature'], '{}_US[{}]_SR[{}].h5'.format(v, unit_size, sample_rate)) # check if extracted feature is exist exist = os.path.isfile(fpath) if not exist and not force: # sampling unit level feature print('[!] Unit Level Feature [ {} ] is not exist'.format(fpath)) print('[*] Extracting ... ') net = unit.sampling(v + '.mp4', size, unit_size, sample_rate, net, gpu, model, dirs['video'], dirs['feature'], reuse = True) #raise ValueError('[!] Unit Level Feature [ {} ] is not exist'.format(fpath)) elif exist: print('Feature Path {} is exist'.format(fpath)) elif force: print('[!] Unit Level Feature [ {} ] is not exist, Ignoring...'.format(fpath)) if not force or (force and exist) : with h5py.File(fpath) as ff:#, cv2.VideoCapture(vpath) as capture: #capture = cv2.VideoCapture(vpath) #length = capture.get(cv2.CAP_PROP_FRAME_COUNT) #fps = capture.get(cv2.CAP_PROP_FPS) #capture.release() print('[*] Current Video => [ {} ]'.format(fpath)) fps = np.asarray(ff['fps']) units = list(ff.keys()) # name of units-level feature fduration = [] #annotations['annotations'][v] = [] for fs in range(len(j['database'][v]['annotations'])): # each video contains more than one foreground segment fduration.append((np.asarray(j['database'][v]['annotations'][fs]['segment']) * fps).astype(int)) # frame duration [start frame, end frame] #annotations['annotations'][v].append(fduration[fs]) print(fduration[fs]) for u in units: #print('units : {}'.format(u)) if u == 'feature_size' or u == 'unit_size' or u == 'sample_rate' or u == 'nframes' or u == 'fps': continue fg = False #print(len(j['database'][v]['annotations'])) for fs in range(len(j['database'][v]['annotations'])): # each video contains more than one foreground segment #print('fs {}'.format(fs)) #print(np.asarray(j['database'][v]['annotations'][fs]['segment']) * 8) # convert start time and end time to start frame and end frame respectively """ foreground duration : time * fps => index of frame proposal duration : index of frame """ #fduration = (np.asarray(j['database'][v]['annotations'][fs]['segment']) * fps).astype(np.int) # frame duration [start frame, end frame] pduration = np.asarray(u.split('_'), dtype = int) iou = unit.iou(pduration, fduration[fs], 'clip') if not iou: break """ if iou < 1 and iou > 0: print('Duration Time : {}'.format(j['database'][v]['annotations'][fs]['segment'])) print('Duration Time : {}'.format(np.asarray(j['database'][v]['annotations'][fs]['segment']) * fps)) print('fduration : {}'.format(fduration[fs])) print('pduration : {}'.format(pduration)) print('iou : {}'.format(iou)) """ if iou > threshold: vu = '{}_{}'.format(v, u) if fg_counter >= tt: dictionary['test']['foreground'].append(vu) dictionary['test']['response'][vu] = '{}_{}'.format(fduration[fs][0], fduration[fs][1]) dictionary['test']['iou'][vu] = iou fg_counter = 0 else: dictionary['train']['foreground'].append(vu) dictionary['train']['response'][vu] = '{}_{}'.format(fduration[fs][0], fduration[fs][1]) dictionary['train']['iou'][vu] = iou fg_counter += 1 fg = True break if not fg: if bg_counter >= tt: dictionary['test']['background'].append('{}_{}'.format(v, u)) bg_counter = 0 else: dictionary['train']['background'].append('{}_{}'.format(v, u)) bg_counter += 1 #print(ujson.dumps(annotations)) db['train'].Put('annotations'.encode(), ujson.dumps(dictionary['train']).encode()) if tt > 1: db['test'].Put('annotations'.encode(), ujson.dumps(dictionary['test']).encode()) db['train'].Write(b, sync = True) print('[*] Cache is save to [ {} ]'.format(dataset + '_threshold_{}_train'.format(threshold))) if tt > 1: db['test'].Write(b, sync = True) print('[*] Cache is save to [ {} ]'.format(dataset + '_threshold_{}_test'.format(threshold))) print('[!] Missing {} Video Files'.format(len(miss)))
def flush(self, sync_mode=False): self._db.Write(self._batch, sync=sync_mode) self.x = 0 self._batch = leveldb.WriteBatch() self.lflush = clock()
def connect_block(self, ser_hash, block, blkmeta): # verify against checkpoint list try: chk_hash = self.netmagic.checkpoints[blkmeta.height] if chk_hash != block.sha256: self.logger.info( "Block %064x does not match checkpoint hash %064x, height %d" % (block.sha256, chk_hash, blkmeta.height)) return False except KeyError: pass # check TX connectivity outpts = self.spent_outpts(block) if outpts is None: self.logger.info("Unconnectable block %064x" % (block.sha256, )) return False # verify script signatures if ('nosig' not in self.settings and ('forcesig' in self.settings or blkmeta.height > self.netmagic.checkpoint_max)): for tx in block.vtx: tx.calc_sha256() if tx.is_coinbase(): continue if not self.tx_signed(tx, block, False): self.logger.info("Invalid signature in block %064x" % (block.sha256, )) return False # update database pointers for best chain batch = leveldb.WriteBatch() batch.Put('misc:total_work', hex(blkmeta.work)) batch.Put('misc:height', str(blkmeta.height)) batch.Put('misc:tophash', ser_hash) self.logger.info("ChainDb: height %d, block %064x" % (blkmeta.height, block.sha256)) # all TX's in block are connectable; index neverseen = 0 for tx in block.vtx: tx.calc_sha256() if not self.mempool.remove(tx.sha256): neverseen += 1 txidx = TxIdx(block.sha256) if not self.puttxidx(tx.sha256, txidx, batch): self.logger.info("TxIndex failed %064x" % (tx.sha256, )) return False self.logger.info("MemPool: blk.vtx.sz %d, neverseen %d, poolsz %d" % (len(block.vtx), neverseen, self.mempool.size())) # mark deps as spent for outpt in outpts: self.spend_txout(outpt[0], outpt[1], batch) self.db.Write(batch) return True
def insert(self, **kwargs): assert set(kwargs.keys()) == {'s', 'p', 'o'} batch = leveldb.WriteBatch() _insert_permutations(kwargs, batch) self.db.Write(batch, sync=True)
def createDB(self,dataset): db=leveldb.LevelDB(self.outdir+u"/"+dataset+u".leveldb",create_if_missing=True) batch=leveldb.WriteBatch() return db,batch
def __encrypt_copy(self, file_path, current_hash_of_unenc_file, name = None): """ Encrypt file and copy to dropbox folder """ self.logger.debug('Calling __encrypt_copy, file_path: ' + file_path + ' current_hash_of_unenc_file: ' + current_hash_of_unenc_file + ' name: ' + str(name)) file_name = ntpath.basename(file_path) temp_dir = tempfile.gettempdir() temp_dir += '/' destination_path = temp_dir+file_name self.logger.debug('Temp destination: ' + destination_path) try: self.logger.debug('Copying ' + file_path + ' to '+ destination_path) shutil.copy2(file_path, destination_path) except IOError: self.logger.error('Error copying ' + file_path + ' to '+ destination_path) sys.exit() self.logger.debug('Encrypting ' + destination_path) #Encryptor returns the path with file name of encrypted file old_path = destination_path destination_path = self.encryptor(self.password, destination_path) self.logger.debug('Done...') #Remove original file (before encryption) os.remove(old_path) random_file_name = '' if name is None: random_file_name = random_name() else: random_file_name = name random_file_name_with_path = temp_dir + random_file_name try: self.logger.debug('Renaming ' + destination_path + ' to ' +random_file_name_with_path) os.rename(destination_path, random_file_name_with_path) except OSError: self.logger.error('Error renaming ' + destination_path + ' to ' + random_file_name_with_path) sys.exit() current_hash_of_enc_file = compute_hash(random_file_name_with_path) combined_hash = current_hash_of_unenc_file combined_hash += self.HASH_SEPARATOR combined_hash += current_hash_of_enc_file combined_hash += self.HASH_SEPARATOR combined_hash += random_file_name batch = leveldb.WriteBatch() file_path_aug = file_path + self.FILE_PATH_MARK self.logger.debug('Insert in db: ' + file_path_aug + ' -> '+ combined_hash) batch.Put(file_path_aug, combined_hash) self.logger.debug('Moving ' + random_file_name_with_path + ' to ' + self.dropbox_folder) #Remove existing file if os.path.isfile(self.dropbox_folder + random_file_name): self.logger.info('Removing file ' + self.dropbox_folder + random_file_name + ' from dropbox dir') try: os.remove(self.dropbox_folder + random_file_name) except OSError: self.logger.error('Error removing ' + self.dropbox_folder + random_file_name) sys.exit() try: shutil.move(random_file_name_with_path, self.dropbox_folder + random_file_name) except IOError: self.logger.error('Error moving ' + random_file_name_with_path + ' to ' + self.dropbox_folder + random_file_name) sys.exit() self.database.Write(batch, sync=True)
def writebatch(self, values): batch = leveldb.WriteBatch() for k, v in values.items(): batch.Put(k, v) self.__db.Write(batch)
def make_proposal_db(input_proposal, output_db, box_order=None): if os.path.isfile(input_proposal) == False: print 'File not found %s' % input_proposal return print 'reading input data file : %s' % input_proposal if '.pkl' in input_proposal: try: with open(input_proposal, 'rb') as f: file_list = cPickle.load(f) box_list = cPickle.load(f) except: with open(input_proposal, 'rb') as f: data = cPickle.load(f) file_list = data['images'][0] box_list = data['boxes'] if len(file_list) == 1: file_list = file_list[0] if box_order != None: new_list = [] for one_box_list in box_list: new_one_box_list = one_box_list[:, box_order] new_list.append(new_one_box_list) box_list = new_list print 'finished reading the pickle file.' elif '.mat' in input_proposal: matlab_data = sio.loadmat(input_proposal) raw_file_data = matlab_data['images'].ravel() raw_box_data = matlab_data['boxes'].ravel() file_list = [] for i in xrange(raw_file_data.shape[0]): file = raw_file_data[i] if isinstance(file, list) == True: file = file[0] elif isinstance(file, ndarray) == True: file = file[0] file_list.append(file.encode('ascii', 'ignore')) box_list = [] for i in xrange(raw_box_data.shape[0]): if len(raw_box_data[i]) > 0: box_list.append(raw_box_data[i][:, box_order] - 1) else: box_list.append(raw_box_data[i]) print 'finished reading the mat file.' else: print 'unsupported file format.' print '.pkl and .mat files are supported.' return remove_folder(output_db) db = leveldb.LevelDB(output_db) batch = leveldb.WriteBatch() i = 0 for file, box in zip(file_list, box_list): if isinstance(file, list) == True: file = file[0] if isinstance(file, list) == True: file = file[0] batch.Put(file, cPickle.dumps(box)) i += 1 if i % 5000 == 0: print 'inserted %s data into DB' % i db.Write(batch, sync=True) del batch batch = leveldb.WriteBatch() if i % 5000 > 0: db.Write(batch, sync=True) print 'inserted total %s proposal data into DB' % i print 'finished writing proposal DB : %s' % output_db
def setheight(chaindb, height, hash): batch = leveldb.WriteBatch() batch.Put('height:' + str(height), hash) chaindb.db.Write(batch)
def test_leveldb_multi_write(n): batch = leveldb.WriteBatch() for i in xrange(n): db.Put(str(i), str(i)) db.Write(batch, sync=True)
def init_batch(): return leveldb.WriteBatch()
def clear_db(self): # db = leveldb.LevelDB('./data') b = leveldb.WriteBatch() for k in self.db.RangeIter(include_value=False, reverse=True): b.Delete(k) self.db.Write(b)
def convert_data_to_db(self, train_data_folder, valid_data_folder, test_data_folder, min_pixel, train_db_name, valid_db_name, test_db_name, train_list_file, valid_list_file, test_list_file, channel_no, preserve_ar): #self.remove_folder(train_db_name) #self.remove_folder(valid_db_name) self.remove_folder(test_db_name) #self.train_db = leveldb.LevelDB(train_db_name) #self.valid_db = leveldb.LevelDB(valid_db_name) self.test_db = leveldb.LevelDB(test_db_name) self.datum = caffe.proto.caffe_pb2.Datum() self.datum.channels = channel_no self.datum.width = min_pixel self.datum.height = min_pixel print "convert_train_data" print "train_db_name : %s" % train_db_name print "valid_db_name : %s" % valid_db_name print "test_db_name : %s" % test_db_name print "channel_no : %s" % channel_no #modes = ['train', 'valid'] #modes = ['train'] #modes = ['valid'] modes = ['test'] start_time = time.time() for mode in modes: if mode == 'train': image_list_file = open(train_list_file, 'rb') data_folder = train_data_folder elif mode == 'valid': image_list_file = open(valid_list_file, 'rb') data_folder = valid_data_folder elif mode == 'test': image_list_file = open(test_list_file, 'rb') data_folder = test_data_folder else: print 'not supported mode : %s' % mode return lines = image_list_file.readlines() image_list_file.close() total_data_no = len(lines) print '' print 'processing %s' % mode for i, line in enumerate(lines): if mode == 'test': parsed = line.split(' ') file_path = parsed[0] + '.JPEG' label = None else: parsed = line.split('\t') label = parsed[1] file_path = parsed[0] file_path = file_path.replace('\r', '') file_path = file_path.replace('\n', '') org_image = cv2.imread(data_folder + '/' + file_path) org_size = org_image.shape if preserve_ar == 'preserve': if org_size[1] > org_size[0]: im_scale = (min_pixel/float(org_size[0])) wsize = int((float(org_size[1])*float(im_scale))) image_width = wsize image_height = min_pixel else: im_scale = (min_pixel/float(org_size[1])) hsize = int((float(org_size[0])*float(im_scale))) image_width = min_pixel image_height = hsize elif preserve_ar == 'ignore': image_width = min_pixel image_height = min_pixel #if org_image.mode != 'RGB': # org_image = org_image.convert('RGB') image = cv2.resize(org_image, (image_width, image_height), interpolation=cv2.INTER_LINEAR) self.datum.width = image_width self.datum.height = image_height self.insert_db(mode, image, label, None, channel_no, False) if mode == 'train' and self.train_no > 0 and self.train_no % 1000 == 0: self.train_db.Write(self.train_batch, sync = True) del self.train_batch self.train_batch = leveldb.WriteBatch() print "%.1f %% done." % (i * 100.0 / total_data_no) print 'Processed %i total train images. %d sec' % (self.train_no, (time.time() - start_time)) start_time = time.time() if mode == 'valid' and self.valid_no > 0 and self.valid_no % 1000 == 0: self.valid_db.Write(self.valid_batch, sync = True) del self.valid_batch self.valid_batch = leveldb.WriteBatch() print 'Processed %i valid images.' % self.valid_no if mode == 'test' and self.test_no > 0 and self.test_no % 1000 == 0: self.test_db.Write(self.test_batch, sync = True) del self.test_batch self.test_batch = leveldb.WriteBatch() print 'Processed %i test images.' % self.test_no # Write last batch of images if self.train_no % 1000 != 0: self.train_db.Write(self.train_batch, sync = True) if self.valid_no % 1000 != 0: self.valid_db.Write(self.valid_batch, sync = True) if self.test_no % 1000 != 0: self.test_db.Write(self.test_batch, sync = True) print 'Processed %d train, %d valid, %d test' % (self.train_no, self.valid_no, self.test_no)
def convert_test_data(self, data_set_folder, min_pixel, test_db_name, test_output_pickle_path, inverse, channel_no = 1): self.remove_folder(test_db_name) test_db = leveldb.LevelDB(test_db_name) pickleTestX = test_output_pickle_path + "/testX_size_" + str(min_pixel) + ".pickle" pickleFileNames = test_output_pickle_path + "/fileNames.pickle" if not os.path.exists(test_output_pickle_path): os.makedirs(test_output_pickle_path) numberofImages = 0 datum = caffe.proto.caffe_pb2.Datum() datum.channels = channel_no datum.width = min_pixel datum.height = min_pixel test_batch = leveldb.WriteBatch() print "Load test dataset from image files" for fileNameDir in os.walk(data_set_folder): for index, fileName in enumerate(fileNameDir[2]): if fileName[-5:] != ".JPEG": continue numberofImages += 1 imageSize = min_pixel * min_pixel num_rows = numberofImages # one row for each image in the test dataset batch_size = 10000 data_size = min(batch_size, numberofImages) testX = numpy.zeros((data_size, channel_no, imageSize), dtype=numpy.uint8) files = [] db_index = 0 pickle_index = 0 batch_no = 1 print "Reading images" for fileNameDir in os.walk(data_set_folder): for index, fileName in enumerate(fileNameDir[2]): if fileName[-5:] != ".JPEG": continue nameFileImage = "{0}{1}{2}".format(fileNameDir[0], os.sep, fileName) org_image = Image.open(nameFileImage) files.append(fileName) image = org_image.resize((min_pixel, min_pixel), Image.ANTIALIAS) """ print fileName plt.figure(1, figsize=(1, 1), dpi=100) plt.gray(); plt.subplot(1, 1, 1) plt.imshow(image) plt.show() """ if inverse: image_ubyte = 255 - img_as_ubyte(image) else: image_ubyte = img_as_ubyte(image) if channel_no > 1: selem = disk(6) w_tophat = white_tophat(image_ubyte, selem) b_tophat = black_tophat(image_ubyte, selem) datum.data = image_ubyte.tostring() + w_tophat.tostring() + b_tophat.tostring() image_output = numpy.concatenate((image_ubyte, w_tophat, b_tophat), axis=1) else: datum.data = image_ubyte.tostring() image_output = image_ubyte test_batch.Put("%08d" % db_index, datum.SerializeToString()) testX[pickle_index] = numpy.reshape(image_output, (channel_no, imageSize)) db_index += 1 pickle_index += 1 if db_index % 1000 == 0: test_db.Write(test_batch, sync = True) del test_batch test_batch = leveldb.WriteBatch() print 'Processed %i test images.' % db_index if pickle_index % batch_size == 0: pickle_file_name = pickleTestX + "_" + str(batch_no) with open(pickle_file_name,'wb') as fp: cPickle.dump(testX, fp) print "pickled %s" % pickle_file_name data_size = min(batch_size, numberofImages - batch_size * batch_no) testX = numpy.zeros((data_size, channel_no, imageSize), dtype=numpy.uint8) batch_no += 1 pickle_index = 0 report = [int((j+1)*num_rows/20.) for j in range(20)] if db_index in report: print numpy.ceil(db_index *100.0 / num_rows), "% done" # Write last batch of images if db_index % 1000 != 0: test_db.Write(test_batch, sync = True) if pickle_index % batch_size > 0: pickle_file_name = pickleTestX + "_" + str(batch_no) with open(pickle_file_name,'wb') as fp: cPickle.dump(testX, fp) print "pickled %s" % pickle_file_name with open(pickleFileNames,'wb') as fp: cPickle.dump(files, fp) print 'Processed a total of %i images.' % db_index
def make_db(input_proposal, output_db, box_order=None): if os.path.isfile(input_proposal) == False: print 'File not found %s' % input_proposal return print 'reading input data file : %s' % input_proposal if '.pkl' in input_proposal: try: with open(input_proposal, 'rb') as f: file_list = cPickle.load(f) box_list = cPickle.load(f) except: with open(input_proposal, 'rb') as f: data = cPickle.load(f) file_list = data['images'][0] box_list = data['boxes'] if len(file_list) == 1: file_list = file_list[0] if box_order != None: new_list = [] for one_box_list in box_list: new_one_box_list = one_box_list[:, box_order] new_list.append(new_one_box_list) box_list = new_list print 'finished reading the pickle file.' else: print 'unsupported file format.' print '.pkl and .mat files are supported.' return remove_folder(output_db) db = leveldb.LevelDB(output_db) batch = leveldb.WriteBatch() i = 0 for file, box in zip(file_list, box_list): if isinstance(file, list) == True: file = file[0] if isinstance(file, list) == True: file = file[0] batch.Put(file, cPickle.dumps(box)) i += 1 if i % 5000 == 0: print 'inserted %s data into DB' % i db.Write(batch, sync=True) del batch batch = leveldb.WriteBatch() if i % 5000 > 0: db.Write(batch, sync=True) print 'inserted total %s proposal data into DB' % i print 'finished writing proposal DB : %s' % output_db #if __name__ == '__main__': # make_db()
def loop(self): logging.info("Starting loop for %s" % self.calendar_url) try: logging.debug("Opening %s" % self.up_to_path) with open(self.up_to_path, 'r') as up_to_fd: last_known = int(up_to_fd.read().strip()) except FileNotFoundError as exp: last_known = -1 logging.info("Checking calendar " + str(self.calendar_url) + ", last_known commitment:" + str(last_known)) if self.btc_net == 'testnet': bitcoin.SelectParams('testnet') elif self.btc_net == 'regtest': bitcoin.SelectParams('regtest') while True: start_time = time.time() backup_url = urljoin(self.calendar_url, "/experimental/backup/%d" % (last_known + 1)) logging.debug("Asking " + str(backup_url)) try: r = requests.get(backup_url) except Exception as err: logging.error( "Exception asking %s error message %s, sleeping for %d seconds" % (str(backup_url), str(err), SLEEP_SECS)) time.sleep(SLEEP_SECS) continue if r.status_code != 200: logging.info("%s not found, sleeping for %d seconds" % (backup_url, SLEEP_SECS)) time.sleep(SLEEP_SECS) continue kv_map = Backup.bytes_to_kv_map(r.content) attestations = {} ops = {} for key, value in kv_map.items(): # print("--- key=" + b2x(key) + " value=" + b2x(value)) ctx = BytesDeserializationContext(value) for _a in range(ctx.read_varuint()): attestation = TimeAttestation.deserialize(ctx) attestations[key] = attestation for _b in range(ctx.read_varuint()): op = Op.deserialize(ctx) ops[key] = op proxy = bitcoin.rpc.Proxy() # Verify all bitcoin attestation are valid logging.debug("Total attestations: " + str(len(attestations))) for key, attestation in attestations.items(): if attestation.__class__ == BitcoinBlockHeaderAttestation: blockhash = proxy.getblockhash(attestation.height) block_header = proxy.getblockheader(blockhash) # the following raise an exception and block computation if the attestation does not verify attested_time = attestation.verify_against_blockheader( key, block_header) logging.debug("Verifying " + b2x(key) + " result " + str(attested_time)) # verify all ops connects to an attestation logging.debug("Total ops: " + str(len(ops))) for key, op in ops.items(): current_key = key current_op = op while True: next_key = current_op(current_key) if next_key in ops: current_key = next_key current_op = ops[next_key] else: break assert next_key in attestations batch = leveldb.WriteBatch() for key, value in kv_map.items(): batch.Put(key, value) self.db.db.Write(batch, sync=True) last_known = last_known + 1 try: with open(self.up_to_path, 'w') as up_to_fd: up_to_fd.write('%d\n' % last_known) except FileNotFoundError as exp: logging.error(str(exp)) break elapsed_time = time.time() - start_time logging.info("Took %ds for %s" % (elapsed_time, str(backup_url)))
def get_batch(self): return leveldb.WriteBatch()
help='old json format of 2012 crawl') parser.add_argument('--batchsize', help='size of levelDB write batches', default=100000, type=int) parser.add_argument('--prefix', help='prefix for filename', default='') parser.add_argument('crawl', help='crawl id, e.g. 2013_11') parser.add_argument('folder', help='subfolder, e.g. 1368696381249') args = parser.parse_args(sys.argv[1:]) db = None if args.db: import leveldb db = leveldb.LevelDB(args.db) batch_size = 0 batch = leveldb.WriteBatch() count = 0 kv_generator = read_cdx(args) if args.cdx else read_json(args) if args.old: kv_generator = read_old_json(args) for key, valuedict in kv_generator: if key is None or valuedict is None: continue count += 1 if db is not None: if args.batchsize > 1: if batch_size >= args.batchsize: db.Write(batch) sys.stderr.write('.')
def __init__(self, db): self.db = db self.batch = leveldb.WriteBatch()
def import_block(self, block, block_hash, block_height, sync, revert=False): self.batch_list = {} # address -> history self.batch_txio = {} # transaction i/o -> address block_inputs = [] block_outputs = [] addr_to_read = [] # deserialize transactions t0 = time.time() tx_hashes, txdict = self.deserialize_block(block) t00 = time.time() # undo info if revert: undo_info = self.get_undo_info(block_height) else: undo_info = {} if not revert: # read addresses of tx inputs for tx in txdict.values(): for x in tx.get('inputs'): txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex') block_inputs.append(txi) block_inputs.sort() for txi in block_inputs: try: addr = self.db.Get(txi) except KeyError: # the input could come from the same block continue except: traceback.print_exc(file=sys.stdout) self.shared.stop() raise self.batch_txio[txi] = addr addr_to_read.append(addr) else: for txid, tx in txdict.items(): for x in tx.get('outputs'): txo = (txid + int_to_hex(x.get('index'), 4)).decode('hex') block_outputs.append(txo) addr_to_read.append( x.get('address') ) undo = undo_info.get(txid) for i, x in enumerate(tx.get('inputs')): addr = undo['prev_addr'][i] addr_to_read.append(addr) # read histories of addresses for txid, tx in txdict.items(): for x in tx.get('outputs'): addr_to_read.append(x.get('address')) addr_to_read.sort() for addr in addr_to_read: try: self.batch_list[addr] = self.db.Get(addr) except KeyError: self.batch_list[addr] = '' except: traceback.print_exc(file=sys.stdout) self.shared.stop() raise # process t1 = time.time() if revert: tx_hashes = tx_hashes[::-1] for txid in tx_hashes: # must be ordered tx = txdict[txid] if not revert: undo = { 'prev_addr':[] } # contains the list of pruned items for each address in the tx; also, 'prev_addr' is a list of prev addresses prev_addr = [] for i, x in enumerate(tx.get('inputs')): txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex') addr = self.batch_txio[txi] # add redeem item to the history. # add it right next to the input txi? this will break history sorting, but it's ok if I neglect tx inputs during search self.set_spent_bit(addr, txi, True, txid, i, block_height) # when I prune, prune a pair self.prune_history(addr, undo) prev_addr.append(addr) undo['prev_addr'] = prev_addr # here I add only the outputs to history; maybe I want to add inputs too (that's in the other loop) for x in tx.get('outputs'): addr = x.get('address') self.add_to_history(addr, txid, x.get('index'), block_height) self.prune_history(addr, undo) # prune here because we increased the length of the history undo_info[txid] = undo else: undo = undo_info.pop(txid) for x in tx.get('outputs'): addr = x.get('address') self.revert_prune_history(addr, undo) self.revert_add_to_history(addr, txid, x.get('index'), block_height) prev_addr = undo.pop('prev_addr') for i, x in enumerate(tx.get('inputs')): addr = prev_addr[i] self.revert_prune_history(addr, undo) txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex') self.unset_spent_bit(addr, txi) assert undo == {} if revert: assert undo_info == {} # write max_len = 0 max_addr = '' t2 = time.time() batch = leveldb.WriteBatch() for addr, serialized_hist in self.batch_list.items(): batch.Put(addr, serialized_hist) l = len(serialized_hist)/80 if l > max_len: max_len = l max_addr = addr if not revert: # add new created outputs for txio, addr in self.batch_txio.items(): batch.Put(txio, addr) # delete spent inputs for txi in block_inputs: batch.Delete(txi) # add undo info self.write_undo_info(batch, block_height, undo_info) else: # restore spent inputs for txio, addr in self.batch_txio.items(): # print "restoring spent input", repr(txio) batch.Put(txio, addr) # delete spent outputs for txo in block_outputs: batch.Delete(txo) # add the max batch.Put('height', self.serialize([(block_hash, block_height, self.db_version)])) # actual write self.db.Write(batch, sync=sync) t3 = time.time() if t3 - t0 > 10 and not sync: print_log("block", block_height, "parse:%0.2f " % (t00 - t0), "read:%0.2f " % (t1 - t00), "proc:%.2f " % (t2-t1), "write:%.2f " % (t3-t2), "max:", max_len, max_addr) for addr in self.batch_list.keys(): self.invalidate_cache(addr)
def commit(self): batch = leveldb.WriteBatch() for k in self.uncommitted: batch.Put(k, self.uncommitted[k]) self.db.Write(batch, sync=True)
'-l', '--limit', type=int, help="Max number of recommendations to generate per-paper", default=10) args = parser.parse_args() db = leveldb.LevelDB( args.db_path, write_buffer_size=100 << 20, # 100MB block_cache_size=400 << 20) # 400MB b = Benchmark(args.benchmark_freq) tf = TreeFile(args.infile) if args.batch_size: writer = leveldb.WriteBatch() else: writer = db for recs in make_expert_rec(tf, args.limit): recd = [r.pid for r in recs] key = recs[0].target_pid + "|expert" writer.Put(key.encode(), msgpack.packb(recd)) b.increment() if args.batch_size and b.count % args.batch_size == 0: db.Write(writer) args.infile.seek(0) tf = TreeFile(args.infile) for recs in make_classic_recs(tf, args.limit): recd = [r.pid for r in recs]
def add_block(self, block: Block): """인증된 블럭만 추가합니다. :param block: 인증완료된 추가하고자 하는 블럭 :return: """ # util.logger.spam(f"blockchain:add_block --start--") if block.block_status is not BlockStatus.confirmed: raise BlockInValidError("미인증 블럭") elif self.__last_block is not None and self.__last_block.height > 0: if self.__last_block.block_hash != block.prev_block_hash: # 마지막 블럭의 hash값이 추가되는 블럭의 prev_hash값과 다르면 추가 하지 않고 익셉션을 냅니다. logging.debug("self.last_block.block_hash: " + self.__last_block.block_hash) logging.debug("block.prev_block_hash: " + block.prev_block_hash) raise BlockError("최종 블럭과 해쉬값이 다릅니다.") # util.logger.spam(f"blockchain:add_block --1-- {block.prev_block_hash}, {block.height}") if block.height == 0 or ObjectManager().peer_service is None: # all results to success success_result = {'code': int(message_code.Response.success)} invoke_results = self.__create_invoke_result_specific_case( block.confirmed_transaction_list, success_result) else: try: invoke_results = ObjectManager().peer_service.score_invoke( block, self.__channel_name) except Exception as e: # When Grpc Connection Raise Exception # save all result{'code': ScoreResponse.SCORE_CONTAINER_EXCEPTION, 'message': str(e)} logging.error(f'Error While Invoke Score fail add block : {e}') score_container_exception_result = { 'code': ScoreResponse.SCORE_CONTAINER_EXCEPTION, 'message': str(e) } invoke_results = self.__create_invoke_result_specific_case( block.confirmed_transaction_list, score_container_exception_result) # util.logger.spam(f"blockchain:add_block --2--") self.__add_tx_to_block_db(block, invoke_results) block_hash_encoded = block.block_hash.encode(encoding='UTF-8') batch = leveldb.WriteBatch() batch.Put(block_hash_encoded, block.serialize_block()) batch.Put(BlockChain.LAST_BLOCK_KEY, block_hash_encoded) batch.Put( BlockChain.BLOCK_HEIGHT_KEY + block.height.to_bytes( conf.BLOCK_HEIGHT_BYTES_LEN, byteorder='big'), block_hash_encoded) self.__confirmed_block_db.Write(batch) self.__last_block = block self.__block_height = self.__last_block.height # logging.debug("ADD BLOCK Height : %i", block.height) # logging.debug("ADD BLOCK Hash : %s", block.block_hash) # logging.debug("ADD BLOCK MERKLE TREE Hash : %s", block.merkle_tree_root_hash) # logging.debug("ADD BLOCK Prev Hash : %s ", block.prev_block_hash) logging.info("ADD BLOCK HEIGHT : %i , HASH : %s", block.height, block.block_hash) # 블럭의 Transaction 의 데이터를 저장 합니다. # Peer에서 Score를 파라미터로 넘김으로써 체인코드를 실행합니다. # util.logger.spam(f"blockchain:add_block --end--") util.apm_event( self.__peer_id, { 'event_type': 'AddBlock', 'peer_id': self.__peer_id, 'data': { 'block_height': self.__block_height, 'block_type': block.block_type.name } }) return True
def putoneblock(self, block): block.calc_sha256() if not block.is_valid(): self.logger.info("Invalid block %064x" % (block.sha256, )) return False if not self.have_prevblock(block): self.orphans[block.sha256] = True self.orphan_deps[block.hashPrevBlock] = block self.logger.info("Orphan block %064x (%d orphans)" % (block.sha256, len(self.orphan_deps))) return False top_height = self.getheight() top_work = long(self.db.Get('misc:total_work'), 16) # read metadata for previous block prevmeta = BlkMeta() if top_height >= 0: ser_prevhash = ser_uint256(block.hashPrevBlock) prevmeta.deserialize(self.db.Get('blkmeta:' + ser_prevhash)) else: ser_prevhash = '' batch = leveldb.WriteBatch() # build network "block" msg, as canonical disk storage form msg = msg_block() msg.block = block msg_data = message_to_str(self.netmagic, msg) # write "block" msg to storage fpos = self.blk_write.tell() self.blk_write.write(msg_data) self.blk_write.flush() # add index entry ser_hash = ser_uint256(block.sha256) batch.Put('blocks:' + ser_hash, str(fpos)) # store metadata related to this block blkmeta = BlkMeta() blkmeta.height = prevmeta.height + 1 blkmeta.work = (prevmeta.work + uint256_from_compact(block.nBits)) batch.Put('blkmeta:' + ser_hash, blkmeta.serialize()) # store list of blocks at this height heightidx = HeightIdx() heightstr = str(blkmeta.height) try: heightidx.deserialize(self.db.Get('height:' + heightstr)) except KeyError: pass heightidx.blocks.append(block.sha256) batch.Put('height:' + heightstr, heightidx.serialize()) self.db.Write(batch) # if chain is not best chain, proceed no further if (blkmeta.work <= top_work): self.logger.info("ChainDb: height %d (weak), block %064x" % (blkmeta.height, block.sha256)) return True # update global chain pointers if not self.set_best_chain(ser_prevhash, ser_hash, block, blkmeta): return False return True
def __init__(self, dbfile): self._nbop = 0 self._db = leveldb.LevelDB(dbfile) self._batch = leveldb.WriteBatch() return