def testRestore(self): self.testSplit() path = self.htree.path l = len(self.htree) h = hash(self.htree) self.htree.close() import pytc db = pytc.HDB() db.open(path, pytc.HDBOREADER | pytc.HDBOWRITER) try: db.out("__pool__") except: pass db.close() t = HTree(path, 0) self.assertEqual(len(t), l) self.assertEqual(hash(t), h) t.close() import pytc db = pytc.HDB() db.open(path, pytc.HDBOREADER | pytc.HDBOWRITER) #assert db["__sync__"] == "1" try: db.out("__sync__") except: pass db.close() t = HTree(path, 0) self.assertEqual(len(t), l) self.assertEqual(hash(t), h) t.close()
def __init__(self, db_path, block_path): logging.debug("BlockStorage.__init__(%s, %s)", db_path, block_path) self.block_path = block_path # block reference, to check if block is used # holds mapping digest to number of references self.db = pytc.HDB(os.path.join(db_path, "blockref.hdb"), pytc.HDBOWRITER | pytc.HDBOCREAT) # hold block data self.blockdb = pytc.HDB(os.path.join(db_path, "blockstorage.hdb"), pytc.HDBOWRITER | pytc.HDBOCREAT)
def main(argv): arg = argparser().parse_args(argv[1:]) dbfn = arg.database db = pytc.HDB() db.open(dbfn, pytc.HDBOREADER) while True: if not arg.no_prompt: print ">>> ", l = sys.stdin.readline() if not l: break l = l.rstrip() try: print db.get(l) except KeyError: if l == '': print "(Use Ctrl-D to exit)" else: print "(no record found for '%s')" % l return 0
def __init__(self, db_path, block_path): self.logger = logging.getLogger(self.__class__.__name__) self.logger.setLevel(logging.ERROR) self.logger.debug("BlockStorage.__init__(%s, %s)", db_path, block_path) self.block_path = block_path # block reference, to check if block is used # holds mapping digest to number of references self.db = pytc.HDB(os.path.join(db_path, "blockstorage.hdb"), pytc.HDBOWRITER | pytc.HDBOCREAT)
def main(argv): arg = argparser().parse_args(argv[1:]) kvfn = arg.file if arg.database is None: # default database file name bn = splitext(basename(kvfn))[0] dbfn = join(default_db_dir(), bn + '.' + DB_FILENAME_EXTENSION) else: dbfn = arg.database if arg.verbose: print >> sys.stderr, "Storing DB as %s" % dbfn print >> sys.stderr, "Importing", start_time = datetime.now() import_count, duplicate_count, error_count = 0, 0, 0 with open(kvfn, 'rU') as kvf: db = pytc.HDB() db.open(dbfn, pytc.HDBOWRITER | pytc.HDBOREADER | pytc.HDBOCREAT) for i, l in enumerate(kvf): l = l.rstrip('\n') # parse line into key and value try: key, value = l.split('\t') except ValueError: if error_count < MAX_ERROR_LINES: print >> sys.stderr, "Error: skipping line %d: expected tab-separated KEY:VALUE pair, got '%s'" % ( i + 1, l) elif error_count == MAX_ERROR_LINES: print >> sys.stderr, "(Too many errors; suppressing further error messages)" error_count += 1 continue # enter key and value into DB try: db.putkeep(key, value) import_count += 1 except pytc.Error, e: if e[0] == pytc.TCEKEEP: # existing key, count dup but ignore duplicate_count += 1 else: # unexpected error, abort raise if arg.verbose and (i + 1) % 10000 == 0: print >> sys.stderr, '.',
def count(filename, dbname): """ flattenで出力したテキストのから単語の頻度を数え上げる TokyoCabinetが必要です """ try: db = pytc.HDB(dbname, pytc.HDBOWRITER | pytc.HDBOCREAT) f = open(filename, "r") for line in f: [db.addint(w, 1) for w in split(line.rstrip("\n")) if w != ""] except Exception, (errno, strerror): print strerror
def view(dbname): """TokyoCabinetにためたデータから頻度を出力するだけの能力""" db = pytc.HDB(dbname, pytc.HDBOWRITER | pytc.HDBOCREAT) try: l = db.items() d = {} for item in l: d[item[0]] = db.addint(item[0], 0) for k, v in sorted(d.items(), key=lambda x: x[1]): print k, v except: pass finally: db.close()
from __future__ import division import sys, time sys.path.append('pytc-0.7/build/lib.linux-x86_64-2.5') import pytc db = pytc.HDB() db.open("db", pytc.HDBOWRITER | pytc.HDBOCREAT) numRecords = 100000 t1 = time.time() for i in range(numRecords): s = "%08d" % i db.put(s, s) db.close() t2 = time.time() print "%s puts at %s tps" % (numRecords, numRecords / (t2 - t1))
def __init__(self, dbname): self.db = pytc.HDB(dbname, pytc.HDBOWRITER | pytc.HDBOCREAT)
def testAll(self): # new db = pytc.HDB() # tune db.tune(100, 32, 64, pytc.HDBTTCBS) # open db.open(DBNAME2, pytc.HDBOWRITER | pytc.HDBOCREAT) # copy db.copy(DBNAME) # close db.close() os.remove(DBNAME2) # open db = pytc.HDB(DBNAME, pytc.HDBOWRITER) # put db.put('hamu', 'ju') db.put('moru', 'pui') db.put('kiki', 'nya-') # get self.assertEqual(db.get('hamu'), 'ju') # vsiz self.assertEqual(db.vsiz('hamu'), len('ju')) # putkeep self.assertRaises(pytc.Error, db.putkeep, 'moru', 'puipui') db.putkeep('moruta', 'puipui') self.assertEqual(db.get('moruta'), 'puipui') # putcat db.putcat('kiki', 'nya-nya-') self.assertEqual(db.get('kiki'), 'nya-nya-nya-') # putasync db.putasync('gunya', 'darari') # sync db.sync self.assertEqual(db.get('gunya'), 'darari') # out db.out('gunya') self.assertRaises(KeyError, db.get, 'gunya') # optimize db.optimize(100, 32, 64, pytc.HDBTTCBS) # path self.assertEqual(db.path(), DBNAME) # rnum self.assertEqual(db.rnum(), 4) # fsiz self.assertNotEqual(db.fsiz(), 0) # iterinit db.iterinit() # iternext self.assertEqual(db.iternext(), 'hamu') db.iterinit() self.assertEqual(db.iternext(), 'hamu') # dict like interfaces result = [] for key in db: result.append(key) self.assertEqual(sorted(result), ['hamu', 'kiki', 'moru', 'moruta']) self.assertEqual(sorted(db.keys()), ['hamu', 'kiki', 'moru', 'moruta']) self.assertEqual(sorted(db.values()), ['ju', 'nya-nya-nya-', 'pui', 'puipui']) self.assertEqual(sorted(db.items()), [('hamu', 'ju'), ('kiki', 'nya-nya-nya-'), ('moru', 'pui'), ('moruta', 'puipui')]) result = [] for key in db.iterkeys(): result.append(key) self.assertEqual(sorted(result), ['hamu', 'kiki', 'moru', 'moruta']) result = [] for value in db.itervalues(): result.append(value) self.assertEqual(sorted(result), ['ju', 'nya-nya-nya-', 'pui', 'puipui']) result = [] for (key, value) in db.iteritems(): result.append((key, value)) self.assertEqual(sorted(result), [('hamu', 'ju'), ('kiki', 'nya-nya-nya-'), ('moru', 'pui'), ('moruta', 'puipui')]) # this bug is reported by id:a2c self.assertRaises(TypeError, eval, 'db[:]', globals(), locals()) db['gunya'] = 'tekito' self.assertEqual(db['gunya'], 'tekito') del db['gunya'] self.assertRaises(KeyError, db.get, 'gunya') self.assert_('hamu' in db) self.assert_('python' not in db) # vanish db.vanish() self.assertEqual(db.rnum(), 0) # addint db['int'] = struct.pack('i', 0) db.addint('int', 1) self.assertEqual(struct.unpack('i', db['int'])[0], 1) # adddouble db['double'] = struct.pack('d', 0.0) db.adddouble('double', 1.0) self.assertEqual(struct.unpack('d', db['double'])[0], 1.0) # Error handling with no record. Thanks to Hatem Nassrat. try: db['absence'] except Exception, e: self.assertEqual(type(e), KeyError)
import pytc db = pytc.HDB('casket.hdb', pytc.HDBOWRITER | pytc.HDBOCREAT) db.put('potato', 'potatis') db.put('carrot', 'morot') db.put('banana', 'banan') assert db.get('carrot') == 'morot' db.put("digest", "1") print db.get("digest") db.addint("digest", 2) print db.get("digest") #assert db.get('digest') == 1
def main(argv): arg = argparser().parse_args(argv[1:]) kvfn = arg.file if arg.database is None: # default database file name bn = splitext(basename(kvfn))[0] fwdbfn = join(default_db_dir(), bn + FW_DB_AFFIX + '.' + DB_FILENAME_EXTENSION) bwdbfn = join(default_db_dir(), bn + BW_DB_AFFIX + '.' + DB_FILENAME_EXTENSION) else: fwdbfn = arg.database + FW_DB_AFFIX bwdbfn = arg.database + BW_DB_AFFIX if arg.verbose: print >> sys.stderr, "Storing DBs as %s and %s" % (fwdbfn, bwdbfn) print >> sys.stderr, "Importing", start_time = datetime.now() import_count, duplicate_count, error_count = 0, 0, 0 with open(kvfn, 'rU') as kvf: fwdb = pytc.HDB() bwdb = pytc.HDB() fwdb.open(fwdbfn, pytc.HDBOWRITER | pytc.HDBOREADER | pytc.HDBOCREAT) bwdb.open(bwdbfn, pytc.HDBOWRITER | pytc.HDBOREADER | pytc.HDBOCREAT) # store special values in the "forward" DB identifying version # and settings. The keys for these start with the separator, # which should guarantee they will never clash with any other # entry. fwdb.put(DB_KEY_SEPARATOR + NORM_DB_STRING, NORM_DB_VERSION) fwdb.put(DB_KEY_SEPARATOR + NORM_DB_LOWERCASE, str(arg.lowercase)) for i, l in enumerate(kvf): l = l.rstrip('\n') # parse line into ID and LABEL:STRING pairs try: _id, rest = l.split('\t', 1) except ValueError: if error_count < MAX_ERROR_LINES: print >> sys.stderr, "Error: skipping line %d: expected tab-separated fields, got '%s'" % ( i + 1, l) elif error_count == MAX_ERROR_LINES: print >> sys.stderr, "(Too many errors; suppressing further error messages)" error_count += 1 continue # parse LABEL:STRING pairs try: pairs = [] for i, pair in enumerate(rest.split('\t')): # exception: first field to be given without label # and use default (backward compatibility) if ':' not in pair and i == 0: label, string = 'Term', pair # patch "rest" too (sorry) rest = 'Term:' + rest else: label, string = pair.split(':', 1) pairs.append((label, string)) except ValueError: if error_count < MAX_ERROR_LINES: print >> sys.stderr, "Error: skipping line %d: expected tab-separated LABEL:STRING pairs, got '%s'" % ( i + 1, l) elif error_count == MAX_ERROR_LINES: print >> sys.stderr, "(Too many errors; suppressing further error messages)" error_count += 1 continue if arg.lowercase: pairs = [(l, s.lower()) for l, s in pairs] # enter ID->rest mapping into DB try: fwdb.putkeep(_id, rest) import_count += 1 except pytc.Error, e: if e[0] == pytc.TCEKEEP: # existing key, count dup but ignore duplicate_count += 1 else: # unexpected error, abort raise # enter mapping from indexed string(s) to ID into DB # split into "indexed" pairs available for lookup and # "meta-information" pairs used only to differentiate # between entries indexed, meta = pairs[:1], pairs[1:] # TODO: protect against indexing the same thing # multiple times for label, value in indexed: bwdb.putcat(value, _id + DB_KEY_SEPARATOR) if arg.verbose and (i + 1) % 10000 == 0: print >> sys.stderr, '.',