コード例 #1
0
ファイル: test.py プロジェクト: mvmap/mvptest
    def testRestore(self):
        self.testSplit()
        path = self.htree.path
        l = len(self.htree)
        h = hash(self.htree)
        self.htree.close()

        import pytc
        db = pytc.HDB()
        db.open(path, pytc.HDBOREADER | pytc.HDBOWRITER)
        try:
            db.out("__pool__")
        except:
            pass
        db.close()

        t = HTree(path, 0)
        self.assertEqual(len(t), l)
        self.assertEqual(hash(t), h)
        t.close()

        import pytc
        db = pytc.HDB()
        db.open(path, pytc.HDBOREADER | pytc.HDBOWRITER)
        #assert db["__sync__"] == "1"
        try:
            db.out("__sync__")
        except:
            pass
        db.close()

        t = HTree(path, 0)
        self.assertEqual(len(t), l)
        self.assertEqual(hash(t), h)
        t.close()
コード例 #2
0
 def __init__(self, db_path, block_path):
     logging.debug("BlockStorage.__init__(%s, %s)", db_path, block_path)
     self.block_path = block_path
     # block reference, to check if block is used
     # holds mapping digest to number of references
     self.db = pytc.HDB(os.path.join(db_path, "blockref.hdb"),
                        pytc.HDBOWRITER | pytc.HDBOCREAT)
     # hold block data
     self.blockdb = pytc.HDB(os.path.join(db_path, "blockstorage.hdb"),
                             pytc.HDBOWRITER | pytc.HDBOCREAT)
コード例 #3
0
def main(argv):
    arg = argparser().parse_args(argv[1:])

    dbfn = arg.database

    db = pytc.HDB()
    db.open(dbfn, pytc.HDBOREADER)

    while True:
        if not arg.no_prompt:
            print ">>> ",
        l = sys.stdin.readline()
        if not l:
            break

        l = l.rstrip()

        try:
            print db.get(l)
        except KeyError:
            if l == '':
                print "(Use Ctrl-D to exit)"
            else:
                print "(no record found for '%s')" % l
    return 0
コード例 #4
0
 def __init__(self, db_path, block_path):
     self.logger = logging.getLogger(self.__class__.__name__)
     self.logger.setLevel(logging.ERROR)
     self.logger.debug("BlockStorage.__init__(%s, %s)", db_path, block_path)
     self.block_path = block_path
     # block reference, to check if block is used
     # holds mapping digest to number of references
     self.db = pytc.HDB(os.path.join(db_path, "blockstorage.hdb"),
                        pytc.HDBOWRITER | pytc.HDBOCREAT)
コード例 #5
0
def main(argv):
    arg = argparser().parse_args(argv[1:])

    kvfn = arg.file

    if arg.database is None:
        # default database file name
        bn = splitext(basename(kvfn))[0]
        dbfn = join(default_db_dir(), bn + '.' + DB_FILENAME_EXTENSION)
    else:
        dbfn = arg.database

    if arg.verbose:
        print >> sys.stderr, "Storing DB as %s" % dbfn
        print >> sys.stderr, "Importing",
    start_time = datetime.now()

    import_count, duplicate_count, error_count = 0, 0, 0

    with open(kvfn, 'rU') as kvf:
        db = pytc.HDB()
        db.open(dbfn, pytc.HDBOWRITER | pytc.HDBOREADER | pytc.HDBOCREAT)

        for i, l in enumerate(kvf):
            l = l.rstrip('\n')

            # parse line into key and value
            try:
                key, value = l.split('\t')
            except ValueError:
                if error_count < MAX_ERROR_LINES:
                    print >> sys.stderr, "Error: skipping line %d: expected tab-separated KEY:VALUE pair, got '%s'" % (
                        i + 1, l)
                elif error_count == MAX_ERROR_LINES:
                    print >> sys.stderr, "(Too many errors; suppressing further error messages)"
                error_count += 1
                continue

            # enter key and value into DB
            try:
                db.putkeep(key, value)
                import_count += 1
            except pytc.Error, e:
                if e[0] == pytc.TCEKEEP:
                    # existing key, count dup but ignore
                    duplicate_count += 1
                else:
                    # unexpected error, abort
                    raise

            if arg.verbose and (i + 1) % 10000 == 0:
                print >> sys.stderr, '.',
コード例 #6
0
def count(filename, dbname):
    """
    flattenで出力したテキストのから単語の頻度を数え上げる
    TokyoCabinetが必要です
    """
    try:
        db = pytc.HDB(dbname, pytc.HDBOWRITER | pytc.HDBOCREAT)
        f = open(filename, "r")
        for line in f:
            [db.addint(w, 1) for w in split(line.rstrip("\n")) if w != ""]

    except Exception, (errno, strerror):
        print strerror
コード例 #7
0
def view(dbname):
    """TokyoCabinetにためたデータから頻度を出力するだけの能力"""
    db = pytc.HDB(dbname, pytc.HDBOWRITER | pytc.HDBOCREAT)
    try:
        l = db.items()
        d = {}

        for item in l:
            d[item[0]] = db.addint(item[0], 0)

        for k, v in sorted(d.items(), key=lambda x: x[1]):
            print k, v
    except:
        pass
    finally:
        db.close()
コード例 #8
0
from __future__ import division
import sys, time
sys.path.append('pytc-0.7/build/lib.linux-x86_64-2.5')
import pytc

db = pytc.HDB()
db.open("db", pytc.HDBOWRITER | pytc.HDBOCREAT)

numRecords = 100000
t1 = time.time()
for i in range(numRecords):
    s = "%08d" % i
    db.put(s, s)

db.close()
t2 = time.time()
print "%s puts at %s tps" % (numRecords, numRecords / (t2 - t1))
コード例 #9
0
 def __init__(self, dbname):
     self.db = pytc.HDB(dbname, pytc.HDBOWRITER | pytc.HDBOCREAT)
コード例 #10
0
    def testAll(self):
        # new
        db = pytc.HDB()
        # tune
        db.tune(100, 32, 64, pytc.HDBTTCBS)
        # open
        db.open(DBNAME2, pytc.HDBOWRITER | pytc.HDBOCREAT)
        # copy
        db.copy(DBNAME)
        # close
        db.close()
        os.remove(DBNAME2)

        # open
        db = pytc.HDB(DBNAME, pytc.HDBOWRITER)

        # put
        db.put('hamu', 'ju')
        db.put('moru', 'pui')
        db.put('kiki', 'nya-')

        # get
        self.assertEqual(db.get('hamu'), 'ju')
        # vsiz
        self.assertEqual(db.vsiz('hamu'), len('ju'))

        # putkeep
        self.assertRaises(pytc.Error, db.putkeep, 'moru', 'puipui')
        db.putkeep('moruta', 'puipui')
        self.assertEqual(db.get('moruta'), 'puipui')

        # putcat
        db.putcat('kiki', 'nya-nya-')
        self.assertEqual(db.get('kiki'), 'nya-nya-nya-')

        # putasync
        db.putasync('gunya', 'darari')
        # sync
        db.sync
        self.assertEqual(db.get('gunya'), 'darari')

        # out
        db.out('gunya')
        self.assertRaises(KeyError, db.get, 'gunya')
        # optimize
        db.optimize(100, 32, 64, pytc.HDBTTCBS)

        # path
        self.assertEqual(db.path(), DBNAME)
        # rnum
        self.assertEqual(db.rnum(), 4)
        # fsiz
        self.assertNotEqual(db.fsiz(), 0)

        # iterinit
        db.iterinit()
        # iternext
        self.assertEqual(db.iternext(), 'hamu')
        db.iterinit()
        self.assertEqual(db.iternext(), 'hamu')

        # dict like interfaces
        result = []
        for key in db:
            result.append(key)
        self.assertEqual(sorted(result), ['hamu', 'kiki', 'moru', 'moruta'])

        self.assertEqual(sorted(db.keys()), ['hamu', 'kiki', 'moru', 'moruta'])
        self.assertEqual(sorted(db.values()),
                         ['ju', 'nya-nya-nya-', 'pui', 'puipui'])
        self.assertEqual(sorted(db.items()), [('hamu', 'ju'),
                                              ('kiki', 'nya-nya-nya-'),
                                              ('moru', 'pui'),
                                              ('moruta', 'puipui')])

        result = []
        for key in db.iterkeys():
            result.append(key)
        self.assertEqual(sorted(result), ['hamu', 'kiki', 'moru', 'moruta'])

        result = []
        for value in db.itervalues():
            result.append(value)
        self.assertEqual(sorted(result),
                         ['ju', 'nya-nya-nya-', 'pui', 'puipui'])

        result = []
        for (key, value) in db.iteritems():
            result.append((key, value))
        self.assertEqual(sorted(result), [('hamu', 'ju'),
                                          ('kiki', 'nya-nya-nya-'),
                                          ('moru', 'pui'),
                                          ('moruta', 'puipui')])

        # this bug is reported by id:a2c
        self.assertRaises(TypeError, eval, 'db[:]', globals(), locals())

        db['gunya'] = 'tekito'
        self.assertEqual(db['gunya'], 'tekito')
        del db['gunya']
        self.assertRaises(KeyError, db.get, 'gunya')

        self.assert_('hamu' in db)
        self.assert_('python' not in db)

        # vanish
        db.vanish()
        self.assertEqual(db.rnum(), 0)

        # addint
        db['int'] = struct.pack('i', 0)
        db.addint('int', 1)
        self.assertEqual(struct.unpack('i', db['int'])[0], 1)

        # adddouble
        db['double'] = struct.pack('d', 0.0)
        db.adddouble('double', 1.0)
        self.assertEqual(struct.unpack('d', db['double'])[0], 1.0)

        # Error handling with no record. Thanks to Hatem Nassrat.
        try:
            db['absence']
        except Exception, e:
            self.assertEqual(type(e), KeyError)
コード例 #11
0
import pytc
db = pytc.HDB('casket.hdb', pytc.HDBOWRITER | pytc.HDBOCREAT)
db.put('potato', 'potatis')
db.put('carrot', 'morot')
db.put('banana', 'banan')
assert db.get('carrot') == 'morot'
db.put("digest", "1")
print db.get("digest")
db.addint("digest", 2)
print db.get("digest")
#assert db.get('digest') == 1
コード例 #12
0
ファイル: norm_db_init.py プロジェクト: edycop/brat
def main(argv):
    arg = argparser().parse_args(argv[1:])

    kvfn = arg.file

    if arg.database is None:
        # default database file name
        bn = splitext(basename(kvfn))[0]
        fwdbfn = join(default_db_dir(),
                      bn + FW_DB_AFFIX + '.' + DB_FILENAME_EXTENSION)
        bwdbfn = join(default_db_dir(),
                      bn + BW_DB_AFFIX + '.' + DB_FILENAME_EXTENSION)
    else:
        fwdbfn = arg.database + FW_DB_AFFIX
        bwdbfn = arg.database + BW_DB_AFFIX

    if arg.verbose:
        print >> sys.stderr, "Storing DBs as %s and %s" % (fwdbfn, bwdbfn)
        print >> sys.stderr, "Importing",
    start_time = datetime.now()

    import_count, duplicate_count, error_count = 0, 0, 0

    with open(kvfn, 'rU') as kvf:
        fwdb = pytc.HDB()
        bwdb = pytc.HDB()
        fwdb.open(fwdbfn, pytc.HDBOWRITER | pytc.HDBOREADER | pytc.HDBOCREAT)
        bwdb.open(bwdbfn, pytc.HDBOWRITER | pytc.HDBOREADER | pytc.HDBOCREAT)

        # store special values in the "forward" DB identifying version
        # and settings. The keys for these start with the separator,
        # which should guarantee they will never clash with any other
        # entry.
        fwdb.put(DB_KEY_SEPARATOR + NORM_DB_STRING, NORM_DB_VERSION)
        fwdb.put(DB_KEY_SEPARATOR + NORM_DB_LOWERCASE, str(arg.lowercase))

        for i, l in enumerate(kvf):
            l = l.rstrip('\n')

            # parse line into ID and LABEL:STRING pairs
            try:
                _id, rest = l.split('\t', 1)
            except ValueError:
                if error_count < MAX_ERROR_LINES:
                    print >> sys.stderr, "Error: skipping line %d: expected tab-separated fields, got '%s'" % (
                        i + 1, l)
                elif error_count == MAX_ERROR_LINES:
                    print >> sys.stderr, "(Too many errors; suppressing further error messages)"
                error_count += 1
                continue

            # parse LABEL:STRING pairs
            try:
                pairs = []
                for i, pair in enumerate(rest.split('\t')):
                    # exception: first field to be given without label
                    # and use default (backward compatibility)
                    if ':' not in pair and i == 0:
                        label, string = 'Term', pair
                        # patch "rest" too (sorry)
                        rest = 'Term:' + rest
                    else:
                        label, string = pair.split(':', 1)
                    pairs.append((label, string))
            except ValueError:
                if error_count < MAX_ERROR_LINES:
                    print >> sys.stderr, "Error: skipping line %d: expected tab-separated LABEL:STRING pairs, got '%s'" % (
                        i + 1, l)
                elif error_count == MAX_ERROR_LINES:
                    print >> sys.stderr, "(Too many errors; suppressing further error messages)"
                error_count += 1
                continue

            if arg.lowercase:
                pairs = [(l, s.lower()) for l, s in pairs]

            # enter ID->rest mapping into DB
            try:
                fwdb.putkeep(_id, rest)
                import_count += 1
            except pytc.Error, e:
                if e[0] == pytc.TCEKEEP:
                    # existing key, count dup but ignore
                    duplicate_count += 1
                else:
                    # unexpected error, abort
                    raise

            # enter mapping from indexed string(s) to ID into DB

            # split into "indexed" pairs available for lookup and
            # "meta-information" pairs used only to differentiate
            # between entries
            indexed, meta = pairs[:1], pairs[1:]

            # TODO: protect against indexing the same thing
            # multiple times
            for label, value in indexed:
                bwdb.putcat(value, _id + DB_KEY_SEPARATOR)

            if arg.verbose and (i + 1) % 10000 == 0:
                print >> sys.stderr, '.',