def __init__(self, dp_path=False): """ Create Analex Cache """ DB_PATH = os.path.join(os.path.expanduser('~'), '.qalsadiCache') self.cache = { 'checkedWords': {}, 'FreqWords': { 'noun': {}, 'verb': {}, 'stopword': {} }, } if not dp_path: dp_path = self.DB_PATH else: dp_path = os.path.join(os.path.dirname(dp_path), '.qalsadiCache') #~ sys.stderr.write("Tahahahah\n"+" "+ dp_path +" "+str(type(dp_path))) #~ self.db = Database("/tmp/QC") try: #~ if True: self.db = Database(str(dp_path)) if not self.db.exists(): self.db.create() x_ind = WithAIndex(self.db.path, "a") self.db.add_index(x_ind) else: self.db.open() except: #~ else: print("Can't Open data base") self.db = None
def main(): db = Database('/tmp/tut5_2') db.create() x_ind = WithXIndex(db.path, 'x') db.add_index(x_ind) for x in xrange(100): db.insert(dict(x=x, t=random.random())) print db.run('x', 'avg', start=10, end=30)
def test_to_many_shards(self, tmpdir): db = Database(str(tmpdir) + '/db') db.create(with_id_index=False) # it's ok to use sharded directly there with pytest.raises(IndexPreconditionsException): db.add_index(ShardedUniqueHashIndex(db.path, 'id', sh_nums=300)) with pytest.raises(IndexPreconditionsException): db.add_index(ShardedUniqueHashIndex(db.path, 'id', sh_nums=256))
def main(): db = Database('/tmp/tut5_1') db.create() x_ind = WithXIndex(db.path, 'x') db.add_index(x_ind) for x in xrange(100): db.insert(dict(x=x, t=random.random())) l = [] for curr in db.get_many('x', start=10, end=30, limit=-1, with_doc=True): l.append(curr['doc']['t']) print sum(l) / len(l)
def test_insert_get(self, tmpdir, sh_nums): db = Database(str(tmpdir) + '/db') db.create(with_id_index=False) n = globals()['ShardedUniqueHashIndex%d' % sh_nums] db.add_index(n(db.path, 'id')) l = [] for x in range(10000): l.append(db.insert(dict(x=x))['_id']) for curr in l: assert db.get('id', curr)['_id'] == curr
def main(): db = Database('/tmp/tut1') db.create() for x in xrange(100): print db.insert(dict(x=x)) for curr in db.all('id'): print curr
class Cache(object): """ cache for word morphological analysis """ def __init__(self, dp_path=False): """ Create Analex Cache """ DB_PATH = os.path.join(os.path.expanduser('~'), '.qalsadiCache') self.cache = { 'checkedWords': {}, 'FreqWords': { 'noun': {}, 'verb': {}, 'stopword': {} }, } if not dp_path: dp_path = self.DB_PATH else: dp_path = os.path.join(os.path.dirname(dp_path), '.qalsadiCache') #~ sys.stderr.write("Tahahahah\n"+" "+ dp_path +" "+str(type(dp_path))) #~ self.db = Database("/tmp/QC") try: #~ if True: self.db = Database(str(dp_path)) if not self.db.exists(): self.db.create() x_ind = WithAIndex(self.db.path, "a") self.db.add_index(x_ind) else: self.db.open() except: #~ else: print("Can't Open data base") self.db = None def __del__(self): """ Delete instance and clear cache """ self.cache = None if self.db: self.db.close() def is_already_checked(self, word): """ return if ``word`` is already cached""" try: return bool(self.db.get('a', word)) except: return False #~ except: return False; def get_checked(self, word): """ return checked ``word`` form cache""" #~ word = bytes(word, "utf-8") if self.db: xxx = self.db.get('a', word, with_doc=True) yyy = xxx.get('doc', False) if yyy: return yyy.get('d', []) else: return [] else: return [] def add_checked(self, word, data): """ add checked ``word`` form cache""" #~ word = bytes(word, "utf-8") idata = {"a": word, 'd': data} if self.db: self.db.insert(idata) def exists_cache_freq(self, word, wordtype): """ return if word exists in freq cache""" return word in self.cache['FreqWords'] def get_freq(self, originalword, wordtype): """ return ``word`` frequency form cache""" return self.cache['FreqWords'][wordtype].get(originalword, 0) def add_freq(self, original, wordtype, freq): """ add ``original`` frequency ``freq`` to cache""" self.cache['FreqWords'][wordtype][original] = freq
def main(): db = Database('/tmp/tut2') db.create() x_ind = WithXIndex(db.path, 'x') db.add_index(x_ind) for x in xrange(100): db.insert(dict(x=x)) for y in xrange(100): db.insert(dict(y=y)) print db.get('x', 10, with_doc=True)
def test_num_shards(self, tmpdir, sh_nums): db = Database(str(tmpdir) + '/db') db.create(with_id_index=False) n = globals()['ShardedUniqueHashIndex%d' % sh_nums] db.add_index(n(db.path, 'id')) assert db.id_ind.sh_nums == sh_nums
def db(request): db = Database(os.path.join(str(request.getfixturevalue('tmpdir')), 'db')) db.create() return db
def test_compact_shards(self, tmpdir): db = Database(str(tmpdir) + '/db') db.create(with_id_index=False) db.add_index(ShardedUniqueHashIndex5(db.path, 'id')) for x in range(100): db.insert({'x': x}) db.compact() assert db.count(db.all, 'id') == 100
def test_create(self, tmpdir): db = Database(str(tmpdir) + '/db') db.create(with_id_index=False) db.add_index(ShardedUniqueHashIndex(db.path, 'id', sh_nums=3))
def migrate(source, destination): """ Very basic for now """ dbs = Database(source) dbt = Database(destination) dbs.open() dbt.create() dbt.close() for curr in os.listdir(os.path.join(dbs.path, '_indexes')): if curr != '00id.py': shutil.copyfile(os.path.join(dbs.path, '_indexes', curr), os.path.join(dbt.path, '_indexes', curr)) dbt.open() for c in dbs.all('id'): del c['_rev'] dbt.insert(c) return True
class MySharded(ShardedHashIndex): custom_header = 'from codernitydb3.sharded_hash import ShardedHashIndex' def __init__(self, *args, **kwargs): kwargs['sh_nums'] = 10 kwargs['key_format'] = 'I' kwargs['use_make_keys'] = True super(MySharded, self).__init__(*args, **kwargs) def make_key_value(self, data): return data['x'], None def calculate_shard(self, key): return key % self.sh_nums y = 1500 * 'y' db = Database('/tmp/shard') db.create(with_id_index=False) db.add_index(CustomIdSharded(db.path, 'id')) db.add_index(MySharded(db.path, 'x')) # it makes non sense to use sharding with such small number of records for x in xrange(10**4): db.insert({'x': x, 'y': y}) print db.get('x', random.randint(0, 10**4))['_id']
def main(): db = Database('/tmp/tut_update') db.create() x_ind = WithXIndex(db.path, 'x') db.add_index(x_ind) # full examples so we had to add first the data # the same code as in previous step for x in xrange(100): db.insert(dict(x=x)) for y in xrange(100): db.insert(dict(y=y)) # end of insert part print db.count(db.all, 'x') for curr in db.all('x', with_doc=True): doc = curr['doc'] if curr['key'] % 7 == 0: db.delete(doc) elif curr['key'] % 5 == 0: doc['updated'] = True db.update(doc) print db.count(db.all, 'x') for curr in db.all('x', with_doc=True): print curr
def main(): db = Database('/tmp/demo_secure') key = 'abcdefgh' id_ind = EncUniqueHashIndex(db.path, 'id') db.set_indexes([id_ind]) db.create() db.id_ind.enc_key = key print db.id_ind.storage for x in xrange(100): db.insert(dict(x=x, data='testing')) db.close() dbr = Database('/tmp/demo_secure') dbr.open() dbr.id_ind.enc_key = key for curr in dbr.all('id', limit=5): print curr
def main(): db = Database('/tmp/tut4') db.create() x_ind = WithXIndex(db.path, 'x') db.add_index(x_ind) for x in xrange(100): db.insert(dict(x=x)) for y in xrange(100): db.insert(dict(y=y)) print db.get('x', 10, with_doc=True) for curr in db.get_many('x', start=15, end=25, limit=-1, with_doc=True): print curr