class CodernityStore(DataStore): def __init__(self, redis_server_ip): self.db = Database('/tmp/db_a') self.db.create() x_ind = WithXIndex(self.db.path, 'x') self.db.add_index(x_ind) def put(self, key, value): self.db.insert(dict(x=key, chunk=value)) def get(self, key): return self.db.get('x', key, with_doc=True)['doc']['chunk'] def exists(self, key): return self.r.exists(key) def persist(self): pass def close(self): pass def used_memory(self): return 0 def dump(self): return "dbsize: %d \n info: %r" % (0, 0) def reset(self): pass
def main(): db = Database('/tmp/tut_update') db.create() x_ind = WithXIndex(db.path, 'x') db.add_index(x_ind) # full examples so we had to add first the data # the same code as in previous step for x in xrange(100): db.insert(dict(x=x)) for y in xrange(100): db.insert(dict(y=y)) # end of insert part print db.count(db.all, 'x') for curr in db.all('x', with_doc=True): doc = curr['doc'] if curr['key'] % 7 == 0: db.delete(doc) elif curr['key'] % 5 == 0: doc['updated'] = True db.update(doc) print db.count(db.all, 'x') for curr in db.all('x', with_doc=True): print curr
class BenchCodernityDB(BenchBase): ID_FIELD = "_id" def __init__(self, *args, **kwargs): super(BenchCodernityDB, self).__init__(*args, **kwargs) def create_database(self): self.db = Database(self.db_name) self.db.create() self.db.add_index(WithSmallNumberIndex(self.db.path, "small_number")) def delete_database(self): self.db.close() shutil.rmtree(self.db_name) def create(self, record): self.db.insert(record) def get(self, key): return self.db.get("id", key, with_doc=True) def query(self, **kwargs): key, val = kwargs.items()[0] return list(self.db.get_many(key, val, limit=-1, with_doc=True))
def test_to_many_shards(self, tmpdir): db = Database(str(tmpdir) + '/db') db.create(with_id_index=False) # it's ok to use sharded directly there with pytest.raises(IndexPreconditionsException): db.add_index(ShardedUniqueHashIndex(db.path, 'id', sh_nums=300)) with pytest.raises(IndexPreconditionsException): db.add_index(ShardedUniqueHashIndex(db.path, 'id', sh_nums=256))
class cache: """ cache for word morphological analysis """ def __init__(self, ): """ Create Analex Cache """ self.cache = { 'checkedWords': {}, 'FreqWords': { 'noun': {}, 'verb': {}, 'stopword': {} }, } self.db = Database('/tmp/qalsadiCache') if not self.db.exists(): self.db.create() x_ind = WithAIndex(self.db.path, 'a') self.db.add_index(x_ind) else: self.db.open() def __del__(self): """ Delete instance and clear cache """ self.cache = None self.db.close() def isAlreadyChecked(self, word): try: return bool(self.db.get('a', word)) except: return False #~ except: return False; def getChecked(self, word): x = self.db.get('a', word, with_doc=True) y = x.get('doc', False) if y: return y.get('d', []) else: return [] def addChecked(self, word, data): idata = {"a": word, 'd': data} self.db.insert(idata) def existsCacheFreq(self, word, wordtype): return word in self.cache['FreqWords'] def getFreq(self, originalword, wordtype): return self.cache['FreqWords'][wordtype].get(originalword, 0) def addFreq(self, original, wordtype, freq): self.cache['FreqWords'][wordtype][original] = freq
def __init__(self): db = Database('db') if db.exists(): db.open() else: db.create() index = UrlIndex(db.path, 'urlidx') db.add_index(index) self._db = db
def test_compact_shards(self, tmpdir): db = Database(str(tmpdir) + '/db') db.create(with_id_index=False) db.add_index(ShardedUniqueHashIndex5(db.path, 'id')) for x in xrange(100): db.insert({'x': x}) db.compact() assert db.count(db.all, 'id') == 100
def main(): db = Database('/tmp/tut5_2') db.create() x_ind = WithXIndex(db.path, 'x') db.add_index(x_ind) for x in xrange(100): db.insert(dict(x=x, t=random.random())) print db.run('x', 'avg', start=10, end=30)
def main(): db = Database("/tmp/tut5_2") db.create() x_ind = WithXIndex(db.path, "x") db.add_index(x_ind) for x in xrange(100): db.insert(dict(x=x, t=random.random())) print db.run("x", "avg", start=10, end=30)
def test_insert_get(self, tmpdir, sh_nums): db = Database(str(tmpdir) + '/db') db.create(with_id_index=False) n = globals()['ShardedUniqueHashIndex%d' % sh_nums] db.add_index(n(db.path, 'id')) l = [] for x in xrange(10000): l.append(db.insert(dict(x=x))['_id']) for curr in l: assert db.get('id', curr)['_id'] == curr
class cache : """ cache for word morphological analysis """ def __init__(self,): """ Create Analex Cache """ self.cache={'checkedWords':{}, 'FreqWords':{'noun':{}, 'verb':{},'stopword':{}}, }; self.db = Database('/tmp/qalsadiCache') if not self.db.exists(): self.db.create(); x_ind = WithAIndex(self.db.path, 'a') self.db.add_index(x_ind) else: self.db.open(); def __del__(self): """ Delete instance and clear cache """ self.cache=None; self.db.close(); def isAlreadyChecked(self, word): try: return bool(self.db.get('a', word)) except: return False #~ except: return False; def getChecked(self, word): x = self.db.get('a', word, with_doc=True) y= x.get('doc',False); if y: return y.get('d',[]) else: return [] def addChecked(self, word, data): idata = {"a":word,'d':data} self.db.insert(idata) def existsCacheFreq(self, word, wordtype): return word in self.cache['FreqWords']; def getFreq(self, originalword, wordtype): return self.cache['FreqWords'][wordtype].get(originalword,0); def addFreq(self, original, wordtype, freq): self.cache['FreqWords'][wordtype][original]=freq;
def main(): db = Database('/tmp/tut5_1') db.create() x_ind = WithXIndex(db.path, 'x') db.add_index(x_ind) for x in xrange(100): db.insert(dict(x=x, t=random.random())) l = [] for curr in db.get_many('x', start=10, end=30, limit=-1, with_doc=True): l.append(curr['doc']['t']) print sum(l) / len(l)
def main(): db = Database('/tmp/tut2') db.create() x_ind = WithXIndex(db.path, 'x') db.add_index(x_ind) for x in xrange(100): db.insert(dict(x=x)) for y in xrange(100): db.insert(dict(y=y)) print db.get('x', 10, with_doc=True)
def read_samples(db_filename, test_name): db = Database(db_filename) db.open() test_name_ind = WithTestNameIndex(db.path, 'test_name') try: db.edit_index(test_name_ind) except (IndexConflict, PreconditionsException): db.add_index(test_name_ind) for data in db.get_many('test_name', test_name, limit=-1): yield data
def create_test_db(db_name='codernity_test_db_0', with_x_hash_index=False, with_x_tree_index=False): t = time.time() db_path = '/tmp/%s' % db_name os.system('rm -rf %s' % db_path) db = Database(db_path) db.create() if with_x_hash_index: x_ind = WithXHashIndex(db.path, 'x') db.add_index(x_ind) if with_x_tree_index: x_ind = WithXTreeIndex(db.path, 'x') db.add_index(x_ind) print('\ncreate_test_db finished in %f sec' % (time.time() - t)) return db
def main(): db = Database('db/tut2') if db.exists(): db.open() else: db.create() x_ind = WithXIndex(db.path, 'y') db.add_index(x_ind) for x in xrange(100): db.insert(dict(x=x)) for y in xrange(100): db.insert(dict(y=y)) print db.get('x', 10, with_doc=True)
def main(): # Crate database db = Database('/tmp/trafficDB') db.create() x_ind = WithXIndex(db.path, 'x') db.add_index(x_ind) # Import data from CSV files to database parser = ParserCSV() parser.get_csv_files() parser.db_import(db) # Display total number of records in database print db.count(db.all, 'id') print("--- %s seconds ---" % (time.time() - start_time))
def main(): db = Database('/tmp/tut4') db.create() x_ind = WithXIndex(db.path, 'x') db.add_index(x_ind) for x in xrange(100): db.insert(dict(x=x)) for y in xrange(100): db.insert(dict(y=y)) print db.get('x', 10, with_doc=True) for curr in db.get_many('x', start=15, end=25, limit=-1, with_doc=True): print curr
class CodernityDB(BaseService): """A service providing a codernity db interface.""" name = 'db' default_config = dict(db=dict(path=''), app=dict(dir='')) def __init__(self, app): super(CodernityDB, self).__init__(app) self.dbfile = os.path.join(self.app.config['app']['dir'], self.app.config['db']['path']) self.db = None self.uncommitted = dict() self.stop_event = Event() self.db = Database(self.dbfile) try: log.info('opening db', path=self.dbfile) self.db.open() except DatabasePathException: log.info('db does not exist, creating it', path=self.dbfile) self.db.create() self.db.add_index(MD5Index(self.dbfile, 'key')) def _run(self): self.stop_event.wait() def stop(self): # commit? log.info('closing db') if self.started: self.db.close() self.stop_event.set() def get(self, key): log.debug('getting entry', key=key) if key in self.uncommitted: if self.uncommitted[key] is None: raise KeyError("key not in db") return self.uncommitted[key] try: value = self.db.get('key', key, with_doc=True)['doc']['value'] except RecordNotFound: raise KeyError("key not in db") return compress.decompress(value) def put(self, key, value): log.debug('putting entry', key=key, value=value) self.uncommitted[key] = value def commit(self): log.debug('committing', db=self) for k, v in self.uncommitted.items(): if v is None: doc = self.db.get('key', k, with_doc=True)['doc'] self.db.delete(doc) else: self.db.insert({'key': k, 'value': compress.compress(v)}) self.uncommitted.clear() def delete(self, key): log.debug('deleting entry', key=key) self.uncommitted[key] = None def __contains__(self, key): try: self.get(key) except KeyError: return False return True def __eq__(self, other): return isinstance(other, self.__class__) and self.db == other.db def __repr__(self): return '<DB at %d uncommitted=%d>' % (id(self.db), len(self.uncommitted)) def inc_refcount(self, key, value): self.put(key, value) def dec_refcount(self, key): pass def revert_refcount_changes(self, epoch): pass def commit_refcount_changes(self, epoch): pass def cleanup(self, epoch): pass def put_temporarily(self, key, value): self.inc_refcount(key, value) self.dec_refcount(key)
class Store(): def __init__(self, pathname): self.store_path = os.path.join(pathname, "store") self.objects_counter = {} self.init_store_db() if not os.path.exists(self.store_path): os.mkdir(self.store_path) def init_store_dir(self): if not os.path.exists(self.store_path): os.mkdir(self.store_path) objects_path = os.path.join(self.store_path, "objects") if not os.path.exists(objects_path): os.mkdir(objects_path) backups_path = os.path.join(self.store_path, "backups") if not os.path.exists(backups_path): os.mkdir(backups_path) journal_path = os.path.join(self.store_path, "journal") if not os.path.exists(journal_path): os.mkdir(journal_path) journal_objects_path = os.path.join(self.store_path, "journal/objects") if not os.path.exists(journal_objects_path): os.mkdir(journal_objects_path) journal_backups_path = os.path.join(self.store_path, "journal/backups") if not os.path.exists(journal_backups_path): os.mkdir(journal_backups_path) def init_store_db(self): self.db = Database(os.path.join(self.store_path, "store.db")) if not self.db.exists(): self.db.create() self.db.add_index(WithHashIndex(self.db.path, "hash")) self.db.add_index(WithPointerIndex(self.db.path, "pointer")) else: self.db.open() def get_path(self): return self.store_path #volania napr. BackupObject.new...(... , target.get_path()) def get_backup_path(self, backup_name): backup_path = os.path.join(self.store_path, "backups") return os.path.join(backup_path, backup_name) def get_journal_backup_path(self, backup_name): backup_path = os.path.join(self.get_journal_path(), "backups") return os.path.join(backup_path, backup_name) def get_journal_backup_path(self, backup_name): backup_path = os.path.join(self.get_journal_path(), "backups") return os.path.join(backup_path, backup_name) def get_objet_dir_path(self, hash): return os.path.join(self.store_path, "objects", hash[:2]) def get_object_path(self, hash): object_path = os.path.join(self.store_path, "objects", hash[:2]) return os.path.join(object_path, hash + ".data") def get_journal_object_path(self, hash): object_path = os.path.join(self.get_journal_path(), "objects", hash[:2]) if not os.path.exists(object_path): os.mkdir(object_path) return os.path.join(object_path, hash + ".data") def get_journal_tmp_object_path(self, hash): object_path = os.path.join(self.get_journal_path(), "objects") return os.path.join(object_path, hash + ".data") def get_object_header_path(self, hash): object_header_path = os.path.join(self.store_path, "objects", hash[:2]) return os.path.join(object_header_path, hash + ".meta") def get_journal_object_header_path(self, hash): object_header_path = os.path.join(self.get_journal_path(), "objects", hash[:2]) if not os.path.exists(object_header_path): os.mkdir(object_header_path) return os.path.join(object_header_path, hash + ".meta") def get_journal_tmp_object_header_path(self, hash): object_header_path = os.path.join(self.get_journal_path(), "objects") return os.path.join(object_header_path, hash + ".meta") def get_backups_path(self): return os.path.join(self.store_path, "backups") def get_latest_path(self): latest_tmp_path = os.path.join(self.store_path, "backups") return os.path.join(latest_tmp_path, "latest") def get_journal_latest_path(self): latest_tmp_path = os.path.join(self.get_journal_path(), "backups") return os.path.join(latest_tmp_path, "latest") def get_journal_path(self): return os.path.join(self.store_path, "journal") def get_all_backups(self): backups_path = os.path.join(self.store_path, "backups") backups = os.listdir(backups_path) if "latest" in backups: backups.remove("latest") return backups def is_journal_complete(self): journal_path = self.get_journal_path() if (os.path.exists(journal_path)): if (os.path.isfile(os.path.join(journal_path, "journal_complete"))): return True elif (os.path.isfile(os.path.join(journal_path, "journal_incomplete"))): print("Clearing Journal") self.remove_incomplete_journal() os.remove(os.path.join(journal_path, "journal_incomplete")) self.rebuildDB() return False return False def remove_incomplete_journal(self): journal_path = self.get_journal_path() for file_object in os.listdir(os.path.join(journal_path, "objects")): os.remove(os.path.join(journal_path, "objects", file_object)) for file_object in os.listdir(os.path.join(journal_path, "backups")): os.remove(os.path.join(journal_path, "backups", file_object)) def write_to_journal(self, command): journal_path = self.get_journal_path() with open(os.path.join(journal_path, "journal_incomplete"), "a") as TF: TF.write(command + "\n") TF.close() def finish_journal(self): for key, value in self.objects_counter.iteritems(): if value["operation"] == "update" and value["value"] == 0: self.removeObject(key) else: self.write_to_journal(value["operation"] + " " + key + " " + str(value["value"])) if os.path.exists(os.path.join(self.get_journal_path(), "journal_incomplete")): journal_file = open(os.path.join(self.get_journal_path(), "journal_incomplete"), "r+") uniqlines = set(journal_file.readlines()) journal_file.close() journal_file = open(os.path.join(self.get_journal_path(), "journal_incomplete"), "w") journal_file.writelines(uniqlines) journal_file.close() self.file_rename(os.path.join(self.get_journal_path(), "journal_incomplete"), "journal_complete") def commit(self): print("Committing Journal") journal_path = self.get_journal_path() if (os.path.exists(self.get_latest_path())): os.remove(self.get_latest_path()) if (self.is_journal_complete()): with open(os.path.join(journal_path, "journal_complete"), "rb") as TF: for command in TF: words = command.split() if (words[0] == "move"): file_path, file_name = os.path.split(words[2]) if not os.path.exists(file_path): os.mkdir(file_path) shutil.move(words[1], words[2]) #os.rename(words[1], words[2]) elif (words[0] == "remove"): os.remove(words[1]) elif (words[0] == "rmdir"): shutil.rmtree(words[1]) elif (words[0] == "insert"): self.db.insert({'hash':words[1], 'pointer':int(words[2])}) elif (words[0] == "update"): element = self.db.get('hash', words[1], with_doc=True) element = element['doc'] element['pointer'] = int(words[2]) self.db.update(element) elif (words[0] == "delete"): element = self.db.get('hash', words[1], with_doc=True) element = element['doc'] self.db.delete(element) TF.close() os.remove(os.path.join(journal_path, "journal_complete")) journal_objects_path = os.path.join(journal_path, "objects") shutil.rmtree(journal_objects_path) os.mkdir(journal_objects_path) @staticmethod def file_rename(old_name, new_name): new_file_name = os.path.join(os.path.dirname(old_name), new_name) os.rename(old_name, new_file_name) def file_move(self, old_name, new_name): tmp = os.path.join(self.get_journal_path(), "objects", new_name[:2]) if (not os.path.exists(tmp)): os.mkdir(tmp) os.rename(old_name, os.path.join(tmp, new_name)) def save_file(self, source_path, name, previous_hash = None, block_size = constants.CONST_BLOCK_SIZE): file_hash = hashlib.sha1() store_file = self.get_journal_tmp_object_path(name) store_file_header = self.get_journal_tmp_object_header_path(name) if not previous_hash == None: previous_type = self.get_object_type(previous_hash) if previous_type == "gz\n" or previous_type == "delta\n" : previous_file = self.get_object_file_header(previous_hash, "rb") previous_file.readline() previous_file.readline() sig_size = previous_file.readline() sig_data = previous_file.read(int(sig_size)) deltaProcess = subprocess.Popen(['rdiff', 'delta', '-', source_path], stdout=subprocess.PIPE, stdin=subprocess.PIPE) deltaProcess.stdin.write(sig_data) deltaProcess.stdin.close() with gzip.open(store_file, "wb") as TF: #bol gzip while True: deltaData = deltaProcess.stdout.read(16) if deltaData: file_hash.update(deltaData) TF.write(deltaData) else: with open(store_file_header, "wb") as THF: THF.write("delta\n") THF.write("signature\n") sigProcess = subprocess.Popen(['rdiff', 'signature', source_path], stdout=subprocess.PIPE) signature, signatureErr = sigProcess.communicate() if (signatureErr is None): THF.write(str(len(signature))) THF.write("\n") THF.write(signature) else: THF.write(str(0)) THF.write("\n") THF.write("previous\n") THF.write(previous_hash) THF.close() self.file_move(store_file, file_hash.hexdigest() + ".data") self.file_move(store_file_header, file_hash.hexdigest() + ".meta") break TF.close() self.write_to_journal("move " + self.get_journal_object_path(file_hash.hexdigest()) + " " + os.path.join(self.store_path, "objects", file_hash.hexdigest()[:2], file_hash.hexdigest() + ".data")) self.write_to_journal("move " + self.get_journal_object_header_path(file_hash.hexdigest()) + " " + os.path.join(self.store_path, "objects", file_hash.hexdigest()[:2], file_hash.hexdigest() + ".meta")) return file_hash.hexdigest() else: with open(source_path, "rb") as SF: with gzip.open(store_file, "wb") as TF: #bol gzip while True: block = SF.read(block_size) file_hash.update(block) TF.write(block) if not block: self.file_move(store_file, file_hash.hexdigest() + ".data") with open(store_file_header, "wb") as THF: THF.write("gz\n") THF.write("signature\n") sigProcess = subprocess.Popen(['rdiff', 'signature', source_path], stdout=subprocess.PIPE) signature, signatureErr = sigProcess.communicate() if (signatureErr is None): THF.write(str(len(signature))) THF.write("\n") THF.write(signature) else: THF.write(str(0)) self.file_move(store_file_header, file_hash.hexdigest() + ".meta") THF.close() break TF.close() self.write_to_journal("move " + self.get_journal_object_path(file_hash.hexdigest()) + " " + os.path.join(self.store_path, "objects", file_hash.hexdigest()[:2], file_hash.hexdigest() + ".data")) self.write_to_journal("move " + self.get_journal_object_header_path(file_hash.hexdigest()) + " " + os.path.join(self.store_path, "objects", file_hash.hexdigest()[:2], file_hash.hexdigest() + ".meta")) SF.close() return file_hash.hexdigest() def save_directory(self, pi, hash_name): with self.get_journal_object_file(hash_name, "wb") as DF: with self.get_journal_object_file_header(hash_name, "wb") as DHF: DHF.write("directory\n") DF.write(pi) DF.close() DHF.close() self.write_to_journal("move " + DF.name + " " + os.path.join(self.store_path, "objects", hash_name[:2], hash_name + ".data")) self.write_to_journal("move " + DHF.name + " " + os.path.join(self.store_path, "objects", hash_name[:2], hash_name + ".meta")) def save_link(self, link, hash_name): with self.get_journal_object_file(hash_name.hexdigest(), "wb") as DF: with self.get_journal_object_file_header(hash_name.hexdigest(), "wb") as DHF: DHF.write("link\n") DHF.write("signature\n") DHF.write(str(0)) DHF.write("\n") DF.write(link) DHF.close() DF.close() self.write_to_journal("move " + DF.name + " " + os.path.join(self.store_path, "objects", hash_name.hexdigest()[:2], hash_name.hexdigest() + ".data")) self.write_to_journal("move " + DHF.name + " " + os.path.join(self.store_path, "objects", hash_name.hexdigest()[:2], hash_name.hexdigest() + ".meta")) def save_data(self, file_name, data): with open(file_name, "wb") as BF: BF.write(data) BF.close() self.write_to_journal("move " + BF.name + " " + os.path.join(self.store_path, "backups")) def get_object_file(self, hash, mode): type = self.get_object_type(hash) if type == "gz\n" or type == "delta\n": return gzip.open(self.get_object_path(hash), mode) return open(self.get_object_path(hash), mode) def get_journal_object_file(self, hash, mode): return open(self.get_journal_object_path(hash), mode) def get_object_file_header(self, hash, mode): return open(self.get_object_header_path(hash), mode) def get_journal_object_file_header(self, hash, mode): return open(self.get_journal_object_header_path(hash), mode) def get_object_type(self, hash): with self.get_object_file_header(hash, "rb") as HF: object_type = HF.readline() HF.close() return object_type def get_object(self, source_path, hash, side_dict): return StoreObject.create(source_path, self, side_dict) def get_unzipped_tempFile(self, hash, tempFile): gzipFile = gzip.open(self.get_object_path(hash)) temp = open(tempFile.name, "w+") while True: block = gzipFile.read() temp.write(block) if not block: break temp.seek(0) gzipFile.close() return temp def get_hash(self, src_file, block_size = constants.CONST_BLOCK_SIZE): file_hash = hashlib.sha1() with open(src_file, "rb") as SF: while True: block = SF.read(block_size) file_hash.update(block) if not block : break SF.close() return file_hash.hexdigest() def incIndex(self, hash): if hash in self.objects_counter: self.objects_counter[hash]["value"] = self.objects_counter[hash]["value"] + 1 return self.objects_counter[hash]["value"] else: try: element = self.db.get('hash', hash, with_doc=True) element = element['doc'] self.objects_counter[hash] = {"value":element['pointer'] + 1, "operation":"update"} return element['pointer'] + 1 except RecordNotFound: self.objects_counter[hash] = {"value":1, "operation":"insert"} return 1 def decIndex(self, hash): if hash in self.objects_counter: self.objects_counter[hash]["value"] = self.objects_counter[hash]["value"] - 1 return self.objects_counter[hash]["value"] else: try: element = self.db.get('hash', hash, with_doc=True) element = element['doc'] self.objects_counter[hash] = {"value":element['pointer'] - 1, "operation":"update"} return element['pointer'] - 1 except RecordNotFound: return def getIndex(self, hash): if hash in self.objects_counter: return self.objects_counter[hash]["value"] else: try: element = self.db.get('hash', hash, with_doc=True) element = element['doc'] return element['pointer'] except RecordNotFound: return 0 def rebuildDB(self): self.db.destroy() self.init_store_db() backups = self.get_all_backups() for backup in backups: tmp = ExistingBackup('', self, backup) tmp.recovery_backup(True) def removeObject(self, hash): if len(os.listdir(self.get_objet_dir_path(hash))) == 2: self.write_to_journal("rmdir " + self.get_objet_dir_path(hash)) else: self.write_to_journal("remove " + self.get_object_path(hash)) self.write_to_journal("remove " + self.get_object_header_path(hash)) self.write_to_journal("delete " + hash) def removeBackup(self, time): backup = ExistingBackup("", self, time).get_root_object() self.is_journal_complete() backup.remove() os.remove(self.get_backup_path(time)) newest = self.getNewestBackupTime() if newest != None: self.save_data(self.get_journal_latest_path(), newest) self.finish_journal() self.commit() def getNewestBackupTime(self): backups_path = self.get_backups_path() backups = sorted(os.listdir(backups_path)) backups.remove("latest") if len(backups) > 0: return backups[len(backups) - 1] return None
delete = super(MultiIndex, self).delete for curr_key in key: delete(doc_id, curr_key, start, size) def get(self, key): return super(MultiIndex, self).get(key) def make_key_value(self, data): return data['l'], None if __name__ == '__main__': from CodernityDB.database import Database db = Database('/tmp/db_test') db.create() db.add_index(MultiIndex(db.path, 'multi')) for x in xrange(2): d = dict(l=range(10 * x, 10 * (x + 1))) db.insert(d) for curr in db.all('multi'): print curr for curr in db.all('id'): nl = map(lambda x: x * 10, curr['l']) curr['l'] = nl db.update(curr) for curr in db.all('multi'): print curr for curr in db.all('id'):
custom_header = 'from CodernityDB.sharded_hash import ShardedHashIndex' def __init__(self, *args, **kwargs): kwargs['sh_nums'] = 10 kwargs['key_format'] = 'I' kwargs['use_make_keys'] = True super(MySharded, self).__init__(*args, **kwargs) def make_key_value(self, data): return data['x'], None def calculate_shard(self, key): return key % self.sh_nums y = 1500 * 'y' db = Database('/tmp/shard') db.create(with_id_index=False) db.add_index(CustomIdSharded(db.path, 'id')) db.add_index(MySharded(db.path, 'x')) # it makes non sense to use sharding with such small number of records for x in xrange(10 ** 4): db.insert({'x': x, 'y': y}) print db.get('x', random.randint(0, 10 ** 4))['_id']
custom_header = """from CodernityDB.tree_index import MultiTreeBasedIndex from itertools import izip""" def __init__(self, *args, **kwargs): kwargs['key_format'] = '16s' super(TreeMultiTest, self).__init__(*args, **kwargs) self.__l = kwargs.get('w_len', 2) def make_key_value(self, data): name = data['w'] l = self.__l max_l = len(name) out = set() for x in xrange(l - 1, max_l): m = (name, ) for y in xrange(0, x): m += (name[y + 1:],) out.update(set(''.join(x).rjust(16, '_').lower() for x in izip(*m))) #ignore import error return out, dict(w=name) def make_key(self, key): return key.rjust(16, '_').lower() db = Database('./tmp/multi') db.create() db.add_index(TreeMultiTest(db.path, "words")) db.insert(dict(w='Codernity')) print db.get('words', 'dern')['w'] # "Codernity" print db.get('words', 'cod')['w'] # "Codernity"
def make_key(self, key): return key def make_key_value(self, data): a_val = data.get('date') if a_val is not None: return a_val, None db = Database('log') if db.exists(): db.open() db.reindex() else: db.create() index = LogIndex(db.path, 'logidx') db.add_index(index) class Log(): def __init__(self): self._db = db def get(self, count): cnt = self._db.count(self._db.all, 'logidx') records = self._db.all("logidx", offset = cnt - int(count) if cnt - int(count) > 0 else 0, with_doc=True) result = [dict(date = r["doc"]["date"], localDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(r["doc"]["date"])), client = r["doc"]["client"], message = r["doc"]["message"]) for r in records] return reversed(result) def add(self, date, client, message): self._db.insert(dict(date = date, client = client, message = message))
class cache: """ cache for word morphological analysis """ def __init__(self, ): """ Create Analex Cache """ # use this dictionary as a local cache, # The global db will be updated on destructing object self.cache = {} self.db = Database('~/tmp/thaalibCache') if not self.db.exists(): self.db.create() x_ind = WithAIndex(self.db.path, 'a') self.db.add_index(x_ind) else: self.db.open() def __del__(self): """ Delete instance and clear cache """ self.cache = None self.db.close() def update(self): """update data base """ for word in self.cache: self.add_checked(word, self.cache[word]) def is_already_checked(self, word): try: return bool(self.db.get('a', word)) except: return False #~ except: return False; def get_checked(self, word): try: x = self.db.get('a', word, with_doc=True) y = x.get('doc', False) if y: return y.get('d', []) else: return [] except: return [] def add_checked(self, word, data): idata = {"a": word, 'd': data} try: saved = self.db.get('a', word, with_doc=True) except: saved = False if saved: saved['doc']['d'] = data doc = saved['doc'] doc['update'] = True self.db.update(doc) else: self.db.insert(idata) def exists_cache_word(self, word): """ test if word exists in cache""" #if exists in cache dictionary if word in self.cache: return True else: # test in database if self.is_already_checked(word): stored_data = self.get_checked(word) self.cache[word] = stored_data return bool(self.cache[word]) else: # add null dict to the word index to avoid multiple database check self.cache[word] = {} return {} def get_relation_freq(self, word_prev, word_cur, relation): self.exists_cache_word(word_prev) return self.cache.get(word_prev, {}).get(word_cur, {}).get(relation, 0) def is_related(self, word_prev, word_cur): """ test if two words are related""" #serach in cache self.exists_cache_word(word_prev) # if exists in cache or database return self.cache.get(word_prev, {}).get(word_cur, {}) def add_relation(self, word_prev, word_cur, relation): #~ relation ='r'+str(relation) if word_prev not in self.cache: # test first that is in db cache if self.is_already_checked(word_prev): stored_data = self.get_checked(word_prev) self.cache[word_prev] = stored_data else: # create an new entry self.cache[word_prev] = { word_cur: { relation: 1, }, } # word_prev exists # add word_cur to previous dict elif word_cur not in self.cache[word_prev]: self.cache[word_prev][word_cur] = { relation: 1, } elif relation not in self.cache[word_prev][word_cur]: self.cache[word_prev][word_cur][relation] = 1 else: self.cache[word_prev][word_cur][relation] += 1 def display_all(self): """ display all contents of data base """ print "aranasyn.cache: dislay all records in Thaalib Database " "" for curr in self.db.all('a', with_doc=True): print curr['doc']['a'], arepr(curr['doc']['d'])
data = json.dumps(data) for c in cl: c.write_message(data) @web.asynchronous def post(self): pass app = web.Application([ (r'/', IndexHandler), (r'/ws', SocketHandler), (r'/api', ApiHandler), (r'/(favicon.ico)', web.StaticFileHandler, {'path': '../'}), ]) db = Database(config["storage"]["directory"]) if __name__ == '__main__': app.listen(config['port']) try: db.open() logging.info("Open database in: %s", config["storage"]["directory"]) except DatabasePathException: db.create() logging.info("Database does not exist in %s (creating new)", config["storage"]["directory"]) db.add_index(ChanelIndex(db.path, 'channel')) try: logging.info("Server starts on port: %s", config['port']) ioloop.IOLoop.instance().start() except KeyboardInterrupt: logging.info("Server closing..") ioloop.IOLoop.instance().stop()
class CodernityDataStore(object): PATH_TYPE = 'path' def __init__(self, db_path): self.db = Database(db_path) if self.db.exists(): self.db.open() else: self.db.create() path_index = PathIndex(self.db.path, 'path') self.db.add_index(path_index) path_added_index = PathAddedIndex(self.db.path, 'path_added') self.db.add_index(path_added_index) @classmethod def dt_str(cls, datetime): return datetime.isoformat()[0:19] def add_video(self, path, video, added=None): logger.debug("add_video(%s, %s, %s)", path, video, added) added = added or datetime.utcnow() existing = list(self.db.get_many('path', path, with_doc=True)) video_data, video_type = Serializer.serialize_video(video) data = dict(_t=self.PATH_TYPE, path=path, video_data=video_data, video_type=video_type, downloads=dict(), added=self.dt_str(added)) self.db.insert(data) for existing_path in existing: self.db.delete(existing_path['doc']) def add_download(self, path, provider, sub_id, language, score): logger.debug("add_download(%s, %s, %s, %s, %d)", path, provider, sub_id, language, score) data = self.db.get('path', path, with_doc=True) path = data['doc'] download = dict(provider=provider, sub_id=sub_id, lang=str(language), score=score) if str(language) in path['downloads']: path['downloads'][str(language)].append(download) else: path['downloads'][str(language)] = [download] self.db.update(path) def get_downloads_for_video(self, path): logger.debug("get_downloads_for_video(%s)", path) data = self.db.get('path', path, with_doc=True) return data['doc']['downloads'] @staticmethod def exceeds_desired_score(video, score, desired_movie_score, desired_episode_score): if isinstance(video, Episode): return score >= desired_episode_score elif isinstance(video, Movie): return score >= desired_movie_score def get_incomplete_videos(self, languages, desired_movie_score, desired_episode_score, ignore_older_than): logger.debug("get_incomplete_videos(%s, %d, %d, %s)", languages, desired_movie_score, desired_episode_score, ignore_older_than) within_date = self.db.get_many('path_added', start=self.dt_str(ignore_older_than), with_doc=True) results = [] for path in (data['doc'] for data in within_date): video = Serializer.deserialize_video(path['video_type'], path['video_data']) needs = [] for lang in languages: if str(lang) in path['downloads']: current_score = max(download['score'] for download in path['downloads'][str(lang)]) if not self.exceeds_desired_score(video, current_score, desired_movie_score, desired_episode_score): needs.append(dict(lang=lang, current_score=current_score)) else: needs.append(dict(lang=lang, current_score=0)) if needs: results.append(dict(path=path['path'], video=video, needs=needs)) logger.debug("found %d incomplete videos: %s", len(results), results) return results def close(self): self.db.close()
def test_create(self, tmpdir): db = Database(str(tmpdir) + '/db') db.create(with_id_index=False) db.add_index(ShardedUniqueHashIndex(db.path, 'id', sh_nums=3))
def test_num_shards(self, tmpdir, sh_nums): db = Database(str(tmpdir) + '/db') db.create(with_id_index=False) n = globals()['ShardedUniqueHashIndex%d' % sh_nums] db.add_index(n(db.path, 'id')) assert db.id_ind.sh_nums == sh_nums
class cache : """ cache for word morphological analysis """ DB_PATH = os.path.join(os.path.expanduser('~'), '.thaalabCache') def __init__(self, cache_path=False): """ Create Analex Cache """ # use this dictionary as a local cache, # The global db will be updated on destructing object # get the database path if hasattr(sys, 'frozen'): # only when running in py2exe this exists base = sys.prefix else: # otherwise this is a regular python script base = os.path.dirname(os.path.realpath(__file__)) if not cache_path: file_path = self.DB_PATH else: file_path = os.path.join(os.path.dirname(cache_path), '.thaalabCache') self.cache={}; self.db = Database(file_path) if not self.db.exists(): self.db.create(); x_ind = WithAIndex(self.db.path, 'a') self.db.add_index(x_ind) else: self.db.open(); def __del__(self): """ Delete instance and clear cache """ self.cache=None; self.db.close(); def update(self): """update data base """ #~ pass for word in self.cache: self.add_checked(word, self.cache[word]) def is_already_checked(self, word): try: return bool(self.db.get('a', word)) except: return False #~ except: return False; def get_checked(self, word): try: x = self.db.get('a', word, with_doc=True) y = x.get('doc',False); if y: return y.get('d',[]) else: return [] except: return [] def add_checked(self, word, data): idata = {"a":word,'d':data} try: saved = self.db.get('a', word, with_doc=True) except: saved = False if saved: saved['doc']['d'] = data doc = saved['doc'] doc['update'] = True self.db.update(doc) else: self.db.insert(idata) def exists_cache_word(self, word): """ test if word exists in cache""" #if exists in cache dictionary if word in self.cache: return True else: # test in database if self.is_already_checked(word): stored_data = self.get_checked(word) self.cache[word] = stored_data return bool(self.cache[word]) else: # add null dict to the word index to avoid multiple database check self.cache[word] = {} return {} def get_relation_freq(self, word_prev, word_cur, relation): self.exists_cache_word(word_prev) return self.cache.get(word_prev, {}).get(word_cur, {}).get(relation, 0); def is_related(self, word_prev, word_cur): """ test if two words are related""" #serach in cache self.exists_cache_word(word_prev) # if exists in cache or database return self.cache.get(word_prev, {}).get(word_cur, {}); def add_relation(self, word_prev, word_cur, relation): #~ relation ='r'+str(relation) if word_prev not in self.cache: # test first that is in db cache if self.is_already_checked(word_prev): stored_data = self.get_checked(word_prev) self.cache[word_prev] = stored_data else: # create an new entry self.cache[word_prev] = {word_cur:{relation:1, }, } # word_prev exists # add word_cur to previous dict elif word_cur not in self.cache[word_prev]: self.cache[word_prev][word_cur] = {relation:1,} elif relation not in self.cache[word_prev][word_cur]: self.cache[word_prev][word_cur][relation] = 1 else: self.cache[word_prev][word_cur][relation] += 1 def display_all(self): """ display all contents of data base """ #~ pass print "aranasyn.cache: dislay all records in Thaalib Database """ for curr in self.db.all('a', with_doc=True): print curr['doc']['a'], arepr(curr['doc']['d'])
class MySharded(ShardedHashIndex): custom_header = 'from CodernityDB.sharded_hash import ShardedHashIndex' def __init__(self, *args, **kwargs): kwargs['sh_nums'] = 10 kwargs['key_format'] = 'I' kwargs['use_make_keys'] = True super(MySharded, self).__init__(*args, **kwargs) def make_key_value(self, data): return data['x'], None def calculate_shard(self, key): return key % self.sh_nums y = 1500 * 'y' db = Database('/tmp/shard') db.create(with_id_index=False) db.add_index(CustomIdSharded(db.path, 'id')) db.add_index(MySharded(db.path, 'x')) # it makes non sense to use sharding with such small number of records for x in xrange(10**4): db.insert({'x': x, 'y': y}) print db.get('x', random.randint(0, 10**4))['_id']
class MavlinkListener(dric.Plugin): def __init__(self): self.timeref = 0 self.messages_count = {} self.messages_stats = {} self.messages = {} self.db = None # {message {parameter: datasource}} self.message_datasources = {} # {message: datasource} self.full_message_datasource = {} self.esids = [] self.esid_aq = dric.aq.AQ(self.aq_esid_list) def setup(self, eventbus): dbpath = join(dric.datadir, 'data', 'mavlink', 'database') if exists(dbpath): rmtree(dbpath) self.db = Database(dbpath) self.db.create() key_ind = MavlinkIndex(self.db.path, 'key') self.db.add_index(key_ind) self.bus = eventbus self.timeref = time() @dric.on('MAVLINK') def mavlink_message_received(self, esid, mav_message): name = mav_message.get_type() message = mav_message.to_dict() # add esid if esid not in self.esids: self.esids.append(esid) self.messages_count[esid] = 0 self.messages_stats[esid] = {} self.esid_aq.update_all() datasource = MavlinkMessageStatsDatasource(self.messages_stats[esid]) source_name = "mavlink/messages_stats${}".format(esid) dric.add_datasource(source_name, datasource) datasource = MavlinkMessageCountDatasource(self.messages_count, esid) source_name = "mavlink/messages_count${}".format(esid) dric.add_datasource(source_name, datasource) datasource = MavlinkMessageCountPerSecondDatasource(self.messages_count, esid) source_name = "mavlink/messages_count_per_second${}".format(esid) dric.add_datasource(source_name, datasource) # statistics self.messages_count[esid] += 1 if name not in self.messages_stats[esid]: self.messages_stats[esid][name] = 0 self.messages_stats[esid][name] += 1 # filter message i = self.messages_stats[esid][name] filterable_message = mavlink_filtering.filter(FilterableMessage(name, i, message)) name = filterable_message.name i = filterable_message.i message = filterable_message.message # add message to database # self.db.insert(dict(name=name, n=i, esid=esid, message=dumps(message), timestamp=(time() - self.timeref))) namesid = name + '$' + esid # parameter datasoruce if namesid not in self.message_datasources: self.message_datasources[namesid] = {} message_datasource_dict = self.message_datasources[namesid] for parameter in message: if parameter not in message_datasource_dict: # check if parameter is plotable (float) noplot = False try: float(message[parameter]) except: noplot = True datasource = MavlinkMessageDatasource(0.5, noplot) message_datasource_dict[parameter] = datasource source_name = "mavlink-{}/{}${}".format(name, parameter, esid) dric.add_datasource(source_name, datasource) message_datasource_dict[parameter].push(message[parameter]) # full message datasource if namesid not in self.full_message_datasource: self.full_message_datasource[namesid] = MavlinkFullMessageDatasource(0.5, True) dric.add_datasource('mavlink-{}'.format(namesid), self.full_message_datasource[namesid]) self.full_message_datasource[namesid].push(message) #dispatch event event_name = 'MAVLINK/{}'.format(name.upper()) self.bus.publish(event_name.upper(), MavlinkEvent(esid, mav_message, message)) @dric.route('mavlink_message', '/mavlink/message/<esid>/<name>/<int:n>') def get_mavlink_message(self, esid, name, n, request=None): try: return dric.Response(self.db.get('key', dict(name=name, n=n, esid=esid))['message']) except KeyError: raise NotFound() @dric.route('mavlink_download_range', '/mavlink/download/<esid>/<name>/<key>/<query>') def download_mavlink_log_range(self, esid, name, key, query, request=None): output = StringIO.StringIO() for i in RangeQuery(query): try: record = self.db.get('key', dict(name=name, n=i, esid=esid)) message_as_dict = loads(record['message']) output.write(str(record['timestamp'])) output.write(" ") output.write(str(message_as_dict[key])) output.write('\n'); except RecordNotFound: pass return dric.Response(output.getvalue(), content_type="plain/txt") @dric.route('mavlink_download', '/mavlink/download/<esid>/<name>/<key>') def download_mavlink_log(self, esid, name, key, request=None): output = StringIO.StringIO() try: i = 0 while True: i = i + 1 record = self.db.get('key', dict(name=name, esid=esid, n=i)) message_as_dict = loads(record['message']) output.write(str(record['timestamp'])) output.write(" ") output.write(str(message_as_dict[key])) output.write('\n'); except RecordNotFound: pass return dric.Response(output.getvalue(), content_type="plain/txt") @dric.route('mavlink_esid', '/mavlink/esid') def get_esid_list(self, request): if dric.support.accept.xml_over_json(request): root = ET.Element('systems') root.set('version', '1') for esid in self.esids: esid_el = ET.SubElement(root, 'systemid') esid_el.text = esid return dric.XMLResponse(root) elif dric.support.accept.json_over_xml(request): return dric.JSONResponse(self.esids) else: raise dric.exceptions.NotAcceptable('xml or json') @dric.websocket('mavlink_esid_ws', '/mavlink/esid/ws', ['AQ']) def ws_esid_list(self, ws, request): self.esid_aq.incoming(ws) def aq_esid_list(self): return unicode(dumps(self.esids))
class Cache(object): """ cache for word morphological analysis """ DB_PATH = os.path.join(os.path.expanduser('~'), '.qalsadiCache') def __init__(self, dp_path = False): """ Create Analex Cache """ self.cache = { 'checkedWords': {}, 'FreqWords': { 'noun': {}, 'verb': {}, 'stopword': {} }, } if not dp_path: dp_path = self.DB_PATH else: dp_path = os.path.join(os.path.dirname(dp_path), '.qalsadiCache') self.db = Database(dp_path) if not self.db.exists(): self.db.create() x_ind = WithAIndex(self.db.path, 'a') self.db.add_index(x_ind) else: self.db.open() def __del__(self): """ Delete instance and clear cache """ self.cache = None self.db.close() def is_already_checked(self, word): """ return if ``word`` is already cached""" try: return bool(self.db.get('a', word)) except: return False #~ except: return False; def get_checked(self, word): """ return checked ``word`` form cache""" xxx = self.db.get('a', word, with_doc=True) yyy = xxx.get('doc', False) if yyy: return yyy.get('d', []) else: return [] def add_checked(self, word, data): """ add checked ``word`` form cache""" idata = {"a": word, 'd': data} self.db.insert(idata) def exists_cache_freq(self, word, wordtype): """ return if word exists in freq cache""" return word in self.cache['FreqWords'] def get_freq(self, originalword, wordtype): """ return ``word`` frequency form cache""" return self.cache['FreqWords'][wordtype].get(originalword, 0) def add_freq(self, original, wordtype, freq): """ add ``original`` frequency ``freq`` to cache""" self.cache['FreqWords'][wordtype][original] = freq
def make_key_value(self, data): if data['t'] == constants.TASK_TYPE_CODE and data['tag'] == 'TRAVEL': return md5(data['extra_params']['username']).digest(), None def make_key(self, key): return md5(key.username).digest() class PermitTravelIndex(HashIndex): def __init__(self, *args, **kwargs): kwargs['key_format'] = 'I' super(PermitTravelIndex, self).__init__(*args, **kwargs) def make_key_value(self, data): if data['t'] == constants.TASK_TYPE_CODE and data[ 'tag'] == 'PERMIT_TRAVEL': return 0, None def make_key(self, key): return 0 codernity_db = Database('db') codernity_db.create() codernity_db.add_index( TaskWithIntiator(codernity_db.path, 'task_with_initiator')) codernity_db.add_index(PermitTravelIndex(codernity_db.path, 'permit_travel')) from lite_task_flow.indexes import add_index add_index(codernity_db)
super(TaskWithIntiator, self).__init__(*args, **kwargs) def make_key_value(self, data): if data['t'] == constants.TASK_TYPE_CODE and data['tag'] == 'TRAVEL': return md5(data['extra_params']['username']).digest(), None def make_key(self, key): return md5(key.username).digest() class PermitTravelIndex(HashIndex): def __init__(self, *args, **kwargs): kwargs['key_format'] = 'I' super(PermitTravelIndex, self).__init__(*args, **kwargs) def make_key_value(self, data): if data['t'] == constants.TASK_TYPE_CODE and data['tag'] == 'PERMIT_TRAVEL': return 0, None def make_key(self, key): return 0 codernity_db = Database('db') codernity_db.create() codernity_db.add_index(TaskWithIntiator(codernity_db.path, 'task_with_initiator')) codernity_db.add_index(PermitTravelIndex(codernity_db.path, 'permit_travel')) from lite_task_flow.indexes import add_index add_index(codernity_db)