def fetch_city_name_id(city_id='', city_name='', db_name='imd_city_db'): ''' City Names, IDs and corresponding links are fetched from local levelDB. If you pass city_id and city_name both, city_id would be chosen over city_name, for lookup. Passing only city_name, would help you to find possible matches. If you pass no arguments, then all available records will be returned back. ''' resp = {} try: db_handle = DB(db_name, create_if_missing=True) if (city_id): if (not __validate_city_id__(city_id)): raise Exception('city id not validated') tmp = db_handle.get(city_id.encode('utf-8'), b'') if (tmp): resp.update({city_id: tmp.decode('utf-8').split(';')}) else: resp = {'status': 'record not found'} elif (city_name): resp.update(__match_city_name__(city_name, db_handle.iterator())) else: itr = db_handle.iterator() for i, j in itr: resp.update({i.decode('utf-8'): j.decode('utf-8').split(';')}) itr.close() db_handle.close() except plError as e: resp = {'status': str(e)} except Exception as e: resp = {'status': str(e)} return resp
def _get_db(self, volume_id): try: db = self.dbs[volume_id] except KeyError: db_path = self._get_db_path(volume_id) self.dbs[volume_id] = DB(db_path, create_if_missing=False) db = self.dbs[volume_id] return db
def _get_db(self, volume_id): try: db = self.dbs[volume_id] except KeyError: path = "%s/%s" % (self.db_path, volume_id) self.dbs[volume_id] = DB(path, create_if_missing=True) db = self.dbs[volume_id] return db
def main(): db = DB("/home/xinyang/Datasets/Wikipedia/enwiki_leveldb/") reader = csv.reader(open(DATA_PATH, encoding="latin1")) for pid, name, num_pages, num_subcats, num_files in tqdm(reader): name = name.encode() dobj = dict(num_pages=int(num_pages), num_subcats=int(num_subcats), num_files=int(num_files)) dobj_dumped = ujson.dumps(dobj).encode() db.put(PREFIX + name, dobj_dumped)
def store_city_name_id(data, db_name='imd_city_db'): ''' Stores City Names, IDs and correspoding links into a local levelDB. City ID is used as key value. ''' resp = {} try: db_handle = DB(db_name, create_if_missing=True) data = __format_db_entry__(data) for i, j in data.items(): db_handle.put(i, j) db_handle.close() resp = {'status': 'success'} except plError as e: resp = {'status': str(e)} except Exception as e: resp = {'status': str(e)} return resp
def __init__(self, dbpath, *args, debug=False, refresh=None, **kwargs): """ :param refresh: ignore data in db and refresh using new value """ super().__init__(*args, **kwargs) try: self.db = DB(dbpath, create_if_missing=True) except Exception as e: self.db = None raise e self.old_key = None self.upgrade = False if debug: handler.level = logging.DEBUG if refresh: self.refresh = True else: self.refresh = False
blocks_sample = sample(range(0, 8 * 1024), 8 * 1024) class INode(object): def __init__(self): self.f_size = int(0) self.f_blocks = [None] * 8 # Free blocks - Can be ignored self.f_frblocks = int(0) total_blocks = 1024 * 10 block_size = 1024 ev_inodes, ev_blocks = (0, 0) db = DB('/home/cujo/nfs/db/db2', create_if_missing=True, block_size=int(sys.argv[1])) _, current_blocks = update_vfs(block_size, total_blocks) for i in range(1024): bytes_written = 0 inode = INode() for k in range(len(inode.f_blocks) - 1): block_number = blocks_sample.pop() inode.f_blocks[k] = block_number bytes_written += populate_block(block_number) ev_blocks += 1 _, current_blocks = update_vfs(block_size, current_blocks) inode.f_size = bytes_written db.put(b'i_' + bytes(i), dumps(inode.__dict__)) ev_inodes += 1
import stat import errno import fuse import sys from json import loads from fuse import Fuse from plyvel import DB if not hasattr(fuse, '__version__'): raise RuntimeError, \ "your fuse-py doesn't know of fuse.__version__, probably it's too old." fuse.fuse_python_api = (0, 2) db = DB('/home/cujo/nfs/db/db1', block_size=int(sys.argv[1])) class LWStat(fuse.Stat): def __init__(self): fuse.Stat.__init__(self) self.st_mode = 0 self.st_ino = 0 self.st_dev = 0 self.st_nlink = 0 self.st_uid = 0 self.st_gid = 0 self.st_size = 0 self.st_atime = 0 self.st_mtime = 0 self.st_ctime = 0
#!/usr/bin/env python import csv from collections import defaultdict import ujson from plyvel import DB CATEGORYLINKS_PATH = "/home/xinyang/Datasets/Wikipedia/csv/enwiki-20180101-categorylinks-clean.csv" PREFIX = b"category_pageids_" if __name__ == "__main__": db = None cate_pageids = defaultdict(list) with open(CATEGORYLINKS_PATH) as fobj: reader = csv.reader(fobj) for row in reader: if row[2] != 'page': continue page_id, cat_name = row[0], row[1] cate_pageids[row[1]].append(row[0]) try: db = DB("/home/xinyang/Datasets/Wikipedia/enwiki_leveldb") for cate_name, page_ids in cate_pageids.items(): db.put(PREFIX + cate_name.encode(), ujson.dumps(page_ids).encode()) finally: db.close()
from plyvel import DB from gensim.models.word2vec import Word2Vec import numpy as np db_name = '../w2v_vectors' w2v_path = '/home/legin/kudablyat/data/all.norm-sz100-w10-cb0-it1-min100.w2v' w2v = Word2Vec.load_word2vec_format(w2v_path, binary=True, unicode_errors='ignore') db = DB(db_name, create_if_missing=True) i = 0 for key in w2v.vocab: i += 1 if i % 1000 == 0: print i vector = w2v[key] db.put(key.encode('utf-8'), np.array(vector).astype(np.float16).tostring())
import os from plyvel import DB from bitcoin_requests import BitcoinRPC BITCOIN_RPC_ADDRESS = os.getenv( "BITCOIN_RPC_ADDRESS") or "http://127.0.0.1:8443" BITCOIN_RPC_USER = os.getenv("BITCOIN_RPC_USER") BITCOIN_RPC_PASSWORD = os.getenv("BITCOIN_RPC_PASSWORD") bitcoin = BitcoinRPC(BITCOIN_RPC_ADDRESS, BITCOIN_RPC_USER, BITCOIN_RPC_PASSWORD) next_block = bitcoin.getblockchaininfo()["blocks"] db = DB("db", create_if_missing=True)
def set_ext_key(database: Path, extension: str, key: str, value: object): """Sets the value to the key of the given extension.""" DB(str(database)).put(get_key(extension, key), get_value(value))
def create(self, volume_id): db_path = self._get_db_path(volume_id) DB(db_path, create_if_missing=True)
def load_w2v(): global w2v_model print 'loading w2v' w2v_model = DB(config.w2v_path) print 'loaded'