Esempio n. 1
0
def fetch_city_name_id(city_id='', city_name='', db_name='imd_city_db'):
    '''
        City Names, IDs and corresponding links are fetched from local levelDB.
        If you pass city_id and city_name both, city_id would be chosen over city_name,
        for lookup.
        Passing only city_name, would help you to find possible matches.
        If you pass no arguments, then all available records will be returned back.
    '''
    resp = {}
    try:
        db_handle = DB(db_name, create_if_missing=True)
        if (city_id):
            if (not __validate_city_id__(city_id)):
                raise Exception('city id not validated')
            tmp = db_handle.get(city_id.encode('utf-8'), b'')
            if (tmp):
                resp.update({city_id: tmp.decode('utf-8').split(';')})
            else:
                resp = {'status': 'record not found'}
        elif (city_name):
            resp.update(__match_city_name__(city_name, db_handle.iterator()))
        else:
            itr = db_handle.iterator()
            for i, j in itr:
                resp.update({i.decode('utf-8'): j.decode('utf-8').split(';')})
            itr.close()
        db_handle.close()
    except plError as e:
        resp = {'status': str(e)}
    except Exception as e:
        resp = {'status': str(e)}
    return resp
Esempio n. 2
0
 def _get_db(self, volume_id):
     try:
         db = self.dbs[volume_id]
     except KeyError:
         db_path = self._get_db_path(volume_id)
         self.dbs[volume_id] = DB(db_path, create_if_missing=False)
         db = self.dbs[volume_id]
     return db
Esempio n. 3
0
 def _get_db(self, volume_id):
     try:
         db = self.dbs[volume_id]
     except KeyError:
         path = "%s/%s" % (self.db_path, volume_id)
         self.dbs[volume_id] = DB(path, create_if_missing=True)
         db = self.dbs[volume_id]
     return db
Esempio n. 4
0
def main():
    db = DB("/home/xinyang/Datasets/Wikipedia/enwiki_leveldb/")
    reader = csv.reader(open(DATA_PATH, encoding="latin1"))

    for pid, name, num_pages, num_subcats, num_files in tqdm(reader):
        name = name.encode()
        dobj = dict(num_pages=int(num_pages),
                    num_subcats=int(num_subcats),
                    num_files=int(num_files))
        dobj_dumped = ujson.dumps(dobj).encode()

        db.put(PREFIX + name, dobj_dumped)
Esempio n. 5
0
def store_city_name_id(data, db_name='imd_city_db'):
    '''
        Stores City Names, IDs and correspoding links into a local levelDB.
        City ID is used as key value.
    '''
    resp = {}
    try:
        db_handle = DB(db_name, create_if_missing=True)
        data = __format_db_entry__(data)
        for i, j in data.items():
            db_handle.put(i, j)
        db_handle.close()
        resp = {'status': 'success'}
    except plError as e:
        resp = {'status': str(e)}
    except Exception as e:
        resp = {'status': str(e)}
    return resp
Esempio n. 6
0
    def __init__(self, dbpath, *args, debug=False, refresh=None, **kwargs):
        """
        :param refresh: ignore data in db and refresh using new value
        """
        super().__init__(*args, **kwargs)
        try:
            self.db = DB(dbpath, create_if_missing=True)
        except Exception as e:
            self.db = None
            raise e
        self.old_key = None
        self.upgrade = False

        if debug:
            handler.level = logging.DEBUG

        if refresh:
            self.refresh = True
        else:
            self.refresh = False
Esempio n. 7
0
blocks_sample = sample(range(0, 8 * 1024), 8 * 1024)


class INode(object):
    def __init__(self):
        self.f_size = int(0)
        self.f_blocks = [None] * 8
        # Free blocks - Can be ignored
        self.f_frblocks = int(0)


total_blocks = 1024 * 10
block_size = 1024
ev_inodes, ev_blocks = (0, 0)
db = DB('/home/cujo/nfs/db/db2',
        create_if_missing=True,
        block_size=int(sys.argv[1]))
_, current_blocks = update_vfs(block_size, total_blocks)

for i in range(1024):
    bytes_written = 0
    inode = INode()
    for k in range(len(inode.f_blocks) - 1):
        block_number = blocks_sample.pop()
        inode.f_blocks[k] = block_number
        bytes_written += populate_block(block_number)
        ev_blocks += 1
        _, current_blocks = update_vfs(block_size, current_blocks)
    inode.f_size = bytes_written
    db.put(b'i_' + bytes(i), dumps(inode.__dict__))
    ev_inodes += 1
Esempio n. 8
0
import stat
import errno
import fuse
import sys
from json import loads
from fuse import Fuse
from plyvel import DB


if not hasattr(fuse, '__version__'):
    raise RuntimeError, \
        "your fuse-py doesn't know of fuse.__version__, probably it's too old."

fuse.fuse_python_api = (0, 2)

db = DB('/home/cujo/nfs/db/db1', block_size=int(sys.argv[1]))


class LWStat(fuse.Stat):
    def __init__(self):
        fuse.Stat.__init__(self)
        self.st_mode = 0
        self.st_ino = 0
        self.st_dev = 0
        self.st_nlink = 0
        self.st_uid = 0
        self.st_gid = 0
        self.st_size = 0
        self.st_atime = 0
        self.st_mtime = 0
        self.st_ctime = 0
#!/usr/bin/env python
import csv
from collections import defaultdict

import ujson
from plyvel import DB

CATEGORYLINKS_PATH = "/home/xinyang/Datasets/Wikipedia/csv/enwiki-20180101-categorylinks-clean.csv"
PREFIX = b"category_pageids_"

if __name__ == "__main__":
    db = None

    cate_pageids = defaultdict(list)
    with open(CATEGORYLINKS_PATH) as fobj:
        reader = csv.reader(fobj)
        for row in reader:
            if row[2] != 'page':
                continue

            page_id, cat_name = row[0], row[1]
            cate_pageids[row[1]].append(row[0])

    try:
        db = DB("/home/xinyang/Datasets/Wikipedia/enwiki_leveldb")
        for cate_name, page_ids in cate_pageids.items():
            db.put(PREFIX + cate_name.encode(), ujson.dumps(page_ids).encode())
    finally:
        db.close()
Esempio n. 10
0
from plyvel import DB
from gensim.models.word2vec import Word2Vec
import numpy as np

db_name = '../w2v_vectors'
w2v_path = '/home/legin/kudablyat/data/all.norm-sz100-w10-cb0-it1-min100.w2v'
w2v = Word2Vec.load_word2vec_format(w2v_path,
                                    binary=True,
                                    unicode_errors='ignore')
db = DB(db_name, create_if_missing=True)
i = 0
for key in w2v.vocab:
    i += 1
    if i % 1000 == 0:
        print i
    vector = w2v[key]
    db.put(key.encode('utf-8'), np.array(vector).astype(np.float16).tostring())
Esempio n. 11
0
import os
from plyvel import DB
from bitcoin_requests import BitcoinRPC

BITCOIN_RPC_ADDRESS = os.getenv(
    "BITCOIN_RPC_ADDRESS") or "http://127.0.0.1:8443"
BITCOIN_RPC_USER = os.getenv("BITCOIN_RPC_USER")
BITCOIN_RPC_PASSWORD = os.getenv("BITCOIN_RPC_PASSWORD")

bitcoin = BitcoinRPC(BITCOIN_RPC_ADDRESS, BITCOIN_RPC_USER,
                     BITCOIN_RPC_PASSWORD)
next_block = bitcoin.getblockchaininfo()["blocks"]

db = DB("db", create_if_missing=True)
Esempio n. 12
0
def set_ext_key(database: Path, extension: str, key: str, value: object):
    """Sets the value to the key of the given extension."""

    DB(str(database)).put(get_key(extension, key), get_value(value))
Esempio n. 13
0
 def create(self, volume_id):
     db_path = self._get_db_path(volume_id)
     DB(db_path, create_if_missing=True)
Esempio n. 14
0
def load_w2v():
    global w2v_model
    print 'loading w2v'
    w2v_model = DB(config.w2v_path)
    print 'loaded'