Example #1
0
    def load(cls, file):
        """a deserialized instance of %s from file.""" % cls
        if isinstance(file, basestring):
            file = open(file, 'r', 0)

        header = file.readline()
        offset = len(header)
        version, metalen = header.strip().split(':')
        metadb = DiscoDB.load(file, offset)
        offset += int(metalen)
        datadb = DiscoDB.load(file, offset)
        return cls(datadb, metadb)
Example #2
0
 def test_dump_load(self):
     from tempfile import NamedTemporaryFile
     handle = NamedTemporaryFile()
     self.discodb.dump(handle)
     handle.seek(0)
     discodb = DiscoDB.load(handle)
     self.assertEquals(discodb.dumps(), self.discodb.dumps())
Example #3
0
def maybe_method(datadir, rest, method, xargs=None):
    if rest.find('/%s/' % method) > 0:
        file, arg = rest.split('/%s/' % method, 1)
        path = os.path.join(datadir, file)
        if os.path.isfile(path):
            bound_method = getattr(DiscoDB.load(open(path)), method)
            return bound_method(xargs(arg)) if xargs else bound_method()
    raise NotMethod(method)
 def __init__(self, **tables):
   # {'name': DiscoDB}
   self._tables = {}
   for name, path_or_db in tables.items():
     if isinstance(path_or_db, DiscoDB):
       db = path_or_db
     else:
       db = DiscoDB.load(open(path_or_db))
     self._tables[name] = DiscoTable(self, name, db)
Example #5
0
def load(name):
    dir = dirname(name)
    with open(os.path.join(dir, 'partitions')) as file:
        partitions = int(file.read())
    discodbs = [DiscoDB()] * partitions
    for partition in xrange(0,partitions):
        path = filename(name, partition)
        if os.path.exists(path):
            discodbs[partition] = DiscoDB.load(open(path))
    dbs[name] = discodbs
Example #6
0
def test_leak():
    while True:
        d = DiscoDB(zip(letters, ['abc'] * 1000))
        t = len(d.query('a'))
        t = len(d['b'])
        t = 'd' in d
        t = d.dumps()
        t = DiscoDB.loads(t)
        t = d.dump(open('/tmp/discodb', 'w'))
        t = DiscoDB.load(open('/tmp/discodb'))
        for k in d.keys():
            for v in d.values():
                t = k == v
Example #7
0
File: perf.py Project: rch/discodb
def test_leak():
    while True:
        d = DiscoDB(zip(letters, ["abc"] * 1000))
        t = len(d.query("a"))
        t = len(d["b"])
        t = "d" in d
        t = d.dumps()
        t = DiscoDB.loads(t)
        t = d.dump(open("/tmp/discodb", "w"))
        t = DiscoDB.load(open("/tmp/discodb"))
        for k in d.keys():
            for v in d.values():
                t = k == v
Example #8
0
def test_leak():
    while True:
        d = DiscoDB(zip(letters, ['abc'] * 1000))
        t = len(d.query('a'))
        t = len(d['b'])
        t = 'd' in d
        t = d.dumps()
        t = DiscoDB.loads(t)
        t = d.dump(open('/tmp/discodb', 'w'))
        t = DiscoDB.load(open('/tmp/discodb'))
        for k in d.keys():
            for v in d.values():
                t = k == v
Example #9
0
def scan_database_dir(state):
    db_path = os.environ["DATA_DB_PATH"]
    for fname in (os.path.join(db_path, f) for f in os.listdir(db_path)):
        if fname not in state["dbs"] and os.path.isfile(fname):
            try:
                state["dbs"][fname] = DiscoDB.load(open(fname))
            except DiscoDBError:
                # maybe a corrupt discodb, nuke it the sync
                # process should fetch a new one later on
                os.remove(fname)
                logger.exception("Unable to open %s", fname)

    if state["dbs"]:
        state["cache_time"] = time.time()
    return state
Example #10
0
def scan_database_dir(state):
  db_path = os.environ['DATA_DB_PATH']
  for fname in (os.path.join(db_path, f) for f in os.listdir(db_path)):
    if fname not in state['dbs'] and os.path.isfile(fname):
      try:
        state['dbs'][fname] = DiscoDB.load(open(fname))
      except DiscoDBError:
        # maybe a corrupt discodb, nuke it the sync
        # process should fetch a new one later on
        os.remove(fname)
        logger.exception('Unable to open %s', fname)

  if state['dbs']:
    state['cache_time'] = time.time()
  return state
Example #11
0
def input_stream(fd, size, url, params):
    scheme, netloc, rest = util.urlsplit(url)

    if netloc[0] == Task.netloc[0]:
        path, rest   = rest.split('!', 1) if '!' in rest else (rest, '')
        Task.discodb = DiscoDB.load(open(os.path.join(Task.root, path)))

        if rest:
            method, arg = rest.split('/', 1)
            if method == 'query':
                if hasattr(params, 'discodb_query'):
                    return Task.discodb.query(params.discodb_query), size, url
                return Task.discodb.query(Q.urlscan(arg)), size, url
            return getattr(Task.discodb, method)(), size, url
        return Task.discodb, size, url
    raise core.DiscoError("Scheme 'discodb' can only be used with force_local=True")
Example #12
0
def input_stream(fd, size, url, params):
    import os
    from disco import util
    from disco.comm import download
    from discodb import DiscoDB, Q
    scheme, netloc, rest = util.urlsplit(url)
    path, rest   = rest.split('!', 1) if '!' in rest else (rest, '')

    if netloc[0] == Task.netloc[0]:
        discodb = DiscoDB.load(open(os.path.join(Task.root, path)))
    else:
        discodb = DiscoDB.loads(download('disco://%s/%s' % (netloc, path)))

    if rest:
        method_name, arg = rest.split('/', 1) if '/' in rest else (rest, None)
        method = getattr(discodb, method_name)
        if method_name in ('metaquery', 'query'):
            return method(Q.urlscan(arg)), size, url
        return method(*filter(None, arg)), size, url
    return discodb, size, url
Example #13
0
def input_stream(fd, size, url, params):
    import os
    from disco import util
    from disco.comm import download
    from discodb import DiscoDB, Q
    scheme, netloc, rest = util.urlsplit(url)
    path, rest = rest.split('!', 1) if '!' in rest else (rest, '')

    if netloc[0] == Task.netloc[0]:
        discodb = DiscoDB.load(open(os.path.join(Task.root, path)))
    else:
        discodb = DiscoDB.loads(download('disco://%s/%s' % (netloc, path)))

    if rest:
        method_name, arg = rest.split('/', 1) if '/' in rest else (rest, None)
        method = getattr(discodb, method_name)
        if method_name in ('metaquery', 'query'):
            return method(Q.urlscan(arg)), size, url
        return method(*filter(None, arg)), size, url
    return discodb, size, url
Example #14
0
def Open(url, task=None):
    if task:
        disco_data = task.disco_data
        ddfs_data = task.ddfs_data
    else:
        from disco.settings import DiscoSettings
        settings = DiscoSettings()
        disco_data = settings['DISCO_DATA']
        ddfs_data = settings['DDFS_DATA']
    scheme, netloc, rest = util.urlsplit(url)
    path, rest = rest.split('!', 1) if '!' in rest else (rest, '')
    discodb = DiscoDB.load(open(util.localize(path, disco_data=disco_data,
                                ddfs_data=ddfs_data)))

    if rest:
        method_name, arg = rest.split('/', 1) if '/' in rest else (rest, None)
        method = getattr(discodb, method_name)
        if method_name in ('metaquery', 'query'):
            return method(Q.urlscan(arg))
        return method(*filter(None, arg))
    return discodb
Example #15
0
def Open(url, task=None):
    if task:
        disco_data = task.disco_data
        ddfs_data = task.ddfs_data
    else:
        from disco.settings import DiscoSettings
        settings = DiscoSettings()
        disco_data = settings['DISCO_DATA']
        ddfs_data = settings['DDFS_DATA']
    scheme, netloc, rest = util.urlsplit(url)
    path, rest = rest.split('!', 1) if '!' in rest else (rest, '')
    discodb = DiscoDB.load(
        open(util.localize(path, disco_data=disco_data, ddfs_data=ddfs_data)))

    if rest:
        method_name, arg = rest.split('/', 1) if '/' in rest else (rest, None)
        method = getattr(discodb, method_name)
        if method_name in ('metaquery', 'query'):
            return method(Q.urlscan(arg))
        return method(*filter(None, arg))
    return discodb
Example #16
0
def open_db():
  if len(sys.argv > 1):
    fname = sys.arv[1]
  else:
    # fetch some data from s3
    conn = boto.s3_connectection(
      os.environ['AWS_KEY'],
      os.environ['AWS_SECRET']
    )
    bucket = conn.get_bucket('com.mozillalabs.blink')

    bucket = conn.bucket('blink')
    item = bucket.list('data/').next()
    fname = os.path.join(
      os.environ['DATA_DB_PATH'],
      str(item.key)
    )
    if not os.path.exists(fname):
      item.get_contents_to_path(fname)

  app.db = DiscoDB.load(open(fname))
Example #17
0
        file, arg = rest.split('/%s/' % method, 1)
        path = os.path.join(datadir, file)
        if os.path.isfile(path):
            bound_method = getattr(DiscoDB.load(open(path)), method)
            return bound_method(xargs(arg)) if xargs else bound_method()
    raise NotMethod(method)

def input_stream(fd, size, url, params):
    scheme, rest = url.split('://', 1)
    host, rest = rest.split('/', 1)

    if hasattr(params, "discodb_query"):
        query = lambda x: params.discodb_query
    else:
        query = Q.urlscan

    if host == Task.host or Task.has_flag("resultfs"):
        datadir = os.path.join(Task.root, "data")
        try:
            return maybe_method(datadir, rest, 'query', xargs=query), size, params
        except NotMethod, e:
            pass
        for method in ('keys', 'values'):
            try:
                return maybe_method(datadir, rest, method), size, params
            except NotMethod, e:
                pass
        return DiscoDB.load(open(os.path.join(datadir, rest))), size, params
    raise core.DiscoError("Scheme 'discodb' can only be used with force_local=True")

Example #18
0
#!/usr/bin/python

import sys
from discodb import DiscoDB

db = DiscoDB.load(file(sys.argv[1], 'r'))

for key in map(str.rstrip, sys.stdin):
    inq = db.get(key)
    if inq and len(inq) > 0:
        print iter(inq).next()
Example #19
0
    lens_ttls.append(g_lens)
    print(g1)
t2 = time.time()
# 500: 48 sec: 5.2k/sec
# 1k: 182: 5.5k/sec
# 2k: 722: 5.5k/sec


# ** writing/loading

fo = open('/home/johannes/Dropbox/gsss/thesis/anls/try1/add_data/db.disco', 'a')
    db.dump(fo)
    fo.close()

with open('/home/johannes/Dropbox/gsss/thesis/anls/try1/add_data/db.disco', 'r') as fi:
    dbsx = DiscoDB.load(fi)


# ** multiprocessing theory
from multiprocessing import Process

def f(name):
    print('hello', name)
    for i in range(5):
        print(i)
        time.sleep(1)

if __name__ == '__main__':
    p = Process(target=f, args=('bob',))
    p.start()
    p.join()