Beispiel #1
0
    def output_dicts_to_db_for_shelf(self, mapping_files, wgs_accessions,
                                     accession2taxid_db,
                                     taxid2wgs_accession_db, output_gz,
                                     output_wgs_gz):
        # generate the accession2taxid db and file
        accession_dict = shelve.Shelf(
            dbm.ndbm.open(accession2taxid_db.replace(".db", ""), 'c'))
        with gzip.open(output_gz, "wt") as gzf:
            for partition_list in mapping_files:
                for partition in partition_list:
                    with open(partition, 'r', encoding="utf-8") as pf:
                        for line in pf:
                            if len(line) <= 1:
                                break
                            fields = line.rstrip().split("\t")
                            accession_dict[fields[0]] = fields[2]
                            gzf.write(line)

        # generate taxid2 accession
        with shelve.Shelf(
                dbm.ndbm.open(taxid2wgs_accession_db.replace(".db", ""),
                              'c')) as taxid2accession_dict:
            with gzip.open(output_wgs_gz, "wt") as gzf:
                with open(wgs_accessions, 'r', encoding="utf-8") as wgsf:
                    for line in wgsf:
                        accession = line[1:].split(".")[0]
                        taxid = accession_dict.get(accession)
                        if taxid:
                            current_match = taxid2accession_dict.get(taxid, "")
                            taxid2accession_dict[
                                taxid] = f"{current_match},{accession}"
                            gzf.write(line)

        accession_dict.close()
 def test_keyencoding(self):
     d = {}
     key = 'Pöp'
     shelve.Shelf(d)[key] = [1]
     self.assertIn(key.encode('utf-8'), d)
     shelve.Shelf(d, keyencoding='latin-1')[key] = [1]
     self.assertIn(key.encode('latin-1'), d)
     s = shelve.Shelf(d, keyencoding='ascii')
     self.assertRaises(UnicodeEncodeError, s.__setitem__, key, [1])
Beispiel #3
0
def test_shelf() -> None:
    with tempfile.TemporaryDirectory() as d:
        with sdbm.open(Path(d) / 'test.db') as db:
            with shelve.Shelf(db) as s:
                s['a'] = ('b', )

        with sdbm.open(Path(d) / 'test.db') as db:
            with shelve.Shelf(db) as s:
                assert s['a'] == ('b', )
Beispiel #4
0
 def test_keyencoding(self):
     d = {}
     key = 'Pöp'
     # the default keyencoding is utf-8
     shelve.Shelf(d)[key] = [1]
     self.assertIn(key.encode('utf-8'), d)
     # but a different one can be given
     shelve.Shelf(d, keyencoding='latin-1')[key] = [1]
     self.assertIn(key.encode('latin-1'), d)
     # with all consequences
     s = shelve.Shelf(d, keyencoding='ascii')
     self.assertRaises(UnicodeEncodeError, s.__setitem__, key, [1])
Beispiel #5
0
    def test_in_memory_shelf(self):
        d1 = byteskeydict()
        with shelve.Shelf(d1, protocol=0) as s:
            s['key1'] = (1, 2, 3, 4)
            self.assertEqual(s['key1'], (1, 2, 3, 4))
        d2 = byteskeydict()
        with shelve.Shelf(d2, protocol=1) as s:
            s['key1'] = (1, 2, 3, 4)
            self.assertEqual(s['key1'], (1, 2, 3, 4))

        self.assertEqual(len(d1), 1)
        self.assertEqual(len(d2), 1)
        self.assertNotEqual(d1.items(), d2.items())
 def test_in_memory_shelf(self):
     d1 = byteskeydict()
     s = shelve.Shelf(d1, protocol=0)
     s['key1'] = 1, 2, 3, 4
     self.assertEqual(s['key1'], (1, 2, 3, 4))
     s.close()
     d2 = byteskeydict()
     s = shelve.Shelf(d2, protocol=1)
     s['key1'] = 1, 2, 3, 4
     self.assertEqual(s['key1'], (1, 2, 3, 4))
     s.close()
     self.assertEqual(len(d1), 1)
     self.assertEqual(len(d2), 1)
     self.assertNotEqual(d1.items(), d2.items())
    def test_in_memory_shelf(self):
        d1 = {}
        s = shelve.Shelf(d1, protocol=0)
        s['key1'] = (1, 2, 3, 4)
        self.assertEqual(s['key1'], (1, 2, 3, 4))
        s.close()
        d2 = {}
        s = shelve.Shelf(d2, protocol=1)
        s['key1'] = (1, 2, 3, 4)
        self.assertEqual(s['key1'], (1, 2, 3, 4))
        s.close()

        self.assertEqual(len(d1), 1)
        self.assertNotEqual(d1, d2)
Beispiel #8
0
    def test_in_memory_shelf(self):
        d1 = {}
        s = shelve.Shelf(d1, binary=False)
        s['key1'] = (1, 2, 3, 4)
        self.assertEqual(s['key1'], (1, 2, 3, 4))
        s.close()
        d2 = {}
        s = shelve.Shelf(d2, binary=True)
        s['key1'] = (1, 2, 3, 4)
        self.assertEqual(s['key1'], (1, 2, 3, 4))
        s.close()

        self.assertEqual(len(d1), 1)
        self.assertNotEqual(d1, d2)
Beispiel #9
0
    def generate_loc_db_for_shelf(self, db_file, loc_db_file, info_db_file):
        # Logic copied from generate_loc_db_work
        #   slightly changed for writing to shelve format
        loc_dict = shelve.Shelf(
            dbm.ndbm.open(loc_db_file.replace(".db", ""), 'c'))
        info_dict = shelve.Shelf(
            dbm.ndbm.open(info_db_file.replace(".db", ""), 'c'))
        with open(db_file) as dbf:
            seq_offset = 0
            seq_len = 0
            seq_bp_len = 0
            header_len = 0
            lines = 0
            accession_id = ""
            accession_name = ""
            for line in dbf:
                lines += 1
                if lines % 100000 == 0:
                    log.write(f"{lines/1000000.0}M lines")
                if line[0] == '>':  # header line
                    if seq_len > 0 and len(accession_id) > 0:
                        loc_dict[accession_id] = [
                            seq_offset, header_len, seq_len
                        ]
                    if seq_bp_len > 0 and len(accession_name) > 0:
                        info_dict[accession_id] = [accession_name, seq_bp_len]

                    seq_offset = seq_offset + header_len + seq_len
                    header_len = len(line)
                    seq_len = 0
                    seq_bp_len = 0
                    accession_name = ""
                    # Sometimes multiple accessions will be mapped to a single sequence.
                    # In this case, they will be separated by the \x01 char.
                    # To get the accession name, just match until the first \x01.
                    s = re.match('^>([^ ]*) ([^\x01]*).*', line)
                    if s:
                        accession_id = s.group(1)
                        accession_name = s.group(2)
                else:
                    seq_len += len(line)
                    seq_bp_len += len(line.strip())
            if seq_len > 0 and len(accession_id) > 0:
                loc_dict[accession_id] = [seq_offset, header_len, seq_len]
            if seq_bp_len > 0 and len(accession_name) > 0:
                info_dict[accession_id] = [accession_name, seq_bp_len]
        loc_dict.close()
        info_dict.close()
def load_data_file(path):
    assert path.exists(), "Path %s does not exist" % path
    p = get_data_file_path(path)
    if not p:
        return shelve.Shelf(
            {}
        )  #no nos deben pedir info por archivos de info, pero si pasara, devolver un dict es lo menos agresivo
    s = shelve.open(str(p), writeback=True)
    if is_windows:
        old_close = s.close

        def hide():
            for n in [p] + [
                    p.with_name(p.name + suff)
                    for suff in [".dat", ".bak", ".dir"]
            ]:
                if n.exists():
                    ctypes.windll.kernel32.SetFileAttributesW(str(n), 2)

        def new_close():
            old_close()
            hide()

        hide()
        s.close = new_close
    return contextlib.closing(s)
Beispiel #11
0
 def update(self, args):
     if args:
         storeobj = shelve.Shelf(mod.open(self.filename, 'c'))
         for key, item in args.items():
             storeobj[key] = item
         storeobj.close()
     return
Beispiel #12
0
 def readall(self):
     if os.path.exists(self.filename):
         storeobj = shelve.Shelf(mod.open(self.filename, 'c'))
         data = list(storeobj.items())
         storeobj.close()
         return data
     return []
def open(filename, flag='c', protocol=None, writeback=False, block=True):
    #print 'opening shelf with flag %s and fn: %s' % (flag,filename)
    """Open the shelve file, creating a lockfile at '.filename.lck'.  If 
    block is False then a IOError will be raised if the lock cannot
    be acquired."""
    lckfilename = os.path.dirname(filename) + os.sep + '.' + os.path.basename(
        filename) + '.lck'
    #   print filename, lckfilename
    old_umask = os.umask(000)
    lckfile = __builtin__.open(lckfilename, 'w')
    os.umask(old_umask)

    # Accquire the lock
    if flag == 'r':
        lockflags = LOCK_SH
    else:
        lockflags = LOCK_EX
    if not block:
        lockflags |= LOCK_NB
    fcntl.flock(lckfile.fileno(), lockflags)

    # Open the shelf
    # shelf = shelve.open(filename, flag, protocol, writeback)
    shelf = shelve.Shelf(bsddb3.hashopen(filename, flag), protocol, writeback)

    # And return a SafeShelf version of it
    return SafeShelf.convertFromShelf(shelf, lckfile)
 def __init__(self,filename,  size=None, cached=True):
     self.db=shelve.Shelf(db.open(filename, "c"),  writeback=cached)
     self._init_indexes()
     if not size:
         self.limit=MAXINT
     else:
         self.limit=size
    def create_shelf_multi(self, uris, key_f):
        # sanity check inputs
        assert uris is not None
        assert len(uris) > 0

        # Shelve creates a file with specific database. Using a temp file requires a workaround to open it.
        # dumbdbm creates an empty database file. In this way shelve can open it properly.

        # note: this file is never deleted!
        filename = tempfile.NamedTemporaryFile(delete=True).name
        shelf = shelve.Shelf(dict=dbm.open(filename, 'n'))
        for uri in uris:
            with URLZSource(uri).open() as f_obj:
                # for python2 we need to decode utf-8
                if sys.version_info < (3, 0):
                    f_obj = codecs.getreader("utf-8")(f_obj)
                for line_no, line in enumerate(f_obj):
                    try:
                        obj = json.loads(line)
                    except json.JSONDecodeError as e:
                        self.logger.error("Unable to read line %d %s", line_no,
                                          uri)
                        raise e

                    key_value = key_f(obj)
                    key = self.str_hook(key_value)
                    if key is not None:
                        existing = shelf.get(key, [])
                        existing.append(obj)
                        shelf[key] = existing
        return shelf
Beispiel #16
0
    def open_pool(self, dbname=None, dbtype=db.DB_HASH, flags=db.DB_CREATE, protocol=pickle.HIGHEST_PROTOCOL, overwrite=False):
        r"""Open the database that the CellPool uses to store cells.

        Parameters
        ----------
        dbname : string

        dbtype : int, optional
            Specifies the type of database to open. Use enumerations provided by
            `bsddb3 <https://www.jcea.es/programacion/pybsddb_doc/db.html#open>`_.
        flags : int, optional
            Specifies the configuration of the database to open. Use enumerations provided by
            `bsddb3 <https://www.jcea.es/programacion/pybsddb_doc/db.html#open>`_.
        protocol : int, optional
            Specifies the data stream format used by
            `pickle <https://docs.python.org/3/library/pickle.html#data-stream-format>`_.
        overwrite : bool, optional
            Indicates if an existing database should be overwritten if found.
        Returns
        -------
        cell_pool_shelf : `shelve.Shelf <https://docs.python.org/3/library/shelve.html#shelve.Shelf>`_
            A `shelve.Shelf` wrapping a bsddb3 database.
        """
        # We can't save our database as a class attribute due to pickling errors.
        # To prevent errors from code repeat, this convenience function opens the database and
        # loads the latest meta data, the returns the database.
        if overwrite:
            self.delete_pool()
        cell_pool_db = db.DB()
        cell_pool_db.open(self.pool_filename, dbname=dbname, dbtype=dbtype, flags=flags)
        cell_pool_shelf = shelve.Shelf(cell_pool_db, protocol=protocol)
        self.load(cell_pool_shelf=cell_pool_shelf)
        return cell_pool_shelf
    def create_shelf_csv(self, uris, key_col, dialect):
        # sanity check inputs
        assert uris is not None
        assert len(uris) > 0

        # Shelve creates a file with specific database. Using a temp file requires a workaround to open it.
        # dumbdbm creates an empty database file. In this way shelve can open it properly.

        # note: this file is never deleted!
        filename = tempfile.NamedTemporaryFile(delete=True).name
        shelf = shelve.Shelf(dict=dbm.open(filename, 'n'))
        for uri in uris:
            with URLZSource(uri).open() as f_obj:
                f_obj = codecs.getreader("utf-8")(f_obj)
                for row in csv.DictReader(f_obj, dialect=dialect):
                    key_value = row[key_col]
                    key = self.str_hook(key_value)
                    if key is not None:
                        if key in shelf:
                            raise ValueError("Duplicate key %s in uri %s" %
                                             (key, uri))
                        row_dict = dict(row)
                        del row_dict[key_col]
                        shelf[key] = row_dict
        return shelf
Beispiel #18
0
def __access_buckets(filename, clear, new_key=None, new_value=None):
    """
    Access data in forkbomb cache, potentially clearing or
    modifying it as required.
    """

    handle = open(filename, "w")
    fcntl.flock(handle.fileno(), fcntl.LOCK_EX)
    internal_db = dbm.open(filename, 'c', 0644)
    storage = shelve.Shelf(internal_db)

    if clear:
        storage.clear()
        storage.close()
        fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
        return {}

    if not storage.has_key("data"):
        storage["data"] = {}
    else:
        pass

    if new_key is not None:
        # bsdb is a bit weird about this
        newish = storage["data"].copy()
        newish[new_key] = new_value
        storage["data"] = newish

    rc = storage["data"].copy()
    storage.close()
    fcntl.flock(handle.fileno(), fcntl.LOCK_UN)

    return rc
Beispiel #19
0
    def create_shelf(self, uris, key_f):
        # Shelve creates a file with specific database. Using a temp file requires a workaround to open it.
        # dumbdbm creates an empty database file. In this way shelve can open it properly.

        #note: this file is never deleted!
        filename = tempfile.NamedTemporaryFile(delete=False).name
        shelf = shelve.Shelf(dict=dbm.open(filename, 'n'))
        for uri in uris:
            with URLZSource(uri).open() as f_obj:
                f_obj = codecs.getreader("utf-8")(f_obj)
                for line_no, line in enumerate(f_obj):
                    try:
                        obj = json.loads(line)
                    except json.JSONDecodeError as e:
                        self.logger.error("Unable to read line %d %s %s",
                                          line_no, uri, e)
                        raise e

                    key = key_f(obj)
                    if key is not None:
                        if str(key) in shelf:
                            raise ValueError("Duplicate key %s in uri %s" %
                                             (key, uri))
                        shelf[str(key)] = obj
        return shelf
Beispiel #20
0
def write():
	# db = DataBase('not_bsd.dat')
	dumb = dumbdbm.open('test_dumb.dat')
	db = shelve.Shelf(dumb)
	db['a'] = range(1000)
	db['b'] = range(2000)
	db.close()
Beispiel #21
0
def cli(ctx, db):
    '''
        Tool that allows low-level exploration of an Exaile music database
    '''
    # simpler version of trackdb.py
    try:
        d = bsddb.hashopen(db, 'r')
        contents = shelve.Shelf(d, protocol=exaile_pickle_protocol)
    except Exception:
        try:
            contents = shelve.open(db,
                                   flag='r',
                                   protocol=exaile_pickle_protocol)
        except Exception:
            if os.path.exists(db):
                raise
            else:
                raise click.ClickException("%s does not exist" % db)

    ctx.obj = contents

    def _on_close():
        ctx.obj.close()

    ctx.call_on_close(_on_close)
Beispiel #22
0
def wrap(redis, lock_class=Lock):
    def lock(key):
        return lock_class(redis, key + '.lock')

    db = shelve.Shelf(redis)
    db.lock = lock
    return db
Beispiel #23
0
def update_shelves(filename):

    execfile(filename)

    name = os.path.basename(filename)
    name = name.replace('.py','')
    parts = name.split('_')
    if len(parts) == 4:
        del parts[2]
    elif len(parts) == 5:
        del parts[2]
        del parts[2]
    name = '_'.join(parts)
    mydict = locals()[name]

    outfile = filename.replace('.py', '.pickle')
    f = open(outfile, 'wb')
    pickle.dump(mydict, f, protocol=2)
    f.close()

    outfile = filename.replace('.py', '.shelf')
    shelf = shelve.Shelf(GDBM_MODULE.open(outfile, 'n'))

    for (key, value) in mydict.iteritems():
        shelf[key] = value

    shelf.close()
Beispiel #24
0
class IndexDb(object):
    """
    A simple wrapper for index Db,which
    is a kind of pickle ...
    """
    WRITE_MODE = "w"
    READ_MODE = "r"

    def __init__(self, dir=None):
        """
        Load the db when have an instance
        """
        self.__storage = None
        self.__handle = None
        self.__dir = utils.getCacheDir()

    def __load_index(self):
        """
        Gets the store object for that instance
        """
        import os
        filename = os.path.join(self.__dir, INTERNAL_DB_FILE)
        try:
            self.__handle = open(filename, self.__mode)
        except IOError, e:
            print 'Cannot create status file. Ensure you have permission to write'
            return False

        fcntl.flock(self.__handle.fileno(), fcntl.LOCK_EX)
        internal_db = dbm.open(filename, 'c', 0600)
        self.__storage = shelve.Shelf(internal_db)
        return True
Beispiel #25
0
def cvtdb(ctx, data, dbtype):
    '''
        Only used for testing purposes
    '''

    db = ctx.parent.params['db']
    newdb = db + '.new'

    if dbtype == 'gdbm':
        import dbm.gnu
        new_d = dbm.gnu.open(newdb, 'n')
    elif dbtype == 'dbm':
        import dbm.ndbm
        new_d = dbm.ndbm.open(newdb, 'n')
    elif dbtype == 'dbhash':
        import dbm.bsd
        new_d = dbm.bsd.open(newdb, 'n')
    elif dbtype == 'bsddb':
        new_d = bsddb.hashopen(newdb, 'n')
    elif dbtype == 'dumbdbm':
        import dbm.dumb
        new_d = dbm.dumb.open(newdb, 'n')
    else:
        raise click.ClickException("Invalid type %s" % dbtype)

    new_data = shelve.Shelf(new_d, protocol=exaile_pickle_protocol)

    for k, v in data.items():
        new_data[k] = v

    new_data.sync()
    new_data.close()
Beispiel #26
0
def main():

    db_file = sys.argv[1]

    done_db = shelve.Shelf(db.open(db_file, "c"))

    log.info('db has %d records', len(done_db))

    added = 0
    try:
        for csv_file in sys.argv[2:]:
            log.info("processing file %s", csv_file)
            reader = csv.DictReader(open(csv_file))
            loaded_files = {}
            for row in reader:
                file_name = row['file'].strip()
                if file_name:
                    loaded_files[row['id']] = row
            log.info('csv file has %d valid records ', len(loaded_files))

            for idx in loaded_files:
                if idx not in done_db:
                    done_db[idx] = loaded_files[idx]
                    added += 1

        log.info('Done -  %d added - db now has %d records', added,
                 len(done_db))

    finally:
        done_db.close()
Beispiel #27
0
def _get_cache(cachepath):
    if cachepath in _cache_shelves:
        return _cache_shelves[cachepath]

    try:
        cache = shelve.open(cachepath, protocol=2)
    except dbm.error:
        # dbm error on open - delete and retry
        print('Error (%s) opening %s - will attempt to delete and re-open.' %
              (sys.exc_info()[1], cachepath))
        try:
            os.remove(cachepath)
            cache = shelve.open(cachepath, protocol=2)
        except Exception:
            print('Error on re-open: %s' % sys.exc_info()[1])
            cache = None
    except Exception:
        # unknown error
        print('Could not open cache file %s, maybe name collision. '
              'Error: %s' % (cachepath, traceback.format_exc()))
        cache = None

    # Don't fail on bad caches
    if cache is None:
        print('Using in-memory shelf for cache file %s' % cachepath)
        cache = shelve.Shelf(dict())

    _cache_shelves[cachepath] = cache
    return cache
 def _empty_mapping(self):
     if self._in_mem:
         x = shelve.Shelf(byteskeydict(), **self._args)
     else:
         self.counter += 1
         x = shelve.open(self.fn + str(self.counter), **self._args)
     self._db.append(x)
     return x
Beispiel #29
0
    def test_mutable_entry(self):
        d1 = byteskeydict()
        with shelve.Shelf(d1, protocol=2, writeback=False) as s:
            s['key1'] = [1, 2, 3, 4]
            self.assertEqual(s['key1'], [1, 2, 3, 4])
            s['key1'].append(5)
            self.assertEqual(s['key1'], [1, 2, 3, 4])

        d2 = byteskeydict()
        with shelve.Shelf(d2, protocol=2, writeback=True) as s:
            s['key1'] = [1, 2, 3, 4]
            self.assertEqual(s['key1'], [1, 2, 3, 4])
            s['key1'].append(5)
            self.assertEqual(s['key1'], [1, 2, 3, 4, 5])

        self.assertEqual(len(d1), 1)
        self.assertEqual(len(d2), 1)
Beispiel #30
0
 def __init__(self, save):
     self.save = save
     self.db_experiences = shelve.Shelf(
         LMDBDict("data/commit_experiences.lmdb"),
         protocol=pickle.DEFAULT_PROTOCOL,
         writeback=save,
     )
     if not save:
         self.mem_experiences = {}