def output_dicts_to_db_for_shelf(self, mapping_files, wgs_accessions, accession2taxid_db, taxid2wgs_accession_db, output_gz, output_wgs_gz): # generate the accession2taxid db and file accession_dict = shelve.Shelf( dbm.ndbm.open(accession2taxid_db.replace(".db", ""), 'c')) with gzip.open(output_gz, "wt") as gzf: for partition_list in mapping_files: for partition in partition_list: with open(partition, 'r', encoding="utf-8") as pf: for line in pf: if len(line) <= 1: break fields = line.rstrip().split("\t") accession_dict[fields[0]] = fields[2] gzf.write(line) # generate taxid2 accession with shelve.Shelf( dbm.ndbm.open(taxid2wgs_accession_db.replace(".db", ""), 'c')) as taxid2accession_dict: with gzip.open(output_wgs_gz, "wt") as gzf: with open(wgs_accessions, 'r', encoding="utf-8") as wgsf: for line in wgsf: accession = line[1:].split(".")[0] taxid = accession_dict.get(accession) if taxid: current_match = taxid2accession_dict.get(taxid, "") taxid2accession_dict[ taxid] = f"{current_match},{accession}" gzf.write(line) accession_dict.close()
def test_keyencoding(self): d = {} key = 'Pöp' shelve.Shelf(d)[key] = [1] self.assertIn(key.encode('utf-8'), d) shelve.Shelf(d, keyencoding='latin-1')[key] = [1] self.assertIn(key.encode('latin-1'), d) s = shelve.Shelf(d, keyencoding='ascii') self.assertRaises(UnicodeEncodeError, s.__setitem__, key, [1])
def test_shelf() -> None: with tempfile.TemporaryDirectory() as d: with sdbm.open(Path(d) / 'test.db') as db: with shelve.Shelf(db) as s: s['a'] = ('b', ) with sdbm.open(Path(d) / 'test.db') as db: with shelve.Shelf(db) as s: assert s['a'] == ('b', )
def test_keyencoding(self): d = {} key = 'Pöp' # the default keyencoding is utf-8 shelve.Shelf(d)[key] = [1] self.assertIn(key.encode('utf-8'), d) # but a different one can be given shelve.Shelf(d, keyencoding='latin-1')[key] = [1] self.assertIn(key.encode('latin-1'), d) # with all consequences s = shelve.Shelf(d, keyencoding='ascii') self.assertRaises(UnicodeEncodeError, s.__setitem__, key, [1])
def test_in_memory_shelf(self): d1 = byteskeydict() with shelve.Shelf(d1, protocol=0) as s: s['key1'] = (1, 2, 3, 4) self.assertEqual(s['key1'], (1, 2, 3, 4)) d2 = byteskeydict() with shelve.Shelf(d2, protocol=1) as s: s['key1'] = (1, 2, 3, 4) self.assertEqual(s['key1'], (1, 2, 3, 4)) self.assertEqual(len(d1), 1) self.assertEqual(len(d2), 1) self.assertNotEqual(d1.items(), d2.items())
def test_in_memory_shelf(self): d1 = byteskeydict() s = shelve.Shelf(d1, protocol=0) s['key1'] = 1, 2, 3, 4 self.assertEqual(s['key1'], (1, 2, 3, 4)) s.close() d2 = byteskeydict() s = shelve.Shelf(d2, protocol=1) s['key1'] = 1, 2, 3, 4 self.assertEqual(s['key1'], (1, 2, 3, 4)) s.close() self.assertEqual(len(d1), 1) self.assertEqual(len(d2), 1) self.assertNotEqual(d1.items(), d2.items())
def test_in_memory_shelf(self): d1 = {} s = shelve.Shelf(d1, protocol=0) s['key1'] = (1, 2, 3, 4) self.assertEqual(s['key1'], (1, 2, 3, 4)) s.close() d2 = {} s = shelve.Shelf(d2, protocol=1) s['key1'] = (1, 2, 3, 4) self.assertEqual(s['key1'], (1, 2, 3, 4)) s.close() self.assertEqual(len(d1), 1) self.assertNotEqual(d1, d2)
def test_in_memory_shelf(self): d1 = {} s = shelve.Shelf(d1, binary=False) s['key1'] = (1, 2, 3, 4) self.assertEqual(s['key1'], (1, 2, 3, 4)) s.close() d2 = {} s = shelve.Shelf(d2, binary=True) s['key1'] = (1, 2, 3, 4) self.assertEqual(s['key1'], (1, 2, 3, 4)) s.close() self.assertEqual(len(d1), 1) self.assertNotEqual(d1, d2)
def generate_loc_db_for_shelf(self, db_file, loc_db_file, info_db_file): # Logic copied from generate_loc_db_work # slightly changed for writing to shelve format loc_dict = shelve.Shelf( dbm.ndbm.open(loc_db_file.replace(".db", ""), 'c')) info_dict = shelve.Shelf( dbm.ndbm.open(info_db_file.replace(".db", ""), 'c')) with open(db_file) as dbf: seq_offset = 0 seq_len = 0 seq_bp_len = 0 header_len = 0 lines = 0 accession_id = "" accession_name = "" for line in dbf: lines += 1 if lines % 100000 == 0: log.write(f"{lines/1000000.0}M lines") if line[0] == '>': # header line if seq_len > 0 and len(accession_id) > 0: loc_dict[accession_id] = [ seq_offset, header_len, seq_len ] if seq_bp_len > 0 and len(accession_name) > 0: info_dict[accession_id] = [accession_name, seq_bp_len] seq_offset = seq_offset + header_len + seq_len header_len = len(line) seq_len = 0 seq_bp_len = 0 accession_name = "" # Sometimes multiple accessions will be mapped to a single sequence. # In this case, they will be separated by the \x01 char. # To get the accession name, just match until the first \x01. s = re.match('^>([^ ]*) ([^\x01]*).*', line) if s: accession_id = s.group(1) accession_name = s.group(2) else: seq_len += len(line) seq_bp_len += len(line.strip()) if seq_len > 0 and len(accession_id) > 0: loc_dict[accession_id] = [seq_offset, header_len, seq_len] if seq_bp_len > 0 and len(accession_name) > 0: info_dict[accession_id] = [accession_name, seq_bp_len] loc_dict.close() info_dict.close()
def load_data_file(path): assert path.exists(), "Path %s does not exist" % path p = get_data_file_path(path) if not p: return shelve.Shelf( {} ) #no nos deben pedir info por archivos de info, pero si pasara, devolver un dict es lo menos agresivo s = shelve.open(str(p), writeback=True) if is_windows: old_close = s.close def hide(): for n in [p] + [ p.with_name(p.name + suff) for suff in [".dat", ".bak", ".dir"] ]: if n.exists(): ctypes.windll.kernel32.SetFileAttributesW(str(n), 2) def new_close(): old_close() hide() hide() s.close = new_close return contextlib.closing(s)
def update(self, args): if args: storeobj = shelve.Shelf(mod.open(self.filename, 'c')) for key, item in args.items(): storeobj[key] = item storeobj.close() return
def readall(self): if os.path.exists(self.filename): storeobj = shelve.Shelf(mod.open(self.filename, 'c')) data = list(storeobj.items()) storeobj.close() return data return []
def open(filename, flag='c', protocol=None, writeback=False, block=True): #print 'opening shelf with flag %s and fn: %s' % (flag,filename) """Open the shelve file, creating a lockfile at '.filename.lck'. If block is False then a IOError will be raised if the lock cannot be acquired.""" lckfilename = os.path.dirname(filename) + os.sep + '.' + os.path.basename( filename) + '.lck' # print filename, lckfilename old_umask = os.umask(000) lckfile = __builtin__.open(lckfilename, 'w') os.umask(old_umask) # Accquire the lock if flag == 'r': lockflags = LOCK_SH else: lockflags = LOCK_EX if not block: lockflags |= LOCK_NB fcntl.flock(lckfile.fileno(), lockflags) # Open the shelf # shelf = shelve.open(filename, flag, protocol, writeback) shelf = shelve.Shelf(bsddb3.hashopen(filename, flag), protocol, writeback) # And return a SafeShelf version of it return SafeShelf.convertFromShelf(shelf, lckfile)
def __init__(self,filename, size=None, cached=True): self.db=shelve.Shelf(db.open(filename, "c"), writeback=cached) self._init_indexes() if not size: self.limit=MAXINT else: self.limit=size
def create_shelf_multi(self, uris, key_f): # sanity check inputs assert uris is not None assert len(uris) > 0 # Shelve creates a file with specific database. Using a temp file requires a workaround to open it. # dumbdbm creates an empty database file. In this way shelve can open it properly. # note: this file is never deleted! filename = tempfile.NamedTemporaryFile(delete=True).name shelf = shelve.Shelf(dict=dbm.open(filename, 'n')) for uri in uris: with URLZSource(uri).open() as f_obj: # for python2 we need to decode utf-8 if sys.version_info < (3, 0): f_obj = codecs.getreader("utf-8")(f_obj) for line_no, line in enumerate(f_obj): try: obj = json.loads(line) except json.JSONDecodeError as e: self.logger.error("Unable to read line %d %s", line_no, uri) raise e key_value = key_f(obj) key = self.str_hook(key_value) if key is not None: existing = shelf.get(key, []) existing.append(obj) shelf[key] = existing return shelf
def open_pool(self, dbname=None, dbtype=db.DB_HASH, flags=db.DB_CREATE, protocol=pickle.HIGHEST_PROTOCOL, overwrite=False): r"""Open the database that the CellPool uses to store cells. Parameters ---------- dbname : string dbtype : int, optional Specifies the type of database to open. Use enumerations provided by `bsddb3 <https://www.jcea.es/programacion/pybsddb_doc/db.html#open>`_. flags : int, optional Specifies the configuration of the database to open. Use enumerations provided by `bsddb3 <https://www.jcea.es/programacion/pybsddb_doc/db.html#open>`_. protocol : int, optional Specifies the data stream format used by `pickle <https://docs.python.org/3/library/pickle.html#data-stream-format>`_. overwrite : bool, optional Indicates if an existing database should be overwritten if found. Returns ------- cell_pool_shelf : `shelve.Shelf <https://docs.python.org/3/library/shelve.html#shelve.Shelf>`_ A `shelve.Shelf` wrapping a bsddb3 database. """ # We can't save our database as a class attribute due to pickling errors. # To prevent errors from code repeat, this convenience function opens the database and # loads the latest meta data, the returns the database. if overwrite: self.delete_pool() cell_pool_db = db.DB() cell_pool_db.open(self.pool_filename, dbname=dbname, dbtype=dbtype, flags=flags) cell_pool_shelf = shelve.Shelf(cell_pool_db, protocol=protocol) self.load(cell_pool_shelf=cell_pool_shelf) return cell_pool_shelf
def create_shelf_csv(self, uris, key_col, dialect): # sanity check inputs assert uris is not None assert len(uris) > 0 # Shelve creates a file with specific database. Using a temp file requires a workaround to open it. # dumbdbm creates an empty database file. In this way shelve can open it properly. # note: this file is never deleted! filename = tempfile.NamedTemporaryFile(delete=True).name shelf = shelve.Shelf(dict=dbm.open(filename, 'n')) for uri in uris: with URLZSource(uri).open() as f_obj: f_obj = codecs.getreader("utf-8")(f_obj) for row in csv.DictReader(f_obj, dialect=dialect): key_value = row[key_col] key = self.str_hook(key_value) if key is not None: if key in shelf: raise ValueError("Duplicate key %s in uri %s" % (key, uri)) row_dict = dict(row) del row_dict[key_col] shelf[key] = row_dict return shelf
def __access_buckets(filename, clear, new_key=None, new_value=None): """ Access data in forkbomb cache, potentially clearing or modifying it as required. """ handle = open(filename, "w") fcntl.flock(handle.fileno(), fcntl.LOCK_EX) internal_db = dbm.open(filename, 'c', 0644) storage = shelve.Shelf(internal_db) if clear: storage.clear() storage.close() fcntl.flock(handle.fileno(), fcntl.LOCK_UN) return {} if not storage.has_key("data"): storage["data"] = {} else: pass if new_key is not None: # bsdb is a bit weird about this newish = storage["data"].copy() newish[new_key] = new_value storage["data"] = newish rc = storage["data"].copy() storage.close() fcntl.flock(handle.fileno(), fcntl.LOCK_UN) return rc
def create_shelf(self, uris, key_f): # Shelve creates a file with specific database. Using a temp file requires a workaround to open it. # dumbdbm creates an empty database file. In this way shelve can open it properly. #note: this file is never deleted! filename = tempfile.NamedTemporaryFile(delete=False).name shelf = shelve.Shelf(dict=dbm.open(filename, 'n')) for uri in uris: with URLZSource(uri).open() as f_obj: f_obj = codecs.getreader("utf-8")(f_obj) for line_no, line in enumerate(f_obj): try: obj = json.loads(line) except json.JSONDecodeError as e: self.logger.error("Unable to read line %d %s %s", line_no, uri, e) raise e key = key_f(obj) if key is not None: if str(key) in shelf: raise ValueError("Duplicate key %s in uri %s" % (key, uri)) shelf[str(key)] = obj return shelf
def write(): # db = DataBase('not_bsd.dat') dumb = dumbdbm.open('test_dumb.dat') db = shelve.Shelf(dumb) db['a'] = range(1000) db['b'] = range(2000) db.close()
def cli(ctx, db): ''' Tool that allows low-level exploration of an Exaile music database ''' # simpler version of trackdb.py try: d = bsddb.hashopen(db, 'r') contents = shelve.Shelf(d, protocol=exaile_pickle_protocol) except Exception: try: contents = shelve.open(db, flag='r', protocol=exaile_pickle_protocol) except Exception: if os.path.exists(db): raise else: raise click.ClickException("%s does not exist" % db) ctx.obj = contents def _on_close(): ctx.obj.close() ctx.call_on_close(_on_close)
def wrap(redis, lock_class=Lock): def lock(key): return lock_class(redis, key + '.lock') db = shelve.Shelf(redis) db.lock = lock return db
def update_shelves(filename): execfile(filename) name = os.path.basename(filename) name = name.replace('.py','') parts = name.split('_') if len(parts) == 4: del parts[2] elif len(parts) == 5: del parts[2] del parts[2] name = '_'.join(parts) mydict = locals()[name] outfile = filename.replace('.py', '.pickle') f = open(outfile, 'wb') pickle.dump(mydict, f, protocol=2) f.close() outfile = filename.replace('.py', '.shelf') shelf = shelve.Shelf(GDBM_MODULE.open(outfile, 'n')) for (key, value) in mydict.iteritems(): shelf[key] = value shelf.close()
class IndexDb(object): """ A simple wrapper for index Db,which is a kind of pickle ... """ WRITE_MODE = "w" READ_MODE = "r" def __init__(self, dir=None): """ Load the db when have an instance """ self.__storage = None self.__handle = None self.__dir = utils.getCacheDir() def __load_index(self): """ Gets the store object for that instance """ import os filename = os.path.join(self.__dir, INTERNAL_DB_FILE) try: self.__handle = open(filename, self.__mode) except IOError, e: print 'Cannot create status file. Ensure you have permission to write' return False fcntl.flock(self.__handle.fileno(), fcntl.LOCK_EX) internal_db = dbm.open(filename, 'c', 0600) self.__storage = shelve.Shelf(internal_db) return True
def cvtdb(ctx, data, dbtype): ''' Only used for testing purposes ''' db = ctx.parent.params['db'] newdb = db + '.new' if dbtype == 'gdbm': import dbm.gnu new_d = dbm.gnu.open(newdb, 'n') elif dbtype == 'dbm': import dbm.ndbm new_d = dbm.ndbm.open(newdb, 'n') elif dbtype == 'dbhash': import dbm.bsd new_d = dbm.bsd.open(newdb, 'n') elif dbtype == 'bsddb': new_d = bsddb.hashopen(newdb, 'n') elif dbtype == 'dumbdbm': import dbm.dumb new_d = dbm.dumb.open(newdb, 'n') else: raise click.ClickException("Invalid type %s" % dbtype) new_data = shelve.Shelf(new_d, protocol=exaile_pickle_protocol) for k, v in data.items(): new_data[k] = v new_data.sync() new_data.close()
def main(): db_file = sys.argv[1] done_db = shelve.Shelf(db.open(db_file, "c")) log.info('db has %d records', len(done_db)) added = 0 try: for csv_file in sys.argv[2:]: log.info("processing file %s", csv_file) reader = csv.DictReader(open(csv_file)) loaded_files = {} for row in reader: file_name = row['file'].strip() if file_name: loaded_files[row['id']] = row log.info('csv file has %d valid records ', len(loaded_files)) for idx in loaded_files: if idx not in done_db: done_db[idx] = loaded_files[idx] added += 1 log.info('Done - %d added - db now has %d records', added, len(done_db)) finally: done_db.close()
def _get_cache(cachepath): if cachepath in _cache_shelves: return _cache_shelves[cachepath] try: cache = shelve.open(cachepath, protocol=2) except dbm.error: # dbm error on open - delete and retry print('Error (%s) opening %s - will attempt to delete and re-open.' % (sys.exc_info()[1], cachepath)) try: os.remove(cachepath) cache = shelve.open(cachepath, protocol=2) except Exception: print('Error on re-open: %s' % sys.exc_info()[1]) cache = None except Exception: # unknown error print('Could not open cache file %s, maybe name collision. ' 'Error: %s' % (cachepath, traceback.format_exc())) cache = None # Don't fail on bad caches if cache is None: print('Using in-memory shelf for cache file %s' % cachepath) cache = shelve.Shelf(dict()) _cache_shelves[cachepath] = cache return cache
def _empty_mapping(self): if self._in_mem: x = shelve.Shelf(byteskeydict(), **self._args) else: self.counter += 1 x = shelve.open(self.fn + str(self.counter), **self._args) self._db.append(x) return x
def test_mutable_entry(self): d1 = byteskeydict() with shelve.Shelf(d1, protocol=2, writeback=False) as s: s['key1'] = [1, 2, 3, 4] self.assertEqual(s['key1'], [1, 2, 3, 4]) s['key1'].append(5) self.assertEqual(s['key1'], [1, 2, 3, 4]) d2 = byteskeydict() with shelve.Shelf(d2, protocol=2, writeback=True) as s: s['key1'] = [1, 2, 3, 4] self.assertEqual(s['key1'], [1, 2, 3, 4]) s['key1'].append(5) self.assertEqual(s['key1'], [1, 2, 3, 4, 5]) self.assertEqual(len(d1), 1) self.assertEqual(len(d2), 1)
def __init__(self, save): self.save = save self.db_experiences = shelve.Shelf( LMDBDict("data/commit_experiences.lmdb"), protocol=pickle.DEFAULT_PROTOCOL, writeback=save, ) if not save: self.mem_experiences = {}