def test_error_conditions(self):
     # Try to open a non-existent database.
     unlink(filename)
     self.assertRaises(gdbm.error, gdbm.open, filename, 'r')
     # Try to access a closed database.
     self.g = gdbm.open(filename, 'c')
     self.g.close()
     self.assertRaises(gdbm.error, lambda: self.g['a'])
     # try pass an invalid open flag
     self.assertRaises(gdbm.error, lambda: gdbm.open(filename, 'rx').close())
 def test_error_conditions(self):
     # Try to open a non-existent database.
     unlink(filename)
     self.assertRaises(gdbm.error, gdbm.open, filename, 'r')
     # Try to access a closed database.
     self.g = gdbm.open(filename, 'c')
     self.g.close()
     self.assertRaises(gdbm.error, lambda: self.g['a'])
     # try pass an invalid open flag
     self.assertRaises(gdbm.error,
                       lambda: gdbm.open(filename, 'rx').close())
    def test_flags(self):
        # Test the flag parameter open() by trying all supported flag modes.
        all = set(gdbm.open_flags)
        # Test standard flags (presumably "crwn").
        modes = all - set('fsu')
        for mode in modes:
            self.g = gdbm.open(filename, mode)
            self.g.close()

        # Test additional flags (presumably "fsu").
        flags = all - set('crwn')
        for mode in modes:
            for flag in flags:
                self.g = gdbm.open(filename, mode + flag)
                self.g.close()
    def test_flags(self):
        # Test the flag parameter open() by trying all supported flag modes.
        all = set(gdbm.open_flags)
        # Test standard flags (presumably "crwn").
        modes = all - set('fsu')
        for mode in modes:
            self.g = gdbm.open(filename, mode)
            self.g.close()

        # Test additional flags (presumably "fsu").
        flags = all - set('crwn')
        for mode in modes:
            for flag in flags:
                self.g = gdbm.open(filename, mode + flag)
                self.g.close()
Example #5
0
    def _setup(self):
        if os.path.exists(self._path):
            try:
                self._metadata = pickle.load(open('%s/%s' % (self._path, _METADATA_FILE), 'rb'))
                print('loaded metadata: %s' % repr(self._metadata))
                logging.debug('loaded metadata %s' % repr(self._metadata))
            except IOError:
                print("IO error loading metadata?")
                self._setup_metadata()

            dbses = _load_paths(self._path)
            for db in dbses:
                try:
                    self._data.append(gdbm.open(db, 'c'))
                except Exception as err:
                    print('error appending dbfile: %s' % db, err)

            print('loaded %i dbm files' % len(self._data))
        else:
            print('path not found, creating')
            os.makedirs(self._path)
            os.makedirs('%s/archive' % self._path)
            self._setup_metadata()

        if not len(self._data):
            self._add_db()
Example #6
0
def find_dups(directory='.', files='*.jpg', callbacks=[]):
    '''Given a ``directory``, goes through all files that pass through the
        filter ``files``, and for each one that is a duplicate, calls a number
        of ``callbacks``. Returns a dictionary containing the duplicates found.

        Example usage::

            d = find_dups('some/directory',
                          callbacks=[print_dups, KeepLarger()])

        The signature for writing callbacks is (existing, dup, m), where
        ``existing`` and ``dup`` are paths and ``m`` is the
        FileExistenceManager instance.
        '''
    from pathlib import Path
    store = GdbmStorageStrategy()
    m = FileExistenceManager(store)
    dups = {}
    for p in Path(directory).glob(files):
        with open(str(p), 'rb') as stream:
            existing = m.try_add_file(stream, str(p))
        if existing:
            existing = existing.decode('utf-8')
            dups[str(p)] = existing
            for function in callbacks:
                function(Path(existing), p, m)
    m.close()
    return dups
Example #7
0
 def create_db(self):
     self.lock_db()
     try:
         with gdbm.open(self.filename, 'c'):
             os.chmod(self.filename, 0o600)
     finally:
         self.unlock_db()
Example #8
0
def main():

    db_file = sys.argv[1]

    done_db = shelve.Shelf(db.open(db_file, "c"))

    log.info('db has %d records', len(done_db))

    added = 0
    try:
        for csv_file in sys.argv[2:]:
            log.info("processing file %s", csv_file)
            reader = csv.DictReader(open(csv_file))
            loaded_files = {}
            for row in reader:
                file_name = row['file'].strip()
                if file_name:
                    loaded_files[row['id']] = row
            log.info('csv file has %d valid records ', len(loaded_files))

            for idx in loaded_files:
                if idx not in done_db:
                    done_db[idx] = loaded_files[idx]
                    added += 1

        log.info('Done -  %d added - db now has %d records', added,
                 len(done_db))

    finally:
        done_db.close()
Example #9
0
    def __init__(self, dbm_file='./warcprox-playback-index.db'):
        if os.path.exists(dbm_file):
            self.logger.info('opening existing playback index database {}'.format(dbm_file))
        else:
            self.logger.info('creating new playback index database {}'.format(dbm_file))

        self.db = dbm_gnu.open(dbm_file, 'c')
 def __init__(self,filename,  size=None, cached=True):
     self.db=shelve.Shelf(db.open(filename, "c"),  writeback=cached)
     self._init_indexes()
     if not size:
         self.limit=MAXINT
     else:
         self.limit=size
 def __init__(self, filename):
     self.db = None
     if filename.endswith(".db"):
         try:
             self.db = gdbm.open(filename, "r")
         except gdbm.error as err:
             print("Unable to open binary database %s: %s" % (filename, err), file=sys.stderr)
Example #12
0
def main():
    
    db_file=sys.argv[1]
    
    
    done_db= shelve.Shelf(db.open(db_file, "c")) 
    
    log.info ('db has %d records', len(done_db))
    
    added=0
    try:
        for csv_file in sys.argv[2:]:
            log.info("processing file %s", csv_file)
            reader=csv.DictReader(open(csv_file))
            loaded_files={}
            for row in reader:
                file_name=row['file'].strip()
                if file_name:
                    loaded_files[row['id']]=row
            log.info('csv file has %d valid records ',  len(loaded_files))
                  
                
            for idx in loaded_files:
                if idx not in done_db:
                    done_db[idx]=loaded_files[idx]
                    added+=1
                    
        log.info('Done -  %d added - db now has %d records',  added, len(done_db))        
            
                
    finally:
        done_db.close()
Example #13
0
 def create_db(self):
     self.lock_db()
     try:
         with gdbm.open(self.filename, 'c'):
             os.chmod(self.filename, 0o600)
     finally:
         self.unlock_db()
Example #14
0
    def Create(self, infile, outfile):
        """Build the database from file."""
        db = gdbm.open(outfile, "n")
        with open(infile) as fid:

            db["datafile"] = os.path.abspath(infile)

            while True:
                line = fid.readline()
                if not line or not len(line):
                    break

                if line[:3] == "ID ":
                    id = string.split(line)[1]
                    start = fid.tell() - len(line)

                elif line[:3] == "AC ":
                    acc = string.split(line)[1]
                    if acc[-1] == ";":
                        acc = acc[:-1]

                elif line[:2] == "//":
                    stop = fid.tell()
                    try:
                        value = "%d %d" % (start, stop)
                        db[id] = value
                        db[acc] = value
                        id, acc, start, stop = None, None, None, None
                    except Exception:
                        print("AARRGGGG %d %d %s %s" %
                              (start, stop, type(start), type(stop)))
                        print("%s %s" % (id, acc))

            db.close()
Example #15
0
    def __init__(self, dbm_file='./warcprox-dedup.db'):
        if os.path.exists(dbm_file):
            self.logger.info('opening existing deduplication database {}'.format(dbm_file))
        else:
            self.logger.info('creating new deduplication database {}'.format(dbm_file))

        self.db = dbm_gnu.open(dbm_file, 'c')
def find_dups(path='./file_hashes.gdbm', directory='.',
              callbacks=[print_dup], filter=lambda path: True):
    """Like ``check_dups()``, but also updates the database as it goes.

    Given a ``directory``, goes through all files that pass through the
    predicate ``filter``, and for each one that is a duplicate, calls the
    of ``callbacks``. Returns a dictionary containing the duplicates found.

    Example usage::

        d = find_dups(directory='some/directory',
                      callbacks=[print_dup, KeepLarger()])

    The signature for writing callbacks is ``(original, dup, m)``, where
    ``original`` and ``dup`` are Path instances and ``m`` is the
    FileExistenceManager instance.
    """
    store = GdbmStorageStrategy(path=path)
    m = FileExistenceManager(store)
    dups = {}
    for p in Path(directory).walk():
        if not p.is_file():
            continue
        with open(str(p), 'rb') as stream:
            original = m.try_add_file(stream, str(p))
        if original:
            original = original.decode('utf-8')
            dups[str(p)] = original
            for function in callbacks:
                function(Path(original), p, m)
    m.close()
    return dups
Example #17
0
    def __init__(self, dbname, hashfact):
        super().__init__()
        self.basepath = dbname  # directory
        self.hashfact = hashfact  # hash factory

        # Ensure path exists.
        path = pathlib.Path(self.basepath)
        try:
            path.mkdir(parents=True)  # exist_ok=True is only for Python 3.5+
        except FileExistsError:
            pass

        # Are there any databases yet?  If not, create one.
        if len(list(path.iterdir())) == 0:
            e = self._create_db(0, hashfact.min(), hashfact.max())
            e["db"].close()  # will reopen later

        # Open all databases in order starting from the highest level.
        self.dbs = []
        for d in sorted(path.iterdir(), reverse=True):
            print(d)
            db = dbm.open(str(d), "cuf")
            e = {}
            e["db"] = db
            e["filename"] = db["filename"]
            e["level"] = db["level"]
            e["minhash"] = db["minhash"]
            e["maxhash"] = db["maxhash"]
            self.dbs.append(e)
Example #18
0
    def Open(self, indexfile=None):
        """Open the indexed database file."""
        if not indexfile:
            indexfile = os.path.join(os.environ["PYPHY"], "nr.dat.indexed")

        self.db = gdbm.open(indexfile)
        self.datafile = self.db["datafile"]
        self.fid = open(self.datafile)
Example #19
0
    def __init__(self, dbm_file='./warcprox-playback-index.db'):
        if os.path.exists(dbm_file):
            self.logger.info(
                'opening existing playback index database {}'.format(dbm_file))
        else:
            self.logger.info(
                'creating new playback index database {}'.format(dbm_file))

        self.db = dbm_gnu.open(dbm_file, 'c')
Example #20
0
 def __init__(self, filename):
     self.db = None
     if filename.endswith(".db"):
         try:
             self.db = gdbm.open(filename, "r")
         except gdbm.error as err:
             print("Unable to open binary database %s: %s" %
                   (filename, err),
                   file=sys.stderr)
Example #21
0
 def _add_db(self):
     filename = 'mdbm%s.db' % time.strftime("%b%d%H%M%Y")
     # filename = 'mdbm%s.db' % str(time.time())
     path = self._path + '/%s' % filename
     db = gdbm.open(path, 'c')
     db[_PATHKEY] = filename
     self._data.append(db)
     self._metadata['cursize'] = 0
     logging.debug('mdbm added new dbm file: %s' % filename)
Example #22
0
def rand_dbm_iter(dbmfile, seed=None):
    random.seed(seed)
    with gdbm.open(dbmfile,'w') as db:
        print('Loaded dbmfile ({}) with {} keys'.format(dbmfile, len(db)))
        while len(db) > 0:
            key = random.choice(db.keys())
            val = db[key].decode()
            del db[key]
            yield val
Example #23
0
 def keys(self):
     self.lock_db()
     try:
         with gdbm.open(self.filename, 'r') as db:
             key = db.firstkey()
             while key is not None:
                 yield key
                 key = db.nextkey(key)
     finally:
         self.unlock_db()
Example #24
0
 def keys(self):
     self.lock_db()
     try:
         with gdbm.open(self.filename, 'r') as db:
             key = db.firstkey()
             while key is not None:
                 yield key
                 key = db.nextkey(key)
     finally:
         self.unlock_db()
        def __init__(self, name):
            super(CoverArtExtDB._impl, self).__init__()
            self.cachedir = RB.user_cache_dir() + "/" + name
            if not os.path.exists(self.cachedir):
                os.makedirs(self.cachedir)

            filename = self.cachedir + "/store.db"
            self.db = gdbm.open(filename, 'c')
            self.queue = Queue()
            self._store_request_in_progress = False
Example #26
0
 def __call__(self, existing, dup, m):
     if self.dups_dir is None:
         self.dups_dir = dup.parent / 'dups'
     if dup.stat().st_size > existing.stat().st_size:
         # Keep *dup* since it is the larger file
         existing.rename(self.dups_dir / dup.name)  # Move the old file
         with open(dup, 'rb') as stream:  # Update the database
             m.add_or_replace_file(stream, str(dup))
     else:
         # Move *dup* since it is the shorter file
         dup.rename(self.dups_dir / dup.name)
Example #27
0
def getArticles(verbose):
    with gdbm.open("/var/cache/man/index.db", "r") as IndexDb:
        IndexDb = IndexDb.keys()
        if verbose:
            print("contents:", IndexDb)
        Articles = []
        for Article in IndexDb:
            Articles.append(bytes(Article).decode()[:-1])
        if verbose:
            print(Articles)
        return Articles
Example #28
0
def rand_fits_iter(topdir, dbmfile='kwhistos.dbm', seed=None):
    save_dblist(topdir, dbmfile)
    random.seed(seed)
    with gdbm.open(dbmfile,'w') as db:
        print('Loaded dbmfile ({}) with {} filenames'.format(dbmfile, len(db)))
        while len(db) > 0:
            idx = random.choice(db.keys())
            fname = db[idx].decode()
            #!print('DBG-1: db[{}]={}'.format(idx, fname))
            del db[idx]
            yield fname
 def _init(self, maxsize):
     #TODO:  Change - this will not work with dummy dbm
     if not self.resume:
         for ext in ('', '.bak', '.dat', '.dir'):
             try:
                 os.remove(self.finame+ext)
             except:
                 pass
             
     self.queue = PersistentFIFO(self.filename)
     self.unfinished=shelve.Shelf(db.open(self.filename+'.unfinished', "c"))
     self.stop=False
Example #30
0
    def __init__(self, name="",dir=None, loglevel=logging.ERROR):
        self.name = name
        self.name_write = name + "__write"
        self.name_read = name + "__read"
        self.namefile = self.name + "_gunQueue.db"
        self.log = logging
        self.log.basicConfig(level=loglevel,
                             format=self.__class__.__name__ + " %(asctime)s - %(levelname)s - - %(message)s",
                             datefmt="%Y-%m/%d %H:%M:%S %p")
        if not dir == None:
            self.dir =  dir
            if not os.path.exists(self.dir):
                os.mkdir(self.dir)
            self.index = gnu.open(self.dir + "/" + name + "_index.db", "c")
            self.queue = gnu.open(self.dir + "/" + self.namefile, "c")
        else:
            os.mkdir("queue_data")
            self.index = gnu.open("queue_data" + "/" + name + "_index.db", "c")
            self.queue = gnu.open("queue_data" + "/" + self.namefile, "c")

        self.write_id = self.index.get(self.name_write, b"1").decode()
        self.read_id = self.index.get(self.name_read, b"1").decode()
Example #31
0
    def delete_transferrable_key(self, key):
        if self.read_only:
            raise TypeError

        self.lock_db()
        try:
            with gdbm.open(self.filename, 'w') as db:
                if key.fingerprint not in db:
                    raise KeyError(key.fingerprint)
                gdbm.reorganize()
                del db[key.fingerprint]
        finally:
            self.unlock_db()
Example #32
0
    def delete_transferrable_key(self, key):
        if self.read_only:
            raise TypeError

        self.lock_db()
        try:
            with gdbm.open(self.filename, 'w') as db:
                if key.fingerprint not in db:
                    raise KeyError(key.fingerprint)
                gdbm.reorganize()
                del db[key.fingerprint]
        finally:
            self.unlock_db()
Example #33
0
def _shell_lookup(args):
    """This function is called when the script is used from command line:

    [jakni@nissen scripts]$ python unifetch.py -a A6XGL2 -ncis
    Name: A6XGL2_HUMAN
    Data class: Unreviewed
    TaxID: 9606
    Sequence: MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRRE [ ... ]
    """

    with _gnu.open(args.database) as database:
        data = database.get(args.accession, None)

    # If no accession is found, return "Not found."
    if data is None:
        return 'Not found.'

    fields = {'Name': [args.name],
                 'Date': [args.date],
                 'Data class': [args.dataclass],
                 'Organism': [args.organism],
                 'Taxonomy': [args.taxonomy],
                 'TaxID': [args.taxid],
                 'Sequence': [args.sequence]
              }

    # If nothing particular is specified, return the entire accession
    if not any(arr[0] for arr in fields.values()):
        text = _gzip.decompress(data).decode()
        return text

    else:
        # If output specified, return the relevant parts.
        fileobject = _io.BytesIO(_gzip.decompress(data))
        record = _SwissProt.read(fileobject)

        fields['Name'].append(record.entry_name)
        fields['Date'].append(record.created[0])
        fields['Data class'].append(record.data_class)
        fields['Organism'].append(record.organism)
        species = get_species(record)
        fields['Taxonomy'].append(
            ';'.join(record.organism_classification + ([species] if species else [])))
        fields['TaxID'].append(';'.join(record.taxonomy_id))
        fields['Sequence'].append(record.sequence)

        output = list()
        for title, (state, information) in fields.items():
            if state:
                output.append('{}: {}'.format(title, information))
        return '\n'.join(output)
Example #34
0
    def add_transferrable_key(self, key):
        if self.read_only:
            raise TypeError

        self.lock_db()
        try:
            with gdbm.open(self.filename, 'w') as db:
                if key.fingerprint in db:
                    raise KeyError(key.fingerprint)
                db[key.fingerprint] = \
                    b''.join(map(bytes, key.to_packets(
                        self._preferred_header_format)))
        finally:
            self.unlock_db()
Example #35
0
def gdbm_test_db(request):
    print("creating test gdbm file")
    temp_file = tempfile.NamedTemporaryFile()
    test_db = gdbm.open(temp_file.name, "n")
    test_db[key1] = val1
    test_db[key2] = val2
    test_db.close()

    def delete_gdbm_test_db():
        print("deleting test gdbm file")
        temp_file.close()

    request.addfinalizer(delete_gdbm_test_db)
    return temp_file.name
Example #36
0
    def add_transferrable_key(self, key):
        if self.read_only:
            raise TypeError

        self.lock_db()
        try:
            with gdbm.open(self.filename, 'w') as db:
                if key.fingerprint in db:
                    raise KeyError(key.fingerprint)
                db[key.fingerprint] = \
                    b''.join(map(bytes, key.to_packets(
                        self._preferred_header_format)))
        finally:
            self.unlock_db()
Example #37
0
    def __init__(self, dbm_file='./warcprox-playback-index.db'):
        try:
            import dbm.gnu as dbm_gnu
        except ImportError:
            try:
                import gdbm as dbm_gnu
            except ImportError:
                import anydbm as dbm_gnu

        if os.path.exists(dbm_file):
            self.logger.info('opening existing playback index database {}'.format(dbm_file))
        else:
            self.logger.info('creating new playback index database {}'.format(dbm_file))

        self.db = dbm_gnu.open(dbm_file, 'c')
    def test_reorganize(self):
        self.g = gdbm.open(filename, 'c')
        size0 = os.path.getsize(filename)

        self.g['x'] = 'x' * 10000
        size1 = os.path.getsize(filename)
        self.assert_(size0 < size1)

        del self.g['x']
        # 'size' is supposed to be the same even after deleting an entry.
        self.assertEqual(os.path.getsize(filename), size1)

        self.g.reorganize()
        size2 = os.path.getsize(filename)
        self.assert_(size1 > size2 >= size0)
def populate_db(path='./file_hashes.gdbm', directory=".",
                callbacks=[print_dup], filter=lambda path: True):
    """Create/update database at ``path`` by hashing files in ``directory``."""
    store = GdbmStorageStrategy(path=path)
    m = FileExistenceManager(store)
    for p in Path(directory).walk():
        if not p.is_file():
            continue
        with open(str(p), 'rb') as stream:
            original = m.try_add_file(stream, str(p))
        if original:
            original = original.decode('utf-8')
            for function in callbacks:
                function(Path(original), p, m)
    m.close()
    def test_reorganize(self):
        self.g = gdbm.open(filename, 'c')
        size0 = os.path.getsize(filename)

        self.g['x'] = 'x' * 10000
        size1 = os.path.getsize(filename)
        self.assert_(size0 < size1)

        del self.g['x']
        # 'size' is supposed to be the same even after deleting an entry.
        self.assertEqual(os.path.getsize(filename), size1)

        self.g.reorganize()
        size2 = os.path.getsize(filename)
        self.assert_(size1 > size2 >= size0)
Example #41
0
 def open_dbm(self):
     if self.read_only:
         open_mode = "r"
     else:
         if os.path.exists(self.dbm_path):
             open_mode = "w"
         else:
             open_mode = "c"
         if os.name != "nt":
             open_mode += "s"
     self.logger.info("open dbm file {} with mode: {}".format(
         self.dbm_path, open_mode))
     self.saved_file_params = gdbm.open(self.dbm_path, open_mode)
     if open_mode[0] == "w" or open_mode[0] == "r":
         self.stats = json.loads(
             self.saved_file_params.get(TFileStorage.stats_key))
Example #42
0
    def __init__(self, dbm_file='./warcprox-stats.db', options=warcprox.Options()):
        try:
            import dbm.gnu as dbm_gnu
        except ImportError:
            try:
                import gdbm as dbm_gnu
            except ImportError:
                import anydbm as dbm_gnu

        if os.path.exists(dbm_file):
            self.logger.info('opening existing stats database {}'.format(dbm_file))
        else:
            self.logger.info('creating new stats database {}'.format(dbm_file))

        self.db = dbm_gnu.open(dbm_file, 'c')
        self.options = options
Example #43
0
def gdbm_test_db(request):
    temp_file = tempfile.NamedTemporaryFile(delete=False)
    print("creating test gdbm file {}".format(temp_file.name))
    test_db = gdbm.open(temp_file.name, "n")
    test_db[key1] = val1
    test_db[key2] = val2
    test_db.close()

    def delete_gdbm_test_db():
        temp_file.close()
        for f in glob.glob("{}*".format(temp_file.name)):
            print("deleting test gdbm file {}".format(f))
            os.remove(f)

    request.addfinalizer(delete_gdbm_test_db)
    return temp_file.name
Example #44
0
def gdbm_test_db(request):
    temp_file = tempfile.NamedTemporaryFile(delete=False)
    print("creating test gdbm file {}".format(temp_file.name))
    test_db = gdbm.open(temp_file.name, "n")
    test_db[key1] = val1
    test_db[key2] = val2
    test_db.close()

    def delete_gdbm_test_db():
        temp_file.close()
        for f in glob.glob("{}*".format(temp_file.name)):
            print("deleting test gdbm file {}".format(f))
            os.remove(f)

    request.addfinalizer(delete_gdbm_test_db)
    return temp_file.name
 def test_key_methods(self):
     self.g = gdbm.open(filename, 'c')
     self.assertEqual(self.g.keys(), [])
     self.g['a'] = 'b'
     self.g['12345678910'] = '019237410982340912840198242'
     self.g[b'bytes'] = b'data'
     key_set = set(self.g.keys())
     self.assertEqual(key_set, set([b'a', b'bytes', b'12345678910']))
     self.assert_(b'a' in self.g)
     self.assertEqual(self.g[b'bytes'], b'data')
     key = self.g.firstkey()
     while key:
         self.assert_(key in key_set)
         key_set.remove(key)
         key = self.g.nextkey(key)
     self.assertRaises(KeyError, lambda: self.g['xxx'])
 def test_key_methods(self):
     self.g = gdbm.open(filename, 'c')
     self.assertEqual(self.g.keys(), [])
     self.g['a'] = 'b'
     self.g['12345678910'] = '019237410982340912840198242'
     self.g[b'bytes'] = b'data'
     key_set = set(self.g.keys())
     self.assertEqual(key_set, set([b'a', b'bytes', b'12345678910']))
     self.assert_(b'a' in self.g)
     self.assertEqual(self.g[b'bytes'], b'data')
     key = self.g.firstkey()
     while key:
         self.assert_(key in key_set)
         key_set.remove(key)
         key = self.g.nextkey(key)
     self.assertRaises(KeyError, lambda: self.g['xxx'])
Example #47
0
def save_dblist(topdir, dbmfile,
                progcnt=1E4,
                #progcnt=10,
                expectedcnt = 84E5):
    idx = 0
    tic()
    with gdbm.open(dbmfile,'nf') as db:
        for fname in fits_iter(topdir):
            db[str(idx)] = fname
            idx += 1
            if (progcnt != None) and (idx % progcnt) == 0:
                secs = toc()
                remhrs = ((secs * expectedcnt / idx) - secs) / 60 / 60
                print('# Saved {:,} to dbm in {:,.0f} secs. Remain hrs: {}'
                      .format(idx, secs, remhrs))
    save_dblist.count = idx
    return idx
def getReportXML(encounter):
    url = "http://appsrv.alleghenycounty.us/reports/rwservlet?food_rep&report=FoodINSP/insp_summary_COVID.jsp&desformat=XML&P_ENCOUNTER=%s" % encounter
    dbm = gdbm.open("reportDB.gdbm", 'c')

    try:
        xmlString = dbm[encounter]
        print("found %s" % encounter)
    except:
        try:
            print("getting %s" % encounter)
            xmlString = wget(url)
            dbm[encounter] = xmlString
        except:
            return None
    dbm.close()

    return xmlString
Example #49
0
 def _create_db(self, level, minhash, maxhash):
     #print("create_db: level =", level, "minhash =", minhash, " maxhash =", maxhash)
     level = str(level)
     filename = self.basepath + "/" + level + "-" + self.hashfact.hexify(
         minhash)[0:4] + "-" + self.hashfact.hexify(maxhash)[0:4]
     print("filename =", filename)
     e = {}
     db = dbm.open(filename, "cuf")
     db["filename"] = filename
     db["level"] = level
     db["minhash"] = minhash
     db["maxhash"] = maxhash
     e["db"] = db
     e["filename"] = db["filename"]
     e["level"] = db["level"]
     e["minhash"] = db["minhash"]
     e["maxhash"] = db["maxhash"]
     return e
Example #50
0
def createdb(outfilepath, infilepath):
    """Creates a new database from a SwissProt/UniProt text file, gzipped or not.
    For speed, database is built in memory, then moved to disk. Takes ~11 hrs."""

    import shutil as _shutil

    if _os.path.exists(outfilepath):
        raise FileExistsError('Database already exists.')

    # Check whether the database is gzipped or not by searching for the two
    # signature bytes 1F8B and use gzip.open if it is.
    with open(infilepath, 'rb') as infile:
        signature = infile.read(2)

    if signature == b'\x1f\x8b':
        opener = _gzip.open
    else:
        opener = open

    # Read the content of the text file. At accession identifier, extract accession.
    # at end of record, save the current record under extracted accession ID.
    # Create a database in memory.
    accession = None
    buffer = list()
    tempfilename = '/dev/shm/temp.gdbm'
    with opener(infilepath, 'rt') as infile, _gnu.open(tempfilename, 'cf') as db:
        for line in infile:
            buffer.append(line)

            if line.startswith('//'):
                assert accession is not None
                db[accession] = _gzip.compress(bytes(''.join(buffer), 'ASCII'))
                buffer.clear()
                accession = None

            elif line.startswith('AC') and accession is None:
                accession = line.split()[1][:-1]

        # Because I openened the database in fast mode, I need to sync before closing.
        db.sync()

    # Move file from memory to actual file location
    _shutil.move(tempfilename, outfilepath)
Example #51
0
	def __init__(self, *args, **config):
		super(database,self).__init__(*args, **config)

		default_db = config.get("dbtype","anydbm")
		if not default_db.startswith("."):
			default_db = '.' + default_db

		self._db_path = os.path.join(self.location, fs_template.gen_label(self.location, self.label)+default_db)
		self.__db = None
		mode = "w"
		if whichdb(self._db_path) in ("dbm.gnu", "gdbm"):
			# Allow multiple concurrent writers (see bug #53607).
			mode += "u"
		try:
			# dbm.open() will not work with bytes in python-3.1:
			#   TypeError: can't concat bytes to str
			self.__db = anydbm_module.open(self._db_path,
				mode, self._perms)
		except anydbm_module.error:
			# XXX handle this at some point
			try:
				self._ensure_dirs()
				self._ensure_dirs(self._db_path)
			except (OSError, IOError) as e:
				raise cache_errors.InitializationError(self.__class__, e)

			# try again if failed
			try:
				if self.__db == None:
					# dbm.open() will not work with bytes in python-3.1:
					#   TypeError: can't concat bytes to str
					if gdbm is None:
						self.__db = anydbm_module.open(self._db_path,
							"c", self._perms)
					else:
						# Prefer gdbm type if available, since it allows
						# multiple concurrent writers (see bug #53607).
						self.__db = gdbm.open(self._db_path,
							"cu", self._perms)
			except anydbm_module.error as e:
				raise cache_errors.InitializationError(self.__class__, e)
		self._ensure_access(self._db_path)
Example #52
0
def verify_database(dbpath):
    db_files = _load_paths(dbpath)
    print("verifying %i mdbm chunks" % len(db_files))
    for db in db_files:
        dbchunk = gdbm.open(db, 'w')
        # print("reorganizing %s" % db)
        # try:
        #     dbchunk.reorganize()
        # except Exception as err:
        #     print("couldn't reorganize: error %s" % err)
        print("checking %s" % db)
        try:
            # check_integrity_for_chunk(dbchunk)
            k = dbchunk.firstkey()
            print("first key: %s" % k)
            check_integrity_for_chunk(dbchunk)
        except Exception as err:
            print("integrity check failed: error %s" % err)
        finally:
            dbchunk.close()
def check_dups(path='./file_hashes.gdbm', directory=".",
               callbacks=[print_dup], filter=lambda path: True):
    """Check files in ``directory`` against the database ``path``.

    Example usage::

        check_dups(directory='some/directory',
                   callbacks=[print_dup, trash_dup])
    """
    store = GdbmStorageStrategy(path=path)
    m = FileExistenceManager(store)
    for p in Path(directory).walk():
        if not p.is_file():
            continue
        with open(str(p), 'rb') as stream:
            original = m.file_exists(stream)
        if original:
            original = original.decode('utf-8')
            for function in callbacks:
                function(Path(original), p, m)
    m.close()
Example #54
0
    def get_transferrable_key(self, fingerprint):
        if len(fingerprint) != 40:
            # Actually a key ID - find the fingerprint first.
            fingerprint = ([
                k for k in self.keys()
                if k.endswith(fingerprint)
                ] + [None]
                )[0]

        if fingerprint is None:
            return None

        self.lock_db()
        try:
            with gdbm.open(self.filename, 'r') as db:
                packet_data = db[fingerprint]
            packets = list(parse_binary_packet_data(packet_data))
            if packets:
                if packets[0].type == constants.PUBLIC_KEY_PACKET_TYPE:
                    return TransferablePublicKey.from_packets(packets)
                elif packets[0].type == constants.SECRET_KEY_PACKET_TYPE:
                    return TransferableSecretKey.from_packets(packets)
        finally:
            self.unlock_db()
Example #55
0
 def __init__(self, path='./file_hashes.gdbm', mode='c', sync='s'):
     from dbm.gnu import open
     self.d = open(path, mode + sync)
Example #56
0
        print ("%d : %s :%s"% (i,k, q.get(k)[0] if hasattr(q.get(k), '__len__') else q.get(k)))   

def print_items2(q): 
    for i, k in enumerate(q):
        print ("%d : %s :%s"% (i,k, k[0]))  
    
if __name__ == '__main__':
    if  len(sys.argv)<3:
        print("must provide data dir and queue size")
        sys.exit(1)
    base_dir=sys.argv[1]
    size= int(sys.argv[2])
    
    filename=os.path.join(base_dir, 'pool_items')
    queue=PersistentFIFO(filename, size)
    unfinished=shelve.Shelf(db.open(filename+'.unfinished', "c"))
    
    print("Queue size %d" %len(queue))
    print("Unfinished size %d" %len(unfinished))
    
    if len(queue):
        print("Queue items:")
        print_items(queue.db)   
        
        
        
    if len(unfinished):
        print("Unfinished items:")
        print_items(unfinished)   

    queue.close()