def reader_to_cdbmake_md5(self, filename): md5 = hashlib.md5() for key, value in cdblib.Reader(file(filename, 'rb').read()).iteritems(): md5.update('+%d,%d:%s->%s\n' % (len(key), len(value), key, value)) md5.update('\n') return md5.hexdigest()
def __init__(self, root): super(CDBDataset, self).__init__() self.root = root fp = open(root, 'rb') mem = mmap.mmap(fp.fileno(), os.path.getsize(fp.name), mmap.MAP_SHARED, mmap.PROT_READ) self.reader = cdblib.Reader(mem) self.len = self.reader.getint(b'len')
async def _schedule_reload(self): if self._reload_interval is None: raise RuntimeError("'reload_interval' must be defined") while True: async with aiofiles.open(self._filename, "rb") as f: _config = await f.read() self.cdb = cdblib.Reader(_config) await asyncio.sleep(self._reload_interval)
def setUp(self): self.sio = sio = StringIO() writer = cdblib.Writer(sio, hashfn=self.HASHFN) writer.puts('dave', (str(i) for i in xrange(10))) writer.put('dave_no_dups', '1') writer.put('dave_hex', '0x1a') writer.putstrings('art', self.ARTS) writer.finalize() sio.seek(0) self.reader = cdblib.Reader(sio.getvalue(), hashfn=self.HASHFN)
def test3_save_cdb(self): """test save cdb""" fn = "./outputs/blocklist.cdb" blocklist_aggregator.save_cdb(filename=fn) with open(fn, 'rb') as f: data = f.read() reader = cdblib.Reader(data) domains = [] for key, _ in reader.iteritems(): domains.append(key) self.assertIn(b"doubleclick.net", domains)
async def read( cls, filename: str, reload_interval: int = 0, ) -> "Config": """Read a cdb file and schedule periodic reload if needed""" _config = cls(filename=filename, reload_interval=reload_interval) with open(_config._filename, "rb") as f: _config.cdb = cdblib.Reader(f.read()) if reload_interval: _config._reload_task = asyncio.create_task(_config._schedule_reload()) _config._reload_task.add_done_callback(_config._reload_done_callback) return _config
def get(self): test = self.request.get('test') data = self.request.get('db') hashfn = self.request.get('hashfn') if self.request.get('hashfn') == 'djb': self.hashfn = cdblib.djb_hash else: self.hashfn = hash self.cdb_fp = file('testdata/%s.cdb' % data) self.reader = cdblib.Reader(self.cdb_fp) self.new_writer() per_run_us = getattr(self, 'test_' + test)() self.response.out.write(str(per_run_us))
def test_cdbmake_weird(self): # Records can have a newline stdin = io.BytesIO( b'+2,2:,:->:,\n' b'+2,2:->-><-\n' b'+7,7:newline->123\n' b'567\n' b'+2,2:\0+->+\0\n' b'\n' ) cdb_path = os.path.join(self.temp_dir, 'out.cdb') tmp_path = os.path.join(self.temp_dir, 'tmp.cdb') args = [cdb_path, tmp_path] python_pure_cdbmake(args, stdin=stdin) with open(cdb_path, 'rb') as infile: data = infile.read() reader = cdblib.Reader(data) self.assertEqual(reader[b',:'], b':,') self.assertEqual(reader[b'->'], b'<-') self.assertEqual(reader[b'newline'], b'123\n567') self.assertEqual(reader[b'\0+'], b'+\0')
def setUp(self): self.reader = cdblib.Reader( file('testdata/top250pws.cdb', 'rb').read())
def get_iteritems(self, filename): reader = cdblib.Reader(file(filename, 'rb').read(), hashfn=self.HASHFN) return reader.iteritems()
def get_reader(self): self.writer.finalize() return cdblib.Reader(self.sio.getvalue(), hashfn=self.HASHFN)
def openfull(): self.cdb_fp.seek(0) cdblib.Reader(self.cdb_fp)
def open100(): self.sio.seek(0) cdblib.Reader(self.sio, self.hashfn)
def copy(self): writer = cdblib.Writer(StringIO(), self.hashfn) for key, value in self.reader.iteritems(): writer.put(key, value) writer.finalize() return cdblib.Reader(writer.fp, self.hashfn)
def __init__(self, data_file=None, index_file=None): self.data_file = data_file self.index_file = index_file self.data_fh = _openfile(data_file) self.index_fh = _openfile(index_file) self._reader = cdblib.Reader(self.index_fh.read(), hash)