Ejemplo n.º 1
0
    def reader_to_cdbmake_md5(self, filename):
        md5 = hashlib.md5()
        for key, value in cdblib.Reader(file(filename,
                                             'rb').read()).iteritems():
            md5.update('+%d,%d:%s->%s\n' % (len(key), len(value), key, value))
        md5.update('\n')

        return md5.hexdigest()
Ejemplo n.º 2
0
 def __init__(self, root):
     super(CDBDataset, self).__init__()
     self.root = root
     fp = open(root, 'rb')
     mem = mmap.mmap(fp.fileno(), os.path.getsize(fp.name), mmap.MAP_SHARED,
             mmap.PROT_READ)
     self.reader = cdblib.Reader(mem)
     self.len = self.reader.getint(b'len')
Ejemplo n.º 3
0
    async def _schedule_reload(self):
        if self._reload_interval is None:
            raise RuntimeError("'reload_interval' must be defined")

        while True:
            async with aiofiles.open(self._filename, "rb") as f:
                _config = await f.read()
            self.cdb = cdblib.Reader(_config)
            await asyncio.sleep(self._reload_interval)
Ejemplo n.º 4
0
    def setUp(self):
        self.sio = sio = StringIO()
        writer = cdblib.Writer(sio, hashfn=self.HASHFN)
        writer.puts('dave', (str(i) for i in xrange(10)))
        writer.put('dave_no_dups', '1')
        writer.put('dave_hex', '0x1a')
        writer.putstrings('art', self.ARTS)
        writer.finalize()

        sio.seek(0)
        self.reader = cdblib.Reader(sio.getvalue(), hashfn=self.HASHFN)
Ejemplo n.º 5
0
    def test3_save_cdb(self):
        """test save cdb"""
        fn = "./outputs/blocklist.cdb"
        blocklist_aggregator.save_cdb(filename=fn)

        with open(fn, 'rb') as f:
            data = f.read()
        reader = cdblib.Reader(data)

        domains = []
        for key, _ in reader.iteritems():
            domains.append(key)
        self.assertIn(b"doubleclick.net", domains)
Ejemplo n.º 6
0
    async def read(
        cls,
        filename: str,
        reload_interval: int = 0,
    ) -> "Config":
        """Read a cdb file and schedule periodic reload if needed"""
        _config = cls(filename=filename, reload_interval=reload_interval)

        with open(_config._filename, "rb") as f:
            _config.cdb = cdblib.Reader(f.read())

        if reload_interval:
            _config._reload_task = asyncio.create_task(_config._schedule_reload())
            _config._reload_task.add_done_callback(_config._reload_done_callback)
        return _config
Ejemplo n.º 7
0
    def get(self):
        test = self.request.get('test')
        data = self.request.get('db')
        hashfn = self.request.get('hashfn')

        if self.request.get('hashfn') == 'djb':
            self.hashfn = cdblib.djb_hash
        else:
            self.hashfn = hash

        self.cdb_fp = file('testdata/%s.cdb' % data)
        self.reader = cdblib.Reader(self.cdb_fp)
        self.new_writer()

        per_run_us = getattr(self, 'test_' + test)()
        self.response.out.write(str(per_run_us))
Ejemplo n.º 8
0
    def test_cdbmake_weird(self):
        # Records can have a newline
        stdin = io.BytesIO(
            b'+2,2:,:->:,\n'
            b'+2,2:->-><-\n'
            b'+7,7:newline->123\n'
            b'567\n'
            b'+2,2:\0+->+\0\n'
            b'\n'
        )
        cdb_path = os.path.join(self.temp_dir, 'out.cdb')
        tmp_path = os.path.join(self.temp_dir, 'tmp.cdb')
        args = [cdb_path, tmp_path]

        python_pure_cdbmake(args, stdin=stdin)

        with open(cdb_path, 'rb') as infile:
            data = infile.read()

        reader = cdblib.Reader(data)
        self.assertEqual(reader[b',:'], b':,')
        self.assertEqual(reader[b'->'], b'<-')
        self.assertEqual(reader[b'newline'], b'123\n567')
        self.assertEqual(reader[b'\0+'], b'+\0')
Ejemplo n.º 9
0
 def setUp(self):
     self.reader = cdblib.Reader(
         file('testdata/top250pws.cdb', 'rb').read())
Ejemplo n.º 10
0
 def get_iteritems(self, filename):
     reader = cdblib.Reader(file(filename, 'rb').read(), hashfn=self.HASHFN)
     return reader.iteritems()
Ejemplo n.º 11
0
 def get_reader(self):
     self.writer.finalize()
     return cdblib.Reader(self.sio.getvalue(), hashfn=self.HASHFN)
Ejemplo n.º 12
0
 def openfull():
     self.cdb_fp.seek(0)
     cdblib.Reader(self.cdb_fp)
Ejemplo n.º 13
0
 def open100():
     self.sio.seek(0)
     cdblib.Reader(self.sio, self.hashfn)
Ejemplo n.º 14
0
 def copy(self):
     writer = cdblib.Writer(StringIO(), self.hashfn)
     for key, value in self.reader.iteritems():
         writer.put(key, value)
     writer.finalize()
     return cdblib.Reader(writer.fp, self.hashfn)
Ejemplo n.º 15
0
 def __init__(self, data_file=None, index_file=None):
     self.data_file = data_file
     self.index_file = index_file
     self.data_fh = _openfile(data_file)
     self.index_fh = _openfile(index_file)
     self._reader = cdblib.Reader(self.index_fh.read(), hash)