Example #1
0
    def compile_hs(self, hs_db_file=None):
        if hs_db_file and os.path.isfile(hs_db_file):
            log.info(f'Loading Hyperscan DB from disk: {hs_db_file}')
            with open(hs_db_file, 'rb') as f:
                self.hs_db = hyperscan.loads(bytearray(f.read()))
            return

        self.hs_db = hyperscan.Database()

        num_patterns = len(self.regexes)
        expressions = list(
            map(
                functools.partial(str.encode, encoding='utf-8'),
                map(operator.itemgetter(self.Regex._fields.index('pattern')),
                    self.regexes)))
        ids = list(range(num_patterns))
        flags = list(
            map(operator.itemgetter(self.Regex._fields.index('hs_flags')),
                self.regexes))

        start = time.time()
        self.hs_db.compile(
            expressions=expressions,
            ids=ids,
            elements=num_patterns,
            flags=flags,
        )
        self.stats.hs_compilation_time = time.time() - start

        if hs_db_file:
            log.info(f'Saving Hyperscan DB to disk: {hs_db_file}')
            with open(hs_db_file, 'wb') as f:
                f.write(hyperscan.dumps(self.hs_db))
Example #2
0
    def _save_hs_db(path: str, db_to_save: hs.Database):
        """
        TODO:

        Args:
            path:
            db_to_save:
        """
        assert db_to_save is not None, "Hyperscan database must not be none in order to save to file"

        serialized_db = hs.dumps(db_to_save)
        with io.open(path, "wb") as bin_file:
            bin_file.write(serialized_db)

        Logger().event(category="hyperscan",
                       action="hyperscan database saved",
                       dataset=path).out(severity=Severity.INFO)
Example #3
0
def test_database_deserialize(database_stream):
    serialized = hyperscan.dumps(database_stream)
    db = hyperscan.loads(bytearray(serialized))
    assert id(db) != id(database_stream)
Example #4
0
def test_database_serialize(database_stream):
    serialized = hyperscan.dumps(database_stream)
    assert len(serialized) == 6200