Example #1
0
    def generate_mlbf(cls, stats, blocked, not_blocked):
        """Originally based on:
        https://github.com/mozilla/crlite/blob/master/create_filter_cascade/certs_to_crlite.py
        (not so much any longer, apart from the fprs calculation)
        """
        salt = secrets.token_bytes(16)

        stats['mlbf_blocked_count'] = len(blocked)
        stats['mlbf_notblocked_count'] = len(not_blocked)

        fprs = [len(blocked) / (math.sqrt(2) * len(not_blocked)), 0.5]

        log.info("Generating filter")
        cascade = FilterCascade(
            error_rates=fprs,
            defaultHashAlg=HashAlgorithm.SHA256,
            salt=salt,
        )
        cascade.initialize(include=blocked, exclude=not_blocked)

        stats['mlbf_fprs'] = fprs
        stats['mlbf_version'] = cascade.version
        stats['mlbf_layers'] = cascade.layerCount()
        stats['mlbf_bits'] = cascade.bitCount()

        log.debug("Filter cascade layers: {layers}, bit: {bits}".format(
            layers=cascade.layerCount(), bits=cascade.bitCount()))

        cascade.verify(include=blocked, exclude=not_blocked)
        return cascade
Example #2
0
def generate_mlbf(stats, blocked, not_blocked):
    log.info('Starting to generating bloomfilter')

    cascade = FilterCascade(
        defaultHashAlg=HashAlgorithm.SHA256,
        salt=secrets.token_bytes(16),
    )

    error_rates = sorted((len(blocked), len(not_blocked)))
    cascade.set_crlite_error_rates(include_len=error_rates[0],
                                   exclude_len=error_rates[1])

    stats['mlbf_blocked_count'] = len(blocked)
    stats['mlbf_notblocked_count'] = len(not_blocked)

    cascade.initialize(include=blocked, exclude=not_blocked)

    stats['mlbf_version'] = cascade.version
    stats['mlbf_layers'] = cascade.layerCount()
    stats['mlbf_bits'] = cascade.bitCount()

    log.info(f'Filter cascade layers: {cascade.layerCount()}, '
             f'bit: {cascade.bitCount()}')

    cascade.verify(include=blocked, exclude=not_blocked)
    return cascade
Example #3
0
def generateMLBF(stats, *, blocked, not_blocked, capacity, diffMetaFile=None):
    """Based on:
    https://github.com/mozilla/crlite/blob/master/create_filter_cascade/certs_to_crlite.py
    """
    fprs = [len(blocked) / (math.sqrt(2) * len(not_blocked)), 0.5]

    if diffMetaFile is not None:
        log.info(
            "Generating filter with characteristics from mlbf base file {}".
            format(diffMetaFile))
        mlbf_meta_file = open(diffMetaFile, 'rb')
        cascade = FilterCascade.loadDiffMeta(mlbf_meta_file)
        cascade.error_rates = fprs
    else:
        log.info("Generating filter")
        cascade = FilterCascade.cascade_with_characteristics(
            int(len(blocked) * capacity), fprs)

    cascade.version = 1
    cascade.initialize(include=blocked, exclude=not_blocked)

    stats['mlbf_fprs'] = fprs
    stats['mlbf_version'] = cascade.version
    stats['mlbf_layers'] = cascade.layerCount()
    stats['mlbf_bits'] = cascade.bitCount()

    log.debug("Filter cascade layers: {layers}, bit: {bits}".format(
        layers=cascade.layerCount(), bits=cascade.bitCount()))
    return cascade
Example #4
0
    def test_generate_and_write_filter(self):
        self.setup_data()
        mlbf = MLBF.generate_from_db(123456)
        mlbf.generate_and_write_filter()

        with open(mlbf.filter_path, 'rb') as filter_file:
            buffer = filter_file.read()
            bfilter = FilterCascade.from_buf(buffer)

        blocked_versions = fetch_blocked_from_db()
        blocked_guids = blocked_versions.values()
        for guid, version_str in blocked_guids:
            key = MLBF.KEY_FORMAT.format(guid=guid, version=version_str)
            assert key in bfilter

        all_addons = fetch_all_versions_from_db(blocked_versions.keys())
        for guid, version_str in all_addons:
            # edge case where a version_str exists in both
            if (guid, version_str) in blocked_guids:
                continue
            key = MLBF.KEY_FORMAT.format(guid=guid, version=version_str)
            assert key not in bfilter

        # Occasionally a combination of salt generated with secrets.token_bytes
        # and the version str generated in version_factory results in a
        # collision in layer 1 of the bloomfilter, leading to a second layer
        # being generated.  When this happens the bitCount and size is larger.
        expected_size, expected_bit_count = (
            (203, 1384) if bfilter.layerCount() == 1 else (393, 2824)
        )
        assert os.stat(mlbf.filter_path).st_size == expected_size, (
            blocked_guids,
            all_addons,
        )
        assert bfilter.bitCount() == expected_bit_count, (blocked_guids, all_addons)
Example #5
0
    def test_generate_and_write_mlbf(self):
        self.setup_data()
        mlbf = MLBF(123456)
        mlbf.generate_and_write_mlbf()

        with open(mlbf.filter_path, 'rb') as filter_file:
            buffer = filter_file.read()
            bfilter = FilterCascade.from_buf(buffer)

        blocked_versions = mlbf.fetch_blocked_from_db()
        blocked_guids = blocked_versions.values()
        for guid, version_str in blocked_guids:
            key = mlbf.KEY_FORMAT.format(guid=guid, version=version_str)
            assert key in bfilter

        all_addons = mlbf.fetch_all_versions_from_db(blocked_versions.keys())
        for guid, version_str in all_addons:
            # edge case where a version_str exists in both
            if (guid, version_str) in blocked_guids:
                continue
            key = mlbf.KEY_FORMAT.format(guid=guid, version=version_str)
            assert key not in bfilter

        assert os.stat(mlbf.filter_path).st_size == 203, (
            blocked_guids, all_addons)
        assert bfilter.bitCount() == 1384, (
            blocked_guids, all_addons)
Example #6
0
def generate_mlbf(stats, key_format, *, blocked=None, not_blocked=None):
    """Based on:
    https://github.com/mozilla/crlite/blob/master/create_filter_cascade/certs_to_crlite.py
    """
    blocked = hash_filter_inputs(
        blocked or get_blocked_guids(), key_format)
    not_blocked = hash_filter_inputs(
        not_blocked or get_all_guids(), key_format)

    not_blocked = list(set(not_blocked) - set(blocked))

    stats['mlbf_blocked_count'] = len(blocked)
    stats['mlbf_unblocked_count'] = len(not_blocked)

    fprs = [len(blocked) / (math.sqrt(2) * len(not_blocked)), 0.5]

    log.info("Generating filter")
    cascade = FilterCascade.cascade_with_characteristics(
        int(len(blocked) * 1.1), fprs)

    cascade.version = 1
    cascade.initialize(include=blocked, exclude=not_blocked)

    stats['mlbf_fprs'] = fprs
    stats['mlbf_version'] = cascade.version
    stats['mlbf_layers'] = cascade.layerCount()
    stats['mlbf_bits'] = cascade.bitCount()

    log.debug("Filter cascade layers: {layers}, bit: {bits}".format(
        layers=cascade.layerCount(), bits=cascade.bitCount()))

    cascade.check(entries=blocked, exclusions=not_blocked)
    return cascade
Example #7
0
 def load_filter(self, *, filter_path, coverage_path):
     self.filter_file = filter_path
     self.coverage_file = coverage_path
     self.filtercascade = FilterCascade.from_buf(
         self.filter_file.read_bytes())
     self.issuer_to_revocations = collections.defaultdict(list)
     self.stash_files = list()
     self.coverage = {}
     with open(coverage_path, "r") as f:
         for ct_log in json.load(f):
             log_id = base64.b64decode(ct_log["logID"])
             self.coverage[log_id] = (ct_log["minTimestamp"],
                                      ct_log["maxTimestamp"])
Example #8
0
    def test_generate_and_write_mlbf(self):
        mlbf = MLBF(123456)
        mlbf.generate_and_write_mlbf()

        with open(mlbf.filter_path, 'rb') as filter_file:
            buffer = filter_file.read()
            bfilter = FilterCascade.from_buf(buffer)

        assert bfilter.bitCount() == 3008
        blocked_versions = mlbf.get_blocked_versions()
        for guid, version_str in blocked_versions.values():
            key = mlbf.KEY_FORMAT.format(guid=guid, version=version_str)
            assert key in bfilter
        for guid, version_str in mlbf.get_all_guids(blocked_versions.keys()):
            key = mlbf.KEY_FORMAT.format(guid=guid, version=version_str)
            assert key not in bfilter
        assert os.stat(mlbf.filter_path).st_size == 406
Example #9
0
def generateMLBF(args, stats, *, revoked_certs, nonrevoked_certs,
                 nonrevoked_certs_len):
    revoked_certs_len = len(revoked_certs)

    log.info("Generating filter")
    cascade = FilterCascade([],
                            version=1,
                            defaultHashAlg=fileformats.HashAlgorithm.MURMUR3)
    cascade.set_crlite_error_rates(include_len=revoked_certs_len,
                                   exclude_len=nonrevoked_certs_len)
    cascade.initialize(include=revoked_certs, exclude=nonrevoked_certs)

    stats["mlbf_fprs"] = cascade.error_rates
    stats["mlbf_version"] = cascade.version
    stats["mlbf_layers"] = cascade.layerCount()
    stats["mlbf_bits"] = cascade.bitCount()

    log.debug("Filter cascade layers: {layers}, bit: {bits}".format(
        layers=cascade.layerCount(), bits=cascade.bitCount()))
    metrics.gauge("GenerateMLBF.BitCount", cascade.bitCount())
    metrics.gauge("GenerateMLBF.LayerCount", cascade.layerCount())
    return cascade