Exemple #1
0
def test_no_false_negatives():
    bf = BloomFilter(size=100, hash_funcs=2)
    for ii in range(0, 1000, 20):
        bf.add(ii)

    for ii in range(0, 1000, 20):
        assert ii in bf
Exemple #2
0
def test_bloom_pickle():
    bf = BloomFilter(size=100, hash_funcs=2)
    for ii in range(0, 1000, 20):
        bf.add(ii)
    data = pickle.dumps(bf)
    bf2 = pickle.loads(data)
    for ii in range(0, 1000, 20):
        assert ii in bf2
Exemple #3
0
def test_contains():
    bf = BloomFilter()
    assert 23 not in bf
    bf.add(23)
    assert 23 in bf

    bf.add(5)
    bf.add(42)
    bf.add(1002)
    assert 5 in bf
    assert 42 in bf
    assert 1002 in bf
Exemple #4
0
def test_to_from_bytes():
    bf = BloomFilter(size=100, hash_funcs=2)
    for ii in range(0, 1000, 20):
        bf.add(ii)
    data = bf.to_bytes()
    bf2 = BloomFilter()
    for ii in range(0, 1000, 20):
        assert ii not in bf2
    bf2.from_bytes(data)
    for ii in range(0, 1000, 20):
        assert ii in bf2
    assert bf2.to_bytes() == data
Exemple #5
0
def test_from_error():
    bf = BloomFilter.from_error_rate(1000)
    for ii in range(0, 1000, 20):
        bf.add(ii)

    for ii in range(0, 1000, 20):
        assert ii in bf
Exemple #6
0
    def from_bytes(self, bytes_data: bytes) -> "Table":
        """Load a table from a bytestring.

        bytes_data (bytes): The data to load.
        RETURNS (Table): The loaded table.

        DOCS: https://spacy.io/api/lookups#table.from_bytes
        """
        loaded = srsly.msgpack_loads(bytes_data)
        data = loaded.get("dict", {})
        self.name = loaded["name"]
        self.bloom = BloomFilter().from_bytes(loaded["bloom"])
        self.clear()
        self.update(data)
        return self
Exemple #7
0
    def __init__(self, name: Optional[str] = None, data: Optional[dict] = None) -> None:
        """Initialize a new table.

        name (str): Optional table name for reference.
        data (dict): Initial data, used to hint Bloom Filter.

        DOCS: https://spacy.io/api/lookups#table.init
        """
        OrderedDict.__init__(self)
        self.name = name
        # Assume a default size of 1M items
        self.default_size = 1e6
        size = max(len(data), 1) if data is not None else self.default_size
        self.bloom = BloomFilter.from_error_rate(size)
        if data:
            self.update(data)
Exemple #8
0
    def __init__(self, name=None, data=None):
        """Initialize a new table.

        name (unicode): Optional table name for reference.
        data (dict): Initial data, used to hint Bloom Filter.
        RETURNS (Table): The newly created object.

        DOCS: https://spacy.io/api/lookups#table.init
        """
        OrderedDict.__init__(self)
        self.name = name
        # Assume a default size of 1M items
        self.default_size = 1e6
        size = len(data) if data and len(data) > 0 else self.default_size
        self.bloom = BloomFilter.from_error_rate(size)
        if data:
            self.update(data)