def test_no_false_negatives(): bf = BloomFilter(size=100, hash_funcs=2) for ii in range(0, 1000, 20): bf.add(ii) for ii in range(0, 1000, 20): assert ii in bf
def test_bloom_pickle(): bf = BloomFilter(size=100, hash_funcs=2) for ii in range(0, 1000, 20): bf.add(ii) data = pickle.dumps(bf) bf2 = pickle.loads(data) for ii in range(0, 1000, 20): assert ii in bf2
def test_contains(): bf = BloomFilter() assert 23 not in bf bf.add(23) assert 23 in bf bf.add(5) bf.add(42) bf.add(1002) assert 5 in bf assert 42 in bf assert 1002 in bf
def test_to_from_bytes(): bf = BloomFilter(size=100, hash_funcs=2) for ii in range(0, 1000, 20): bf.add(ii) data = bf.to_bytes() bf2 = BloomFilter() for ii in range(0, 1000, 20): assert ii not in bf2 bf2.from_bytes(data) for ii in range(0, 1000, 20): assert ii in bf2 assert bf2.to_bytes() == data
def test_from_error(): bf = BloomFilter.from_error_rate(1000) for ii in range(0, 1000, 20): bf.add(ii) for ii in range(0, 1000, 20): assert ii in bf
def from_bytes(self, bytes_data: bytes) -> "Table": """Load a table from a bytestring. bytes_data (bytes): The data to load. RETURNS (Table): The loaded table. DOCS: https://spacy.io/api/lookups#table.from_bytes """ loaded = srsly.msgpack_loads(bytes_data) data = loaded.get("dict", {}) self.name = loaded["name"] self.bloom = BloomFilter().from_bytes(loaded["bloom"]) self.clear() self.update(data) return self
def __init__(self, name: Optional[str] = None, data: Optional[dict] = None) -> None: """Initialize a new table. name (str): Optional table name for reference. data (dict): Initial data, used to hint Bloom Filter. DOCS: https://spacy.io/api/lookups#table.init """ OrderedDict.__init__(self) self.name = name # Assume a default size of 1M items self.default_size = 1e6 size = max(len(data), 1) if data is not None else self.default_size self.bloom = BloomFilter.from_error_rate(size) if data: self.update(data)
def __init__(self, name=None, data=None): """Initialize a new table. name (unicode): Optional table name for reference. data (dict): Initial data, used to hint Bloom Filter. RETURNS (Table): The newly created object. DOCS: https://spacy.io/api/lookups#table.init """ OrderedDict.__init__(self) self.name = name # Assume a default size of 1M items self.default_size = 1e6 size = len(data) if data and len(data) > 0 else self.default_size self.bloom = BloomFilter.from_error_rate(size) if data: self.update(data)