Example #1
0
    def test_remove_id_map_binary(self):
        sub_index = faiss.IndexBinaryFlat(40)
        xb = np.zeros((10, 5), dtype='uint8')
        xb[:, 0] = np.arange(10) + 100
        index = faiss.IndexBinaryIDMap2(sub_index)
        index.add_with_ids(xb, np.arange(10) + 1000)
        assert index.reconstruct(1004)[0] == 104
        index.remove_ids(np.array([1003]))
        assert index.reconstruct(1004)[0] == 104
        try:
            index.reconstruct(1003)
        except:
            pass
        else:
            assert False, 'should have raised an exception'

        # while we are there, let's test I/O as well...
        _, tmpnam = tempfile.mkstemp()
        try:
            faiss.write_index_binary(index, tmpnam)
            index = faiss.read_index_binary(tmpnam)
        finally:
            os.remove(tmpnam)

        assert index.reconstruct(1004)[0] == 104
        try:
            index.reconstruct(1003)
        except:
            pass
        else:
            assert False, 'should have raised an exception'
    def create(hashes: t.Iterable[PDQ_HASH_TYPE],
               custom_ids: t.Iterable[int] = None) -> "PDQFlatHashIndex":
        """
        Creates a PDQFlatHashIndex for use searching against the provided hashes.

        Parameters
        ----------
        hashes: sequence of PDQ Hashes
            The PDQ hashes to create the index with
        custom_ids: sequence of custom ids for the PDQ Hashes (optional)
            Optional sequence of custom id values to use for the PDQ hashes for any
            method relating to indexes (e.g., hash_at). If provided, the nth item in
            custom_ids will be used as the id for the nth hash in hashes. If not provided
            then the ids for the hashes will be assumed to be their respective index
            in hashes (i.e., the nth hash would have id n, starting from 0).

        Returns
        -------
        a PDQFlatHashIndex of these hashes
        """
        hash_bytes = [binascii.unhexlify(hash) for hash in hashes]
        vectors = list(
            map(lambda h: numpy.frombuffer(h, dtype=numpy.uint8), hash_bytes))
        index = faiss.index_binary_factory(BITS_IN_PDQ, "BFlat")
        if custom_ids != None:
            index = faiss.IndexBinaryIDMap2(index)
            i64_ids = list(map(uint64_to_int64, custom_ids))
            index.add_with_ids(numpy.array(vectors), numpy.array(i64_ids))
        else:
            index.add(numpy.array(vectors))
        return PDQFlatHashIndex(index)
 def __init__(self, nhash: int = 16):
     bits_per_hashmap = BITS_IN_PDQ // nhash
     faiss_index = faiss.IndexBinaryIDMap2(
         faiss.IndexBinaryMultiHash(BITS_IN_PDQ, nhash, bits_per_hashmap))
     super().__init__(faiss_index)
     self.__construct_index_rev_map()
 def __init__(self):
     faiss_index = faiss.IndexBinaryIDMap2(
         faiss.index_binary_factory(BITS_IN_PDQ, "BFlat"))
     super().__init__(faiss_index)