Exemple #1
0
def _faiss_knn(keys: torch.Tensor, queries: torch.Tensor, num_neighbors: int,
               distance: str) -> Tuple[torch.Tensor, torch.Tensor]:
    # https://github.com/facebookresearch/XLM/blob/master/src/model/memory/utils.py
    if not is_faiss_available():
        raise RuntimeError("faiss_knn requires faiss-gpu")
    import faiss

    assert distance in ['dot_product', 'l2']
    assert keys.size(1) == queries.size(1)

    metric = faiss.METRIC_INNER_PRODUCT if distance == 'dot_product' else faiss.METRIC_L2

    k_ptr = _tensor_to_ptr(keys)
    q_ptr = _tensor_to_ptr(queries)

    scores = keys.new_zeros((queries.size(0), num_neighbors),
                            dtype=torch.float32)
    indices = keys.new_zeros((queries.size(0), num_neighbors),
                             dtype=torch.int64)

    s_ptr = _tensor_to_ptr(scores)
    i_ptr = _tensor_to_ptr(indices)

    faiss.bfKnn(FAISS_RES, metric, k_ptr, True, keys.size(0), q_ptr, True,
                queries.size(0), queries.size(1), num_neighbors, s_ptr, i_ptr)
    return scores, indices
def _faiss_knn(keys: torch.Tensor, queries: torch.Tensor, num_neighbors: int,
               distance: str) -> Tuple[torch.Tensor, torch.Tensor]:
    # https://github.com/facebookresearch/XLM/blob/master/src/model/memory/utils.py
    if not is_faiss_available():
        raise RuntimeError("faiss_knn requires faiss-gpu")
    import faiss

    metric = faiss.METRIC_INNER_PRODUCT if distance == 'dot_product' else faiss.METRIC_L2

    k_ptr = _tensor_to_ptr(keys)
    q_ptr = _tensor_to_ptr(queries)

    scores = keys.new_zeros((queries.size(0), num_neighbors),
                            dtype=torch.float32)
    indices = keys.new_zeros((queries.size(0), num_neighbors),
                             dtype=torch.int64)

    s_ptr = _tensor_to_ptr(scores)
    i_ptr = _tensor_to_ptr(indices)

    args = faiss.GpuDistanceParams()
    args.metric = metric
    args.k = num_neighbors
    args.dims = queries.size(1)
    args.vectors = k_ptr
    args.vectorsRowMajor = True
    args.numVectors = keys.size(0)
    args.queries = q_ptr
    args.queriesRowMajor = True
    args.numQueries = queries.size(0)
    args.outDistances = s_ptr
    args.outIndices = i_ptr
    faiss.bfKnn(FAISS_RES, args)
    return scores, indices
Exemple #3
0
def search_raw_array_pytorch(res, xb, xq, k, D=None, I=None,
                             metric=faiss.METRIC_L2):
    """search xq in xb, without building an index"""
    assert xb.device == xq.device

    nq, d = xq.size()
    if xq.is_contiguous():
        xq_row_major = True
    elif xq.t().is_contiguous():
        xq = xq.t()    # I initially wrote xq:t(), Lua is still haunting me :-)
        xq_row_major = False
    else:
        raise TypeError('matrix should be row or column-major')

    xq_ptr = swig_ptr_from_FloatTensor(xq)

    nb, d2 = xb.size()
    assert d2 == d
    if xb.is_contiguous():
        xb_row_major = True
    elif xb.t().is_contiguous():
        xb = xb.t()
        xb_row_major = False
    else:
        raise TypeError('matrix should be row or column-major')
    xb_ptr = swig_ptr_from_FloatTensor(xb)

    if D is None:
        D = torch.empty(nq, k, device=xb.device, dtype=torch.float32)
    else:
        assert D.shape == (nq, k)
        assert D.device == xb.device

    if I is None:
        I = torch.empty(nq, k, device=xb.device, dtype=torch.int64)
    else:
        assert I.shape == (nq, k)
        assert I.device == xb.device

    D_ptr = swig_ptr_from_FloatTensor(D)
    I_ptr = swig_ptr_from_LongTensor(I)

    args = faiss.GpuDistanceParams()
    args.metric = metric
    args.k = k
    args.dims = d
    args.vectors = xb_ptr
    args.vectorsRowMajor = xb_row_major
    args.numVectors = nb
    args.queries = xq_ptr
    args.queriesRowMajor = xq_row_major
    args.numQueries = nq
    args.outDistances = D_ptr
    args.outIndices = I_ptr

    with using_stream(res):
        faiss.bfKnn(res, args)

    return D, I
Exemple #4
0
def faiss_knn(keys: torch.Tensor, queries: torch.Tensor, num_neighbors: int,
              distance: str) -> Tuple[torch.Tensor, torch.Tensor]:
    """ k nearest neighbor using faiss. Users are recommended to use `k_nearest_neighbor` instead.

    :param keys: tensor of (num_keys, dim)
    :param queries: tensor of (num_queries, dim)
    :param num_neighbors: `k`
    :param distance: user can use str or faiss.METRIC_*.
    :return: scores, indices in tensor
    """

    if not is_faiss_available():
        raise RuntimeError("_faiss_knn requires faiss-gpu")

    metric_map = {
        "inner_product": faiss.METRIC_INNER_PRODUCT,
        "l2": faiss.METRIC_L2,
        "l1": faiss.METRIC_L1,
        "linf": faiss.METRIC_Linf,
        "jansen_shannon": faiss.METRIC_JensenShannon
    }

    k_ptr = _tensor_to_ptr(keys)
    q_ptr = _tensor_to_ptr(queries)

    scores = keys.new_empty((queries.size(0), num_neighbors),
                            dtype=torch.float32)
    indices = keys.new_empty((queries.size(0), num_neighbors),
                             dtype=torch.int64)

    s_ptr = _tensor_to_ptr(scores)
    i_ptr = _tensor_to_ptr(indices)

    args = faiss.GpuDistanceParams()
    args.metric = metric_map[distance] if isinstance(distance,
                                                     str) else distance
    args.k = num_neighbors
    args.dims = queries.size(1)
    args.vectors = k_ptr
    args.vectorsRowMajor = True
    args.numVectors = keys.size(0)
    args.queries = q_ptr
    args.queriesRowMajor = True
    args.numQueries = queries.size(0)
    args.outDistances = s_ptr
    args.outIndices = i_ptr
    faiss.bfKnn(FAISS_RES, args)
    return scores, indices
Exemple #5
0
    def test_dist(self):
        metrics = [
            faiss.METRIC_L2, faiss.METRIC_INNER_PRODUCT, faiss.METRIC_L1,
            faiss.METRIC_Linf, faiss.METRIC_Canberra, faiss.METRIC_BrayCurtis,
            faiss.METRIC_JensenShannon
        ]

        for metric in metrics:
            print(metric)
            d = 33
            k = 500

            # all pairwise distance should be the same as nb = k
            nb = k
            nq = 20

            xs = make_t(nb, d)
            qs = make_t(nq, d)

            res = faiss.StandardGpuResources()

            # Get ground truth using IndexFlat
            index = faiss.IndexFlat(d, metric)
            index.add(xs)
            ref_d, _ = index.search(qs, k)

            out_d = np.empty((nq, k), dtype=np.float32)

            # Try f32 data/queries
            params = faiss.GpuDistanceParams()
            params.metric = metric
            params.k = -1  # all pairwise
            params.dims = d
            params.vectors = faiss.swig_ptr(xs)
            params.numVectors = nb
            params.queries = faiss.swig_ptr(qs)
            params.numQueries = nq
            params.outDistances = faiss.swig_ptr(out_d)

            faiss.bfKnn(res, params)

            # IndexFlat will sort the results, so we need to
            # do the same on our end
            out_d = np.sort(out_d, axis=1)

            # INNER_PRODUCT is in descending order, make sure it is the same
            # order
            if metric == faiss.METRIC_INNER_PRODUCT:
                ref_d = np.sort(ref_d, axis=1)

            print('f32', np.abs(ref_d - out_d).max())

            self.assertTrue(np.allclose(ref_d, out_d, atol=1e-5))

            # Try float16 data/queries
            xs_f16 = xs.astype(np.float16)
            qs_f16 = qs.astype(np.float16)
            xs_f16_f32 = xs_f16.astype(np.float32)
            qs_f16_f32 = qs_f16.astype(np.float32)
            index.reset()
            index.add(xs_f16_f32)
            ref_d_f16, _ = index.search(qs_f16_f32, k)

            params.vectors = faiss.swig_ptr(xs_f16)
            params.vectorType = faiss.DistanceDataType_F16
            params.queries = faiss.swig_ptr(qs_f16)
            params.queryType = faiss.DistanceDataType_F16

            out_d_f16 = np.empty((nq, k), dtype=np.float32)
            params.outDistances = faiss.swig_ptr(out_d_f16)

            faiss.bfKnn(res, params)

            # IndexFlat will sort the results, so we need to
            # do the same on our end
            out_d_f16 = np.sort(out_d_f16, axis=1)

            # INNER_PRODUCT is in descending order, make sure it is the same
            # order
            if metric == faiss.METRIC_INNER_PRODUCT:
                ref_d_f16 = np.sort(ref_d_f16, axis=1)

            print('f16', np.abs(ref_d_f16 - out_d_f16).max())

            self.assertTrue(np.allclose(ref_d_f16, out_d_f16, atol=4e-3))
Exemple #6
0
    def test_input_types(self):
        d = 33
        k = 5
        nb = 1000
        nq = 10

        xs = make_t(nb, d)
        qs = make_t(nq, d)

        res = faiss.StandardGpuResources()

        # Get ground truth using IndexFlat
        index = faiss.IndexFlatL2(d)
        index.add(xs)
        ref_d, ref_i = index.search(qs, k)

        out_d = np.empty((nq, k), dtype=np.float32)
        out_i = np.empty((nq, k), dtype=np.int64)

        # Try f32 data/queries, i64 out indices
        params = faiss.GpuDistanceParams()
        params.k = k
        params.dims = d
        params.vectors = faiss.swig_ptr(xs)
        params.numVectors = nb
        params.queries = faiss.swig_ptr(qs)
        params.numQueries = nq
        params.outDistances = faiss.swig_ptr(out_d)
        params.outIndices = faiss.swig_ptr(out_i)

        faiss.bfKnn(res, params)

        self.assertTrue(np.allclose(ref_d, out_d, atol=1e-5))
        self.assertGreaterEqual((out_i == ref_i).sum(), ref_i.size)

        # Try int32 out indices
        out_i32 = np.empty((nq, k), dtype=np.int32)
        params.outIndices = faiss.swig_ptr(out_i32)
        params.outIndicesType = faiss.IndicesDataType_I32

        faiss.bfKnn(res, params)
        self.assertEqual((out_i32 == ref_i).sum(), ref_i.size)

        # Try float16 data/queries, i64 out indices
        xs_f16 = xs.astype(np.float16)
        qs_f16 = qs.astype(np.float16)
        xs_f16_f32 = xs_f16.astype(np.float32)
        qs_f16_f32 = qs_f16.astype(np.float32)
        index.reset()
        index.add(xs_f16_f32)
        ref_d_f16, ref_i_f16 = index.search(qs_f16_f32, k)

        params.vectors = faiss.swig_ptr(xs_f16)
        params.vectorType = faiss.DistanceDataType_F16
        params.queries = faiss.swig_ptr(qs_f16)
        params.queryType = faiss.DistanceDataType_F16

        out_d_f16 = np.empty((nq, k), dtype=np.float32)
        out_i_f16 = np.empty((nq, k), dtype=np.int64)

        params.outDistances = faiss.swig_ptr(out_d_f16)
        params.outIndices = faiss.swig_ptr(out_i_f16)
        params.outIndicesType = faiss.IndicesDataType_I64

        faiss.bfKnn(res, params)

        self.assertGreaterEqual((out_i_f16 == ref_i_f16).sum(),
                                ref_i_f16.size - 5)
        self.assertTrue(np.allclose(ref_d_f16, out_d_f16, atol=2e-3))
Exemple #7
0
def torch_replacement_knn_gpu(res,
                              xq,
                              xb,
                              k,
                              D=None,
                              I=None,
                              metric=faiss.METRIC_L2):
    if type(xb) is np.ndarray:
        # Forward to faiss __init__.py base method
        return faiss.knn_gpu_numpy(res, xq, xb, k, D, I, metric)

    nb, d = xb.size()
    if xb.is_contiguous():
        xb_row_major = True
    elif xb.t().is_contiguous():
        xb = xb.t()
        xb_row_major = False
    else:
        raise TypeError('matrix should be row or column-major')

    if xb.dtype == torch.float32:
        xb_type = faiss.DistanceDataType_F32
        xb_ptr = swig_ptr_from_FloatTensor(xb)
    elif xb.dtype == torch.float16:
        xb_type = faiss.DistanceDataType_F16
        xb_ptr = swig_ptr_from_HalfTensor(xb)
    else:
        raise TypeError('xb must be f32 or f16')

    nq, d2 = xq.size()
    assert d2 == d
    if xq.is_contiguous():
        xq_row_major = True
    elif xq.t().is_contiguous():
        xq = xq.t()
        xq_row_major = False
    else:
        raise TypeError('matrix should be row or column-major')

    if xq.dtype == torch.float32:
        xq_type = faiss.DistanceDataType_F32
        xq_ptr = swig_ptr_from_FloatTensor(xq)
    elif xq.dtype == torch.float16:
        xq_type = faiss.DistanceDataType_F16
        xq_ptr = swig_ptr_from_HalfTensor(xq)
    else:
        raise TypeError('xq must be f32 or f16')

    if D is None:
        D = torch.empty(nq, k, device=xb.device, dtype=torch.float32)
    else:
        assert D.shape == (nq, k)
        # interface takes void*, we need to check this
        assert (D.dtype == torch.float32)

    if I is None:
        I = torch.empty(nq, k, device=xb.device, dtype=torch.int64)
    else:
        assert I.shape == (nq, k)

    if I.dtype == torch.int64:
        I_type = faiss.IndicesDataType_I64
        I_ptr = swig_ptr_from_IndicesTensor(I)
    elif I.dtype == I.dtype == torch.int32:
        I_type = faiss.IndicesDataType_I32
        I_ptr = swig_ptr_from_IntTensor(I)
    else:
        raise TypeError('I must be i64 or i32')

    D_ptr = swig_ptr_from_FloatTensor(D)

    args = faiss.GpuDistanceParams()
    args.metric = metric
    args.k = k
    args.dims = d
    args.vectors = xb_ptr
    args.vectorsRowMajor = xb_row_major
    args.vectorType = xb_type
    args.numVectors = nb
    args.queries = xq_ptr
    args.queriesRowMajor = xq_row_major
    args.queryType = xq_type
    args.numQueries = nq
    args.outDistances = D_ptr
    args.outIndices = I_ptr
    args.outIndicesType = I_type

    with using_stream(res):
        faiss.bfKnn(res, args)

    return D, I
Exemple #8
0
def knn_gpu(res, xb, xq, k, D=None, I=None, metric=faiss.METRIC_L2):
    """Brute-force k-nearest neighbor on the GPU using CPU-resident numpy arrays
    Supports float16 arrays and Fortran-order arrays.
    """
    if xb.ndim != 2 or xq.ndim != 2:
        raise TypeError('xb and xq must be matrices')

    nb, d = xb.shape
    nq, d2 = xq.shape
    if d != d2:
        raise TypeError('xq not the same dimension as xb')

    if xb.flags.c_contiguous:
        xb_row_major = True
    elif xb.flags.f_contiguous:
        xb = xb.T
        xb_row_major = False
    else:
        raise TypeError('xb must be either C or Fortran contiguous')

    if xq.flags.c_contiguous:
        xq_row_major = True
    elif xq.flags.f_contiguous:
        xq = xq.T
        xq_row_major = False
    else:
        raise TypeError('xq must be either C or Fortran contiguous')

    if xb.dtype == np.float32 and xq.dtype == np.float32:
        xb_xq_type = faiss.DistanceDataType_F32
    elif xb.dtype == np.float16 and xq.dtype == np.float16:
        xb_xq_type = faiss.DistanceDataType_F16
    else:
        raise TypeError('xb and xq must both be np.float32 or np.float16')

    if D is None:
        D = np.empty((nq, k), dtype=np.float32)
    else:
        assert D.shape == (nq, k)
        assert D.dtype == np.float32

    if I is None:
        I = np.empty((nq, k), dtype=np.int64)
        indices_type = faiss.IndicesDataType_I64
    else:
        assert I.shape == (nq, k)
        if I.dtype == np.int64:
            indices_type = faiss.IndicesDataType_I64
        elif I.dtype == np.int32:
            indices_type = faiss.IndicesDataType_I32
        else:
            raise TypeError('I must be either np.int64 or np.int32')

    print('row major', xb_row_major, xq_row_major)

    args = faiss.GpuDistanceParams()
    args.metric = metric
    args.k = k
    args.dims = d
    args.vectors = faiss.swig_ptr(xb)
    args.vectorType = xb_xq_type
    args.vectorsRowMajor = xb_row_major
    args.numVectors = nb
    args.queries = faiss.swig_ptr(xq)
    args.queryType = xb_xq_type
    args.queriesRowMajor = xq_row_major
    args.numQueries = nq
    args.outDistances = faiss.swig_ptr(D)
    args.outIndices = faiss.swig_ptr(I)
    args.outIndicesType = indices_type
    faiss.bfKnn(res, args)

    return D, I
def search_raw_array_pytorch(res,
                             xb,
                             xq,
                             k,
                             D=None,
                             I=None,
                             metric=faiss.METRIC_L2):
    assert xb.device == xq.device

    nq, d = xq.size()
    if xq.is_contiguous():
        xq_row_major = True
    elif xq.t().is_contiguous():
        xq = xq.t()  # I initially wrote xq:t(), Lua is still haunting me :-)
        xq_row_major = False
    else:
        raise TypeError('matrix should be row or column-major')

    xq_ptr = swig_ptr_from_FloatTensor(xq)

    nb, d2 = xb.size()
    assert d2 == d
    if xb.is_contiguous():
        xb_row_major = True
    elif xb.t().is_contiguous():
        xb = xb.t()
        xb_row_major = False
    else:
        raise TypeError('matrix should be row or column-major')
    xb_ptr = swig_ptr_from_FloatTensor(xb)

    if D is None:
        D = torch.empty(nq, k, device=xb.device, dtype=torch.float32)
    else:
        assert D.shape == (nq, k)
        assert D.device == xb.device

    if I is None:
        I = torch.empty(nq, k, device=xb.device, dtype=torch.int64)
    else:
        assert I.shape == (nq, k)
        assert I.device == xb.device

    D_ptr = swig_ptr_from_FloatTensor(D)
    I_ptr = swig_ptr_from_LongTensor(I)

    gpu_config = faiss.GpuDistanceParams()
    gpu_config.metric = metric
    gpu_config.k = k
    gpu_config.dims = d
    gpu_config.vectors = xb_ptr
    gpu_config.vectorsRowMajor = xb_row_major
    gpu_config.vectorType = faiss.DistanceDataType_F32
    gpu_config.numVectors = nb
    gpu_config.queries = xq_ptr
    gpu_config.queriesRowMajor = xq_row_major
    gpu_config.queryType = faiss.DistanceDataType_F32
    gpu_config.numQueries = nq
    gpu_config.outDistances = D_ptr
    gpu_config.outIndices = I_ptr
    gpu_config.outIndicesType = faiss.DistanceDataType_F32
    faiss.bfKnn(res, gpu_config)

    return D, I
Exemple #10
0
def search_mod_raw_array_pytorch(res,
                                 noise_level,
                                 xb,
                                 xq,
                                 k,
                                 D=None,
                                 I=None,
                                 metric=faiss_mod.METRIC_L2):
    assert xb.device == xq.device

    nq, d = xq.size()
    if xq.is_contiguous():
        xq_row_major = True
    elif xq.t().is_contiguous():
        xq = xq.t()  # I initially wrote xq:t(), Lua is still haunting me :-)
        xq_row_major = False
    else:
        raise TypeError('matrix should be row or column-major')

    xq_ptr = swig_ptr_from_FloatTensor(xq)

    nb, d2 = xb.size()
    assert d2 == d
    if xb.is_contiguous():
        xb_row_major = True
    elif xb.t().is_contiguous():
        xb = xb.t()
        xb_row_major = False
    else:
        raise TypeError('matrix should be row or column-major')
    xb_ptr = swig_ptr_from_FloatTensor(xb)

    if D is None:
        D = torch.empty(nq, k, device=xb.device, dtype=torch.float32)
    else:
        assert D.shape == (nq, k)
        assert D.device == xb.device

    if I is None:
        I = torch.empty(nq, k, device=xb.device, dtype=torch.int64)
    else:
        assert I.shape == (nq, k)
        assert I.device == xb.device

    D_ptr = swig_ptr_from_FloatTensor(D)
    I_ptr = swig_ptr_from_LongTensor(I)
    # print("xb.means()",xb.mean(1).shape,xb.mean(1))
    # print("xq.means()",xq.mean(1).shape,xq.mean(1))
    # print("xb.stds()",xb.std(1).shape,xb.std(1)**2)
    # print("xq.stds()",xq.std(1).shape,xq.std(1)**2)
    # print("xb.norms().shape",xb.norm(dim=1,p=2))
    # print("xq.norms().shape",xq.norm(dim=1,p=2))
    # dist,ind = wasserstein_search(xb,xq,noise_level,k)
    # print("Test W Search")
    # print(dist,ind)

    gpu_config = faiss_mod.GpuDistanceParams()
    gpu_config.metric = metric
    gpu_config.k = k
    gpu_config.dims = d
    gpu_config.vectors = xb_ptr
    gpu_config.vectorsRowMajor = xb_row_major
    gpu_config.vectorType = faiss_mod.DistanceDataType_F32
    gpu_config.numVectors = nb
    gpu_config.queries = xq_ptr
    gpu_config.queriesRowMajor = xq_row_major
    gpu_config.queryType = faiss_mod.DistanceDataType_F32
    gpu_config.numQueries = nq
    gpu_config.outDistances = D_ptr
    gpu_config.outIndices = I_ptr
    gpu_config.outIndicesType = faiss_mod.DistanceDataType_F32
    gpu_config.ignoreOutDistances = False
    gpu_config.noise_level = 2 * noise_level**2 / xb.shape[1]
    gpu_config.useWasserstein = True
    faiss_mod.bfKnn(res, gpu_config)

    # faiss_mod.bruteForceKnn(res, metric,
    #                     xb_ptr, xb_row_major, nb,
    #                     xq_ptr, xq_row_major, nq,
    #                     d, k, D_ptr, I_ptr)
    # print(D[:2],I[:2])

    return D, I