Beispiel #1
0
    def fit(x_train):
        # Setup the K-NN estimator:
        x_train = tensor(x_train)
        Ntrain, D = x_train.shape
        start = timer()
        # The "training" time here should be negligible:
        x_train_norm = (x_train ** 2).sum(-1)
        elapsed = timer() - start

        def f(x_test):
            x_test = tensor(x_test)

            # Estimate the largest reasonable batch size:
            Ntest = x_test.shape[0]
            av_mem = int(5e8)  # 500 Mb of GPU memory per batch
            # Remember that a vector of D float32 number takes up 4*D bytes:
            Ntest_loop = min(max(1, av_mem // (4 * D * Ntrain)), Ntest)
            Nloop = (Ntest - 1) // Ntest_loop + 1
            out = int_tensor(Ntest, K)

            start = timer()
            # Actual K-NN query:
            for k in range(Nloop):
                x_test_k = x_test[Ntest_loop * k : Ntest_loop * (k + 1), :]
                out[Ntest_loop * k : Ntest_loop * (k + 1), :] = KNN_torch_fun(
                    x_train, x_train_norm, x_test_k, K, metric
                )

            # torch.cuda.empty_cache()

            elapsed = timer() - start
            indices = out.cpu().numpy()
            return indices, elapsed

        return f, elapsed
Beispiel #2
0
        def f(x_test):
            x_test = tensor(x_test)
            start = timer()
            indices = KNN.kneighbors(x_test)
            elapsed = timer() - start
            indices = indices.cpu().numpy()

            return indices, elapsed
Beispiel #3
0
        def f(x_test):
            x_test = tensor(x_test)
            start = timer()

            # Actual K-NN query:
            out = KNN_torch_fun(x_train, x_train_norm, x_test, K, metric)

            elapsed = timer() - start
            indices = out.cpu().numpy()
            return indices, elapsed
Beispiel #4
0
        def f(x_test):
            x_test = tensor(x_test)
            start = timer()

            # Actual K-NN query:
            indices = KNN_fun(x_test, x_train)

            elapsed = timer() - start

            indices = indices.cpu().numpy()
            return indices, elapsed
Beispiel #5
0
def ground_truth(x_train, x_test, K, metric):
    # Setup the K-NN estimator:
    x_train = tensor(x_train)
    x_test = tensor(x_test)

    # Encoding as KeOps LazyTensors:
    X_i = LazyTensor(x_test[:, None, :])
    X_j = LazyTensor(x_train[None, :, :])

    # Symbolic distance matrix:
    if metric == "euclidean":
        D_ij = ((X_i - X_j) ** 2).sum(-1)
    elif metric == "manhattan":
        D_ij = (X_i - X_j).abs().sum(-1)
    elif metric == "angular":
        D_ij = -(X_i | X_j)
    elif metric == "hyperbolic":
        D_ij = ((X_i - X_j) ** 2).sum(-1) / (X_i[0] * X_j[0])

    # K-NN query:
    indices = D_ij.argKmin(K, dim=1)
    return indices.cpu().numpy()
Beispiel #6
0
    def fit(x_train):
        x_train = tensor(x_train)
        start = timer()
        KNN.fit(x_train, clusters=clusters, a=a)
        elapsed = timer() - start

        def f(x_test):
            x_test = tensor(x_test)
            start = timer()
            indices = KNN.kneighbors(x_test)
            elapsed = timer() - start
            indices = indices.cpu().numpy()

            return indices, elapsed

        return f, elapsed
Beispiel #7
0
    def fit(x_train):
        # Setup the K-NN estimator:
        x_train = tensor(x_train)
        start = timer()

        # Encoding as KeOps LazyTensors:
        D = x_train.shape[1]
        X_i = Vi(0, D)  # Purely symbolic "i" variable, without any data array
        X_j = Vj(1, D)  # Purely symbolic "j" variable, without any data array

        # Symbolic distance matrix:
        if metric == "euclidean":
            D_ij = ((X_i - X_j)**2).sum(-1)
        elif metric == "manhattan":
            D_ij = (X_i - X_j).abs().sum(-1)
        elif metric == "angular":
            D_ij = -(X_i | X_j)
        elif metric == "hyperbolic":
            D_ij = ((X_i - X_j)**2).sum(-1) / (X_i[0] * X_j[0])
        else:
            raise NotImplementedError(
                f"The '{metric}' distance is not supported.")

        # K-NN query operator:
        KNN_fun = D_ij.argKmin(K, dim=1)

        # N.B.: The "training" time here should be negligible.
        elapsed = timer() - start

        def f(x_test):
            x_test = tensor(x_test)
            start = timer()

            # Actual K-NN query:
            indices = KNN_fun(x_test, x_train)

            elapsed = timer() - start

            indices = indices.cpu().numpy()
            return indices, elapsed

        return f, elapsed
Beispiel #8
0
    def fit(x_train):
        # Setup the K-NN estimator:
        x_train = tensor(x_train)
        start = timer()
        # The "training" time here should be negligible:
        x_train_norm = (x_train ** 2).sum(-1)
        elapsed = timer() - start

        def f(x_test):
            x_test = tensor(x_test)
            start = timer()

            # Actual K-NN query:
            out = KNN_torch_fun(x_train, x_train_norm, x_test, K, metric)

            elapsed = timer() - start
            indices = out.cpu().numpy()
            return indices, elapsed

        return f, elapsed