コード例 #1
0
def test_transformer_equivalence():
    N_NEIGHBORS = 15
    EPSILON = 0.15
    train = nn_data[:400]
    test = nn_data[:200]

    # Note we shift N_NEIGHBORS to conform to sklearn's KNeighborTransformer defn
    nnd = NNDescent(data=train,
                    n_neighbors=N_NEIGHBORS + 1,
                    random_state=42,
                    compressed=False)
    indices, dists = nnd.query(test, k=N_NEIGHBORS, epsilon=EPSILON)
    sort_idx = np.argsort(indices, axis=1)
    indices_sorted = np.vstack(
        [indices[i, sort_idx[i]] for i in range(sort_idx.shape[0])])
    dists_sorted = np.vstack(
        [dists[i, sort_idx[i]] for i in range(sort_idx.shape[0])])

    # Note we shift N_NEIGHBORS to conform to sklearn' KNeighborTransformer defn
    transformer = PyNNDescentTransformer(n_neighbors=N_NEIGHBORS,
                                         search_epsilon=EPSILON,
                                         random_state=42).fit(
                                             train, compress_index=False)
    Xt = transformer.transform(test).sorted_indices()

    assert np.all(Xt.indices == indices_sorted.flatten())
    assert np.allclose(Xt.data, dists_sorted.flat)
コード例 #2
0
def to_graph(X,sigma,e,n_neighbors,similarity_matrix,knn_aprox,eps=1e-7):
    '''
    Compute similarity matrix.

    return: similarity matrix
    '''
    if type(X) == torch.Tensor:
      X = X.detach().to("cpu").numpy()
      
    if similarity_matrix == 'e-NG':
      A = radius_neighbors_graph(X, e, mode='connectivity',include_self=False, n_jobs=-1)
      return A
    
    elif similarity_matrix == 'full':
        pass
    
    elif similarity_matrix == 'precomputed':
      return A

    else:
        
        if knn_aprox:
            A = PyNNDescentTransformer(n_neighbors=n_neighbors,metric="euclidean",n_jobs=-1).fit_transform(X)
        else:
            A = kneighbors_graph(X, n_neighbors, mode='distance',include_self=False, n_jobs=-1)
            
        if sigma == 'max':
            sigma_2 = 2*np.power(A.max(axis=1).toarray(),2) + eps
            A = ( ( A.power(2,dtype=np.float32) ).multiply(1/sigma_2) ).tocsr()
            np.exp(-A.data,out=A.data)
        
        elif sigma == 'mean':
            sigma_2 = 2*np.power(A.sum(axis=1) / A.getnnz(axis=1).reshape(-1,1),2) + eps
            A = ( ( A.power(2,dtype=np.float32) ).multiply(1/sigma_2) ).tocsr()
            np.exp(-A.data,out=A.data)
        
        else:
            sigma_2 = 2*np.power(sigma,2) + eps
            A = ( ( A.power(2,dtype=np.float32) ).multiply(1/sigma_2) ).tocsr()
            np.exp(-A.data,out=A.data)
        
        if knn_aprox:
            A = A - sparse.identity(A.shape[0])

        if similarity_matrix == 'k-hNNG':
            return (A + A.T)/2
            
        if similarity_matrix == 'k-NNG':
            return A.maximum(A.T)

        if similarity_matrix == 'k-mNNG':
            return A.minimum(A.T)
コード例 #3
0
def test_transformer_pickle_unpickle():
    seed = np.random.RandomState(42)

    x1 = seed.normal(0, 100, (1000, 50))
    x2 = seed.normal(0, 100, (1000, 50))

    index1 = PyNNDescentTransformer(n_neighbors=10).fit(x1)
    result1 = index1.transform(x2)

    pickle.dump(index1, open("test_tmp.pkl", "wb"))
    index2 = pickle.load(open("test_tmp.pkl", "rb"))
    os.remove("test_tmp.pkl")

    result2 = index2.transform(x2)

    np.testing.assert_equal(result1.indices, result2.indices)
    np.testing.assert_equal(result1.data, result2.data)
コード例 #4
0
def test_transformer_pickle_unpickle():
    seed = np.random.RandomState(42)

    x1 = seed.normal(0, 100, (1000, 50))
    x2 = seed.normal(0, 100, (1000, 50))

    index1 = PyNNDescentTransformer(n_neighbors=10).fit(x1)
    result1 = index1.transform(x2)

    mem_temp = io.BytesIO()
    pickle.dump(index1, mem_temp)
    mem_temp.seek(0)
    index2 = pickle.load(mem_temp)

    result2 = index2.transform(x2)

    np.testing.assert_equal(result1.indices, result2.indices)
    np.testing.assert_equal(result1.data, result2.data)
コード例 #5
0
def test_transformer_equivalence():
    N_NEIGHBORS = 15
    QUEUE_SIZE = 5.0
    train = nn_data[:400]
    test = nn_data[:200]

    nnd = NNDescent(data=train, n_neighbors=N_NEIGHBORS, random_state=42)
    indices, dists = nnd.query(test, k=N_NEIGHBORS, queue_size=QUEUE_SIZE)
    sort_idx = np.argsort(indices, axis=1)
    indices_sorted = np.vstack(
        [indices[i, sort_idx[i]] for i in range(sort_idx.shape[0])]
    )
    dists_sorted = np.vstack([dists[i, sort_idx[i]] for i in range(sort_idx.shape[0])])

    transformer = PyNNDescentTransformer(
        n_neighbors=N_NEIGHBORS, search_queue_size=QUEUE_SIZE, random_state=42
    ).fit(train)
    Xt = transformer.transform(test).sorted_indices()

    assert np.all(Xt.indices == indices_sorted.flat)
    assert np.allclose(Xt.data, dists_sorted.flat)
コード例 #6
0
def test_transformer_output_when_verbose_is_false():
    out = io.StringIO()
    with redirect_stdout(out):
        _ = PyNNDescentTransformer(
            n_neighbors=4,
            metric="euclidean",
            metric_kwds={},
            random_state=np.random,
            n_trees=5,
            n_iters=2,
            verbose=False,
        ).fit_transform(spatial_data)
    output = out.getvalue().strip()
    assert_equal(len(output), 0)
コード例 #7
0
def test_transformer_output_when_verbose_is_false(spatial_data, seed):
    out = io.StringIO()
    with redirect_stdout(out):
        _ = PyNNDescentTransformer(
            n_neighbors=4,
            metric="standardised_euclidean",
            metric_kwds={"sigma": np.ones(spatial_data.shape[1])},
            random_state=np.random.RandomState(seed),
            n_trees=5,
            n_iters=2,
            verbose=False,
        ).fit_transform(spatial_data)
    output = out.getvalue().strip()
    assert len(output) == 0
コード例 #8
0
def test_transformer_output_when_verbose_is_true():
    out = io.StringIO()
    with redirect_stdout(out):
        _ = PyNNDescentTransformer(
            n_neighbors=4,
            metric="euclidean",
            metric_kwds={},
            random_state=np.random,
            n_trees=5,
            n_iters=2,
            verbose=True,
        ).fit_transform(spatial_data)
    output = out.getvalue()
    assert_true(re.match("^.*5 trees", output, re.DOTALL))
    assert_true(re.match("^.*2 iterations", output, re.DOTALL))