def test_transformer_equivalence(): N_NEIGHBORS = 15 EPSILON = 0.15 train = nn_data[:400] test = nn_data[:200] # Note we shift N_NEIGHBORS to conform to sklearn's KNeighborTransformer defn nnd = NNDescent(data=train, n_neighbors=N_NEIGHBORS + 1, random_state=42, compressed=False) indices, dists = nnd.query(test, k=N_NEIGHBORS, epsilon=EPSILON) sort_idx = np.argsort(indices, axis=1) indices_sorted = np.vstack( [indices[i, sort_idx[i]] for i in range(sort_idx.shape[0])]) dists_sorted = np.vstack( [dists[i, sort_idx[i]] for i in range(sort_idx.shape[0])]) # Note we shift N_NEIGHBORS to conform to sklearn' KNeighborTransformer defn transformer = PyNNDescentTransformer(n_neighbors=N_NEIGHBORS, search_epsilon=EPSILON, random_state=42).fit( train, compress_index=False) Xt = transformer.transform(test).sorted_indices() assert np.all(Xt.indices == indices_sorted.flatten()) assert np.allclose(Xt.data, dists_sorted.flat)
def test_transformer_pickle_unpickle(): seed = np.random.RandomState(42) x1 = seed.normal(0, 100, (1000, 50)) x2 = seed.normal(0, 100, (1000, 50)) index1 = PyNNDescentTransformer(n_neighbors=10).fit(x1) result1 = index1.transform(x2) pickle.dump(index1, open("test_tmp.pkl", "wb")) index2 = pickle.load(open("test_tmp.pkl", "rb")) os.remove("test_tmp.pkl") result2 = index2.transform(x2) np.testing.assert_equal(result1.indices, result2.indices) np.testing.assert_equal(result1.data, result2.data)
def test_transformer_pickle_unpickle(): seed = np.random.RandomState(42) x1 = seed.normal(0, 100, (1000, 50)) x2 = seed.normal(0, 100, (1000, 50)) index1 = PyNNDescentTransformer(n_neighbors=10).fit(x1) result1 = index1.transform(x2) mem_temp = io.BytesIO() pickle.dump(index1, mem_temp) mem_temp.seek(0) index2 = pickle.load(mem_temp) result2 = index2.transform(x2) np.testing.assert_equal(result1.indices, result2.indices) np.testing.assert_equal(result1.data, result2.data)
def test_transformer_equivalence(): N_NEIGHBORS = 15 QUEUE_SIZE = 5.0 train = nn_data[:400] test = nn_data[:200] nnd = NNDescent(data=train, n_neighbors=N_NEIGHBORS, random_state=42) indices, dists = nnd.query(test, k=N_NEIGHBORS, queue_size=QUEUE_SIZE) sort_idx = np.argsort(indices, axis=1) indices_sorted = np.vstack( [indices[i, sort_idx[i]] for i in range(sort_idx.shape[0])] ) dists_sorted = np.vstack([dists[i, sort_idx[i]] for i in range(sort_idx.shape[0])]) transformer = PyNNDescentTransformer( n_neighbors=N_NEIGHBORS, search_queue_size=QUEUE_SIZE, random_state=42 ).fit(train) Xt = transformer.transform(test).sorted_indices() assert np.all(Xt.indices == indices_sorted.flat) assert np.allclose(Xt.data, dists_sorted.flat)