def test_vmap_transform_embedding_stability(): """Test that transforming data does not alter the learned embeddings Issue #217 describes how using transform to embed new data using a trained VMAP transformer causes the fitting embedding matrix to change in cases when the new data has the same number of rows as the original training data. """ data = iris.data[iris_selection] fitter = VMAP(n_neighbors=10, min_dist=0.01, random_state=42).fit(data) original_embedding = fitter.embedding_.copy() # The important point is that the new data has the same number of rows # as the original fit data new_data = np.random.random(data.shape) embedding = fitter.transform(new_data) assert_array_equal( original_embedding, fitter.embedding_, "Transforming new data changed the original embeddings") # Example from issue #217 a = np.random.random((1000, 10)) b = np.random.random((1000, 5)) vmap = VMAP() u1 = vmap.fit_transform(a[:, :5]) u1_orig = u1.copy() assert_array_equal(u1_orig, vmap.embedding_) u2 = vmap.transform(b) assert_array_equal(u1_orig, vmap.embedding_)
def test_vmap_transform_on_iris(): data = iris.data[iris_selection] fitter = VMAP(n_neighbors=10, min_dist=0.01, random_state=42).fit(data) new_data = iris.data[~iris_selection] embedding = fitter.transform(new_data) trust = trustworthiness(new_data, embedding, 10) assert_greater_equal( trust, 0.89, "Insufficiently trustworthy transform for" "iris dataset: {}".format(trust), )