def test_random_projection_embedding_quality(): data, _ = make_sparse_random_data(8, 5000, 15000) eps = 0.2 original_distances = euclidean_distances(data, squared=True) original_distances = original_distances.ravel() non_identical = original_distances != 0.0 # remove 0 distances to avoid division by 0 original_distances = original_distances[non_identical] for RandomProjection in all_RandomProjection: rp = RandomProjection(n_components='auto', eps=eps, random_state=0) projected = rp.fit_transform(data) projected_distances = euclidean_distances(projected, squared=True) projected_distances = projected_distances.ravel() # remove 0 distances to avoid division by 0 projected_distances = projected_distances[non_identical] distances_ratio = projected_distances / original_distances # check that the automatically tuned values for the density respect the # contract for eps: pairwise distances are preserved according to the # Johnson-Lindenstrauss lemma assert distances_ratio.max() < 1 + eps assert 1 - eps < distances_ratio.min()
def test_affinity_propagation_equal_mutual_similarities(): X = np.array([[-1, 1], [1, -1]]) S = -euclidean_distances(X, squared=True) # setting preference > similarity cluster_center_indices, labels = assert_warns_message( UserWarning, "mutually equal", affinity_propagation, S, preference=0) # expect every sample to become an exemplar assert_array_equal([0, 1], cluster_center_indices) assert_array_equal([0, 1], labels) # setting preference < similarity cluster_center_indices, labels = assert_warns_message( UserWarning, "mutually equal", affinity_propagation, S, preference=-10) # expect one cluster, with arbitrary (first) sample as exemplar assert_array_equal([0], cluster_center_indices) assert_array_equal([0, 0], labels) # setting different preferences cluster_center_indices, labels = assert_no_warnings( affinity_propagation, S, preference=[-20, -10]) # expect one cluster, with highest-preference sample as exemplar assert_array_equal([1], cluster_center_indices) assert_array_equal([0, 0], labels)
def test_equal_similarities_and_preferences(): # Unequal distances X = np.array([[0, 0], [1, 1], [-2, -2]]) S = -euclidean_distances(X, squared=True) assert not _equal_similarities_and_preferences(S, np.array(0)) assert not _equal_similarities_and_preferences(S, np.array([0, 0])) assert not _equal_similarities_and_preferences(S, np.array([0, 1])) # Equal distances X = np.array([[0, 0], [1, 1]]) S = -euclidean_distances(X, squared=True) # Different preferences assert not _equal_similarities_and_preferences(S, np.array([0, 1])) # Same preferences assert _equal_similarities_and_preferences(S, np.array([0, 0])) assert _equal_similarities_and_preferences(S, np.array(0))
def test_affinity_propagation(): # Affinity Propagation algorithm # Compute similarities S = -euclidean_distances(X, squared=True) preference = np.median(S) * 10 # Compute Affinity Propagation cluster_centers_indices, labels = affinity_propagation( S, preference=preference) n_clusters_ = len(cluster_centers_indices) assert n_clusters == n_clusters_ af = AffinityPropagation(preference=preference, affinity="precomputed") labels_precomputed = af.fit(S).labels_ af = AffinityPropagation(preference=preference, verbose=True) labels = af.fit(X).labels_ assert_array_equal(labels, labels_precomputed) cluster_centers_indices = af.cluster_centers_indices_ n_clusters_ = len(cluster_centers_indices) assert np.unique(labels).size == n_clusters_ assert n_clusters == n_clusters_ # Test also with no copy _, labels_no_copy = affinity_propagation(S, preference=preference, copy=False) assert_array_equal(labels, labels_no_copy) # Test input validation with pytest.raises(ValueError): affinity_propagation(S[:, :-1]) with pytest.raises(ValueError): affinity_propagation(S, damping=0) af = AffinityPropagation(affinity="unknown") with pytest.raises(ValueError): af.fit(X) af_2 = AffinityPropagation(affinity='precomputed') with pytest.raises(TypeError): af_2.fit(csr_matrix((3, 3)))
from matplotlib import pyplot as plt from matplotlib.collections import LineCollection from mrex import manifold from mrex.metrics import euclidean_distances from mrex.decomposition import PCA EPSILON = np.finfo(np.float32).eps n_samples = 20 seed = np.random.RandomState(seed=3) X_true = seed.randint(0, 20, 2 * n_samples).astype(np.float) X_true = X_true.reshape((n_samples, 2)) # Center the data X_true -= X_true.mean() similarities = euclidean_distances(X_true) # Add noise to the similarities noise = np.random.rand(n_samples, n_samples) noise = noise + noise.T noise[np.arange(noise.shape[0]), np.arange(noise.shape[0])] = 0 similarities += noise mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed, dissimilarity="precomputed", n_jobs=1) pos = mds.fit(similarities).embedding_