def test_iris_manhattan(): # Manhattan distance n = SubsampledNeighborsTransformer(0.5, eps=10.0, metric='manhattan', random_state=42) assert_almost_equal(np.mean(n.fit_transform(iris.data)), 1.613, decimal=3)
def test_iris_cosine(): n = SubsampledNeighborsTransformer(0.6, eps=0.1, metric='cosine', random_state=42) assert_almost_equal(np.mean(n.fit(iris.data).transform(iris.data)), 0.009, decimal=2)
def test_iris_small_eps(): # Small eps n = SubsampledNeighborsTransformer(0.5, eps=0.1, random_state=42) expected_result = csr_matrix( ([0.1, 0.1, 0.1], ([50, 127, 133], [133, 41, 50])), shape=(n_iris, n_iris)) assert_array_almost_equal( n.fit_transform(iris.data).toarray(), expected_result.toarray())
def test_sample_toy_different(): # Fit and transform with different matrices n = SubsampledNeighborsTransformer(0.5, random_state=5) expected_result = csr_matrix(([5.055689, 8.833459], ([0, 1], [2, 1])), shape=(2, 3)) assert_array_almost_equal( n.fit(X_fit).transform(X_transform).toarray(), expected_result.toarray())
def test_iris_callable(): # Callable lambda function def fn(a, b): return np.mean(np.maximum(a, b)) n = SubsampledNeighborsTransformer(0.5, eps=5.0, metric=fn, random_state=42) assert_almost_equal(np.mean(n.fit_transform(iris.data)), 1.504, decimal=3)
def test_sample_toy_fit_nonsparse_transform_sparse(): # Fit with non-sparse, test with sparse n = SubsampledNeighborsTransformer(0.3, random_state=3) expected_result = csr_matrix( ([3.464102, 1.732051, 1.732051], ([0, 1, 2], [2, 0, 1])), shape=(4, 4)) assert_array_almost_equal( n.fit(X).transform(X_csr).toarray(), expected_result.toarray()) expected_result = csr_matrix(([2.0, 1.0, 1.0], ([0, 1, 2], [2, 0, 1])), shape=(4, 4)) assert_array_equal( n.fit(X2).transform(X2_csr).toarray(), expected_result.toarray())
def test_sample_toy_fit_sparse_transform_sparse(): # Fit and transform with sparse n = SubsampledNeighborsTransformer(s=0.3, random_state=1) expected_result = csr_matrix(([1.732051, 6.928203, 3.464102, 8.660254], ([0, 1, 2, 3], [1, 3, 0, 0])), shape=(4, 4)) assert_array_almost_equal( n.fit_transform(X_csr).toarray(), expected_result.toarray()) expected_result = csr_matrix( ([1.0, 2.0, 2.0, 3.0], ([0, 1, 2, 3], [1, 3, 0, 0])), shape=(4, 4)) assert_array_equal( n.fit_transform(X2_csr).toarray(), expected_result.toarray())
def test_sample_toy_fit_nonsparse_transform_nonsparse(): # Test with non-sparse matrix n = SubsampledNeighborsTransformer(s=1., eps=5., random_state=0) expected_result = csr_matrix(([1.732051, 1.732051], ([0, 2], [1, 1])), shape=(4, 4)) assert_array_almost_equal( n.fit_transform(X).toarray(), expected_result.toarray()) expected_result = csr_matrix( ([1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 3.0], ([0, 0, 1, 2, 2, 3, 3], [1, 3, 3, 1, 3, 2, 0])), shape=(4, 4)) assert_array_equal( n.fit_transform(X2).toarray(), expected_result.toarray())
def test_sample_toy_fit_sparse_transform_nonsparse(): # Fit with sparse, test with non-sparse n = SubsampledNeighborsTransformer(0.9, random_state=2) expected_result = csr_matrix(([ 1.732051, 8.660254, 1.732051, 1.732051, 6.928203, 3.464102, 5.196152, 5.196152, 6.928203 ], ([0, 0, 1, 1, 1, 2, 2, 3, 3], [1, 3, 0, 2, 3, 0, 3, 2, 1])), shape=(4, 4)) assert_array_almost_equal( n.fit(X_csr).transform(X).toarray(), expected_result.toarray()) expected_result = csr_matrix( ([1.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0], ([0, 0, 1, 1, 1, 2, 2, 3, 3], [1, 3, 0, 2, 3, 3, 0, 2, 1])), shape=(4, 4)) assert_array_equal( n.fit(X2_csr).transform(X2).toarray(), expected_result.toarray())
def test_sample_toy_noncsr(): # Fit and transform with non-CSR sparse matrices n = SubsampledNeighborsTransformer(0.8, random_state=4) expected_result = csr_matrix(([ 3.464102, 8.660254, 1.732051, 3.464102, 5.196152, 5.196152, 6.928203, 8.660254 ], ([0, 0, 1, 2, 2, 3, 3, 3], [2, 3, 0, 0, 3, 2, 1, 0])), shape=(4, 4)) assert_array_almost_equal( n.fit(X_csr.tocoo()).transform(X_csr.tolil()).toarray(), expected_result.toarray()) expected_result = csr_matrix( ([2.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0], ([0, 0, 1, 2, 2, 3, 3, 3], [2, 3, 0, 3, 0, 2, 1, 0])), shape=(4, 4)) assert_array_equal( n.fit(X2_csr.todok()).transform(X2_csr.tocsc()).toarray(), expected_result.toarray())
def test_iris_large_s(): # Large s n = SubsampledNeighborsTransformer(2.0, random_state=42) assert_almost_equal(np.mean(n.fit_transform(iris.data)), 2.18, decimal=3) # Large s n = SubsampledNeighborsTransformer(2.0, eps=2.5, random_state=42) assert_almost_equal(np.mean(n.fit_transform(iris.data)), 0.52, decimal=3)
def test_sample_toy_no_edges(): # Sampling rate too low n = SubsampledNeighborsTransformer(0.01, random_state=6) expected_result = csr_matrix(([], ([], [])), shape=(4, 4)) assert_array_almost_equal( n.fit_transform(X).toarray(), expected_result.toarray()) expected_result = csr_matrix(([], ([], [])), shape=(4, 4)) assert_array_equal( n.fit_transform(X2_csr).toarray(), expected_result.toarray()) # Epsilon too small n = SubsampledNeighborsTransformer(0.9, eps=0.01, random_state=6) expected_result = csr_matrix(([], ([], [])), shape=(4, 4)) assert_array_almost_equal( n.fit_transform(X).toarray(), expected_result.toarray()) expected_result = csr_matrix(([], ([], [])), shape=(4, 4)) assert_array_equal( n.fit_transform(X2_csr).toarray(), expected_result.toarray())
def test_iris_no_edges(): # Sampling rate too low n = SubsampledNeighborsTransformer(0.00001) expected_result = csr_matrix(([], ([], [])), shape=(n_iris, n_iris)) assert_array_almost_equal( n.fit_transform(iris.data).toarray(), expected_result.toarray()) # Epsilon too small n = SubsampledNeighborsTransformer(0.5, eps=0.00001) expected_result = csr_matrix(([], ([], [])), shape=(n_iris, n_iris)) assert_array_almost_equal( n.fit_transform(iris.data).toarray(), expected_result.toarray())
def test_iris_euclidean(): n = SubsampledNeighborsTransformer(0.4, metric='euclidean', random_state=42) assert_almost_equal(np.mean(n.fit(iris.data).transform(iris.data)), 0.836, decimal=2) n = SubsampledNeighborsTransformer(0.4, eps=2.0, metric='euclidean', random_state=42) assert_almost_equal(np.mean(n.fit(iris.data).transform(iris.data)), 0.147, decimal=2)
def test_iris_small_s(): # Small s n = SubsampledNeighborsTransformer(0.001, random_state=42) expected_result = csr_matrix((n_iris, n_iris)) assert_array_almost_equal(n.fit_transform(iris.data).toarray(), expected_result.toarray(), decimal=2) n = SubsampledNeighborsTransformer(0.01, eps=0.5, random_state=42) expected_result = csr_matrix( ([0.3, 0.3, 0.412311, 0.424264, 0.424264], ([18, 23, 27, 50, 128], [37, 88, 50, 80, 133])), shape=(n_iris, n_iris)) assert_array_almost_equal(n.fit_transform(iris.data).toarray(), expected_result.toarray(), decimal=2)
# ############################################################################# # Compute clustering with DBSCAN dbscan = DBSCAN(eps=eps, min_samples=min_samples, algorithm='auto') t0 = time.time() dbscan.fit(X) labels_dbscan = dbscan.labels_ t_dbscan = time.time() - t0 rand_dbscan = adjusted_rand_score(labels_dbscan, labels) mi_dbscan = adjusted_mutual_info_score(labels_dbscan, labels) # ############################################################################ # Compute clustering with DBSCAN and subsampled neighbors dbscan_sub = DBSCAN(eps=eps, min_samples=min_samples_sub, metric='precomputed') snt = SubsampledNeighborsTransformer(s=s, eps=eps) t0 = time.time() X_sub = snt.fit_transform(X) dbscan_sub.fit(X_sub) labels_sub = dbscan_sub.labels_ t_sub = time.time() - t0 rand_sub = adjusted_rand_score(labels_sub, labels) mi_sub = adjusted_mutual_info_score(labels_sub, labels) # # # ######################################################################## # # # Plot result fig = plt.figure(figsize=(8, 4)) fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9) colors = ['black', 'lightblue', 'red', 'orange']
def test_iris_large_eps(): # Large eps n = SubsampledNeighborsTransformer(0.8, eps=100., random_state=42) assert_almost_equal(np.mean(n.fit_transform(iris.data)), 1.399, decimal=3)