def test_iris_manhattan():
    # Manhattan distance
    n = SubsampledNeighborsTransformer(0.5,
                                       eps=10.0,
                                       metric='manhattan',
                                       random_state=42)
    assert_almost_equal(np.mean(n.fit_transform(iris.data)), 1.613, decimal=3)
def test_iris_cosine():
    n = SubsampledNeighborsTransformer(0.6,
                                       eps=0.1,
                                       metric='cosine',
                                       random_state=42)
    assert_almost_equal(np.mean(n.fit(iris.data).transform(iris.data)),
                        0.009,
                        decimal=2)
def test_iris_small_eps():
    # Small eps
    n = SubsampledNeighborsTransformer(0.5, eps=0.1, random_state=42)
    expected_result = csr_matrix(
        ([0.1, 0.1, 0.1], ([50, 127, 133], [133, 41, 50])),
        shape=(n_iris, n_iris))
    assert_array_almost_equal(
        n.fit_transform(iris.data).toarray(), expected_result.toarray())
def test_sample_toy_different():
    # Fit and transform with different matrices
    n = SubsampledNeighborsTransformer(0.5, random_state=5)
    expected_result = csr_matrix(([5.055689, 8.833459], ([0, 1], [2, 1])),
                                 shape=(2, 3))
    assert_array_almost_equal(
        n.fit(X_fit).transform(X_transform).toarray(),
        expected_result.toarray())
def test_iris_callable():
    # Callable lambda function
    def fn(a, b):
        return np.mean(np.maximum(a, b))

    n = SubsampledNeighborsTransformer(0.5,
                                       eps=5.0,
                                       metric=fn,
                                       random_state=42)
    assert_almost_equal(np.mean(n.fit_transform(iris.data)), 1.504, decimal=3)
def test_sample_toy_fit_nonsparse_transform_sparse():
    # Fit with non-sparse, test with sparse
    n = SubsampledNeighborsTransformer(0.3, random_state=3)
    expected_result = csr_matrix(
        ([3.464102, 1.732051, 1.732051], ([0, 1, 2], [2, 0, 1])), shape=(4, 4))
    assert_array_almost_equal(
        n.fit(X).transform(X_csr).toarray(), expected_result.toarray())

    expected_result = csr_matrix(([2.0, 1.0, 1.0], ([0, 1, 2], [2, 0, 1])),
                                 shape=(4, 4))
    assert_array_equal(
        n.fit(X2).transform(X2_csr).toarray(), expected_result.toarray())
def test_sample_toy_fit_sparse_transform_sparse():
    # Fit and transform with sparse
    n = SubsampledNeighborsTransformer(s=0.3, random_state=1)
    expected_result = csr_matrix(([1.732051, 6.928203, 3.464102, 8.660254],
                                  ([0, 1, 2, 3], [1, 3, 0, 0])),
                                 shape=(4, 4))
    assert_array_almost_equal(
        n.fit_transform(X_csr).toarray(), expected_result.toarray())

    expected_result = csr_matrix(
        ([1.0, 2.0, 2.0, 3.0], ([0, 1, 2, 3], [1, 3, 0, 0])), shape=(4, 4))
    assert_array_equal(
        n.fit_transform(X2_csr).toarray(), expected_result.toarray())
def test_sample_toy_fit_nonsparse_transform_nonsparse():
    # Test with non-sparse matrix
    n = SubsampledNeighborsTransformer(s=1., eps=5., random_state=0)
    expected_result = csr_matrix(([1.732051, 1.732051], ([0, 2], [1, 1])),
                                 shape=(4, 4))
    assert_array_almost_equal(
        n.fit_transform(X).toarray(), expected_result.toarray())

    expected_result = csr_matrix(
        ([1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 3.0],
         ([0, 0, 1, 2, 2, 3, 3], [1, 3, 3, 1, 3, 2, 0])),
        shape=(4, 4))
    assert_array_equal(
        n.fit_transform(X2).toarray(), expected_result.toarray())
def test_sample_toy_fit_sparse_transform_nonsparse():
    # Fit with sparse, test with non-sparse
    n = SubsampledNeighborsTransformer(0.9, random_state=2)
    expected_result = csr_matrix(([
        1.732051, 8.660254, 1.732051, 1.732051, 6.928203, 3.464102, 5.196152,
        5.196152, 6.928203
    ], ([0, 0, 1, 1, 1, 2, 2, 3, 3], [1, 3, 0, 2, 3, 0, 3, 2, 1])),
                                 shape=(4, 4))
    assert_array_almost_equal(
        n.fit(X_csr).transform(X).toarray(), expected_result.toarray())

    expected_result = csr_matrix(
        ([1.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0],
         ([0, 0, 1, 1, 1, 2, 2, 3, 3], [1, 3, 0, 2, 3, 3, 0, 2, 1])),
        shape=(4, 4))
    assert_array_equal(
        n.fit(X2_csr).transform(X2).toarray(), expected_result.toarray())
def test_sample_toy_noncsr():
    # Fit and transform with non-CSR sparse matrices
    n = SubsampledNeighborsTransformer(0.8, random_state=4)
    expected_result = csr_matrix(([
        3.464102, 8.660254, 1.732051, 3.464102, 5.196152, 5.196152, 6.928203,
        8.660254
    ], ([0, 0, 1, 2, 2, 3, 3, 3], [2, 3, 0, 0, 3, 2, 1, 0])),
                                 shape=(4, 4))
    assert_array_almost_equal(
        n.fit(X_csr.tocoo()).transform(X_csr.tolil()).toarray(),
        expected_result.toarray())

    expected_result = csr_matrix(
        ([2.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0],
         ([0, 0, 1, 2, 2, 3, 3, 3], [2, 3, 0, 3, 0, 2, 1, 0])),
        shape=(4, 4))
    assert_array_equal(
        n.fit(X2_csr.todok()).transform(X2_csr.tocsc()).toarray(),
        expected_result.toarray())
def test_iris_large_s():
    # Large s
    n = SubsampledNeighborsTransformer(2.0, random_state=42)
    assert_almost_equal(np.mean(n.fit_transform(iris.data)), 2.18, decimal=3)

    # Large s
    n = SubsampledNeighborsTransformer(2.0, eps=2.5, random_state=42)
    assert_almost_equal(np.mean(n.fit_transform(iris.data)), 0.52, decimal=3)
def test_sample_toy_no_edges():
    # Sampling rate too low
    n = SubsampledNeighborsTransformer(0.01, random_state=6)
    expected_result = csr_matrix(([], ([], [])), shape=(4, 4))
    assert_array_almost_equal(
        n.fit_transform(X).toarray(), expected_result.toarray())
    expected_result = csr_matrix(([], ([], [])), shape=(4, 4))
    assert_array_equal(
        n.fit_transform(X2_csr).toarray(), expected_result.toarray())

    # Epsilon too small
    n = SubsampledNeighborsTransformer(0.9, eps=0.01, random_state=6)
    expected_result = csr_matrix(([], ([], [])), shape=(4, 4))
    assert_array_almost_equal(
        n.fit_transform(X).toarray(), expected_result.toarray())
    expected_result = csr_matrix(([], ([], [])), shape=(4, 4))
    assert_array_equal(
        n.fit_transform(X2_csr).toarray(), expected_result.toarray())
def test_iris_no_edges():
    # Sampling rate too low
    n = SubsampledNeighborsTransformer(0.00001)
    expected_result = csr_matrix(([], ([], [])), shape=(n_iris, n_iris))
    assert_array_almost_equal(
        n.fit_transform(iris.data).toarray(), expected_result.toarray())

    # Epsilon too small
    n = SubsampledNeighborsTransformer(0.5, eps=0.00001)
    expected_result = csr_matrix(([], ([], [])), shape=(n_iris, n_iris))
    assert_array_almost_equal(
        n.fit_transform(iris.data).toarray(), expected_result.toarray())
def test_iris_euclidean():
    n = SubsampledNeighborsTransformer(0.4,
                                       metric='euclidean',
                                       random_state=42)
    assert_almost_equal(np.mean(n.fit(iris.data).transform(iris.data)),
                        0.836,
                        decimal=2)

    n = SubsampledNeighborsTransformer(0.4,
                                       eps=2.0,
                                       metric='euclidean',
                                       random_state=42)
    assert_almost_equal(np.mean(n.fit(iris.data).transform(iris.data)),
                        0.147,
                        decimal=2)
def test_iris_small_s():
    # Small s
    n = SubsampledNeighborsTransformer(0.001, random_state=42)
    expected_result = csr_matrix((n_iris, n_iris))
    assert_array_almost_equal(n.fit_transform(iris.data).toarray(),
                              expected_result.toarray(),
                              decimal=2)

    n = SubsampledNeighborsTransformer(0.01, eps=0.5, random_state=42)
    expected_result = csr_matrix(
        ([0.3, 0.3, 0.412311, 0.424264, 0.424264],
         ([18, 23, 27, 50, 128], [37, 88, 50, 80, 133])),
        shape=(n_iris, n_iris))
    assert_array_almost_equal(n.fit_transform(iris.data).toarray(),
                              expected_result.toarray(),
                              decimal=2)
Example #16
0
# #############################################################################
# Compute clustering with DBSCAN

dbscan = DBSCAN(eps=eps, min_samples=min_samples, algorithm='auto')
t0 = time.time()
dbscan.fit(X)
labels_dbscan = dbscan.labels_
t_dbscan = time.time() - t0
rand_dbscan = adjusted_rand_score(labels_dbscan, labels)
mi_dbscan = adjusted_mutual_info_score(labels_dbscan, labels)

# ############################################################################
# Compute clustering with DBSCAN and subsampled neighbors

dbscan_sub = DBSCAN(eps=eps, min_samples=min_samples_sub, metric='precomputed')
snt = SubsampledNeighborsTransformer(s=s, eps=eps)
t0 = time.time()
X_sub = snt.fit_transform(X)
dbscan_sub.fit(X_sub)
labels_sub = dbscan_sub.labels_
t_sub = time.time() - t0
rand_sub = adjusted_rand_score(labels_sub, labels)
mi_sub = adjusted_mutual_info_score(labels_sub, labels)

# # # ########################################################################
# # # Plot result

fig = plt.figure(figsize=(8, 4))
fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)
colors = ['black', 'lightblue', 'red', 'orange']
def test_iris_large_eps():
    # Large eps
    n = SubsampledNeighborsTransformer(0.8, eps=100., random_state=42)
    assert_almost_equal(np.mean(n.fit_transform(iris.data)), 1.399, decimal=3)