Esempio n. 1
0
def test_mnn_graph_matrix_gamma():
    X, sample_idx = generate_swiss_roll()
    bs = 0.8
    gamma = np.array([
        [1, bs],  # 0
        [bs, 1]
    ])  # 3
    k = 10
    a = 20
    metric = 'euclidean'
    beta = 0
    samples = np.unique(sample_idx)

    K = np.zeros((len(X), len(X)))
    K[:] = np.nan
    K = pd.DataFrame(K)

    for si in samples:
        X_i = X[sample_idx == si]  # get observations in sample i
        for sj in samples:
            X_j = X[sample_idx == sj]  # get observation in sample j
            pdx_ij = cdist(X_i, X_j, metric=metric)  # pairwise distances
            kdx_ij = np.sort(pdx_ij, axis=1)  # get kNN
            e_ij = kdx_ij[:, k]  # dist to kNN
            pdxe_ij = pdx_ij / e_ij[:, np.newaxis]  # normalize
            k_ij = np.exp(-1 * (pdxe_ij**a))  # apply alpha-decaying kernel
            if si == sj:
                K.iloc[sample_idx == si, sample_idx == sj] = k_ij * \
                    (1 - beta)  # fill out values in K for NN on diagonal
            else:
                # fill out values in K for NN on diagonal
                K.iloc[sample_idx == si, sample_idx == sj] = k_ij

    K = np.array(K)

    matrix_gamma = pd.DataFrame(np.zeros((len(sample_idx), len(sample_idx))))
    for ix, si in enumerate(set(sample_idx)):
        for jx, sj in enumerate(set(sample_idx)):
            matrix_gamma.iloc[sample_idx == si, sample_idx == sj] = gamma[ix,
                                                                          jx]

    W = np.array((matrix_gamma * np.minimum(K, K.T)) +
                 ((1 - matrix_gamma) * np.maximum(K, K.T)))
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = graphtools.Graph(X,
                          knn=k + 1,
                          decay=a,
                          beta=1 - beta,
                          kernel_symm='gamma',
                          gamma=gamma,
                          distance=metric,
                          sample_idx=sample_idx,
                          thresh=0,
                          use_pygsp=True)
    assert G.N == G2.N
    assert np.all(G.d == G2.d)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.MNNGraph)
Esempio n. 2
0
def test_mnn_with_vector_gamma():
    n_sample = len(np.unique(digits['target']))
    # vector gamma
    build_graph(data,
                thresh=0,
                n_pca=20,
                decay=10,
                knn=5,
                random_state=42,
                sample_idx=digits['target'],
                kernel_symm='gamma',
                gamma=np.linspace(0, 1, n_sample - 1))
Esempio n. 3
0
def test_mnn_with_square_gamma_wrong_length():
    n_sample = len(np.unique(digits['target']))
    # square matrix gamma of the wrong size
    build_graph(data,
                thresh=0,
                n_pca=20,
                decay=10,
                knn=5,
                random_state=42,
                sample_idx=digits['target'],
                kernel_symm='gamma',
                gamma=np.tile(np.linspace(0, 1, n_sample - 1),
                              n_sample).reshape(n_sample - 1, n_sample))
Esempio n. 4
0
def test_mnn_with_vector_theta():
    n_sample = len(np.unique(digits["target"]))
    # vector theta
    build_graph(
        data,
        thresh=0,
        n_pca=20,
        decay=10,
        knn=5,
        random_state=42,
        sample_idx=digits["target"],
        kernel_symm="mnn",
        theta=np.linspace(0, 1, n_sample - 1),
    )
Esempio n. 5
0
def test_mnn_with_matrix_theta():
    n_sample = len(np.unique(digits["target"]))
    # square matrix theta of the wrong size
    build_graph(
        data,
        thresh=0,
        n_pca=20,
        decay=10,
        knn=5,
        random_state=42,
        sample_idx=digits["target"],
        kernel_symm="mnn",
        theta=np.tile(np.linspace(0, 1, n_sample), n_sample).reshape(
            n_sample, n_sample
        ),
    )
Esempio n. 6
0
def test_mnn_with_vector_theta():
    with assert_raises_message(
            TypeError,
            "Expected `theta` as a float. Got <class 'numpy.ndarray'>."):
        n_sample = len(np.unique(digits["target"]))
        # vector theta
        build_graph(
            data,
            thresh=0,
            n_pca=20,
            decay=10,
            knn=5,
            random_state=42,
            sample_idx=digits["target"],
            kernel_symm="mnn",
            theta=np.linspace(0, 1, n_sample - 1),
        )
Esempio n. 7
0
def test_mnn_with_matrix_theta():
    with assert_raises_message(
            TypeError,
            "Expected `theta` as a float. Got <class 'numpy.ndarray'>."):
        n_sample = len(np.unique(digits["target"]))
        # square matrix theta of the wrong size
        build_graph(
            data,
            thresh=0,
            n_pca=20,
            decay=10,
            knn=5,
            random_state=42,
            sample_idx=digits["target"],
            kernel_symm="mnn",
            theta=np.tile(np.linspace(0, 1, n_sample),
                          n_sample).reshape(n_sample, n_sample),
        )
Esempio n. 8
0
def test_landmark_exact_graph():
    n_landmark = 100
    # exact graph
    G = build_graph(
        data,
        n_landmark=n_landmark,
        thresh=0,
        n_pca=20,
        decay=10,
        knn=5 - 1,
        random_state=42,
    )
    assert G.landmark_op.shape == (n_landmark, n_landmark)
    assert isinstance(G, graphtools.graphs.TraditionalGraph)
    assert isinstance(G, graphtools.graphs.LandmarkGraph)
    assert G.transitions.shape == (data.shape[0], n_landmark)
    assert G.clusters.shape == (data.shape[0],)
    assert len(np.unique(G.clusters)) <= n_landmark
    signal = np.random.normal(0, 1, [n_landmark, 10])
    interpolated_signal = G.interpolate(signal)
    assert interpolated_signal.shape == (data.shape[0], signal.shape[1])
    G._reset_landmarks()
    # no error on double delete
    G._reset_landmarks()
Esempio n. 9
0
def test_mnn_graph_decay():
    X, sample_idx = generate_swiss_roll()
    theta = 0.9
    k = 10
    a = 20
    metric = "euclidean"
    beta = 0.2
    samples = np.unique(sample_idx)

    K = np.zeros((len(X), len(X)))
    K[:] = np.nan
    K = pd.DataFrame(K)

    for si in samples:
        X_i = X[sample_idx == si]  # get observations in sample i
        for sj in samples:
            batch_k = k if si == sj else k - 1
            X_j = X[sample_idx == sj]  # get observation in sample j
            pdx_ij = cdist(X_i, X_j, metric=metric)  # pairwise distances
            kdx_ij = np.sort(pdx_ij, axis=1)  # get kNN
            e_ij = kdx_ij[:, batch_k]  # dist to kNN
            pdxe_ij = pdx_ij / e_ij[:, np.newaxis]  # normalize
            k_ij = np.exp(-1 * (pdxe_ij ** a))  # apply alpha-decaying kernel
            if si == sj:
                K.iloc[sample_idx == si, sample_idx == sj] = (k_ij + k_ij.T) / 2
            else:
                # fill out values in K for NN on diagonal
                K.iloc[sample_idx == si, sample_idx == sj] = k_ij

    Kn = K.copy()
    for i in samples:
        curr_K = K.iloc[sample_idx == i, sample_idx == i]
        i_norm = norm(curr_K, 1, axis=1)
        for j in samples:
            if i == j:
                continue
            else:
                curr_K = K.iloc[sample_idx == i, sample_idx == j]
                curr_norm = norm(curr_K, 1, axis=1)
                scale = np.minimum(1, i_norm / curr_norm) * beta
                Kn.iloc[sample_idx == i, sample_idx == j] = (
                    curr_K.values * scale[:, None]
                )

    K = Kn
    W = np.array((theta * np.minimum(K, K.T)) + ((1 - theta) * np.maximum(K, K.T)))
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = graphtools.Graph(
        X,
        knn=k,
        decay=a,
        beta=beta,
        kernel_symm="mnn",
        theta=theta,
        distance=metric,
        sample_idx=sample_idx,
        thresh=0,
        use_pygsp=True,
    )
    assert G.N == G2.N
    np.testing.assert_array_equal(G.dw, G2.dw)
    np.testing.assert_array_equal((G.W - G2.W).data, 0)
    assert isinstance(G2, graphtools.graphs.MNNGraph)