예제 #1
0
def test_mnn_graph_matrix_gamma():
    X, sample_idx = generate_swiss_roll()
    bs = 0.8
    gamma = np.array([
        [1, bs],  # 0
        [bs, 1]
    ])  # 3
    k = 10
    a = 20
    metric = 'euclidean'
    beta = 0
    samples = np.unique(sample_idx)

    K = np.zeros((len(X), len(X)))
    K[:] = np.nan
    K = pd.DataFrame(K)

    for si in samples:
        X_i = X[sample_idx == si]  # get observations in sample i
        for sj in samples:
            X_j = X[sample_idx == sj]  # get observation in sample j
            pdx_ij = cdist(X_i, X_j, metric=metric)  # pairwise distances
            kdx_ij = np.sort(pdx_ij, axis=1)  # get kNN
            e_ij = kdx_ij[:, k]  # dist to kNN
            pdxe_ij = pdx_ij / e_ij[:, np.newaxis]  # normalize
            k_ij = np.exp(-1 * (pdxe_ij**a))  # apply alpha-decaying kernel
            if si == sj:
                K.iloc[sample_idx == si, sample_idx == sj] = k_ij * \
                    (1 - beta)  # fill out values in K for NN on diagonal
            else:
                # fill out values in K for NN on diagonal
                K.iloc[sample_idx == si, sample_idx == sj] = k_ij

    K = np.array(K)

    matrix_gamma = pd.DataFrame(np.zeros((len(sample_idx), len(sample_idx))))
    for ix, si in enumerate(set(sample_idx)):
        for jx, sj in enumerate(set(sample_idx)):
            matrix_gamma.iloc[sample_idx == si, sample_idx == sj] = gamma[ix,
                                                                          jx]

    W = np.array((matrix_gamma * np.minimum(K, K.T)) +
                 ((1 - matrix_gamma) * np.maximum(K, K.T)))
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = graphtools.Graph(X,
                          knn=k + 1,
                          decay=a,
                          beta=1 - beta,
                          kernel_symm='gamma',
                          gamma=gamma,
                          distance=metric,
                          sample_idx=sample_idx,
                          thresh=0,
                          use_pygsp=True)
    assert G.N == G2.N
    assert np.all(G.d == G2.d)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.MNNGraph)
예제 #2
0
def test_verbose():
    X, sample_idx = generate_swiss_roll()
    print()
    print("Verbose test: MNN")
    build_graph(
        X, sample_idx=sample_idx, kernel_symm="mnn", theta=0.5, n_pca=None, verbose=True
    )
예제 #3
0
def test_set_params():
    X, sample_idx = generate_swiss_roll()
    G = build_graph(X,
                    sample_idx=sample_idx,
                    kernel_symm="mnn",
                    theta=0.5,
                    n_pca=None,
                    thresh=1e-4)
    assert G.get_params() == {
        "n_pca": None,
        "random_state": 42,
        "kernel_symm": "mnn",
        "theta": 0.5,
        "anisotropy": 0,
        "beta": 1,
        "knn": 3,
        "decay": 10,
        "bandwidth": None,
        "distance": "euclidean",
        "thresh": 1e-4,
        "n_jobs": 1,
    }
    G.set_params(n_jobs=4)
    assert G.n_jobs == 4
    for graph in G.subgraphs:
        assert graph.n_jobs == 4
        assert graph.knn_tree.n_jobs == 4
    G.set_params(random_state=13)
    assert G.random_state == 13
    for graph in G.subgraphs:
        assert graph.random_state == 13
    G.set_params(verbose=2)
    assert G.verbose == 2
    for graph in G.subgraphs:
        assert graph.verbose == 2
    G.set_params(verbose=0)
    with assert_raises_message(ValueError,
                               "Cannot update knn. Please create a new graph"):
        G.set_params(knn=15)
    with assert_raises_message(
            ValueError, "Cannot update decay. Please create a new graph"):
        G.set_params(decay=15)
    with assert_raises_message(
            ValueError, "Cannot update distance. Please create a new graph"):
        G.set_params(distance="manhattan")
    with assert_raises_message(
            ValueError, "Cannot update thresh. Please create a new graph"):
        G.set_params(thresh=1e-3)
    with assert_raises_message(
            ValueError, "Cannot update beta. Please create a new graph"):
        G.set_params(beta=0.2)
    G.set_params(knn=G.knn,
                 decay=G.decay,
                 thresh=G.thresh,
                 distance=G.distance,
                 beta=G.beta)
예제 #4
0
def test_landmark_mnn_graph():
    n_landmark = 150
    X, sample_idx = generate_swiss_roll()
    # mnn graph
    G = build_graph(X, n_landmark=n_landmark,
                    thresh=1e-5, n_pca=None,
                    decay=10, knn=5, random_state=42,
                    sample_idx=sample_idx)
    assert(G.landmark_op.shape == (n_landmark, n_landmark))
    assert(isinstance(G, graphtools.graphs.MNNGraph))
    assert(isinstance(G, graphtools.graphs.LandmarkGraph))
예제 #5
0
def test_set_params():
    X, sample_idx = generate_swiss_roll()
    G = build_graph(X,
                    sample_idx=sample_idx,
                    kernel_symm='gamma',
                    gamma=0.5,
                    n_pca=None,
                    thresh=1e-4)
    assert G.get_params() == {
        'n_pca': None,
        'random_state': 42,
        'kernel_symm': 'gamma',
        'gamma': 0.5,
        'beta': 1,
        'adaptive_k': 'sqrt',
        'knn': 3,
        'decay': 10,
        'distance': 'euclidean',
        'thresh': 1e-4,
        'n_jobs': 1
    }
    G.set_params(n_jobs=4)
    assert G.n_jobs == 4
    for graph in G.subgraphs:
        assert graph.n_jobs == 4
        assert graph.knn_tree.n_jobs == 4
    G.set_params(random_state=13)
    assert G.random_state == 13
    for graph in G.subgraphs:
        assert graph.random_state == 13
    G.set_params(verbose=2)
    assert G.verbose == 2
    for graph in G.subgraphs:
        assert graph.verbose == 2
    G.set_params(verbose=0)
    assert_raises(ValueError, G.set_params, knn=15)
    assert_raises(ValueError, G.set_params, decay=15)
    assert_raises(ValueError, G.set_params, distance='manhattan')
    assert_raises(ValueError, G.set_params, thresh=1e-3)
    assert_raises(ValueError, G.set_params, beta=0.2)
    assert_raises(ValueError, G.set_params, adaptive_k='min')
    G.set_params(knn=G.knn,
                 decay=G.decay,
                 thresh=G.thresh,
                 distance=G.distance,
                 beta=G.beta,
                 adaptive_k=G.adaptive_k)
예제 #6
0
def test_landmark_mnn_pygsp_graph():
    n_landmark = 150
    X, sample_idx = generate_swiss_roll()
    # mnn graph
    G = build_graph(
        X,
        n_landmark=n_landmark,
        thresh=1e-3,
        n_pca=None,
        decay=10,
        knn=3 - 1,
        random_state=42,
        sample_idx=sample_idx,
        use_pygsp=True,
    )
    assert G.landmark_op.shape == (n_landmark, n_landmark)
    assert isinstance(G, graphtools.graphs.MNNGraph)
    assert isinstance(G, graphtools.graphs.LandmarkGraph)
    assert isinstance(G, pygsp.graphs.Graph)
예제 #7
0
def test_mnn_with_non_zero_indexed_sample_idx():
    X, sample_idx = generate_swiss_roll()
    G = build_graph(X,
                    sample_idx=sample_idx,
                    kernel_symm='gamma',
                    gamma=0.5,
                    n_pca=None,
                    use_pygsp=True)
    sample_idx += 1
    G2 = build_graph(X,
                     sample_idx=sample_idx,
                     kernel_symm='gamma',
                     gamma=0.5,
                     n_pca=None,
                     use_pygsp=True)
    assert G.N == G2.N
    assert np.all(G.d == G2.d)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.MNNGraph)
예제 #8
0
def test_mnn_with_string_sample_idx():
    X, sample_idx = generate_swiss_roll()
    G = build_graph(
        X,
        sample_idx=sample_idx,
        kernel_symm="mnn",
        theta=0.5,
        n_pca=None,
        use_pygsp=True,
    )
    sample_idx = np.where(sample_idx == 0, "a", "b")
    G2 = build_graph(
        X,
        sample_idx=sample_idx,
        kernel_symm="mnn",
        theta=0.5,
        n_pca=None,
        use_pygsp=True,
    )
    assert G.N == G2.N
    assert np.all(G.d == G2.d)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.MNNGraph)
예제 #9
0
def test_mnn_graph_decay():
    X, sample_idx = generate_swiss_roll()
    theta = 0.9
    k = 10
    a = 20
    metric = "euclidean"
    beta = 0.2
    samples = np.unique(sample_idx)

    K = np.zeros((len(X), len(X)))
    K[:] = np.nan
    K = pd.DataFrame(K)

    for si in samples:
        X_i = X[sample_idx == si]  # get observations in sample i
        for sj in samples:
            batch_k = k if si == sj else k - 1
            X_j = X[sample_idx == sj]  # get observation in sample j
            pdx_ij = cdist(X_i, X_j, metric=metric)  # pairwise distances
            kdx_ij = np.sort(pdx_ij, axis=1)  # get kNN
            e_ij = kdx_ij[:, batch_k]  # dist to kNN
            pdxe_ij = pdx_ij / e_ij[:, np.newaxis]  # normalize
            k_ij = np.exp(-1 * (pdxe_ij ** a))  # apply alpha-decaying kernel
            if si == sj:
                K.iloc[sample_idx == si, sample_idx == sj] = (k_ij + k_ij.T) / 2
            else:
                # fill out values in K for NN on diagonal
                K.iloc[sample_idx == si, sample_idx == sj] = k_ij

    Kn = K.copy()
    for i in samples:
        curr_K = K.iloc[sample_idx == i, sample_idx == i]
        i_norm = norm(curr_K, 1, axis=1)
        for j in samples:
            if i == j:
                continue
            else:
                curr_K = K.iloc[sample_idx == i, sample_idx == j]
                curr_norm = norm(curr_K, 1, axis=1)
                scale = np.minimum(1, i_norm / curr_norm) * beta
                Kn.iloc[sample_idx == i, sample_idx == j] = (
                    curr_K.values * scale[:, None]
                )

    K = Kn
    W = np.array((theta * np.minimum(K, K.T)) + ((1 - theta) * np.maximum(K, K.T)))
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = graphtools.Graph(
        X,
        knn=k,
        decay=a,
        beta=beta,
        kernel_symm="mnn",
        theta=theta,
        distance=metric,
        sample_idx=sample_idx,
        thresh=0,
        use_pygsp=True,
    )
    assert G.N == G2.N
    np.testing.assert_array_equal(G.dw, G2.dw)
    np.testing.assert_array_equal((G.W - G2.W).data, 0)
    assert isinstance(G2, graphtools.graphs.MNNGraph)