def test_shortest_path_data(): data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) D = squareform(pdist(G.data_nu)) * np.where(G.K.toarray() > 0, 1, 0) P = graph_shortest_path(D) # sklearn returns 0 if no path exists P[np.where(P == 0)] = np.inf # diagonal should actually be zero np.fill_diagonal(P, 0) np.testing.assert_allclose(P, G.shortest_path(distance="data")) np.testing.assert_allclose(P, G.shortest_path())
def test_shortest_path_affinity(): data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) D = -1 * np.where(G.K != 0, np.log(np.where(G.K != 0, G.K, np.nan)), 0) P = graph_shortest_path(D) # sklearn returns 0 if no path exists P[np.where(P == 0)] = np.inf # diagonal should actually be zero np.fill_diagonal(P, 0) np.testing.assert_allclose(P, G.shortest_path(distance="affinity")) np.testing.assert_allclose(P, G.shortest_path())
def test_shortest_path_constant(): data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) P = graph_shortest_path(G.K) # sklearn returns 0 if no path exists P[np.where(P == 0)] = np.inf # diagonal should actually be zero np.fill_diagonal(P, 0) np.testing.assert_equal(P, G.shortest_path(distance="constant"))
def test_knnmax(): data = datasets.make_swiss_roll()[0] k = 5 k_max = 10 a = 0.45 thresh = 0 with warnings.catch_warnings(): warnings.filterwarnings("ignore", "K should be symmetric", RuntimeWarning) G = build_graph( data, n_pca=None, # n_pca, decay=a, knn=k - 1, knn_max=k_max - 1, thresh=0, random_state=42, kernel_symm=None, ) assert np.all((G.K > 0).sum(axis=1) == k_max) pdx = squareform(pdist(data, metric="euclidean")) knn_dist = np.partition(pdx, k, axis=1)[:, :k] knn_max_dist = np.max(np.partition(pdx, k_max, axis=1)[:, :k_max], axis=1) epsilon = np.max(knn_dist, axis=1) pdx_scale = (pdx.T / epsilon).T K = np.where(pdx <= knn_max_dist[:, None], np.exp(-1 * pdx_scale**a), 0) K = K + K.T W = np.divide(K, 2) np.fill_diagonal(W, 0) G = pygsp.graphs.Graph(W) G2 = build_graph( data, n_pca=None, # n_pca, decay=a, knn=k - 1, knn_max=k_max - 1, thresh=0, random_state=42, use_pygsp=True, ) assert isinstance(G2, graphtools.graphs.kNNGraph) assert G.N == G2.N assert np.all(G.dw == G2.dw) assert (G.W - G2.W).nnz == 0
def test_mnn_with_string_sample_idx(): X, sample_idx = generate_swiss_roll() G = build_graph(X, sample_idx=sample_idx, kernel_symm='gamma', gamma=0.5, n_pca=None, use_pygsp=True) sample_idx = np.where(sample_idx == 0, 'a', 'b') G2 = build_graph(X, sample_idx=sample_idx, kernel_symm='gamma', gamma=0.5, n_pca=None, use_pygsp=True) assert G.N == G2.N assert np.all(G.d == G2.d) assert (G.W != G2.W).nnz == 0 assert (G2.W != G.W).sum() == 0 assert isinstance(G2, graphtools.graphs.MNNGraph)
def test_mnn_graph_no_decay(): X, sample_idx = generate_swiss_roll() theta = 0.9 k = 10 a = None metric = "euclidean" beta = 0.2 samples = np.unique(sample_idx) K = np.zeros((len(X), len(X))) K[:] = np.nan K = pd.DataFrame(K) for si in samples: X_i = X[sample_idx == si] # get observations in sample i for sj in samples: batch_k = k + 1 if si == sj else k X_j = X[sample_idx == sj] # get observation in sample j pdx_ij = cdist(X_i, X_j, metric=metric) # pairwise distances kdx_ij = np.sort(pdx_ij, axis=1) # get kNN e_ij = kdx_ij[:, batch_k - 1] # dist to kNN k_ij = np.where(pdx_ij <= e_ij[:, None], 1, 0) # apply knn kernel if si == sj: K.iloc[sample_idx == si, sample_idx == sj] = (k_ij + k_ij.T) / 2 else: # fill out values in K for NN on diagonal K.iloc[sample_idx == si, sample_idx == sj] = k_ij Kn = K.copy() for i in samples: curr_K = K.iloc[sample_idx == i, sample_idx == i] i_norm = norm(curr_K, 1, axis=1) for j in samples: if i == j: continue else: curr_K = K.iloc[sample_idx == i, sample_idx == j] curr_norm = norm(curr_K, 1, axis=1) scale = np.minimum(1, i_norm / curr_norm) * beta Kn.iloc[sample_idx == i, sample_idx == j] = ( curr_K.values * scale[:, None] ) K = Kn W = np.array((theta * np.minimum(K, K.T)) + ((1 - theta) * np.maximum(K, K.T))) np.fill_diagonal(W, 0) G = pygsp.graphs.Graph(W) G2 = graphtools.Graph( X, knn=k, decay=a, beta=beta, kernel_symm="mnn", theta=theta, distance=metric, sample_idx=sample_idx, thresh=0, use_pygsp=True, ) assert G.N == G2.N np.testing.assert_array_equal(G.dw, G2.dw) np.testing.assert_array_equal((G.W - G2.W).data, 0) assert isinstance(G2, graphtools.graphs.MNNGraph)