def test_mnn_graph_matrix_gamma(): X, sample_idx = generate_swiss_roll() bs = 0.8 gamma = np.array([ [1, bs], # 0 [bs, 1] ]) # 3 k = 10 a = 20 metric = 'euclidean' beta = 0 samples = np.unique(sample_idx) K = np.zeros((len(X), len(X))) K[:] = np.nan K = pd.DataFrame(K) for si in samples: X_i = X[sample_idx == si] # get observations in sample i for sj in samples: X_j = X[sample_idx == sj] # get observation in sample j pdx_ij = cdist(X_i, X_j, metric=metric) # pairwise distances kdx_ij = np.sort(pdx_ij, axis=1) # get kNN e_ij = kdx_ij[:, k] # dist to kNN pdxe_ij = pdx_ij / e_ij[:, np.newaxis] # normalize k_ij = np.exp(-1 * (pdxe_ij**a)) # apply alpha-decaying kernel if si == sj: K.iloc[sample_idx == si, sample_idx == sj] = k_ij * \ (1 - beta) # fill out values in K for NN on diagonal else: # fill out values in K for NN on diagonal K.iloc[sample_idx == si, sample_idx == sj] = k_ij K = np.array(K) matrix_gamma = pd.DataFrame(np.zeros((len(sample_idx), len(sample_idx)))) for ix, si in enumerate(set(sample_idx)): for jx, sj in enumerate(set(sample_idx)): matrix_gamma.iloc[sample_idx == si, sample_idx == sj] = gamma[ix, jx] W = np.array((matrix_gamma * np.minimum(K, K.T)) + ((1 - matrix_gamma) * np.maximum(K, K.T))) np.fill_diagonal(W, 0) G = pygsp.graphs.Graph(W) G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=1 - beta, kernel_symm='gamma', gamma=gamma, distance=metric, sample_idx=sample_idx, thresh=0, use_pygsp=True) assert G.N == G2.N assert np.all(G.d == G2.d) assert (G.W != G2.W).nnz == 0 assert (G2.W != G.W).sum() == 0 assert isinstance(G2, graphtools.graphs.MNNGraph)
def test_knn_interpolate(): G = build_graph(data, decay=None) assert_raises(ValueError, G.interpolate, data) pca_data = PCA(2).fit_transform(data) transitions = G.extend_to_data(data) assert(np.all(G.interpolate(pca_data, Y=data) == G.interpolate(pca_data, transitions=transitions)))
def test_transform_sparse_adaptive_pca(): G = build_graph(data, sparse=True, n_pca=True, random_state=42) assert np.all(G.data_nu == G.transform(G.data)) with assert_raises_message( ValueError, "data of shape ({0}, 1) cannot be transformed to graph built on data of shape ({0}, {1}). Expected shape ({0}, {1})".format( G.data.shape[0], G.data.shape[1] ), ): G.transform(sp.csr_matrix(G.data)[:, 0]) with assert_raises_message( ValueError, "data of shape ({0}, 15) cannot be transformed to graph built on data of shape ({0}, {1}). Expected shape ({0}, {1})".format( G.data.shape[0], G.data.shape[1] ), ): G.transform(sp.csr_matrix(G.data)[:, :15]) G2 = build_graph( data, sparse=True, n_pca=True, rank_threshold=G.rank_threshold, random_state=42 ) assert np.allclose(G2.data_nu, G2.transform(G2.data)) assert np.allclose(G2.data_nu, G.transform(G.data)) G3 = build_graph(data, sparse=True, n_pca=G2.n_pca, random_state=42) assert np.allclose(G3.data_nu, G3.transform(G3.data)) assert np.allclose(G3.data_nu, G2.transform(G2.data))
def test_knn_graph(): k = 3 n_pca = 20 pca = PCA(n_pca, svd_solver='randomized', random_state=42).fit(data) data_nu = pca.transform(data) pdx = squareform(pdist(data_nu, metric='euclidean')) knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) K = np.empty_like(pdx) for i in range(len(pdx)): K[i, pdx[i, :] <= epsilon[i]] = 1 K[i, pdx[i, :] > epsilon[i]] = 0 K = K + K.T W = np.divide(K, 2) np.fill_diagonal(W, 0) G = pygsp.graphs.Graph(W) G2 = build_graph(data, n_pca=n_pca, decay=None, knn=k, random_state=42, use_pygsp=True) assert(G.N == G2.N) assert(np.all(G.d == G2.d)) assert((G.W != G2.W).nnz == 0) assert((G2.W != G.W).sum() == 0) assert(isinstance(G2, graphtools.graphs.kNNGraph))
def test_inverse_transform_dense_no_pca(): G = build_graph(data, n_pca=None) np.testing.assert_allclose( data[:, 5:7], G.inverse_transform(G.data_nu, columns=[5, 6]), atol=1e-12 ) assert np.all(G.data == G.inverse_transform(G.data_nu)) with assert_raises_message( ValueError, "data of shape ({0},) cannot be inverse transformed from graph built on reduced data of shape ({0}, {1})".format( data.shape[0], G.data.shape[1] ), ): G.inverse_transform(G.data[:, 0]) with assert_raises_message( ValueError, "data of shape ({0}, 1, 15) cannot be inverse transformed from graph built on reduced data of shape ({0}, {1})".format( data.shape[0], data.shape[1] ), ): G.inverse_transform(G.data[:, None, :15]) with assert_raises_message( ValueError, "data of shape ({0}, 15) cannot be inverse transformed from graph built on reduced data of shape ({0}, {1})".format( data.shape[0], data.shape[1] ), ): G.inverse_transform(G.data[:, :15])
def test_knnmax(): data = datasets.make_swiss_roll()[0] k = 5 k_max = 10 a = 0.45 thresh = 0 with warnings.catch_warnings(): warnings.filterwarnings("ignore", "K should be symmetric", RuntimeWarning) G = build_graph( data, n_pca=None, # n_pca, decay=a, knn=k - 1, knn_max=k_max - 1, thresh=0, random_state=42, kernel_symm=None, ) assert np.all((G.K > 0).sum(axis=1) == k_max) pdx = squareform(pdist(data, metric="euclidean")) knn_dist = np.partition(pdx, k, axis=1)[:, :k] knn_max_dist = np.max(np.partition(pdx, k_max, axis=1)[:, :k_max], axis=1) epsilon = np.max(knn_dist, axis=1) pdx_scale = (pdx.T / epsilon).T K = np.where(pdx <= knn_max_dist[:, None], np.exp(-1 * pdx_scale**a), 0) K = K + K.T W = np.divide(K, 2) np.fill_diagonal(W, 0) G = pygsp.graphs.Graph(W) G2 = build_graph( data, n_pca=None, # n_pca, decay=a, knn=k - 1, knn_max=k_max - 1, thresh=0, random_state=42, use_pygsp=True, ) assert isinstance(G2, graphtools.graphs.kNNGraph) assert G.N == G2.N assert np.all(G.dw == G2.dw) assert (G.W - G2.W).nnz == 0
def test_inverse_transform_dense_no_pca(): G = build_graph(data, n_pca=None) np.testing.assert_allclose(data[:, 5:7], G.inverse_transform(G.data_nu, columns=[5, 6]), atol=1e-12) assert np.all(G.data == G.inverse_transform(G.data_nu)) assert_raises(ValueError, G.inverse_transform, G.data[:, 0]) assert_raises(ValueError, G.inverse_transform, G.data[:, None, :15]) assert_raises(ValueError, G.inverse_transform, G.data[:, :15])
def test_truncated_exact_graph_no_pca(): k = 3 a = 13 n_pca = None thresh = 1e-4 data_small = data[np.random.choice(len(data), len(data) // 10, replace=False)] pdx = squareform(pdist(data_small, metric='euclidean')) knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) weighted_pdx = (pdx.T / epsilon).T K = np.exp(-1 * weighted_pdx**a) K[K < thresh] = 0 W = K + K.T W = np.divide(W, 2) np.fill_diagonal(W, 0) G = pygsp.graphs.Graph(W) G2 = build_graph(data_small, thresh=thresh, graphtype='exact', n_pca=n_pca, decay=a, knn=k, random_state=42, use_pygsp=True) assert (G.N == G2.N) assert (np.all(G.d == G2.d)) assert ((G.W != G2.W).nnz == 0) assert ((G2.W != G.W).sum() == 0) assert (isinstance(G2, graphtools.graphs.TraditionalGraph)) G2 = build_graph(sp.csr_matrix(data_small), thresh=thresh, graphtype='exact', n_pca=n_pca, decay=a, knn=k, random_state=42, use_pygsp=True) assert (G.N == G2.N) assert (np.all(G.d == G2.d)) assert ((G.W != G2.W).nnz == 0) assert ((G2.W != G.W).sum() == 0) assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
def test_exact_interpolate(): G = build_graph(data, decay=10, thresh=0) with assert_raises_message( ValueError, "Either `transitions` or `Y` must be provided."): G.interpolate(data) pca_data = PCA(2).fit_transform(data) transitions = G.extend_to_data(data) assert np.all( G.interpolate(pca_data, Y=data) == G.interpolate( pca_data, transitions=transitions))
def test_transform_sparse_pca(): G = build_graph(data, sparse=True, n_pca=20) assert np.all(G.data_nu == G.transform(G.data)) with assert_raises_message( ValueError, "data of shape ({0}, 1) cannot be transformed to graph built on data of shape ({0}, {1}). Expected shape ({0}, {1})".format( G.data.shape[0], G.data.shape[1] ), ): G.transform(sp.csr_matrix(G.data)[:, 0]) with assert_raises_message( ValueError, "data of shape ({0}, 15) cannot be transformed to graph built on data of shape ({0}, {1}). Expected shape ({0}, {1})".format( G.data.shape[0], G.data.shape[1] ), ): G.transform(sp.csr_matrix(G.data)[:, :15])
def test_transform_sparse_adaptive_pca(): G = build_graph(data, sparse=True, n_pca=True, random_state=42) assert np.all(G.data_nu == G.transform(G.data)) assert_raises(ValueError, G.transform, sp.csr_matrix(G.data)[:, 0]) assert_raises(ValueError, G.transform, sp.csr_matrix(G.data)[:, :15]) G2 = build_graph(data, sparse=True, n_pca=True, rank_threshold=G.rank_threshold, random_state=42) assert np.allclose(G2.data_nu, G2.transform(G2.data)) assert np.allclose(G2.data_nu, G.transform(G.data)) G3 = build_graph(data, sparse=True, n_pca=G2.n_pca, random_state=42) assert np.allclose(G3.data_nu, G3.transform(G3.data)) assert np.allclose(G3.data_nu, G2.transform(G2.data))
def test_mnn_with_non_zero_indexed_sample_idx(): X, sample_idx = generate_swiss_roll() G = build_graph(X, sample_idx=sample_idx, kernel_symm='gamma', gamma=0.5, n_pca=None, use_pygsp=True) sample_idx += 1 G2 = build_graph(X, sample_idx=sample_idx, kernel_symm='gamma', gamma=0.5, n_pca=None, use_pygsp=True) assert G.N == G2.N assert np.all(G.d == G2.d) assert (G.W != G2.W).nnz == 0 assert (G2.W != G.W).sum() == 0 assert isinstance(G2, graphtools.graphs.MNNGraph)
def test_transform_dense_no_pca(): G = build_graph(data, n_pca=None) assert np.all(G.data_nu == G.transform(G.data)) with assert_raises_message( ValueError, "data of shape ({0},) cannot be transformed to graph built on data of shape ({0}, {1})".format( data.shape[0], data.shape[1] ), ): G.transform(G.data[:, 0]) with assert_raises_message( ValueError, "data of shape ({0}, 1, 15) cannot be transformed to graph built on data of shape ({0}, {1})".format( data.shape[0], data.shape[1] ), ): G.transform(G.data[:, None, :15]) with assert_raises_message( ValueError, "data of shape ({0}, 15) cannot be transformed to graph built on data of shape ({0}, {1})".format( data.shape[0], data.shape[1] ), ): G.transform(G.data[:, :15])
def test_mnn_with_string_sample_idx(): X, sample_idx = generate_swiss_roll() G = build_graph( X, sample_idx=sample_idx, kernel_symm="mnn", theta=0.5, n_pca=None, use_pygsp=True, ) sample_idx = np.where(sample_idx == 0, "a", "b") G2 = build_graph( X, sample_idx=sample_idx, kernel_symm="mnn", theta=0.5, n_pca=None, use_pygsp=True, ) assert G.N == G2.N assert np.all(G.d == G2.d) assert (G.W != G2.W).nnz == 0 assert (G2.W != G.W).sum() == 0 assert isinstance(G2, graphtools.graphs.MNNGraph)
def test_truncated_exact_graph_sparse(): k = 3 a = 13 n_pca = 20 thresh = 1e-4 data_small = data[np.random.choice(len(data), len(data) // 2, replace=False)] pca = TruncatedSVD(n_pca, random_state=42).fit(data_small) data_small_nu = pca.transform(data_small) pdx = squareform(pdist(data_small_nu, metric='euclidean')) knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) weighted_pdx = (pdx.T / epsilon).T K = np.exp(-1 * weighted_pdx**a) K[K < thresh] = 0 W = K + K.T W = np.divide(W, 2) np.fill_diagonal(W, 0) G = pygsp.graphs.Graph(W) G2 = build_graph(sp.coo_matrix(data_small), thresh=thresh, graphtype='exact', n_pca=n_pca, decay=a, knn=k, random_state=42, use_pygsp=True) assert (G.N == G2.N) np.testing.assert_allclose(G2.W.toarray(), G.W.toarray()) assert (isinstance(G2, graphtools.graphs.TraditionalGraph)) G2 = build_graph(sp.bsr_matrix(pdx), n_pca=None, precomputed='distance', thresh=thresh, decay=a, knn=k, random_state=42, use_pygsp=True) assert (G.N == G2.N) assert (np.all(G.d == G2.d)) assert ((G.W != G2.W).nnz == 0) assert ((G2.W != G.W).sum() == 0) assert (isinstance(G2, graphtools.graphs.TraditionalGraph)) G2 = build_graph(sp.lil_matrix(K), n_pca=None, precomputed='affinity', thresh=thresh, random_state=42, use_pygsp=True) assert (G.N == G2.N) assert (np.all(G.d == G2.d)) assert ((G.W != G2.W).nnz == 0) assert ((G2.W != G.W).sum() == 0) assert (isinstance(G2, graphtools.graphs.TraditionalGraph)) G2 = build_graph(sp.dok_matrix(W), n_pca=None, precomputed='adjacency', random_state=42, use_pygsp=True) assert (G.N == G2.N) assert (np.all(G.d == G2.d)) assert ((G.W != G2.W).nnz == 0) assert ((G2.W != G.W).sum() == 0) assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
def test_transform_dense_no_pca(): G = build_graph(data, n_pca=None) assert np.all(G.data_nu == G.transform(G.data)) assert_raises(ValueError, G.transform, G.data[:, 0]) assert_raises(ValueError, G.transform, G.data[:, None, :15]) assert_raises(ValueError, G.transform, G.data[:, :15])
def test_exact_graph(): k = 3 a = 13 n_pca = 20 data_small = data[np.random.choice(len(data), len(data) // 2, replace=False)] pca = PCA(n_pca, svd_solver='randomized', random_state=42).fit(data_small) data_small_nu = pca.transform(data_small) pdx = squareform(pdist(data_small_nu, metric='euclidean')) knn_dist = np.partition(pdx, k, axis=1)[:, :k] epsilon = np.max(knn_dist, axis=1) weighted_pdx = (pdx.T / epsilon).T K = np.exp(-1 * weighted_pdx**a) W = K + K.T W = np.divide(W, 2) np.fill_diagonal(W, 0) G = pygsp.graphs.Graph(W) G2 = build_graph(data_small, thresh=0, n_pca=n_pca, decay=a, knn=k, random_state=42, use_pygsp=True) assert (G.N == G2.N) assert (np.all(G.d == G2.d)) assert ((G.W != G2.W).nnz == 0) assert ((G2.W != G.W).sum() == 0) assert (isinstance(G2, graphtools.graphs.TraditionalGraph)) G2 = build_graph(pdx, n_pca=None, precomputed='distance', decay=a, knn=k, random_state=42, use_pygsp=True) assert (G.N == G2.N) assert (np.all(G.d == G2.d)) assert ((G.W != G2.W).nnz == 0) assert ((G2.W != G.W).sum() == 0) assert (isinstance(G2, graphtools.graphs.TraditionalGraph)) G2 = build_graph(sp.coo_matrix(K), n_pca=None, precomputed='affinity', random_state=42, use_pygsp=True) assert (G.N == G2.N) assert (np.all(G.d == G2.d)) assert ((G.W != G2.W).nnz == 0) assert ((G2.W != G.W).sum() == 0) assert (isinstance(G2, graphtools.graphs.TraditionalGraph)) G2 = build_graph(K, n_pca=None, precomputed='affinity', random_state=42, use_pygsp=True) assert (G.N == G2.N) assert (np.all(G.d == G2.d)) assert ((G.W != G2.W).nnz == 0) assert ((G2.W != G.W).sum() == 0) assert (isinstance(G2, graphtools.graphs.TraditionalGraph)) G2 = build_graph(W, n_pca=None, precomputed='adjacency', random_state=42, use_pygsp=True) assert (G.N == G2.N) assert (np.all(G.d == G2.d)) assert ((G.W != G2.W).nnz == 0) assert ((G2.W != G.W).sum() == 0) assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
def test_transform_sparse_pca(): G = build_graph(data, sparse=True, n_pca=20) assert np.all(G.data_nu == G.transform(G.data)) assert_raises(ValueError, G.transform, sp.csr_matrix(G.data)[:, 0]) assert_raises(ValueError, G.transform, sp.csr_matrix(G.data)[:, :15])