def test_transform_sparse_adaptive_pca(): G = build_graph(data, sparse=True, n_pca=True, random_state=42) assert np.all(G.data_nu == G.transform(G.data)) with assert_raises_message( ValueError, "data of shape ({0}, 1) cannot be transformed to graph built on data of shape ({0}, {1}). Expected shape ({0}, {1})".format( G.data.shape[0], G.data.shape[1] ), ): G.transform(sp.csr_matrix(G.data)[:, 0]) with assert_raises_message( ValueError, "data of shape ({0}, 15) cannot be transformed to graph built on data of shape ({0}, {1}). Expected shape ({0}, {1})".format( G.data.shape[0], G.data.shape[1] ), ): G.transform(sp.csr_matrix(G.data)[:, :15]) G2 = build_graph( data, sparse=True, n_pca=True, rank_threshold=G.rank_threshold, random_state=42 ) assert np.allclose(G2.data_nu, G2.transform(G2.data)) assert np.allclose(G2.data_nu, G.transform(G.data)) G3 = build_graph(data, sparse=True, n_pca=G2.n_pca, random_state=42) assert np.allclose(G3.data_nu, G3.transform(G3.data)) assert np.allclose(G3.data_nu, G2.transform(G2.data))
def test_check_between(): graphtools.utils.check_between(-5, -3, foo=-4) with assert_raises_message(ValueError, "Expected foo between -5 and -3, got -6"): graphtools.utils.check_between(-5, -3, foo=-6) with assert_raises_message(ValueError, "Expected v_max > -3, got -5"): graphtools.utils.check_between(-3, -5, foo=-6)
def test_inverse_transform_dense_no_pca(): G = build_graph(data, n_pca=None) np.testing.assert_allclose( data[:, 5:7], G.inverse_transform(G.data_nu, columns=[5, 6]), atol=1e-12 ) assert np.all(G.data == G.inverse_transform(G.data_nu)) with assert_raises_message( ValueError, "data of shape ({0},) cannot be inverse transformed from graph built on reduced data of shape ({0}, {1})".format( data.shape[0], G.data.shape[1] ), ): G.inverse_transform(G.data[:, 0]) with assert_raises_message( ValueError, "data of shape ({0}, 1, 15) cannot be inverse transformed from graph built on reduced data of shape ({0}, {1})".format( data.shape[0], data.shape[1] ), ): G.inverse_transform(G.data[:, None, :15]) with assert_raises_message( ValueError, "data of shape ({0}, 15) cannot be inverse transformed from graph built on reduced data of shape ({0}, {1})".format( data.shape[0], data.shape[1] ), ): G.inverse_transform(G.data[:, :15])
def test_inverse_transform_dense_pca(): G = build_graph(data, n_pca=data.shape[1] - 1) np.testing.assert_allclose(G.data, G.inverse_transform(G.data_nu), atol=1e-12) np.testing.assert_allclose( G.data[:, -1, None], G.inverse_transform(G.data_nu, columns=-1), atol=1e-12 ) np.testing.assert_allclose( G.data[:, 5:7], G.inverse_transform(G.data_nu, columns=[5, 6]), atol=1e-12 ) with assert_raises_message( IndexError, "index {0} is out of bounds for axis 1 with size {0}".format(G.data.shape[1]), ): G.inverse_transform(G.data_nu, columns=data.shape[1]) with assert_raises_message( ValueError, "data of shape ({0},) cannot be inverse transformed from graph built on reduced data of shape ({0}, {1})".format( G.data.shape[0], G.n_pca ), ): G.inverse_transform(G.data[:, 0]) with assert_raises_message( ValueError, "data of shape ({0}, 1, 15) cannot be inverse transformed from graph built on reduced data of shape ({0}, {1})".format( G.data.shape[0], G.n_pca ), ): G.inverse_transform(G.data[:, None, :15]) with assert_raises_message( ValueError, "data of shape ({0}, 15) cannot be inverse transformed from graph built on reduced data of shape ({0}, {1})".format( G.data.shape[0], G.n_pca ), ): G.inverse_transform(G.data[:, :15])
def test_set_params(): X, sample_idx = generate_swiss_roll() G = build_graph(X, sample_idx=sample_idx, kernel_symm="mnn", theta=0.5, n_pca=None, thresh=1e-4) assert G.get_params() == { "n_pca": None, "random_state": 42, "kernel_symm": "mnn", "theta": 0.5, "anisotropy": 0, "beta": 1, "knn": 3, "decay": 10, "bandwidth": None, "distance": "euclidean", "thresh": 1e-4, "n_jobs": 1, } G.set_params(n_jobs=4) assert G.n_jobs == 4 for graph in G.subgraphs: assert graph.n_jobs == 4 assert graph.knn_tree.n_jobs == 4 G.set_params(random_state=13) assert G.random_state == 13 for graph in G.subgraphs: assert graph.random_state == 13 G.set_params(verbose=2) assert G.verbose == 2 for graph in G.subgraphs: assert graph.verbose == 2 G.set_params(verbose=0) with assert_raises_message(ValueError, "Cannot update knn. Please create a new graph"): G.set_params(knn=15) with assert_raises_message( ValueError, "Cannot update decay. Please create a new graph"): G.set_params(decay=15) with assert_raises_message( ValueError, "Cannot update distance. Please create a new graph"): G.set_params(distance="manhattan") with assert_raises_message( ValueError, "Cannot update thresh. Please create a new graph"): G.set_params(thresh=1e-3) with assert_raises_message( ValueError, "Cannot update beta. Please create a new graph"): G.set_params(beta=0.2) G.set_params(knn=G.knn, decay=G.decay, thresh=G.thresh, distance=G.distance, beta=G.beta)
def test_1d_data(): with assert_raises_message( ValueError, "Expected 2D array, got 1D array instead (shape: ({},).)".format(data.shape[0]), ): build_graph(data[:, 0]) with assert_raises_message( ValueError, "Reshape your data either using array.reshape(-1, 1) " "if your data has a single feature or array.reshape(1, -1) if " "it contains a single sample.".format(data.shape[0]), ): build_graph(data[:, 0])
def test_sample_idx_wrong_length(): with assert_raises_message( ValueError, "sample_idx (10) must be the same length as data ({})".format( data.shape[0]), ): build_graph(data, graphtype="mnn", sample_idx=np.arange(10))
def test_precomputed_interpolate(): with assert_raises_message(ValueError, "Cannot extend kernel on precomputed graph"): G = build_graph(squareform(pdist(data)), n_pca=None, precomputed="distance") G.build_kernel_to_data(data)
def test_precomputed_invalid(): with assert_raises_message( ValueError, "Precomputed value invalid not recognized. Choose from ['distance', 'affinity', 'adjacency']", ): build_graph(np.random.uniform(0, 1, [200, 200]), precomputed="invalid", n_pca=None)
def test_transform_sparse_pca(): G = build_graph(data, sparse=True, n_pca=20) assert np.all(G.data_nu == G.transform(G.data)) with assert_raises_message( ValueError, "data of shape ({0}, 1) cannot be transformed to graph built on data of shape ({0}, {1}). Expected shape ({0}, {1})".format( G.data.shape[0], G.data.shape[1] ), ): G.transform(sp.csr_matrix(G.data)[:, 0]) with assert_raises_message( ValueError, "data of shape ({0}, 15) cannot be transformed to graph built on data of shape ({0}, {1}). Expected shape ({0}, {1})".format( G.data.shape[0], G.data.shape[1] ), ): G.transform(sp.csr_matrix(G.data)[:, :15])
def test_build_landmark_with_too_many_landmarks(): with assert_raises_message( ValueError, "n_landmark ({0}) >= n_samples ({0}). Use kNNGraph instead".format( data.shape[0] ), ): build_graph(data, n_landmark=len(data))
def test_shortest_path_invalid(): with assert_raises_message( ValueError, "Expected `distance` in ['constant', 'data', 'affinity']. Got invalid", ): data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) G.shortest_path(distance="invalid")
def test_shortest_path_no_decay_affinity(): with assert_raises_message( ValueError, "Graph shortest path with affinity distance only valid for weighted graphs. For unweighted graphs, use `distance='constant'` or `distance='data'`.", ): data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) G.shortest_path(distance="affinity")
def test_3d_data(): with assert_raises_message( ValueError, "Expected 2D array, got 3D array instead (shape: ({0}, 64, 1).)".format( data.shape[0] ), ): build_graph(data[:, :, None])
def test_inverse_transform_sparse_no_pca(): G = build_graph(data, sparse=True, n_pca=None) assert np.sum(G.data != G.inverse_transform(G.data_nu)) == 0 with assert_raises_message( ValueError, "data of shape ({0}, 1) cannot be inverse transformed from graph built on reduced data of shape ({0}, {1})".format( G.data.shape[0], G.data.shape[1] ), ): G.inverse_transform(sp.csr_matrix(G.data)[:, 0]) with assert_raises_message( ValueError, "data of shape ({0}, 15) cannot be inverse transformed from graph built on reduced data of shape ({0}, {1})".format( G.data.shape[0], G.data.shape[1] ), ): G.inverse_transform(sp.csr_matrix(G.data)[:, :15])
def test_knn_no_knn_no_bandwidth(): with assert_raises_message( ValueError, "Either `knn` or `bandwidth` must be provided."): build_graph(data, graphtype="knn", knn=None, bandwidth=None, thresh=1e-4)
def test_transform_sparse_no_pca(): G = build_graph(data, sparse=True, n_pca=None) assert np.sum(G.data_nu != G.transform(G.data)) == 0 with assert_raises_message( ValueError, "data of shape {} cannot be transformed to graph built on data of shape {}".format( G.data.tocsr()[:, 0].shape, G.data.shape ), ): G.transform(sp.csr_matrix(G.data)[:, 0]) with assert_raises_message( ValueError, "data of shape {} cannot be transformed to graph built on data of shape {}".format( G.data.tocsr()[:, :15].shape, G.data.shape ), ): G.transform(sp.csr_matrix(G.data)[:, :15])
def test_shortest_path_precomputed_no_decay_data(): with assert_raises_message( ValueError, "Graph shortest path with data distance not valid for precomputed graphs. For precomputed graphs, use `distance='constant'` for unweighted graphs and `distance='affinity'` for weighted graphs.", ): data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=None) G = graphtools.Graph(G.K, precomputed="affinity") G.shortest_path(distance="data")
def test_knn_interpolate_wrong_shape(): G = build_graph(data, n_pca=10, decay=None) with assert_raises_message( ValueError, "Expected a 2D matrix. Y has shape ({},)".format(data.shape[0])): G.extend_to_data(data[:, 0]) with assert_raises_message( ValueError, "Expected a 2D matrix. Y has shape ({}, {}, 1)".format( data.shape[0], data.shape[1]), ): G.extend_to_data(data[:, :, None]) with assert_raises_message(ValueError, "Y must be of shape either (n, 64) or (n, 10)"): G.extend_to_data(data[:, :data.shape[1] // 2]) G = build_graph(data, n_pca=None, decay=None) with assert_raises_message(ValueError, "Y must be of shape (n, 64)"): G.extend_to_data(data[:, :data.shape[1] // 2])
def test_sample_idx_unique(): with assert_raises_message( ValueError, "sample_idx must contain more than one unique value"): build_graph(data, graph_class=graphtools.graphs.MNNGraph, sample_idx=np.ones(len(data))) with assert_warns_message(UserWarning, "Only one unique sample. Not using MNNGraph"): build_graph(data, sample_idx=np.ones(len(data)), graphtype="mnn")
def test_build_exact_with_sample_idx(): with assert_raises_message( ValueError, "TraditionalGraph does not support batch correction. Use `graphtype='mnn'` or `sample_idx=None`", ): build_graph(data, graphtype="exact", sample_idx=np.arange(len(data)), decay=10)
def test_sample_idx_and_precomputed(): with assert_raises_message( ValueError, "MNNGraph does not support precomputed values. Use `graphtype='exact'` and `sample_idx=None` or `precomputed=None`", ): build_graph(data, n_pca=None, sample_idx=np.arange(10), precomputed="distance")
def test_knn_graph_invalid_symm(): with assert_raises_message( ValueError, "kernel_symm 'invalid' not recognized. Choose from '+', '*', 'mnn', or 'none'.", ): build_graph(data, graphtype="knn", knn=5, thresh=1e-4, kernel_symm="invalid")
def test_set_params(): G = graphtools.base.Data(data, n_pca=20) assert G.get_params() == {"n_pca": 20, "random_state": None} G.set_params(random_state=13) assert G.random_state == 13 with assert_raises_message( ValueError, "Cannot update n_pca. Please create a new graph" ): G.set_params(n_pca=10) G.set_params(n_pca=G.n_pca)
def test_shortest_path_decay_data(): with assert_raises_message( NotImplementedError, "Graph shortest path with constant or data distance only implemented for unweighted graphs. For weighted graphs, use `distance='affinity'`.", ): data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) G.shortest_path(distance="data")
def test_exact_interpolate(): G = build_graph(data, decay=10, thresh=0) with assert_raises_message( ValueError, "Either `transitions` or `Y` must be provided."): G.interpolate(data) pca_data = PCA(2).fit_transform(data) transitions = G.extend_to_data(data) assert np.all( G.interpolate(pca_data, Y=data) == G.interpolate( pca_data, transitions=transitions))
def test_knn_interpolate(): G = build_graph(data, decay=None) with assert_raises_message( ValueError, "Either `transitions` or `Y` must be provided."): G.interpolate(data) pca_data = PCA(2).fit_transform(data) transitions = G.extend_to_data(data) np.testing.assert_equal( G.interpolate(pca_data, Y=data), G.interpolate(pca_data, transitions=transitions), )
def test_shortest_path_precomputed_decay_constant(): with assert_raises_message( NotImplementedError, "Graph shortest path with constant distance only implemented for unweighted graphs. For weighted graphs, use `distance='affinity'`.", ): data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)] G = build_graph(data_small, knn=5, decay=15) G = graphtools.Graph(G.K, precomputed="affinity") G.shortest_path(distance="constant")
def test_mnn_with_string_theta(): with assert_raises_message( TypeError, "Expected `theta` as a float. Got <class 'str'>."): build_graph( data, thresh=0, n_pca=20, decay=10, knn=5, random_state=42, sample_idx=digits["target"], kernel_symm="mnn", theta="invalid", )
def test_inverse_transform_sparse_svd(): G = build_graph(data, sparse=True, n_pca=data.shape[1] - 1) np.testing.assert_allclose(data, G.inverse_transform(G.data_nu), atol=1e-12) np.testing.assert_allclose( data[:, -1, None], G.inverse_transform(G.data_nu, columns=-1), atol=1e-12 ) np.testing.assert_allclose( data[:, 5:7], G.inverse_transform(G.data_nu, columns=[5, 6]), atol=1e-12 ) with assert_raises_message( IndexError, "index 64 is out of bounds for axis 1 with size 64" ): G.inverse_transform(G.data_nu, columns=data.shape[1]) with assert_raises_message( TypeError, "A sparse matrix was passed, but dense data is required. Use X.toarray() to convert to a dense numpy array.", ): G.inverse_transform(sp.csr_matrix(G.data)[:, 0]) with assert_raises_message( TypeError, "A sparse matrix was passed, but dense data is required. Use X.toarray() to convert to a dense numpy array.", ): G.inverse_transform(sp.csr_matrix(G.data)[:, :15]) with assert_raises_message( ValueError, "data of shape ({0},) cannot be inverse transformed from graph built on reduced data of shape ({0}, {1}). Expected shape ({0}, {1})".format( data.shape[0], G.n_pca ), ): G.inverse_transform(data[:, 0]) with assert_raises_message( ValueError, "data of shape ({0}, 15) cannot be inverse transformed from graph built on reduced data of shape ({0}, {1}). Expected shape ({0}, {1})".format( data.shape[0], G.n_pca ), ): G.inverse_transform(data[:, :15])