def test_mnn_graph_matrix_gamma(): X, sample_idx = generate_swiss_roll() bs = 0.8 gamma = np.array([ [1, bs], # 0 [bs, 1] ]) # 3 k = 10 a = 20 metric = 'euclidean' beta = 0 samples = np.unique(sample_idx) K = np.zeros((len(X), len(X))) K[:] = np.nan K = pd.DataFrame(K) for si in samples: X_i = X[sample_idx == si] # get observations in sample i for sj in samples: X_j = X[sample_idx == sj] # get observation in sample j pdx_ij = cdist(X_i, X_j, metric=metric) # pairwise distances kdx_ij = np.sort(pdx_ij, axis=1) # get kNN e_ij = kdx_ij[:, k] # dist to kNN pdxe_ij = pdx_ij / e_ij[:, np.newaxis] # normalize k_ij = np.exp(-1 * (pdxe_ij**a)) # apply alpha-decaying kernel if si == sj: K.iloc[sample_idx == si, sample_idx == sj] = k_ij * \ (1 - beta) # fill out values in K for NN on diagonal else: # fill out values in K for NN on diagonal K.iloc[sample_idx == si, sample_idx == sj] = k_ij K = np.array(K) matrix_gamma = pd.DataFrame(np.zeros((len(sample_idx), len(sample_idx)))) for ix, si in enumerate(set(sample_idx)): for jx, sj in enumerate(set(sample_idx)): matrix_gamma.iloc[sample_idx == si, sample_idx == sj] = gamma[ix, jx] W = np.array((matrix_gamma * np.minimum(K, K.T)) + ((1 - matrix_gamma) * np.maximum(K, K.T))) np.fill_diagonal(W, 0) G = pygsp.graphs.Graph(W) G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=1 - beta, kernel_symm='gamma', gamma=gamma, distance=metric, sample_idx=sample_idx, thresh=0, use_pygsp=True) assert G.N == G2.N assert np.all(G.d == G2.d) assert (G.W != G2.W).nnz == 0 assert (G2.W != G.W).sum() == 0 assert isinstance(G2, graphtools.graphs.MNNGraph)
def test_verbose(): X, sample_idx = generate_swiss_roll() print() print("Verbose test: MNN") build_graph( X, sample_idx=sample_idx, kernel_symm="mnn", theta=0.5, n_pca=None, verbose=True )
def test_set_params(): X, sample_idx = generate_swiss_roll() G = build_graph(X, sample_idx=sample_idx, kernel_symm="mnn", theta=0.5, n_pca=None, thresh=1e-4) assert G.get_params() == { "n_pca": None, "random_state": 42, "kernel_symm": "mnn", "theta": 0.5, "anisotropy": 0, "beta": 1, "knn": 3, "decay": 10, "bandwidth": None, "distance": "euclidean", "thresh": 1e-4, "n_jobs": 1, } G.set_params(n_jobs=4) assert G.n_jobs == 4 for graph in G.subgraphs: assert graph.n_jobs == 4 assert graph.knn_tree.n_jobs == 4 G.set_params(random_state=13) assert G.random_state == 13 for graph in G.subgraphs: assert graph.random_state == 13 G.set_params(verbose=2) assert G.verbose == 2 for graph in G.subgraphs: assert graph.verbose == 2 G.set_params(verbose=0) with assert_raises_message(ValueError, "Cannot update knn. Please create a new graph"): G.set_params(knn=15) with assert_raises_message( ValueError, "Cannot update decay. Please create a new graph"): G.set_params(decay=15) with assert_raises_message( ValueError, "Cannot update distance. Please create a new graph"): G.set_params(distance="manhattan") with assert_raises_message( ValueError, "Cannot update thresh. Please create a new graph"): G.set_params(thresh=1e-3) with assert_raises_message( ValueError, "Cannot update beta. Please create a new graph"): G.set_params(beta=0.2) G.set_params(knn=G.knn, decay=G.decay, thresh=G.thresh, distance=G.distance, beta=G.beta)
def test_landmark_mnn_graph(): n_landmark = 150 X, sample_idx = generate_swiss_roll() # mnn graph G = build_graph(X, n_landmark=n_landmark, thresh=1e-5, n_pca=None, decay=10, knn=5, random_state=42, sample_idx=sample_idx) assert(G.landmark_op.shape == (n_landmark, n_landmark)) assert(isinstance(G, graphtools.graphs.MNNGraph)) assert(isinstance(G, graphtools.graphs.LandmarkGraph))
def test_set_params(): X, sample_idx = generate_swiss_roll() G = build_graph(X, sample_idx=sample_idx, kernel_symm='gamma', gamma=0.5, n_pca=None, thresh=1e-4) assert G.get_params() == { 'n_pca': None, 'random_state': 42, 'kernel_symm': 'gamma', 'gamma': 0.5, 'beta': 1, 'adaptive_k': 'sqrt', 'knn': 3, 'decay': 10, 'distance': 'euclidean', 'thresh': 1e-4, 'n_jobs': 1 } G.set_params(n_jobs=4) assert G.n_jobs == 4 for graph in G.subgraphs: assert graph.n_jobs == 4 assert graph.knn_tree.n_jobs == 4 G.set_params(random_state=13) assert G.random_state == 13 for graph in G.subgraphs: assert graph.random_state == 13 G.set_params(verbose=2) assert G.verbose == 2 for graph in G.subgraphs: assert graph.verbose == 2 G.set_params(verbose=0) assert_raises(ValueError, G.set_params, knn=15) assert_raises(ValueError, G.set_params, decay=15) assert_raises(ValueError, G.set_params, distance='manhattan') assert_raises(ValueError, G.set_params, thresh=1e-3) assert_raises(ValueError, G.set_params, beta=0.2) assert_raises(ValueError, G.set_params, adaptive_k='min') G.set_params(knn=G.knn, decay=G.decay, thresh=G.thresh, distance=G.distance, beta=G.beta, adaptive_k=G.adaptive_k)
def test_landmark_mnn_pygsp_graph(): n_landmark = 150 X, sample_idx = generate_swiss_roll() # mnn graph G = build_graph( X, n_landmark=n_landmark, thresh=1e-3, n_pca=None, decay=10, knn=3 - 1, random_state=42, sample_idx=sample_idx, use_pygsp=True, ) assert G.landmark_op.shape == (n_landmark, n_landmark) assert isinstance(G, graphtools.graphs.MNNGraph) assert isinstance(G, graphtools.graphs.LandmarkGraph) assert isinstance(G, pygsp.graphs.Graph)
def test_mnn_with_non_zero_indexed_sample_idx(): X, sample_idx = generate_swiss_roll() G = build_graph(X, sample_idx=sample_idx, kernel_symm='gamma', gamma=0.5, n_pca=None, use_pygsp=True) sample_idx += 1 G2 = build_graph(X, sample_idx=sample_idx, kernel_symm='gamma', gamma=0.5, n_pca=None, use_pygsp=True) assert G.N == G2.N assert np.all(G.d == G2.d) assert (G.W != G2.W).nnz == 0 assert (G2.W != G.W).sum() == 0 assert isinstance(G2, graphtools.graphs.MNNGraph)
def test_mnn_with_string_sample_idx(): X, sample_idx = generate_swiss_roll() G = build_graph( X, sample_idx=sample_idx, kernel_symm="mnn", theta=0.5, n_pca=None, use_pygsp=True, ) sample_idx = np.where(sample_idx == 0, "a", "b") G2 = build_graph( X, sample_idx=sample_idx, kernel_symm="mnn", theta=0.5, n_pca=None, use_pygsp=True, ) assert G.N == G2.N assert np.all(G.d == G2.d) assert (G.W != G2.W).nnz == 0 assert (G2.W != G.W).sum() == 0 assert isinstance(G2, graphtools.graphs.MNNGraph)
def test_mnn_graph_decay(): X, sample_idx = generate_swiss_roll() theta = 0.9 k = 10 a = 20 metric = "euclidean" beta = 0.2 samples = np.unique(sample_idx) K = np.zeros((len(X), len(X))) K[:] = np.nan K = pd.DataFrame(K) for si in samples: X_i = X[sample_idx == si] # get observations in sample i for sj in samples: batch_k = k if si == sj else k - 1 X_j = X[sample_idx == sj] # get observation in sample j pdx_ij = cdist(X_i, X_j, metric=metric) # pairwise distances kdx_ij = np.sort(pdx_ij, axis=1) # get kNN e_ij = kdx_ij[:, batch_k] # dist to kNN pdxe_ij = pdx_ij / e_ij[:, np.newaxis] # normalize k_ij = np.exp(-1 * (pdxe_ij ** a)) # apply alpha-decaying kernel if si == sj: K.iloc[sample_idx == si, sample_idx == sj] = (k_ij + k_ij.T) / 2 else: # fill out values in K for NN on diagonal K.iloc[sample_idx == si, sample_idx == sj] = k_ij Kn = K.copy() for i in samples: curr_K = K.iloc[sample_idx == i, sample_idx == i] i_norm = norm(curr_K, 1, axis=1) for j in samples: if i == j: continue else: curr_K = K.iloc[sample_idx == i, sample_idx == j] curr_norm = norm(curr_K, 1, axis=1) scale = np.minimum(1, i_norm / curr_norm) * beta Kn.iloc[sample_idx == i, sample_idx == j] = ( curr_K.values * scale[:, None] ) K = Kn W = np.array((theta * np.minimum(K, K.T)) + ((1 - theta) * np.maximum(K, K.T))) np.fill_diagonal(W, 0) G = pygsp.graphs.Graph(W) G2 = graphtools.Graph( X, knn=k, decay=a, beta=beta, kernel_symm="mnn", theta=theta, distance=metric, sample_idx=sample_idx, thresh=0, use_pygsp=True, ) assert G.N == G2.N np.testing.assert_array_equal(G.dw, G2.dw) np.testing.assert_array_equal((G.W - G2.W).data, 0) assert isinstance(G2, graphtools.graphs.MNNGraph)