def test_mnn_graph_matrix_gamma(): X, sample_idx = generate_swiss_roll() bs = 0.8 gamma = np.array([ [1, bs], # 0 [bs, 1] ]) # 3 k = 10 a = 20 metric = 'euclidean' beta = 0 samples = np.unique(sample_idx) K = np.zeros((len(X), len(X))) K[:] = np.nan K = pd.DataFrame(K) for si in samples: X_i = X[sample_idx == si] # get observations in sample i for sj in samples: X_j = X[sample_idx == sj] # get observation in sample j pdx_ij = cdist(X_i, X_j, metric=metric) # pairwise distances kdx_ij = np.sort(pdx_ij, axis=1) # get kNN e_ij = kdx_ij[:, k] # dist to kNN pdxe_ij = pdx_ij / e_ij[:, np.newaxis] # normalize k_ij = np.exp(-1 * (pdxe_ij**a)) # apply alpha-decaying kernel if si == sj: K.iloc[sample_idx == si, sample_idx == sj] = k_ij * \ (1 - beta) # fill out values in K for NN on diagonal else: # fill out values in K for NN on diagonal K.iloc[sample_idx == si, sample_idx == sj] = k_ij K = np.array(K) matrix_gamma = pd.DataFrame(np.zeros((len(sample_idx), len(sample_idx)))) for ix, si in enumerate(set(sample_idx)): for jx, sj in enumerate(set(sample_idx)): matrix_gamma.iloc[sample_idx == si, sample_idx == sj] = gamma[ix, jx] W = np.array((matrix_gamma * np.minimum(K, K.T)) + ((1 - matrix_gamma) * np.maximum(K, K.T))) np.fill_diagonal(W, 0) G = pygsp.graphs.Graph(W) G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=1 - beta, kernel_symm='gamma', gamma=gamma, distance=metric, sample_idx=sample_idx, thresh=0, use_pygsp=True) assert G.N == G2.N assert np.all(G.d == G2.d) assert (G.W != G2.W).nnz == 0 assert (G2.W != G.W).sum() == 0 assert isinstance(G2, graphtools.graphs.MNNGraph)
def test_mnn_with_vector_gamma(): n_sample = len(np.unique(digits['target'])) # vector gamma build_graph(data, thresh=0, n_pca=20, decay=10, knn=5, random_state=42, sample_idx=digits['target'], kernel_symm='gamma', gamma=np.linspace(0, 1, n_sample - 1))
def test_mnn_with_square_gamma_wrong_length(): n_sample = len(np.unique(digits['target'])) # square matrix gamma of the wrong size build_graph(data, thresh=0, n_pca=20, decay=10, knn=5, random_state=42, sample_idx=digits['target'], kernel_symm='gamma', gamma=np.tile(np.linspace(0, 1, n_sample - 1), n_sample).reshape(n_sample - 1, n_sample))
def test_mnn_with_vector_theta(): n_sample = len(np.unique(digits["target"])) # vector theta build_graph( data, thresh=0, n_pca=20, decay=10, knn=5, random_state=42, sample_idx=digits["target"], kernel_symm="mnn", theta=np.linspace(0, 1, n_sample - 1), )
def test_mnn_with_matrix_theta(): n_sample = len(np.unique(digits["target"])) # square matrix theta of the wrong size build_graph( data, thresh=0, n_pca=20, decay=10, knn=5, random_state=42, sample_idx=digits["target"], kernel_symm="mnn", theta=np.tile(np.linspace(0, 1, n_sample), n_sample).reshape( n_sample, n_sample ), )
def test_mnn_with_vector_theta(): with assert_raises_message( TypeError, "Expected `theta` as a float. Got <class 'numpy.ndarray'>."): n_sample = len(np.unique(digits["target"])) # vector theta build_graph( data, thresh=0, n_pca=20, decay=10, knn=5, random_state=42, sample_idx=digits["target"], kernel_symm="mnn", theta=np.linspace(0, 1, n_sample - 1), )
def test_mnn_with_matrix_theta(): with assert_raises_message( TypeError, "Expected `theta` as a float. Got <class 'numpy.ndarray'>."): n_sample = len(np.unique(digits["target"])) # square matrix theta of the wrong size build_graph( data, thresh=0, n_pca=20, decay=10, knn=5, random_state=42, sample_idx=digits["target"], kernel_symm="mnn", theta=np.tile(np.linspace(0, 1, n_sample), n_sample).reshape(n_sample, n_sample), )
def test_landmark_exact_graph(): n_landmark = 100 # exact graph G = build_graph( data, n_landmark=n_landmark, thresh=0, n_pca=20, decay=10, knn=5 - 1, random_state=42, ) assert G.landmark_op.shape == (n_landmark, n_landmark) assert isinstance(G, graphtools.graphs.TraditionalGraph) assert isinstance(G, graphtools.graphs.LandmarkGraph) assert G.transitions.shape == (data.shape[0], n_landmark) assert G.clusters.shape == (data.shape[0],) assert len(np.unique(G.clusters)) <= n_landmark signal = np.random.normal(0, 1, [n_landmark, 10]) interpolated_signal = G.interpolate(signal) assert interpolated_signal.shape == (data.shape[0], signal.shape[1]) G._reset_landmarks() # no error on double delete G._reset_landmarks()
def test_mnn_graph_decay(): X, sample_idx = generate_swiss_roll() theta = 0.9 k = 10 a = 20 metric = "euclidean" beta = 0.2 samples = np.unique(sample_idx) K = np.zeros((len(X), len(X))) K[:] = np.nan K = pd.DataFrame(K) for si in samples: X_i = X[sample_idx == si] # get observations in sample i for sj in samples: batch_k = k if si == sj else k - 1 X_j = X[sample_idx == sj] # get observation in sample j pdx_ij = cdist(X_i, X_j, metric=metric) # pairwise distances kdx_ij = np.sort(pdx_ij, axis=1) # get kNN e_ij = kdx_ij[:, batch_k] # dist to kNN pdxe_ij = pdx_ij / e_ij[:, np.newaxis] # normalize k_ij = np.exp(-1 * (pdxe_ij ** a)) # apply alpha-decaying kernel if si == sj: K.iloc[sample_idx == si, sample_idx == sj] = (k_ij + k_ij.T) / 2 else: # fill out values in K for NN on diagonal K.iloc[sample_idx == si, sample_idx == sj] = k_ij Kn = K.copy() for i in samples: curr_K = K.iloc[sample_idx == i, sample_idx == i] i_norm = norm(curr_K, 1, axis=1) for j in samples: if i == j: continue else: curr_K = K.iloc[sample_idx == i, sample_idx == j] curr_norm = norm(curr_K, 1, axis=1) scale = np.minimum(1, i_norm / curr_norm) * beta Kn.iloc[sample_idx == i, sample_idx == j] = ( curr_K.values * scale[:, None] ) K = Kn W = np.array((theta * np.minimum(K, K.T)) + ((1 - theta) * np.maximum(K, K.T))) np.fill_diagonal(W, 0) G = pygsp.graphs.Graph(W) G2 = graphtools.Graph( X, knn=k, decay=a, beta=beta, kernel_symm="mnn", theta=theta, distance=metric, sample_idx=sample_idx, thresh=0, use_pygsp=True, ) assert G.N == G2.N np.testing.assert_array_equal(G.dw, G2.dw) np.testing.assert_array_equal((G.W - G2.W).data, 0) assert isinstance(G2, graphtools.graphs.MNNGraph)