def test_likelihood_kernel_L_gram_factor(self): phi = rndm.randn(self.rank, self.N) dpp = FiniteDPP(kernel_type='likelihood', projection=False, **{'L_gram_factor': phi}) for size in self.sizes: for mode in ('GS', 'GS_bis', 'KuTa12'): dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact_k_dpp(size, mode) self.check_right_cardinality(dpp, dpp.list_of_samples) for mode in ('AED', 'AD'): dpp.flush_samples() dpp.sample_mcmc_k_dpp(size, **{'nb_iter': self.nb_samples}) self.check_right_cardinality(dpp, dpp.list_of_samples[0])
def test_likelihood_kernel(self): eig_vals = 1 + rndm.geometric(p=0.5, size=self.rank) eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic') dpp = FiniteDPP(kernel_type='likelihood', projection=False, **{'L': (eig_vecs * eig_vals).dot(eig_vecs.T)}) for size in self.sizes: for mode in ('GS', 'GS_bis', 'KuTa12'): dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact_k_dpp(size, mode) self.check_right_cardinality(dpp, dpp.list_of_samples) for mode in ('AED', 'AD'): dpp.flush_samples() dpp.sample_mcmc_k_dpp(size, **{'nb_iter': self.nb_samples}) self.check_right_cardinality(dpp, dpp.list_of_samples[0])
def test_kernel_eig(self): eig_vals = rndm.rand(self.rank) eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic') dpp = FiniteDPP(kernel_type='correlation', projection=False, **{'K_eig_dec': (eig_vals, eig_vecs)}) for size in self.sizes: for mode in ('GS', 'GS_bis', 'KuTa12'): dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact_k_dpp(size, mode) self.check_right_cardinality(dpp, dpp.list_of_samples) for mode in ('AED', 'AD'): dpp.flush_samples() dpp.sample_mcmc_k_dpp(size, **{'nb_iter': self.nb_samples}) self.check_right_cardinality(dpp, dpp.list_of_samples[0])
def sample_dpp_multiple_ts(kernel, k, num_masks): ''' return a list of length num_masks each element is a numpy array of length k as the sampling result ''' DPP = FiniteDPP('likelihood', **{'L': kernel}) for _ in range(num_masks): DPP.sample_exact_k_dpp(size=k) return DPP.list_of_samples
def select( self, x: np.ndarray, a_x: np.ndarray, batch_size: int ) -> Tuple[np.ndarray, np.ndarray]: """Select a batch of points by sampling from a k-dpp.""" likelihood = self.kernel(x) + self.alpha * np.eye(len(x)) dpp = FiniteDPP("likelihood", L=likelihood) dpp.sample_exact_k_dpp(size=batch_size) indices = dpp.list_of_samples[0] return x[indices], a_x[indices]
def test_proj_dpp_sampler_as_kDPP_with_correlation_kernel_A_zono(self): """ Test whether projection DPP sampled as a k-DPP with k=rank(K) generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection correlation kernel K from A_zono: K = A.T (A A.T)^-1 A """ A = rndm.randn(self.rank, self.N) dpp = FiniteDPP(kernel_type='correlation', projection=True, **{'A_zono': A}) dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact_k_dpp(self.rank) self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples)) self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
def test_proj_dpp_sampler_as_kDPP_with_correlation_kernel(self): """ Test whether projection DPP sampled as a k-DPP with k=rank(K) generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection correlation kernel K """ eig_vals = np.ones(self.rank) eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic') dpp = FiniteDPP(kernel_type='correlation', projection=True, **{'K': (eig_vecs * eig_vals).dot(eig_vecs.T)}) dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact_k_dpp(self.rank) self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples)) self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
def select_with_dpp(pred_c, k): rng = np.random.RandomState(1) pred_c = np.array(pred_c) #pred_shape=(10,100) #100はout_putの出力 A = pred_c.dot(pred_c.T) DPP = FiniteDPP('likelihood', **{'L': A}) add = DPP.sample_exact_k_dpp(size=k, random_state=rng) #[7, 1, 5, 9]みたいな print(add) return add
def test_proj_dpp_sampler_as_kDPP_with_likelihood_kernel_eig_proj_true( self): """ Test whether projection DPP sampled as a k-DPP with k=rank(K) generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection likelihood kernel L """ eig_vals = np.ones(self.rank) eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic') dpp = FiniteDPP(kernel_type='likelihood', projection=True, **{'L_eig_dec': (eig_vals, eig_vecs)}) dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact_k_dpp(self.rank) dpp.compute_L() dpp.K = dpp.L self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples)) self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
def test_proj_dpp_sampler_as_kDPP_with_likelihood_kernel_eig_proj_false( self): """ Test whether projection DPP sampled as a k-DPP with k=rank(K) generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection likelihood kernel L from its eigendecomposition and projection is set to False in order to go through the computation of elementary symmetric polynomials etc """ eig_vals = np.zeros(self.N) eig_vals[:self.rank] = 1.0 eig_vecs, _ = qr(rndm.randn(self.N, self.N), mode='economic') dpp = FiniteDPP(kernel_type='likelihood', projection=False, **{'L_eig_dec': (eig_vals, eig_vecs)}) dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact_k_dpp(self.rank) dpp.compute_L() dpp.K = dpp.L self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples)) self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
def get_diverse(X, y, size=10): # This is a heuristic if the covariance matrix is ill conditioned. if X.shape[0] > 10: size = int(X.shape[0] / 2) else: return (X, y) K = rbf_kernel(X, X) L = np.matmul(K, np.linalg.inv(np.eye(K.shape[0]) - K)) DPP = FiniteDPP('likelihood', **{'L': L}) DPP.flush_samples() try: DPP.sample_exact_k_dpp(size=size) except ValueError: return (X, y) newX = X[DPP.list_of_samples[0], :] newy = y[DPP.list_of_samples[0], :] return (newX, newy)
def DPP_kernel(adjacency, kernel, k=50): """ DPP on graph using the kernel as L Input: - adjacency: adjacency matrix - kernel: kernel for L - k: number of nodes Output: - indices of the sample """ L = kernel(adjacency) DPP = FiniteDPP('likelihood', **{'L': L}) inds = DPP.sample_exact_k_dpp(k) return inds
def sample_dpp(kernel, k): DPP = FiniteDPP('likelihood', **{'L': kernel}) DPP.sample_exact_k_dpp(size=k) x = list(DPP.list_of_samples)[0] # assert(len(x) == k) return x
gaps = [] t_gap = [] avgs = [] # Generate a dataset of size Max N, Y is sampled from the prior X = np.random.randn(N_sequence[-1], 1) Kff = k.compute_K_symm(X) Y = np.random.multivariate_normal( mean=np.zeros(N_sequence[-1]), cov=Kff + np.square(sn) * np.eye(N_sequence[-1]))[:, None] for N, M in zip(N_sequence, M_sequence): X_cur = X[:N, :] kff = k.compute_K_symm(X_cur) DPP = FiniteDPP('likelihood', **{'L': kff + np.eye(N) * 1e-6}) DPP.flush_samples() DPP.sample_exact_k_dpp(size=M) ind = DPP.list_of_samples[0] Z_cur = X_cur[ind] Y_cur = Y[:N, :] with gpflow.settings.temp_settings(low_jitter): # bound from theorem 4 avg_kl = KL_bound2(k_var=k.variance.value, k_ls=lengthscale, sigma_n=sn, N=N, p_sd=1, p_success=0.5, M=M) # We set the GP to have the parameters used in generating data full_m = gpflow.models.GPR(X_cur, Y_cur, k) full_m.likelihood.variance = np.square(sn)
def erun(self): model_str = self.model # formatted data feas = format_data(self.data_name) # Define placeholders # 定义placeholders,get_placeholder函数中只需要传入一个参数,即adj,函数中需要用到adj.shape placeholders = get_placeholder(feas['adj'], feas['num_features']) #定义由Dpp和密度估计出来的混合高斯 DPP = FiniteDPP('correlation', **{'K': feas['adj'].toarray()}) #DPP.sample_exact_k_dpp(size=4) pca = PCA(n_components=FLAGS.hidden2) #index = DPP.list_of_samples[0] if self.data_name == 'cora': DPP.sample_exact_k_dpp(size=21) index = DPP.list_of_samples[0] pass elif self.data_name == 'citeseer': index = np.array([ 1782, 741, 3258, 3189, 3112, 2524, 2895, 1780, 1100, 2735, 1318, 2944, 1825, 18, 987, 2564, 463, 6, 3173, 701, 1901, 2349, 2786, 2412, 646, 2626, 2648, 1793, 432, 538, 1729, 1217, 1397, 1932, 2850, 458, 2129, 702, 2934, 2030, 2882, 1393, 308, 1271, 1106, 2688, 629, 1145, 3251, 1903, 1004, 1149, 1385, 285, 858, 2977, 844, 335, 532, 404, 3174, 528 ]) elif self.data_name == 'pubmed': index = np.array( [842, 3338, 5712, 17511, 10801, 2714, 6970, 13296, 5466, 2230]) feature_sample = feas['features_dense'] feature_sample = pca.fit_transform(feature_sample) featuresCompress = np.array([feature_sample[i] for i in index]) #featuresCompress = np.array(feature_sample) kde = KernelDensity(bandwidth=0.7).fit(featuresCompress) # construct model d_real, discriminator, ae_model, model_z2g, D_Graph, GD_real = get_model( model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero']) # Optimizer opt = get_optimizer(model_str, ae_model, model_z2g, D_Graph, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes'], GD_real) # Initialize session #config = tf.ConfigProto() #config.gpu_options.allow_growth = True #sess = tf.Session(config = config) sess = tf.Session() sess.run(tf.global_variables_initializer()) val_roc_score = [] record = [] record_emb = [] # Train model for epoch in range(self.iteration): emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'], kde, feas['features_dense']) lm_train = linkpred_metrics(feas['val_edges'], feas['val_edges_false']) roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas) val_roc_score.append(roc_curr) print( "Epoch:", '%04d' % (epoch + 1), "train_loss= {:.5f}, d_loss= {:.5f}, g_loss= {:.5f}, GD_loss= {:.5f}, GG_loss= {:.5f}" .format(avg_cost[0], avg_cost[1], avg_cost[2], avg_cost[3], avg_cost[4]), "val_roc=", "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr)) if (epoch + 1) % 10 == 0: lm_test = linkpred_metrics(feas['test_edges'], feas['test_edges_false']) roc_score, ap_score, _ = lm_test.get_roc_score(emb, feas) print('Test ROC score: ' + str(roc_score)) print('Test AP score: ' + str(ap_score)) record.append([roc_score, ap_score]) record_emb.append(emb) rec = np.array(record) index = rec[:, 0].tolist().index(max(rec[:, 0].tolist())) emb = record_emb[index] ana = record[index] scio.savemat('result/{}_link_64_64_new.mat'.format(self.data_name), { 'embedded': emb, 'labels': feas['true_labels'] }) print('The peak val_roc=%f, ap = %f' % (ana[0], ana[1]))
def erun(self): model_str = self.model # formatted data feas = format_data(self.data_name) # Define placeholders placeholders = get_placeholder(feas['adj'], feas['num_features']) #定义由Dpp和密度估计出来的混合高斯 DPP = FiniteDPP('correlation',**{'K': feas['adj'].toarray()}) #DPP.sample_exact_k_dpp(size=4) pca = PCA(n_components = FLAGS.hidden2) #index = DPP.list_of_samples[0] if self.data_name == 'cora': DPP.sample_exact_k_dpp(size=24) index = DPP.list_of_samples[0] elif self.data_name == 'citeseer': #''' index = np.array([481, 1763, 1701, 171, 1425, 842])#epoch 36时最高 0.571 #''' #''' index = np.array([3165, 589, 1283, 1756, 2221, 2409])#50时可以达到0.545 #''' #''' index = np.array([2300, 2725, 3313, 1216, 2821, 2432])#50 #''' '''index = np.array([1718, 3241, 787, 2727, 624, 3110, 1503, 1867, 2410, 1594, 1203, 2711, 171, 1790, 1778, 294, 685, 39, 1700, 2650, 2028, 2573, 375, 2744, 2302, 1876, 784, 2233, 2546, 1793, 1677, 3278, 2587, 2623, 1018, 1160, 3166, 668, 1663, 3007, 864, 2893, 743, 3129, 3104, 3277, 1643, 3047, 322, 298, 2894, 35, 2578, 2031, 3316, 1815, 361, 1868, 1546, 1895, 1514, 636])#这个性能最高''' elif self.data_name == 'pubmed': index = np.array([ 842, 3338, 5712, 17511, 10801, 2714, 6970, 13296, 5466, 2230, 14052]) feature_sample = feas['features_dense'] feature_sample = pca.fit_transform(feature_sample) featuresCompress = np.array([feature_sample[i] for i in index]) kde = KernelDensity(bandwidth=0.7).fit(featuresCompress) # construct model d_real, discriminator, ae_model, model_z2g, D_Graph, GD_real = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero']) # Optimizer opt = get_optimizer(model_str, ae_model, model_z2g, D_Graph, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes'], GD_real) # Initialize session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config = config) sess.run(tf.global_variables_initializer()) #record list record = [] record_emb = [] # Train model for epoch in range(self.iteration): emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'],kde, feas['features_dense']) if (epoch+1) % 2 == 0: record_emb.append(emb) kmeans = KMeans(n_clusters=self.n_clusters, random_state=0).fit(emb) print("Epoch:", '%04d' % (epoch + 1)) predict_labels = kmeans.predict(emb) cm = clustering_metrics(feas['true_labels'], predict_labels) [a,b,c] = cm.evaluationClusterModelFromLabel() record.append([a,b,c]) rec = np.array(record) index = rec[:,0].tolist().index(max(rec[:,0].tolist())) ana = record[index] print('------------------------------------',index) emb = record_emb[index] scio.savemat('result/{}.mat'.format(self.data_name),{'embedded':emb, 'labels':feas['true_labels']}) print('The peak ACC=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (ana[0], ana[1], ana[2]))