def test_likelihood_kernel_L_gram_factor(self):

        phi = rndm.randn(self.rank, self.N)

        dpp = FiniteDPP(kernel_type='likelihood',
                        projection=False,
                        **{'L_gram_factor': phi})

        for size in self.sizes:

            for mode in ('GS', 'GS_bis', 'KuTa12'):

                dpp.flush_samples()
                for _ in range(self.nb_samples):
                    dpp.sample_exact_k_dpp(size, mode)

                self.check_right_cardinality(dpp, dpp.list_of_samples)

            for mode in ('AED', 'AD'):

                dpp.flush_samples()
                dpp.sample_mcmc_k_dpp(size,
                                      **{'nb_iter': self.nb_samples})

                self.check_right_cardinality(dpp, dpp.list_of_samples[0])
    def test_likelihood_kernel(self):

        eig_vals = 1 + rndm.geometric(p=0.5, size=self.rank)
        eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic')

        dpp = FiniteDPP(kernel_type='likelihood',
                        projection=False,
                        **{'L': (eig_vecs * eig_vals).dot(eig_vecs.T)})

        for size in self.sizes:

            for mode in ('GS', 'GS_bis', 'KuTa12'):

                dpp.flush_samples()
                for _ in range(self.nb_samples):
                    dpp.sample_exact_k_dpp(size, mode)

                self.check_right_cardinality(dpp, dpp.list_of_samples)

            for mode in ('AED', 'AD'):

                dpp.flush_samples()
                dpp.sample_mcmc_k_dpp(size,
                                      **{'nb_iter': self.nb_samples})

                self.check_right_cardinality(dpp, dpp.list_of_samples[0])
    def test_kernel_eig(self):

        eig_vals = rndm.rand(self.rank)
        eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic')

        dpp = FiniteDPP(kernel_type='correlation',
                        projection=False,
                        **{'K_eig_dec': (eig_vals, eig_vecs)})

        for size in self.sizes:

            for mode in ('GS', 'GS_bis', 'KuTa12'):

                dpp.flush_samples()
                for _ in range(self.nb_samples):
                    dpp.sample_exact_k_dpp(size, mode)

                self.check_right_cardinality(dpp, dpp.list_of_samples)

            for mode in ('AED', 'AD'):

                dpp.flush_samples()
                dpp.sample_mcmc_k_dpp(size,
                                      **{'nb_iter': self.nb_samples})

                self.check_right_cardinality(dpp, dpp.list_of_samples[0])
Ejemplo n.º 4
0
def sample_dpp_multiple_ts(kernel, k, num_masks):
    '''
	return a list of length num_masks
	each element is a numpy array of length k as the sampling result
	'''
    DPP = FiniteDPP('likelihood', **{'L': kernel})
    for _ in range(num_masks):
        DPP.sample_exact_k_dpp(size=k)
    return DPP.list_of_samples
Ejemplo n.º 5
0
    def select(
        self, x: np.ndarray, a_x: np.ndarray, batch_size: int
    ) -> Tuple[np.ndarray, np.ndarray]:
        """Select a batch of points by sampling from a k-dpp."""
        likelihood = self.kernel(x) + self.alpha * np.eye(len(x))

        dpp = FiniteDPP("likelihood", L=likelihood)
        dpp.sample_exact_k_dpp(size=batch_size)

        indices = dpp.list_of_samples[0]

        return x[indices], a_x[indices]
Ejemplo n.º 6
0
    def test_proj_dpp_sampler_as_kDPP_with_correlation_kernel_A_zono(self):
        """ Test whether projection DPP sampled as a k-DPP with k=rank(K) generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection correlation kernel K from A_zono: K = A.T (A A.T)^-1 A
        """

        A = rndm.randn(self.rank, self.N)
        dpp = FiniteDPP(kernel_type='correlation',
                        projection=True,
                        **{'A_zono': A})

        dpp.flush_samples()
        for _ in range(self.nb_samples):
            dpp.sample_exact_k_dpp(self.rank)

        self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples))
        self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
Ejemplo n.º 7
0
    def test_proj_dpp_sampler_as_kDPP_with_correlation_kernel(self):
        """ Test whether projection DPP sampled as a k-DPP with k=rank(K) generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection correlation kernel K
        """

        eig_vals = np.ones(self.rank)
        eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic')
        dpp = FiniteDPP(kernel_type='correlation',
                        projection=True,
                        **{'K': (eig_vecs * eig_vals).dot(eig_vecs.T)})

        dpp.flush_samples()
        for _ in range(self.nb_samples):
            dpp.sample_exact_k_dpp(self.rank)

        self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples))
        self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
Ejemplo n.º 8
0
def select_with_dpp(pred_c, k):
    rng = np.random.RandomState(1)
    pred_c = np.array(pred_c)
    #pred_shape=(10,100)
    #100はout_putの出力
    A = pred_c.dot(pred_c.T)
    DPP = FiniteDPP('likelihood', **{'L': A})
    add = DPP.sample_exact_k_dpp(size=k, random_state=rng)
    #[7, 1, 5, 9]みたいな
    print(add)
    return add
Ejemplo n.º 9
0
    def test_proj_dpp_sampler_as_kDPP_with_likelihood_kernel_eig_proj_true(
            self):
        """ Test whether projection DPP sampled as a k-DPP with k=rank(K)  generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection likelihood kernel L
        """

        eig_vals = np.ones(self.rank)
        eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic')
        dpp = FiniteDPP(kernel_type='likelihood',
                        projection=True,
                        **{'L_eig_dec': (eig_vals, eig_vecs)})

        dpp.flush_samples()
        for _ in range(self.nb_samples):
            dpp.sample_exact_k_dpp(self.rank)

        dpp.compute_L()
        dpp.K = dpp.L

        self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples))
        self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
Ejemplo n.º 10
0
    def test_proj_dpp_sampler_as_kDPP_with_likelihood_kernel_eig_proj_false(
            self):
        """ Test whether projection DPP sampled as a k-DPP with k=rank(K)  generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection likelihood kernel L from its eigendecomposition and projection is set to False in order to go through the computation of elementary symmetric polynomials etc
        """
        eig_vals = np.zeros(self.N)
        eig_vals[:self.rank] = 1.0

        eig_vecs, _ = qr(rndm.randn(self.N, self.N), mode='economic')
        dpp = FiniteDPP(kernel_type='likelihood',
                        projection=False,
                        **{'L_eig_dec': (eig_vals, eig_vecs)})

        dpp.flush_samples()
        for _ in range(self.nb_samples):
            dpp.sample_exact_k_dpp(self.rank)

        dpp.compute_L()
        dpp.K = dpp.L

        self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples))
        self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
Ejemplo n.º 11
0
Archivo: pb2.py Proyecto: os-popt/PB2
def get_diverse(X, y, size=10):

    # This is a heuristic if the covariance matrix is ill conditioned.

    if X.shape[0] > 10:
        size = int(X.shape[0] / 2)
    else:
        return (X, y)

    K = rbf_kernel(X, X)

    L = np.matmul(K, np.linalg.inv(np.eye(K.shape[0]) - K))

    DPP = FiniteDPP('likelihood', **{'L': L})
    DPP.flush_samples()
    try:
        DPP.sample_exact_k_dpp(size=size)
    except ValueError:
        return (X, y)
    newX = X[DPP.list_of_samples[0], :]
    newy = y[DPP.list_of_samples[0], :]
    return (newX, newy)
Ejemplo n.º 12
0
def DPP_kernel(adjacency, kernel, k=50):
    """
        DPP on graph using the kernel as L
        
        Input:
        - adjacency: adjacency matrix
        - kernel: kernel for L
        - k: number of nodes
        
        Output:
        - indices of the sample
    """
    L = kernel(adjacency)
    DPP = FiniteDPP('likelihood', **{'L': L})

    inds = DPP.sample_exact_k_dpp(k)
    return inds
Ejemplo n.º 13
0
def sample_dpp(kernel, k):
    DPP = FiniteDPP('likelihood', **{'L': kernel})
    DPP.sample_exact_k_dpp(size=k)
    x = list(DPP.list_of_samples)[0]
    # assert(len(x) == k)
    return x
Ejemplo n.º 14
0
    gaps = []
    t_gap = []
    avgs = []
    # Generate a dataset of size Max N, Y is sampled from the prior
    X = np.random.randn(N_sequence[-1], 1)
    Kff = k.compute_K_symm(X)
    Y = np.random.multivariate_normal(
        mean=np.zeros(N_sequence[-1]),
        cov=Kff + np.square(sn) * np.eye(N_sequence[-1]))[:, None]

    for N, M in zip(N_sequence, M_sequence):
        X_cur = X[:N, :]
        kff = k.compute_K_symm(X_cur)
        DPP = FiniteDPP('likelihood', **{'L': kff + np.eye(N) * 1e-6})
        DPP.flush_samples()
        DPP.sample_exact_k_dpp(size=M)
        ind = DPP.list_of_samples[0]
        Z_cur = X_cur[ind]
        Y_cur = Y[:N, :]
        with gpflow.settings.temp_settings(low_jitter):
            # bound from theorem 4
            avg_kl = KL_bound2(k_var=k.variance.value,
                               k_ls=lengthscale,
                               sigma_n=sn,
                               N=N,
                               p_sd=1,
                               p_success=0.5,
                               M=M)
            # We set the GP to have the parameters used in generating data
            full_m = gpflow.models.GPR(X_cur, Y_cur, k)
            full_m.likelihood.variance = np.square(sn)
Ejemplo n.º 15
0
    def erun(self):
        model_str = self.model
        # formatted data
        feas = format_data(self.data_name)

        # Define placeholders
        # 定义placeholders,get_placeholder函数中只需要传入一个参数,即adj,函数中需要用到adj.shape
        placeholders = get_placeholder(feas['adj'], feas['num_features'])

        #定义由Dpp和密度估计出来的混合高斯
        DPP = FiniteDPP('correlation', **{'K': feas['adj'].toarray()})
        #DPP.sample_exact_k_dpp(size=4)
        pca = PCA(n_components=FLAGS.hidden2)

        #index = DPP.list_of_samples[0]

        if self.data_name == 'cora':
            DPP.sample_exact_k_dpp(size=21)
            index = DPP.list_of_samples[0]
            pass
        elif self.data_name == 'citeseer':

            index = np.array([
                1782, 741, 3258, 3189, 3112, 2524, 2895, 1780, 1100, 2735,
                1318, 2944, 1825, 18, 987, 2564, 463, 6, 3173, 701, 1901, 2349,
                2786, 2412, 646, 2626, 2648, 1793, 432, 538, 1729, 1217, 1397,
                1932, 2850, 458, 2129, 702, 2934, 2030, 2882, 1393, 308, 1271,
                1106, 2688, 629, 1145, 3251, 1903, 1004, 1149, 1385, 285, 858,
                2977, 844, 335, 532, 404, 3174, 528
            ])

        elif self.data_name == 'pubmed':
            index = np.array(
                [842, 3338, 5712, 17511, 10801, 2714, 6970, 13296, 5466, 2230])
        feature_sample = feas['features_dense']
        feature_sample = pca.fit_transform(feature_sample)

        featuresCompress = np.array([feature_sample[i] for i in index])
        #featuresCompress = np.array(feature_sample)
        kde = KernelDensity(bandwidth=0.7).fit(featuresCompress)

        # construct model
        d_real, discriminator, ae_model, model_z2g, D_Graph, GD_real = get_model(
            model_str, placeholders, feas['num_features'], feas['num_nodes'],
            feas['features_nonzero'])

        # Optimizer
        opt = get_optimizer(model_str, ae_model, model_z2g, D_Graph,
                            discriminator, placeholders, feas['pos_weight'],
                            feas['norm'], d_real, feas['num_nodes'], GD_real)

        # Initialize session

        #config = tf.ConfigProto()
        #config.gpu_options.allow_growth = True
        #sess = tf.Session(config = config)
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        val_roc_score = []
        record = []
        record_emb = []
        # Train model
        for epoch in range(self.iteration):

            emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'],
                                   feas['adj_label'], feas['features'],
                                   placeholders, feas['adj'], kde,
                                   feas['features_dense'])

            lm_train = linkpred_metrics(feas['val_edges'],
                                        feas['val_edges_false'])
            roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas)
            val_roc_score.append(roc_curr)
            print(
                "Epoch:", '%04d' % (epoch + 1),
                "train_loss= {:.5f}, d_loss= {:.5f}, g_loss= {:.5f}, GD_loss= {:.5f}, GG_loss= {:.5f}"
                .format(avg_cost[0], avg_cost[1], avg_cost[2], avg_cost[3],
                        avg_cost[4]), "val_roc=",
                "{:.5f}".format(val_roc_score[-1]), "val_ap=",
                "{:.5f}".format(ap_curr))

            if (epoch + 1) % 10 == 0:
                lm_test = linkpred_metrics(feas['test_edges'],
                                           feas['test_edges_false'])
                roc_score, ap_score, _ = lm_test.get_roc_score(emb, feas)
                print('Test ROC score: ' + str(roc_score))
                print('Test AP score: ' + str(ap_score))
                record.append([roc_score, ap_score])
                record_emb.append(emb)
        rec = np.array(record)
        index = rec[:, 0].tolist().index(max(rec[:, 0].tolist()))
        emb = record_emb[index]
        ana = record[index]
        scio.savemat('result/{}_link_64_64_new.mat'.format(self.data_name), {
            'embedded': emb,
            'labels': feas['true_labels']
        })
        print('The peak val_roc=%f, ap = %f' % (ana[0], ana[1]))
Ejemplo n.º 16
0
    def erun(self):
        model_str = self.model

        # formatted data
        feas = format_data(self.data_name)

        # Define placeholders
        placeholders = get_placeholder(feas['adj'], feas['num_features'])
        
        #定义由Dpp和密度估计出来的混合高斯
        DPP = FiniteDPP('correlation',**{'K': feas['adj'].toarray()})
        #DPP.sample_exact_k_dpp(size=4)
        pca = PCA(n_components = FLAGS.hidden2)
        
        #index = DPP.list_of_samples[0]
        
        if self.data_name == 'cora':
            DPP.sample_exact_k_dpp(size=24)
            index = DPP.list_of_samples[0]
        elif self.data_name == 'citeseer':
            #'''
            index = np.array([481, 1763, 1701,  171, 1425,  842])#epoch 36时最高 0.571
            #'''
            #'''
            index = np.array([3165,  589, 1283, 1756, 2221, 2409])#50时可以达到0.545 
            #'''
            #'''
            index = np.array([2300, 2725, 3313, 1216, 2821, 2432])#50 
            #'''
            '''index = np.array([1718, 3241,  787, 2727,  624, 3110, 1503, 1867, 2410, 1594, 1203,
        2711,  171, 1790, 1778,  294,  685,   39, 1700, 2650, 2028, 2573,
         375, 2744, 2302, 1876,  784, 2233, 2546, 1793, 1677, 3278, 2587,
        2623, 1018, 1160, 3166,  668, 1663, 3007,  864, 2893,  743, 3129,
        3104, 3277, 1643, 3047,  322,  298, 2894,   35, 2578, 2031, 3316,
        1815,  361, 1868, 1546, 1895, 1514,  636])#这个性能最高'''
        
        elif self.data_name == 'pubmed':
            index = np.array([  842,  3338,  5712, 17511, 10801,  2714,  6970, 13296,  5466,
         2230, 14052])
        feature_sample = feas['features_dense']
        feature_sample = pca.fit_transform(feature_sample)
        
        featuresCompress = np.array([feature_sample[i] for i in index])
        kde = KernelDensity(bandwidth=0.7).fit(featuresCompress)

        # construct model
        d_real, discriminator, ae_model, model_z2g, D_Graph, GD_real = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero'])

        # Optimizer
        opt = get_optimizer(model_str, ae_model, model_z2g, D_Graph, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes'], GD_real)

        # Initialize session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config = config)
        sess.run(tf.global_variables_initializer())
        
        #record list
        record = []
        record_emb = []
        # Train model
        for epoch in range(self.iteration):
            emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'],kde, feas['features_dense'])

            if (epoch+1) % 2 == 0:
                record_emb.append(emb)
                kmeans = KMeans(n_clusters=self.n_clusters, random_state=0).fit(emb)
                print("Epoch:", '%04d' % (epoch + 1))
                predict_labels = kmeans.predict(emb)
                cm = clustering_metrics(feas['true_labels'], predict_labels)
                [a,b,c] = cm.evaluationClusterModelFromLabel()
                record.append([a,b,c])
        rec = np.array(record)
        index = rec[:,0].tolist().index(max(rec[:,0].tolist()))
        ana = record[index]
        print('------------------------------------',index)
        emb = record_emb[index]
        scio.savemat('result/{}.mat'.format(self.data_name),{'embedded':emb,
                                     'labels':feas['true_labels']})
        print('The peak ACC=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (ana[0], ana[1], ana[2]))