Beispiel #1
0
    def test_moments(self):
        """
        Test the moments of categorical nodes.
        """

        # Simple test
        X = Categorical([0.7, 0.2, 0.1])
        u = X._message_to_child()
        self.assertEqual(len(u), 1)
        self.assertAllClose(u[0], [0.7, 0.2, 0.1])

        # Test plates in p
        p = np.random.dirichlet([1, 1], size=3)
        X = Categorical(p)
        u = X._message_to_child()
        self.assertAllClose(u[0], p)

        # Test with Dirichlet prior
        P = Dirichlet([7, 3])
        logp = P._message_to_child()[0]
        p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1]))
        p1 = np.exp(logp[1]) / (np.exp(logp[0]) + np.exp(logp[1]))
        X = Categorical(P)
        u = X._message_to_child()
        p = np.array([p0, p1])
        self.assertAllClose(u[0], p)

        # Test with broadcasted plates
        P = Dirichlet([7, 3], plates=(10, ))
        X = Categorical(P)
        u = X._message_to_child()
        self.assertAllClose(u[0] * np.ones(X.get_shape(0)), p * np.ones(
            (10, 1)))

        pass
Beispiel #2
0
    def test_moments(self):
        """
        Test the moments of multinomial nodes.
        """

        # Simple test
        X = Multinomial(1, [0.7, 0.2, 0.1])
        u = X._message_to_child()
        self.assertEqual(len(u), 1)
        self.assertAllClose(u[0], [0.7, 0.2, 0.1])

        # Test n
        X = Multinomial(10, [0.7, 0.2, 0.1])
        u = X._message_to_child()
        self.assertAllClose(u[0], [7, 2, 1])

        # Test plates in p
        n = np.random.randint(1, 10)
        p = np.random.dirichlet([1, 1], size=3)
        X = Multinomial(n, p)
        u = X._message_to_child()
        self.assertAllClose(u[0], p * n)

        # Test plates in n
        n = np.random.randint(1, 10, size=(3, ))
        p = np.random.dirichlet([1, 1, 1, 1])
        X = Multinomial(n, p)
        u = X._message_to_child()
        self.assertAllClose(u[0], p * n[:, None])

        # Test plates in p and n
        n = np.random.randint(1, 10, size=(4, 1))
        p = np.random.dirichlet([1, 1], size=3)
        X = Multinomial(n, p)
        u = X._message_to_child()
        self.assertAllClose(u[0], p * n[..., None])

        # Test with Dirichlet prior
        P = Dirichlet([7, 3])
        logp = P._message_to_child()[0]
        p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1]))
        p1 = np.exp(logp[1]) / (np.exp(logp[0]) + np.exp(logp[1]))
        X = Multinomial(1, P)
        u = X._message_to_child()
        p = np.array([p0, p1])
        self.assertAllClose(u[0], p)

        # Test with broadcasted plates
        P = Dirichlet([7, 3], plates=(10, ))
        X = Multinomial(5, P)
        u = X._message_to_child()
        self.assertAllClose(u[0] * np.ones(X.get_shape(0)), 5 * p * np.ones(
            (10, 1)))

        pass
Beispiel #3
0
    def test_moments(self):
        """
        Test the moments of Dirichlet nodes.
        """

        p = Dirichlet([2, 3, 4])
        u = p._message_to_child()
        self.assertAllClose(u[0],
                            special.psi([2,3,4]) - special.psi(2+3+4))
        
        pass
Beispiel #4
0
    def test_constant(self):
        """
        Test the constant moments of Dirichlet nodes.
        """

        p = Dirichlet([1, 1, 1])
        p.initialize_from_value([0.5, 0.4, 0.1])
        u = p._message_to_child()
        self.assertAllClose(u[0], np.log([0.5, 0.4, 0.1]))

        pass
Beispiel #5
0
    def test_moments(self):
        """
        Test the moments of Dirichlet nodes.
        """

        p = Dirichlet([2, 3, 4])
        u = p._message_to_child()
        self.assertAllClose(u[0],
                            special.psi([2, 3, 4]) - special.psi(2 + 3 + 4))

        pass
Beispiel #6
0
    def test_constant(self):
        """
        Test the constant moments of Dirichlet nodes.
        """

        p = Dirichlet([1, 1, 1])
        p.initialize_from_value([0.5, 0.4, 0.1])
        u = p._message_to_child()
        self.assertAllClose(u[0],
                            np.log([0.5, 0.4, 0.1]))

        pass
Beispiel #7
0
    def _run(self, x, K=25, beta=0.5, alpha=0.00001, hinton_plot=False, end=False):
        '''Only to be used when doing parameter optimization.'''

        self.participant_list = x[0]
        
        N = len(x[0])            #number of data points (i.e. WCS participants)
        D = np.shape(x[1])[1]    #number of features
        #K = 20            #number of initial clusters
        
        R = Dirichlet(K*[alpha],
                      name='R')
        Z = Categorical(R,
                        plates=(N,1),
                        name='Z')
        
        P = Beta([beta, beta],
                 plates=(D,K),
                 name='P')
        
        X = Mixture(Z, Bernoulli, P)
        
        Q = VB(Z, R, X, P)
        P.initialize_from_random()
        X.observe(x[1])
        Q.update(repeat=1000)

        log_likelihood = Q.L[Q.iter-1]

        if hinton_plot:
            bpplt.hinton(Z)
            bpplt.pyplot.show()
            
            bpplt.hinton(R)
            bpplt.pyplot.show()

        #Get the weight matrix stored in Z (weights determine which cluster data point belongs to)
        z = Z._message_to_child()[0]
        z = z * np.ones(Z.plates+(1,))
        z = np.squeeze(z)
        self.z = z

        #Get the weights stored in R (proportional to the size of the clusters)
        r = np.exp(R._message_to_child()[0])
        r = r * np.ones(R.plates+(1,))
        r = np.squeeze(r)
        self.r = r

        #Get the cluster assignment of each data point
        self.c_assign = np.argmax(self.z, axis=1)

        return log_likelihood
Beispiel #8
0
    def test_moments(self):
        """
        Test the moments of categorical nodes.
        """

        # Simple test
        X = Categorical([0.7,0.2,0.1])
        u = X._message_to_child()
        self.assertEqual(len(u), 1)
        self.assertAllClose(u[0],
                            [0.7,0.2,0.1])

        # Test plates in p
        p = np.random.dirichlet([1,1], size=3)
        X = Categorical(p)
        u = X._message_to_child()
        self.assertAllClose(u[0],
                            p)
        
        # Test with Dirichlet prior
        P = Dirichlet([7, 3])
        logp = P._message_to_child()[0]
        p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1]))
        p1 = np.exp(logp[1]) / (np.exp(logp[0]) + np.exp(logp[1]))
        X = Categorical(P)
        u = X._message_to_child()
        p = np.array([p0, p1])
        self.assertAllClose(u[0],
                            p)

        # Test with broadcasted plates
        P = Dirichlet([7, 3], plates=(10,))
        X = Categorical(P)
        u = X._message_to_child()
        self.assertAllClose(u[0] * np.ones(X.get_shape(0)),
                            p*np.ones((10,1)))

        pass
    def test_moments(self):
        """
        Test the moments of multinomial nodes.
        """

        # Simple test
        X = Multinomial(1, [0.7,0.2,0.1])
        u = X._message_to_child()
        self.assertEqual(len(u), 1)
        self.assertAllClose(u[0],
                            [0.7,0.2,0.1])

        # Test n
        X = Multinomial(10, [0.7,0.2,0.1])
        u = X._message_to_child()
        self.assertAllClose(u[0],
                            [7,2,1])

        # Test plates in p
        n = np.random.randint(1, 10)
        p = np.random.dirichlet([1,1], size=3)
        X = Multinomial(n, p)
        u = X._message_to_child()
        self.assertAllClose(u[0],
                            p*n)
        
        # Test plates in n
        n = np.random.randint(1, 10, size=(3,))
        p = np.random.dirichlet([1,1,1,1])
        X = Multinomial(n, p)
        u = X._message_to_child()
        self.assertAllClose(u[0],
                            p*n[:,None])

        # Test plates in p and n
        n = np.random.randint(1, 10, size=(4,1))
        p = np.random.dirichlet([1,1], size=3)
        X = Multinomial(n, p)
        u = X._message_to_child()
        self.assertAllClose(u[0],
                            p*n[...,None])

        # Test with Dirichlet prior
        P = Dirichlet([7, 3])
        logp = P._message_to_child()[0]
        p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1]))
        p1 = np.exp(logp[1]) / (np.exp(logp[0]) + np.exp(logp[1]))
        X = Multinomial(1, P)
        u = X._message_to_child()
        p = np.array([p0, p1])
        self.assertAllClose(u[0],
                            p)

        # Test with broadcasted plates
        P = Dirichlet([7, 3], plates=(10,))
        X = Multinomial(5, P)
        u = X._message_to_child()
        self.assertAllClose(u[0] * np.ones(X.get_shape(0)),
                            5*p*np.ones((10,1)))

        pass
Beispiel #10
0
    def run(self, K=25, beta=0.5, alpha=0.00001, foci_thresh=0, num_neigh=4, hinton_plot=False, end=False):
        '''Performs one run of the BBDP according to the specified parameters.'''

        print("Transforming WCS participant data into binary vectors...")
        x = u.transform_data_all(self.langs, norm=False, end=end, foci=True, foci_thresh=foci_thresh, num_neigh=num_neigh)
        print("Finished transforming participant data") 
        self.participant_list = x[0]
        
        N = len(x[0])            #number of data points (i.e. WCS participants)
        D = np.shape(x[1])[1]    #number of features
        #K = 20            #number of initial clusters
        
        R = Dirichlet(K*[alpha],
                      name='R')
        Z = Categorical(R,
                        plates=(N,1),
                        name='Z')
        
        P = Beta([beta, beta],
                 plates=(D,K),
                 name='P')
        
        X = Mixture(Z, Bernoulli, P)
        
        Q = VB(Z, R, X, P)
        P.initialize_from_random()
        X.observe(x[1])
        Q.update(repeat=1000)

        if hinton_plot:
            bpplt.hinton(Z)
            bpplt.pyplot.show()
            
            bpplt.hinton(R)
            bpplt.pyplot.show()

        #Get the weight matrix stored in Z (weights determine which cluster data point belongs to)
        z = Z._message_to_child()[0]
        z = z * np.ones(Z.plates+(1,))
        z = np.squeeze(z)
        self.z = z

        #Get the weights stored in R (proportional to the size of the clusters)
        r = np.exp(R._message_to_child()[0])
        r = r * np.ones(R.plates+(1,))
        r = np.squeeze(r)
        self.r = r

        #Get the cluster assignment of each data point
        self.c_assign = np.argmax(self.z, axis=1)

        #Write cluster results to a file
        if self.write_to_file:
            if end:
                save_path = "cluster_results_end_K={}_B={}_a={}_t={}_nn={}".format(K, beta, alpha, foci_thresh, num_neigh)
            else:
                save_path = "cluster_results_K={}_B={}_a={}_t={}_nn={}".format(K, beta, alpha, foci_thresh, num_neigh)
            while path.exists(save_path+".txt"):
                #save_path already exists
                try:
                    old_file_num = int(save_path[save_path.find('(')+1:-1])
                    new_file_num = old_file_num + 1
                    save_path = save_path[0:save_path.find('(')] + '(' + str(new_file_num) + ')'
                except ValueError:
                    save_path = save_path + " (1)"

            self.save_path = save_path       
            file = open(path.abspath(self.save_path+".txt"), 'w')
            
            #Write cluster assignment matrix Z (gives the probability that observation i belongs to cluster j)
            if 'Z' not in self.in_file:
                for i in range(len(self.z)):
                    line = "\t".join([str(x) for x in self.z[i]]) + "\n"
                    file.write(line)
                file.write('---Z\n')
                self.in_file.append('Z')

            #Write cluster weights matrix R (proportional to the size of the resulting clusters)
            if 'R' not in self.in_file:
                line = "\t".join([str(x) for x in self.r]) + "\n"
                file.write(line)
                file.write('---R\n')
                self.in_file.append('R')

            #Write deterministic cluster assignments with the corresponding participant key
            if 'C' not in self.in_file:
                line1 = "\t".join([str(x) for x in self.participant_list]) + "\n"
                line2 = "\t".join([str(x) for x in self.c_assign]) + "\n"              
                file.write(line1)
                file.write(line2)
                file.write('---C\n')
                self.in_file.append('C')
            
            file.close()

        return self.c_assign
Beispiel #11
0
# coding: utf-8

## Gaussian mixture model

# Do some stuff:

# In[2]:

from bayespy.nodes import Dirichlet
alpha = Dirichlet([1e-3, 1e-3, 1e-3])
print(alpha._message_to_child())


# Nice!