def test_observed(self): """ Test observed categorical nodes """ # Single observation X = Categorical([0.7, 0.2, 0.1]) X.observe(2) u = X._message_to_child() self.assertAllClose(u[0], [0, 0, 1]) # One plate axis X = Categorical([0.7, 0.2, 0.1], plates=(2, )) X.observe([2, 1]) u = X._message_to_child() self.assertAllClose(u[0], [[0, 0, 1], [0, 1, 0]]) # Several plate axes X = Categorical([0.7, 0.1, 0.1, 0.1], plates=( 2, 3, )) X.observe([[2, 1, 1], [0, 2, 3]]) u = X._message_to_child() self.assertAllClose(u[0], [[[0, 0, 1, 0], [0, 1, 0, 0], [0, 1, 0, 0]], [[1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]]) # Check invalid observations X = Categorical([0.7, 0.2, 0.1]) self.assertRaises(ValueError, X.observe, -1) self.assertRaises(ValueError, X.observe, 3) self.assertRaises(ValueError, X.observe, 1.5) pass
def test_moments(self): """ Test the moments of categorical nodes. """ # Simple test X = Categorical([0.7, 0.2, 0.1]) u = X._message_to_child() self.assertEqual(len(u), 1) self.assertAllClose(u[0], [0.7, 0.2, 0.1]) # Test plates in p p = np.random.dirichlet([1, 1], size=3) X = Categorical(p) u = X._message_to_child() self.assertAllClose(u[0], p) # Test with Dirichlet prior P = Dirichlet([7, 3]) logp = P._message_to_child()[0] p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1])) p1 = np.exp(logp[1]) / (np.exp(logp[0]) + np.exp(logp[1])) X = Categorical(P) u = X._message_to_child() p = np.array([p0, p1]) self.assertAllClose(u[0], p) # Test with broadcasted plates P = Dirichlet([7, 3], plates=(10, )) X = Categorical(P) u = X._message_to_child() self.assertAllClose(u[0] * np.ones(X.get_shape(0)), p * np.ones( (10, 1))) pass
def test_observed(self): """ Test observed categorical nodes """ # Single observation X = Categorical([0.7,0.2,0.1]) X.observe(2) u = X._message_to_child() self.assertAllClose(u[0], [0,0,1]) # One plate axis X = Categorical([0.7,0.2,0.1], plates=(2,)) X.observe([2,1]) u = X._message_to_child() self.assertAllClose(u[0], [[0,0,1], [0,1,0]]) # Several plate axes X = Categorical([0.7,0.1,0.1,0.1], plates=(2,3,)) X.observe([[2,1,1], [0,2,3]]) u = X._message_to_child() self.assertAllClose(u[0], [ [[0,0,1,0], [0,1,0,0], [0,1,0,0]], [[1,0,0,0], [0,0,1,0], [0,0,0,1]] ]) # Check invalid observations X = Categorical([0.7,0.2,0.1]) self.assertRaises(ValueError, X.observe, -1) self.assertRaises(ValueError, X.observe, 3) self.assertRaises(ValueError, X.observe, 1.5) pass
def _run(self, x, K=25, beta=0.5, alpha=0.00001, hinton_plot=False, end=False): '''Only to be used when doing parameter optimization.''' self.participant_list = x[0] N = len(x[0]) #number of data points (i.e. WCS participants) D = np.shape(x[1])[1] #number of features #K = 20 #number of initial clusters R = Dirichlet(K*[alpha], name='R') Z = Categorical(R, plates=(N,1), name='Z') P = Beta([beta, beta], plates=(D,K), name='P') X = Mixture(Z, Bernoulli, P) Q = VB(Z, R, X, P) P.initialize_from_random() X.observe(x[1]) Q.update(repeat=1000) log_likelihood = Q.L[Q.iter-1] if hinton_plot: bpplt.hinton(Z) bpplt.pyplot.show() bpplt.hinton(R) bpplt.pyplot.show() #Get the weight matrix stored in Z (weights determine which cluster data point belongs to) z = Z._message_to_child()[0] z = z * np.ones(Z.plates+(1,)) z = np.squeeze(z) self.z = z #Get the weights stored in R (proportional to the size of the clusters) r = np.exp(R._message_to_child()[0]) r = r * np.ones(R.plates+(1,)) r = np.squeeze(r) self.r = r #Get the cluster assignment of each data point self.c_assign = np.argmax(self.z, axis=1) return log_likelihood
def test_moments(self): """ Test the moments of categorical nodes. """ # Simple test X = Categorical([0.7,0.2,0.1]) u = X._message_to_child() self.assertEqual(len(u), 1) self.assertAllClose(u[0], [0.7,0.2,0.1]) # Test plates in p p = np.random.dirichlet([1,1], size=3) X = Categorical(p) u = X._message_to_child() self.assertAllClose(u[0], p) # Test with Dirichlet prior P = Dirichlet([7, 3]) logp = P._message_to_child()[0] p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1])) p1 = np.exp(logp[1]) / (np.exp(logp[0]) + np.exp(logp[1])) X = Categorical(P) u = X._message_to_child() p = np.array([p0, p1]) self.assertAllClose(u[0], p) # Test with broadcasted plates P = Dirichlet([7, 3], plates=(10,)) X = Categorical(P) u = X._message_to_child() self.assertAllClose(u[0] * np.ones(X.get_shape(0)), p*np.ones((10,1))) pass
def test_initialization(self): """ Test initialization of categorical nodes """ # Test initialization from random with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) Z = Categorical([[0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) Z.initialize_from_random() u = Z._message_to_child() self.assertAllClose(u[0], [[0, 1, 0], [0, 0, 1]]) pass
def test_initialization(self): """ Test initialization of categorical nodes """ # Test initialization from random with warnings.catch_warnings(record=True) as w: Z = Categorical([[0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) Z.initialize_from_random() u = Z._message_to_child() self.assertAllClose(u[0], [[0, 1, 0], [0, 0, 1]]) pass
def test_initialization(self): """ Test initialization of categorical nodes """ # Test initialization from random with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) Z = Categorical([[0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) Z.initialize_from_random() u = Z._message_to_child() self.assertAllClose(u[0], [[0, 1, 0], [0, 0, 1]]) pass
def run(self, K=25, beta=0.5, alpha=0.00001, foci_thresh=0, num_neigh=4, hinton_plot=False, end=False): '''Performs one run of the BBDP according to the specified parameters.''' print("Transforming WCS participant data into binary vectors...") x = u.transform_data_all(self.langs, norm=False, end=end, foci=True, foci_thresh=foci_thresh, num_neigh=num_neigh) print("Finished transforming participant data") self.participant_list = x[0] N = len(x[0]) #number of data points (i.e. WCS participants) D = np.shape(x[1])[1] #number of features #K = 20 #number of initial clusters R = Dirichlet(K*[alpha], name='R') Z = Categorical(R, plates=(N,1), name='Z') P = Beta([beta, beta], plates=(D,K), name='P') X = Mixture(Z, Bernoulli, P) Q = VB(Z, R, X, P) P.initialize_from_random() X.observe(x[1]) Q.update(repeat=1000) if hinton_plot: bpplt.hinton(Z) bpplt.pyplot.show() bpplt.hinton(R) bpplt.pyplot.show() #Get the weight matrix stored in Z (weights determine which cluster data point belongs to) z = Z._message_to_child()[0] z = z * np.ones(Z.plates+(1,)) z = np.squeeze(z) self.z = z #Get the weights stored in R (proportional to the size of the clusters) r = np.exp(R._message_to_child()[0]) r = r * np.ones(R.plates+(1,)) r = np.squeeze(r) self.r = r #Get the cluster assignment of each data point self.c_assign = np.argmax(self.z, axis=1) #Write cluster results to a file if self.write_to_file: if end: save_path = "cluster_results_end_K={}_B={}_a={}_t={}_nn={}".format(K, beta, alpha, foci_thresh, num_neigh) else: save_path = "cluster_results_K={}_B={}_a={}_t={}_nn={}".format(K, beta, alpha, foci_thresh, num_neigh) while path.exists(save_path+".txt"): #save_path already exists try: old_file_num = int(save_path[save_path.find('(')+1:-1]) new_file_num = old_file_num + 1 save_path = save_path[0:save_path.find('(')] + '(' + str(new_file_num) + ')' except ValueError: save_path = save_path + " (1)" self.save_path = save_path file = open(path.abspath(self.save_path+".txt"), 'w') #Write cluster assignment matrix Z (gives the probability that observation i belongs to cluster j) if 'Z' not in self.in_file: for i in range(len(self.z)): line = "\t".join([str(x) for x in self.z[i]]) + "\n" file.write(line) file.write('---Z\n') self.in_file.append('Z') #Write cluster weights matrix R (proportional to the size of the resulting clusters) if 'R' not in self.in_file: line = "\t".join([str(x) for x in self.r]) + "\n" file.write(line) file.write('---R\n') self.in_file.append('R') #Write deterministic cluster assignments with the corresponding participant key if 'C' not in self.in_file: line1 = "\t".join([str(x) for x in self.participant_list]) + "\n" line2 = "\t".join([str(x) for x in self.c_assign]) + "\n" file.write(line1) file.write(line2) file.write('---C\n') self.in_file.append('C') file.close() return self.c_assign