def test_moments(self): """ Test the moments of categorical nodes. """ # Simple test X = Categorical([0.7, 0.2, 0.1]) u = X._message_to_child() self.assertEqual(len(u), 1) self.assertAllClose(u[0], [0.7, 0.2, 0.1]) # Test plates in p p = np.random.dirichlet([1, 1], size=3) X = Categorical(p) u = X._message_to_child() self.assertAllClose(u[0], p) # Test with Dirichlet prior P = Dirichlet([7, 3]) logp = P._message_to_child()[0] p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1])) p1 = np.exp(logp[1]) / (np.exp(logp[0]) + np.exp(logp[1])) X = Categorical(P) u = X._message_to_child() p = np.array([p0, p1]) self.assertAllClose(u[0], p) # Test with broadcasted plates P = Dirichlet([7, 3], plates=(10, )) X = Categorical(P) u = X._message_to_child() self.assertAllClose(u[0] * np.ones(X.get_shape(0)), p * np.ones( (10, 1))) pass
def test_init(self): """ Test the creation of Dirichlet nodes. """ # Some simple initializations p = Dirichlet([1.5, 4.2, 3.5]) # Check that plates are correct p = Dirichlet([2, 3, 4], plates=(4, 3)) self.assertEqual(p.plates, (4, 3)) p = Dirichlet(np.ones((4, 3, 5))) self.assertEqual(p.plates, (4, 3)) # Parent not a vector self.assertRaises(ValueError, Dirichlet, 4) # Parent vector has invalid values self.assertRaises(ValueError, Dirichlet, [-2, 3, 1]) # Plates inconsistent self.assertRaises(ValueError, Dirichlet, np.ones((4, 3)), plates=(3, )) # Explicit plates too small self.assertRaises(ValueError, Dirichlet, np.ones((4, 3)), plates=(1, )) pass
def test_moments(self): """ Test the moments of multinomial nodes. """ # Simple test X = Multinomial(1, [0.7, 0.2, 0.1]) u = X._message_to_child() self.assertEqual(len(u), 1) self.assertAllClose(u[0], [0.7, 0.2, 0.1]) # Test n X = Multinomial(10, [0.7, 0.2, 0.1]) u = X._message_to_child() self.assertAllClose(u[0], [7, 2, 1]) # Test plates in p n = np.random.randint(1, 10) p = np.random.dirichlet([1, 1], size=3) X = Multinomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p * n) # Test plates in n n = np.random.randint(1, 10, size=(3, )) p = np.random.dirichlet([1, 1, 1, 1]) X = Multinomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p * n[:, None]) # Test plates in p and n n = np.random.randint(1, 10, size=(4, 1)) p = np.random.dirichlet([1, 1], size=3) X = Multinomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p * n[..., None]) # Test with Dirichlet prior P = Dirichlet([7, 3]) logp = P._message_to_child()[0] p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1])) p1 = np.exp(logp[1]) / (np.exp(logp[0]) + np.exp(logp[1])) X = Multinomial(1, P) u = X._message_to_child() p = np.array([p0, p1]) self.assertAllClose(u[0], p) # Test with broadcasted plates P = Dirichlet([7, 3], plates=(10, )) X = Multinomial(5, P) u = X._message_to_child() self.assertAllClose(u[0] * np.ones(X.get_shape(0)), 5 * p * np.ones( (10, 1))) pass
def test_moments(self): """ Test the moments of Dirichlet nodes. """ p = Dirichlet([2, 3, 4]) u = p._message_to_child() self.assertAllClose(u[0], special.psi([2, 3, 4]) - special.psi(2 + 3 + 4)) pass
def test_constant(self): """ Test the constant moments of Dirichlet nodes. """ p = Dirichlet([1, 1, 1]) p.initialize_from_value([0.5, 0.4, 0.1]) u = p._message_to_child() self.assertAllClose(u[0], np.log([0.5, 0.4, 0.1])) pass
def test_moments(self): """ Test the moments of Dirichlet nodes. """ p = Dirichlet([2, 3, 4]) u = p._message_to_child() self.assertAllClose(u[0], special.psi([2,3,4]) - special.psi(2+3+4)) pass
def _run(self, x, K=25, beta=0.5, alpha=0.00001, hinton_plot=False, end=False): '''Only to be used when doing parameter optimization.''' self.participant_list = x[0] N = len(x[0]) #number of data points (i.e. WCS participants) D = np.shape(x[1])[1] #number of features #K = 20 #number of initial clusters R = Dirichlet(K*[alpha], name='R') Z = Categorical(R, plates=(N,1), name='Z') P = Beta([beta, beta], plates=(D,K), name='P') X = Mixture(Z, Bernoulli, P) Q = VB(Z, R, X, P) P.initialize_from_random() X.observe(x[1]) Q.update(repeat=1000) log_likelihood = Q.L[Q.iter-1] if hinton_plot: bpplt.hinton(Z) bpplt.pyplot.show() bpplt.hinton(R) bpplt.pyplot.show() #Get the weight matrix stored in Z (weights determine which cluster data point belongs to) z = Z._message_to_child()[0] z = z * np.ones(Z.plates+(1,)) z = np.squeeze(z) self.z = z #Get the weights stored in R (proportional to the size of the clusters) r = np.exp(R._message_to_child()[0]) r = r * np.ones(R.plates+(1,)) r = np.squeeze(r) self.r = r #Get the cluster assignment of each data point self.c_assign = np.argmax(self.z, axis=1) return log_likelihood
def test_init(self): """ Test the creation of categorical nodes. """ # Some simple initializations X = Categorical([0.1, 0.3, 0.6]) X = Categorical(Dirichlet([5,4,3])) # Check that plates are correct X = Categorical([0.1, 0.3, 0.6], plates=(3,4)) self.assertEqual(X.plates, (3,4)) X = Categorical(0.25*np.ones((2,3,4))) self.assertEqual(X.plates, (2,3)) X = Categorical(Dirichlet([2,1,9], plates=(3,4))) self.assertEqual(X.plates, (3,4)) # Probabilities not a vector self.assertRaises(ValueError, Categorical, 0.5) # Invalid probability self.assertRaises(ValueError, Categorical, [-0.5, 1.5], n=10) self.assertRaises(ValueError, Categorical, [0.5, 1.5], n=10) # Inconsistent plates self.assertRaises(ValueError, Categorical, 0.25*np.ones((2,4)), plates=(3,), n=10) # Explicit plates too small self.assertRaises(ValueError, Categorical, 0.25*np.ones((2,4)), plates=(1,), n=10) pass
def test_gaussian_mixture_plot(): """ Test the gaussian_mixture plotting function. The code is from http://www.bayespy.org/examples/gmm.html """ np.random.seed(1) y0 = np.random.multivariate_normal([0, 0], [[1, 0], [0, 0.02]], size=50) y1 = np.random.multivariate_normal([0, 0], [[0.02, 0], [0, 1]], size=50) y2 = np.random.multivariate_normal([2, 2], [[1, -0.9], [-0.9, 1]], size=50) y3 = np.random.multivariate_normal([-2, -2], [[0.1, 0], [0, 0.1]], size=50) y = np.vstack([y0, y1, y2, y3]) bpplt.pyplot.plot(y[:, 0], y[:, 1], 'rx') N = 200 D = 2 K = 10 alpha = Dirichlet(1e-5 * np.ones(K), name='alpha') Z = Categorical(alpha, plates=(N, ), name='z') mu = Gaussian(np.zeros(D), 1e-5 * np.identity(D), plates=(K, ), name='mu') Lambda = Wishart(D, 1e-5 * np.identity(D), plates=(K, ), name='Lambda') Y = Mixture(Z, Gaussian, mu, Lambda, name='Y') Z.initialize_from_random() Q = VB(Y, mu, Lambda, Z, alpha) Y.observe(y) Q.update(repeat=1000) bpplt.gaussian_mixture_2d(Y, scale=2)
def _setup_bernoulli_mixture(): """ Setup code for the hinton tests. This code is from http://www.bayespy.org/examples/bmm.html """ np.random.seed(1) p0 = [0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9] p1 = [0.1, 0.1, 0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.9, 0.9] p2 = [0.9, 0.9, 0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1] p = np.array([p0, p1, p2]) z = random.categorical([1 / 3, 1 / 3, 1 / 3], size=100) x = random.bernoulli(p[z]) N = 100 D = 10 K = 10 R = Dirichlet(K * [1e-5], name='R') Z = Categorical(R, plates=(N, 1), name='Z') P = Beta([0.5, 0.5], plates=(D, K), name='P') X = Mixture(Z, Bernoulli, P) Q = VB(Z, R, X, P) P.initialize_from_random() X.observe(x) Q.update(repeat=1000) return (R, P, Z)
def test_init(self): """ Test the creation of multinomial nodes. """ # Some simple initializations X = Multinomial(10, [0.1, 0.3, 0.6]) X = Multinomial(10, Dirichlet([5, 4, 3])) # Check that plates are correct X = Multinomial(10, [0.1, 0.3, 0.6], plates=(3, 4)) self.assertEqual(X.plates, (3, 4)) X = Multinomial(10, 0.25 * np.ones((2, 3, 4))) self.assertEqual(X.plates, (2, 3)) n = 10 * np.ones((3, 4), dtype=np.int) X = Multinomial(n, [0.1, 0.3, 0.6]) self.assertEqual(X.plates, (3, 4)) X = Multinomial(n, Dirichlet([2, 1, 9], plates=(3, 4))) self.assertEqual(X.plates, (3, 4)) # Probabilities not a vector self.assertRaises(ValueError, Multinomial, 10, 0.5) # Invalid probability self.assertRaises(ValueError, Multinomial, 10, [-0.5, 1.5]) self.assertRaises(ValueError, Multinomial, 10, [0.5, 1.5]) # Invalid number of trials self.assertRaises(ValueError, Multinomial, -1, [0.5, 0.5]) self.assertRaises(ValueError, Multinomial, 8.5, [0.5, 0.5]) # Inconsistent plates self.assertRaises(ValueError, Multinomial, 10, 0.25 * np.ones((2, 4)), plates=(3, )) # Explicit plates too small self.assertRaises(ValueError, Multinomial, 10, 0.25 * np.ones((2, 4)), plates=(1, )) pass
def hidden_markov_model(distribution, *args, K=3, N=100): # Prior for initial state probabilities alpha = Dirichlet(1e-3 * np.ones(K), name='alpha') # Prior for state transition probabilities A = Dirichlet(1e-3 * np.ones(K), plates=(K, ), name='A') # Hidden states (with unknown initial state probabilities and state # transition probabilities) Z = CategoricalMarkovChain(alpha, A, states=N, name='Z') # Emission/observation distribution Y = Mixture(Z, distribution, *args, name='Y') Q = VB(Y, Z, alpha, A) return Q
def test_moments(self): """ Test the moments of categorical nodes. """ # Simple test X = Categorical([0.7,0.2,0.1]) u = X._message_to_child() self.assertEqual(len(u), 1) self.assertAllClose(u[0], [0.7,0.2,0.1]) # Test plates in p p = np.random.dirichlet([1,1], size=3) X = Categorical(p) u = X._message_to_child() self.assertAllClose(u[0], p) # Test with Dirichlet prior P = Dirichlet([7, 3]) logp = P._message_to_child()[0] p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1])) p1 = np.exp(logp[1]) / (np.exp(logp[0]) + np.exp(logp[1])) X = Categorical(P) u = X._message_to_child() p = np.array([p0, p1]) self.assertAllClose(u[0], p) # Test with broadcasted plates P = Dirichlet([7, 3], plates=(10,)) X = Categorical(P) u = X._message_to_child() self.assertAllClose(u[0] * np.ones(X.get_shape(0)), p*np.ones((10,1))) pass
def mixture_model(distribution, *args, K=3, N=100): # Prior for state probabilities alpha = Dirichlet(1e-3 * np.ones(K), name='alpha') # Cluster assignments Z = Categorical(alpha, plates=(N, ), name='Z') # Observation distribution Y = Mixture(Z, distribution, *args, name='Y') Q = VB(Y, Z, alpha) return Q
data.append([ ageEnum[x[0]], genderEnum[x[1]], familyHistoryEnum[x[2]], dietEnum[x[3]], lifeStyleEnum[x[4]], cholesterolEnum[x[5]], heartDiseaseEnum[x[6]] ]) """ data-->[[0, 0, 0, 1, 3, 0, 0], [0, 1, 0, 1, 3, 0, 0], [1, 0, 1, 0, 2, 1, 0], [4, 0, 0, 1, 3, 2, 1],[3, 1, 0, 0, 0, 2, 1], [2, 0, 0, 1, 1, 0, 0], [4, 0, 0, 0, 2, 0, 0], [0, 0, 0, 1, 3, 0, 0],[3, 1, 0, 0, 0, 2, 1], [1, 1, 1, 0, 0, 2, 0], [4, 1, 1, 1, 2, 0, 0]] """ data = np.array(data) N = len(data) print(N) p_age = Dirichlet( 1.0 * np.ones(5)) #used to classify text in a document to a particular topic. age = Categorical( p_age, plates=(N, )) #a sequence of unique values and no missing values age.observe(data[:, 0]) p_gender = Dirichlet(1.0 * np.ones(2)) gender = Categorical(p_gender, plates=(N, )) gender.observe(data[:, 1]) p_familyhistory = Dirichlet(1.0 * np.ones(2)) familyhistory = Categorical(p_familyhistory, plates=(N, )) familyhistory.observe(data[:, 2]) p_diet = Dirichlet(1.0 * np.ones(3)) diet = Categorical(p_diet, plates=(N, ))
K = 3 N = 200 p0 = np.ones(K) / K q = 0.9 r = (1 - q) / (K - 1) P = q * np.identity(K) + r * (np.ones((3, 3)) - np.identity(3)) y = np.zeros((N, 2)) z = np.zeros(N) state = np.random.choice(K, p=p0) for n in range(N): z[n] = state y[n, :] = std * np.random.randn(2) + mu[state] state = np.random.choice(K, p=P[state]) from bayespy.nodes import Dirichlet a0 = Dirichlet(1e-3 * np.ones(K)) A = Dirichlet(1e-3 * np.ones((K, K))) Z = CategoricalMarkovChain(a0, A, states=N) Lambda = std**(-2) * np.identity(2) from bayespy.nodes import Gaussian Y = Mixture(Z, Gaussian, mu, Lambda) Y.observe(y) Q = VB(Y, Z, A, a0) Q.update(repeat=1000) bpplt.pyplot.figure() bpplt.pyplot.axis('equal') colors = Y.parents[0].get_moments()[0] bpplt.pyplot.plot(y[:, 0], y[:, 1], 'k-', zorder=-10) bpplt.pyplot.scatter(y[:, 0], y[:, 1], c=colors, s=40)
# coding: utf-8 ## Gaussian mixture model # Do some stuff: # In[2]: from bayespy.nodes import Dirichlet alpha = Dirichlet([1e-3, 1e-3, 1e-3]) print(alpha._message_to_child()) # Nice!
X = obs_nodes[n] else: mu = Gaussian(np.zeros(O_D), 1e-5 * np.identity(O_D)) lambda_ = Wishart(O_D, np.identity(O_D)) O_n = Gaussian(mu, lambda_, name=f"O_{n}") obs_nodes[n] = O_n X.observe(o_n) for action in actions: trial, agent, a_n, n = action if a_n < 0: #action reset continue if n in action_nodes: A = action_nodes[n] else: category_prob = Dirichlet(1e-3 * np.ones(A_D), name='category_prob') #FIXME: Unconfirmed! A = Categorical(category_prob) action_nodes[n] = A A.observe(a_n) # In[139]: action_nodes[0].__dict__ # In[ ]: Dirichlet(1e-3 * np.ones(A_D)) # In[120]: np.prod(env.action_space.shape)
def get_community_assignments_by(self, method=None, temp_dfile_file="gibbsldapp.dfile", params={}): if method == "HMM": """ model = hmm.MultinomialHMM(n_components=3) model.startprob_ = np.array([0.6, 0.3, 0.1]) model.transmat_ = np.array([[0.7, 0.2, 0.1], [0.3, 0.5, 0.2], [0.3, 0.3, 0.4]]) model.emissionprob_ = np.array([[0.4, 0.2, 0.1, 0.3], [0.3, 0.4, 0.1, 0.2], [0.1, 0.3, 0.5, 0.1]]) X, Z = model.sample(1000) print(np.asarray(X).T) print(Z) """ """ remodel = hmm.MultinomialHMM(n_components=3, n_iter=100) remodel.fit(X) Z2 = remodel.predict(X) print(Z2) """ """ seqs = [] lens = [] for walk in self._walks: s = [[int(w)-1] for w in walk] seqs.extend(s) lens.append(len(s)) model = hmm.MultinomialHMM(n_components=params['number_of_topics'], tol=0.001, n_iter=5000) model.fit(seqs, lens) posteriors = model.predict_proba(np.asarray([[i] for i in range(self.g.number_of_nodes())])) comms = np.argmax(posteriors, 1) node2comm = {} for id in range(len(comms)): node2comm[str(id+1)] = comms[id] return node2comm """ seqs = [] lens = [] for walk in self._walks: s = [int(w) - 1 for w in walk] seqs.append(s) lens.append(len(s)) pipi = np.asarray([0.5, 0.5], dtype=np.float) AA = np.asarray([[0.2, 0.8], [0.5, 0.5]], dtype=np.float) OO = np.asarray([[0.9, 0.05, 0.05], [0.05, 0.05, 0.9]], dtype=np.float) seqs = [] for i in range(31): seq = [] s = np.random.choice(range(2), p=pipi) o = np.random.choice(range(3), p=OO[s, :]) seq.append(o) for _ in range(59): s = np.random.choice(range(2), p=AA[s, :]) o = np.random.choice(range(3), p=OO[s, :]) seq.append(o) seqs.append(seq) seqs = np.vstack(seqs) #print(seqs) from bayespy.nodes import Categorical, Mixture from bayespy.nodes import CategoricalMarkovChain from bayespy.nodes import Dirichlet from bayespy.inference import VB K = params['number_of_topics'] # the number of hidden states N = self.g.number_of_nodes() # the number of observations #p0 = np.ones(K) / K D = 31 #len(lens) states = 60 a0 = Dirichlet(1e+1 * np.ones(K), plates=()) A = Dirichlet(1e+1 * np.ones(K), plates=(2, ), name='A') P = Dirichlet(1e+1 * np.ones((K, N))) Z = CategoricalMarkovChain(a0, A, states=states, plates=(D, )) Y = Mixture(Z, Categorical, P) Y.observe(seqs) #a0.random() #A.random() #P.random() Ainit = np.random.random((2, 2)) Ainit = np.divide(Ainit.T, np.sum(Ainit, 1)).T #A.initialize_from_value(Ainit) #print(Ainit) Q = VB(Y, Z, P, A, a0) Q.update(repeat=1000, plot=False, verbose=True) #print(Z.random()) print(Q['A']) return {} if method == "LDA": # Run GibbsLDA++ lda_exe_path = c._GIBBSLDA_PATH if not os.path.exists(lda_exe_path): raise ValueError("Invalid path of GibbsLDA++!") temp_lda_folder = "./temp" if not os.path.exists(temp_lda_folder): os.makedirs(temp_lda_folder) temp_dfile_path = os.path.join(temp_lda_folder, temp_dfile_file) if not os.path.exists(temp_dfile_path): # Save the walks into the dfile n = len(self._walks) with open(temp_dfile_path, 'w') as f: f.write("{}\n".format(n)) for walk in self._walks: f.write("{}\n".format(" ".join(str(w) for w in walk))) initial_time = time.time() cmd = "{} -est ".format(lda_exe_path) cmd += "-alpha {} ".format(params['lda_alpha']) cmd += "-beta {} ".format(params['lda_beta']) cmd += "-ntopics {} ".format(params['number_of_topics']) cmd += "-niters {} ".format(params['lda_number_of_iters']) cmd += "-savestep {} ".format(params['lda_number_of_iters'] + 1) cmd += "-dfile {} ".format(temp_dfile_path) os.system(cmd) print( "-> The LDA algorithm run in {:.2f} secs".format(time.time() - initial_time)) # Read wordmap file id2node = {} temp_wordmap_path = os.path.join(temp_lda_folder, "wordmap.txt") with open(temp_wordmap_path, 'r') as f: f.readline() # skip the first line for line in f.readlines(): tokens = line.strip().split() id2node[int(tokens[1])] = tokens[0] # Read phi file phi = np.zeros(shape=(params['number_of_topics'], len(id2node)), dtype=np.float) temp_phi_path = os.path.join(temp_lda_folder, "model-final.phi") with open(temp_phi_path, 'r') as f: for topicId, line in enumerate(f.readlines()): phi[topicId, :] = [ float(value) for value in line.strip().split() ] max_topics = np.argmax(phi, axis=0) node2comm = {} for nodeId in id2node: node2comm[id2node[nodeId]] = max_topics[int(nodeId)] return node2comm
import numpy as np a = {'SuperSeniorCitizen':0, 'SeniorCitizen':1, 'MiddleAged':2, 'Youth':3,'Teen':4} b = {'Male':0, 'Female':1} c = {'Yes':0, 'No':1} d = {'High':0, 'Medium':1, 'Low':2} e = {'Athlete':0, 'Active':1, 'Moderate':2, 'Sedetary':3} f = {'High':0, 'BorderLine':1, 'Normal':2} g = {'Yes':0, 'No':1} dataset = list(reader(open('Dataset7.csv'))) dataset = [ [ a[x[0]],b[x[1]],c[x[2]],d[x[3]],e[x[4]],f[x[5]],g[x[6]] ] for x in dataset] dataset=np.array(dataset) attr = [5,2,2,3,4,3] n = len(dataset) arr = [] for i in range(6): dirichlet = Dirichlet(np.ones(attr[i])) arr.append(Categorical(dirichlet, plates=(n,))) arr[i].observe(dataset[:, i]) target = Dirichlet(np.ones(2), plates=(5,2,2,3,4,3)) model = MultiMixture(arr, Categorical, target) model.observe(dataset[:, -1]) target.update() tup = [int(input()) for i in range(6)] result = MultiMixture(tup, Categorical, target).get_moments()[0][0] print(result)
from bayespy.nodes import Dirichlet, Categorical from bayespy.nodes import Gaussian, Wishart from bayespy.nodes import Mixture from bayespy.inference import VB y0 = np.random.multivariate_normal([0, 0], [[2, 0], [0, 0.1]], size=50) y1 = np.random.multivariate_normal([0, 0], [[0.1, 0], [0, 2]], size=50) y2 = np.random.multivariate_normal([2, 2], [[2, -1.5], [-1.5, 2]], size=50) y3 = np.random.multivariate_normal([-2, -2], [[0.5, 0], [0, 0.5]], size=50) y = np.vstack([y0, y1, y2, y3]) N = 200 D = 2 K = 10 alpha = Dirichlet(1e-5*np.ones(K), name='alpha') Z = Categorical(alpha, plates=(N,),name='z') mu = Gaussian(np.zeros(D),1e-5*np.identity(D),plates=(K,),name='mu') Lambda = Wishart(D,1e-5*np.identity(D),plates=(K,),name='Lambda') Y = Mixture(Z, Gaussian, mu, Lambda, name='Y') Z.initialize_from_random() Q = VB(Y, mu, Lambda, Z, alpha) Y.observe(y) Q.update(repeat=1000) bpplt.gaussian_mixture_2d(Y, alpha=alpha, scale=2)
from pprint import pprint import numpy as np with open('7-dataset.csv') as f: dataset = np.array(list(reader(f))) enum = [list(set(column)) for column in dataset.T] dataset = np.array([[enum[i].index(j) for i, j in enumerate(row)] for row in dataset]) n = len(dataset) categoricals = [] for i in range(len(enum) - 1): dirichlet = Dirichlet(np.ones(len(enum[i]))) categoricals.append(Categorical(dirichlet, plates=(n, ))) categoricals[i].observe(dataset[:, i]) target = Dirichlet(np.ones(2), plates=tuple([len(x) for x in enum[:-1]])) model = MultiMixture(categoricals, Categorical, target) model.observe(dataset[:, -1]) target.update() while True: tup = [ enum[i].index(j) for i, j in enumerate(input('Tuple : ').split(',')) ] result = MultiMixture(tup, Categorical, target).get_moments()[0][enum[-1].index("Y")] print(result)
def test_moments(self): """ Test the moments of multinomial nodes. """ # Simple test X = Multinomial(1, [0.7,0.2,0.1]) u = X._message_to_child() self.assertEqual(len(u), 1) self.assertAllClose(u[0], [0.7,0.2,0.1]) # Test n X = Multinomial(10, [0.7,0.2,0.1]) u = X._message_to_child() self.assertAllClose(u[0], [7,2,1]) # Test plates in p n = np.random.randint(1, 10) p = np.random.dirichlet([1,1], size=3) X = Multinomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p*n) # Test plates in n n = np.random.randint(1, 10, size=(3,)) p = np.random.dirichlet([1,1,1,1]) X = Multinomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p*n[:,None]) # Test plates in p and n n = np.random.randint(1, 10, size=(4,1)) p = np.random.dirichlet([1,1], size=3) X = Multinomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p*n[...,None]) # Test with Dirichlet prior P = Dirichlet([7, 3]) logp = P._message_to_child()[0] p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1])) p1 = np.exp(logp[1]) / (np.exp(logp[0]) + np.exp(logp[1])) X = Multinomial(1, P) u = X._message_to_child() p = np.array([p0, p1]) self.assertAllClose(u[0], p) # Test with broadcasted plates P = Dirichlet([7, 3], plates=(10,)) X = Multinomial(5, P) u = X._message_to_child() self.assertAllClose(u[0] * np.ones(X.get_shape(0)), 5*p*np.ones((10,1))) pass
def run(N=100000, N_batch=50, seed=42, maxiter=100, plot=True): """ Run deterministic annealing demo for 1-D Gaussian mixture. """ if seed is not None: np.random.seed(seed) # Number of clusters in the model K = 20 # Dimensionality of the data D = 5 # Generate data K_true = 10 spread = 5 means = spread * np.random.randn(K_true, D) z = random.categorical(np.ones(K_true), size=N) data = np.empty((N, D)) for n in range(N): data[n] = means[z[n]] + np.random.randn(D) # # Standard VB-EM algorithm # # Full model mu = Gaussian(np.zeros(D), np.identity(D), plates=(K, ), name='means') alpha = Dirichlet(np.ones(K), name='class probabilities') Z = Categorical(alpha, plates=(N, ), name='classes') Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations') # Break symmetry with random initialization of the means mu.initialize_from_random() # Put the data in Y.observe(data) # Run inference Q = VB(Y, Z, mu, alpha) Q.save(mu) Q.update(repeat=maxiter) if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') max_cputime = np.sum(Q.cputime[~np.isnan(Q.cputime)]) # # Stochastic variational inference # # Construct smaller model (size of the mini-batch) mu = Gaussian(np.zeros(D), np.identity(D), plates=(K, ), name='means') alpha = Dirichlet(np.ones(K), name='class probabilities') Z = Categorical(alpha, plates=(N_batch, ), plates_multiplier=(N / N_batch, ), name='classes') Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations') # Break symmetry with random initialization of the means mu.initialize_from_random() # Inference engine Q = VB(Y, Z, mu, alpha, autosave_filename=Q.autosave_filename) Q.load(mu) # Because using mini-batches, messages need to be multiplied appropriately print("Stochastic variational inference...") Q.ignore_bound_checks = True maxiter *= int(N / N_batch) delay = 1 forgetting_rate = 0.7 for n in range(maxiter): # Observe a mini-batch subset = np.random.choice(N, N_batch) Y.observe(data[subset, :]) # Learn intermediate variables Q.update(Z) # Set step length step = (n + delay)**(-forgetting_rate) # Stochastic gradient for the global variables Q.gradient_step(mu, alpha, scale=step) if np.sum(Q.cputime[:n]) > max_cputime: break if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.xlabel('CPU time (in seconds)') bpplt.pyplot.ylabel('VB lower bound') bpplt.pyplot.legend(['VB-EM', 'Stochastic inference'], loc='lower right') bpplt.pyplot.title('VB for Gaussian mixture model') return
import numpy numpy.random.seed(1) p0 = [0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9] p1 = [0.1, 0.1, 0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.9, 0.9] p2 = [0.9, 0.9, 0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1] import numpy as np p = np.array([p0, p1, p2]) from bayespy.utils import random z = random.categorical([1 / 3, 1 / 3, 1 / 3], size=100) x = random.bernoulli(p[z]) N = 100 D = 10 K = 10 from bayespy.nodes import Categorical, Dirichlet R = Dirichlet(K * [1e-5], name='R') Z = Categorical(R, plates=(N, 1), name='Z') from bayespy.nodes import Beta P = Beta([0.5, 0.5], plates=(D, K), name='P') from bayespy.nodes import Mixture, Bernoulli X = Mixture(Z, Bernoulli, P) from bayespy.inference import VB Q = VB(Z, R, X, P) P.initialize_from_random() X.observe(x) Q.update(repeat=1000) import bayespy.plot as bpplt bpplt.hinton(P) bpplt.pyplot.show()
def run(self, K=25, beta=0.5, alpha=0.00001, foci_thresh=0, num_neigh=4, hinton_plot=False, end=False): '''Performs one run of the BBDP according to the specified parameters.''' print("Transforming WCS participant data into binary vectors...") x = u.transform_data_all(self.langs, norm=False, end=end, foci=True, foci_thresh=foci_thresh, num_neigh=num_neigh) print("Finished transforming participant data") self.participant_list = x[0] N = len(x[0]) #number of data points (i.e. WCS participants) D = np.shape(x[1])[1] #number of features #K = 20 #number of initial clusters R = Dirichlet(K*[alpha], name='R') Z = Categorical(R, plates=(N,1), name='Z') P = Beta([beta, beta], plates=(D,K), name='P') X = Mixture(Z, Bernoulli, P) Q = VB(Z, R, X, P) P.initialize_from_random() X.observe(x[1]) Q.update(repeat=1000) if hinton_plot: bpplt.hinton(Z) bpplt.pyplot.show() bpplt.hinton(R) bpplt.pyplot.show() #Get the weight matrix stored in Z (weights determine which cluster data point belongs to) z = Z._message_to_child()[0] z = z * np.ones(Z.plates+(1,)) z = np.squeeze(z) self.z = z #Get the weights stored in R (proportional to the size of the clusters) r = np.exp(R._message_to_child()[0]) r = r * np.ones(R.plates+(1,)) r = np.squeeze(r) self.r = r #Get the cluster assignment of each data point self.c_assign = np.argmax(self.z, axis=1) #Write cluster results to a file if self.write_to_file: if end: save_path = "cluster_results_end_K={}_B={}_a={}_t={}_nn={}".format(K, beta, alpha, foci_thresh, num_neigh) else: save_path = "cluster_results_K={}_B={}_a={}_t={}_nn={}".format(K, beta, alpha, foci_thresh, num_neigh) while path.exists(save_path+".txt"): #save_path already exists try: old_file_num = int(save_path[save_path.find('(')+1:-1]) new_file_num = old_file_num + 1 save_path = save_path[0:save_path.find('(')] + '(' + str(new_file_num) + ')' except ValueError: save_path = save_path + " (1)" self.save_path = save_path file = open(path.abspath(self.save_path+".txt"), 'w') #Write cluster assignment matrix Z (gives the probability that observation i belongs to cluster j) if 'Z' not in self.in_file: for i in range(len(self.z)): line = "\t".join([str(x) for x in self.z[i]]) + "\n" file.write(line) file.write('---Z\n') self.in_file.append('Z') #Write cluster weights matrix R (proportional to the size of the resulting clusters) if 'R' not in self.in_file: line = "\t".join([str(x) for x in self.r]) + "\n" file.write(line) file.write('---R\n') self.in_file.append('R') #Write deterministic cluster assignments with the corresponding participant key if 'C' not in self.in_file: line1 = "\t".join([str(x) for x in self.participant_list]) + "\n" line2 = "\t".join([str(x) for x in self.c_assign]) + "\n" file.write(line1) file.write(line2) file.write('---C\n') self.in_file.append('C') file.close() return self.c_assign
def model(M=20, N=100, D=10, K=3): """ Construct the linear state-space model with switching dynamics. """ # # Switching dynamics (HMM) # # Prior for initial state probabilities rho = Dirichlet(1e-3 * np.ones(K), name='rho') # Prior for state transition probabilities V = Dirichlet(1e-3 * np.ones(K), plates=(K, ), name='V') v = 10 * np.identity(K) + 1 * np.ones((K, K)) v /= np.sum(v, axis=-1, keepdims=True) V.initialize_from_value(v) # Hidden states (with unknown initial state probabilities and state # transition probabilities) Z = CategoricalMarkovChain(rho, V, states=N - 1, name='Z', plotter=bpplt.CategoricalMarkovChainPlotter(), initialize=False) Z.u[0] = np.random.dirichlet(np.ones(K)) Z.u[1] = np.reshape( np.random.dirichlet(0.5 * np.ones(K * K), size=(N - 2)), (N - 2, K, K)) # # Linear state-space models # # Dynamics matrix with ARD # (K,D) x () alpha = Gamma(1e-5, 1e-5, plates=(K, 1, D), name='alpha') # (K,1,1,D) x (D) A = GaussianARD(0, alpha, shape=(D, ), plates=(K, D), name='A', plotter=bpplt.GaussianHintonPlotter()) A.initialize_from_value( np.identity(D) * np.ones((K, D, D)) + 0.1 * np.random.randn(K, D, D)) # Latent states with dynamics # (K,1) x (N,D) X = SwitchingGaussianMarkovChain( np.zeros(D), # mean of x0 1e-3 * np.identity(D), # prec of x0 A, # dynamics Z, # dynamics selection np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter()) X.initialize_from_value(10 * np.random.randn(N, D)) # Mixing matrix from latent space to observation space using ARD # (K,1,1,D) x () gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma') # (K,M,1) x (D) C = GaussianARD(0, gamma, shape=(D, ), plates=(M, 1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=-3, cols=-1)) C.initialize_from_value(np.random.randn(M, 1, D)) # Underlying noiseless function # (K,M,N) x () F = SumMultiply('i,i', C, X, name='F') # # Mixing the models # # Observation noise tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Emission/observation distribution Y = GaussianARD(F, tau, name='Y') Q = VB(Y, F, Z, rho, V, C, gamma, X, A, alpha, tau) return Q
def test_message_to_parent(self): """ Test the message to parents of Mixture node. """ K = 3 # Broadcasting the moments on the cluster axis Mu = GaussianARD(2, 1, ndim=0, plates=(K,)) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1, plates=(K,)) (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K)/K) X = Mixture(z, GaussianARD, Mu, Alpha) tau = 4 Y = GaussianARD(X, tau) y = 5 Y.observe(y) (x, xx) = X._message_to_child() m = z._message_from_children() self.assertAllClose(m[0] * np.ones(K), random.gaussian_logpdf(xx*alpha, x*alpha*mu, mumu*alpha, logalpha, 0) * np.ones(K)) m = Mu._message_from_children() self.assertAllClose(m[0], 1/K * (alpha*x) * np.ones(3)) self.assertAllClose(m[1], -0.5 * 1/K * alpha * np.ones(3)) # Some parameters do not have cluster plate axis Mu = GaussianARD(2, 1, ndim=0, plates=(K,)) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1) # Note: no cluster plate axis! (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K)/K) X = Mixture(z, GaussianARD, Mu, Alpha) tau = 4 Y = GaussianARD(X, tau) y = 5 Y.observe(y) (x, xx) = X._message_to_child() m = z._message_from_children() self.assertAllClose(m[0] * np.ones(K), random.gaussian_logpdf(xx*alpha, x*alpha*mu, mumu*alpha, logalpha, 0) * np.ones(K)) m = Mu._message_from_children() self.assertAllClose(m[0], 1/K * (alpha*x) * np.ones(3)) self.assertAllClose(m[1], -0.5 * 1/K * alpha * np.ones(3)) # Cluster assignments do not have as many plate axes as parameters. M = 2 Mu = GaussianARD(2, 1, ndim=0, plates=(K,M)) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1, plates=(K,M)) (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K)/K) X = Mixture(z, GaussianARD, Mu, Alpha, cluster_plate=-2) tau = 4 Y = GaussianARD(X, tau) y = 5 * np.ones(M) Y.observe(y) (x, xx) = X._message_to_child() m = z._message_from_children() self.assertAllClose(m[0]*np.ones(K), np.sum(random.gaussian_logpdf(xx*alpha, x*alpha*mu, mumu*alpha, logalpha, 0) * np.ones((K,M)), axis=-1)) m = Mu._message_from_children() self.assertAllClose(m[0] * np.ones((K,M)), 1/K * (alpha*x) * np.ones((K,M))) self.assertAllClose(m[1] * np.ones((K,M)), -0.5 * 1/K * alpha * np.ones((K,M))) # Mixed distribution broadcasts g # This tests for a found bug. The bug caused an error. Z = Categorical([0.3, 0.5, 0.2]) X = Mixture(Z, Categorical, [[0.2,0.8], [0.1,0.9], [0.3,0.7]]) m = Z._message_from_children() # # Test nested mixtures # t1 = [1, 1, 0, 3, 3] t2 = [2] p = Dirichlet([1, 1], plates=(4, 3)) X = Mixture(t1, Mixture, t2, Categorical, p) X.observe([1, 1, 0, 0, 0]) p.update() self.assertAllClose( p.phi[0], [ [[1, 1], [1, 1], [2, 1]], [[1, 1], [1, 1], [1, 3]], [[1, 1], [1, 1], [1, 1]], [[1, 1], [1, 1], [3, 1]], ] ) # Test sample plates in nested mixtures t1 = Categorical([0.3, 0.7], plates=(5,)) t2 = [[1], [1], [0], [3], [3]] t3 = 2 p = Dirichlet([1, 1], plates=(2, 4, 3)) X = Mixture(t1, Mixture, t2, Mixture, t3, Categorical, p) X.observe([1, 1, 0, 0, 0]) p.update() self.assertAllClose( p.phi[0], [ [ [[1, 1], [1, 1], [1.3, 1]], [[1, 1], [1, 1], [1, 1.6]], [[1, 1], [1, 1], [1, 1]], [[1, 1], [1, 1], [1.6, 1]], ], [ [[1, 1], [1, 1], [1.7, 1]], [[1, 1], [1, 1], [1, 2.4]], [[1, 1], [1, 1], [1, 1]], [[1, 1], [1, 1], [2.4, 1]], ] ] ) # Check that Gate and nested Mixture are equal t1 = Categorical([0.3, 0.7], plates=(5,)) t2 = Categorical([0.1, 0.3, 0.6], plates=(5, 1)) p = Dirichlet([1, 2, 3, 4], plates=(2, 3)) X = Mixture(t1, Mixture, t2, Categorical, p) X.observe([3, 3, 1, 2, 2]) t1_msg = t1._message_from_children() t2_msg = t2._message_from_children() p_msg = p._message_from_children() t1 = Categorical([0.3, 0.7], plates=(5,)) t2 = Categorical([0.1, 0.3, 0.6], plates=(5, 1)) p = Dirichlet([1, 2, 3, 4], plates=(2, 3)) X = Categorical(Gate(t1, Gate(t2, p))) X.observe([3, 3, 1, 2, 2]) t1_msg2 = t1._message_from_children() t2_msg2 = t2._message_from_children() p_msg2 = p._message_from_children() self.assertAllClose(t1_msg[0], t1_msg2[0]) self.assertAllClose(t2_msg[0], t2_msg2[0]) self.assertAllClose(p_msg[0], p_msg2[0]) pass
def model(M=20, N=100, D=10, K=3): """ Construct the linear state-space model with switching dynamics. """ # # Switching dynamics (HMM) # # Prior for initial state probabilities rho = Dirichlet(1e-3*np.ones(K), name='rho') # Prior for state transition probabilities V = Dirichlet(1e-3*np.ones(K), plates=(K,), name='V') v = 10*np.identity(K) + 1*np.ones((K,K)) v /= np.sum(v, axis=-1, keepdims=True) V.initialize_from_value(v) # Hidden states (with unknown initial state probabilities and state # transition probabilities) Z = CategoricalMarkovChain(rho, V, states=N-1, name='Z', plotter=bpplt.CategoricalMarkovChainPlotter(), initialize=False) Z.u[0] = np.random.dirichlet(np.ones(K)) Z.u[1] = np.reshape(np.random.dirichlet(0.5*np.ones(K*K), size=(N-2)), (N-2, K, K)) # # Linear state-space models # # Dynamics matrix with ARD # (K,D) x () alpha = Gamma(1e-5, 1e-5, plates=(K,1,D), name='alpha') # (K,1,1,D) x (D) A = GaussianARD(0, alpha, shape=(D,), plates=(K,D), name='A', plotter=bpplt.GaussianHintonPlotter()) A.initialize_from_value(np.identity(D)*np.ones((K,D,D)) + 0.1*np.random.randn(K,D,D)) # Latent states with dynamics # (K,1) x (N,D) X = SwitchingGaussianMarkovChain(np.zeros(D), # mean of x0 1e-3*np.identity(D), # prec of x0 A, # dynamics Z, # dynamics selection np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter()) X.initialize_from_value(10*np.random.randn(N,D)) # Mixing matrix from latent space to observation space using ARD # (K,1,1,D) x () gamma = Gamma(1e-5, 1e-5, plates=(D,), name='gamma') # (K,M,1) x (D) C = GaussianARD(0, gamma, shape=(D,), plates=(M,1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=-3,cols=-1)) C.initialize_from_value(np.random.randn(M,1,D)) # Underlying noiseless function # (K,M,N) x () F = SumMultiply('i,i', C, X, name='F') # # Mixing the models # # Observation noise tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Emission/observation distribution Y = GaussianARD(F, tau, name='Y') Q = VB(Y, F, Z, rho, V, C, gamma, X, A, alpha, tau) return Q
'Sedetary': 3 }, { 'H': 0, 'B': 1, 'N': 2 }, { 'Y': 0, 'N': 1 }] data = np.array([[enum[i][j] for i, j in enumerate(k)] for k in reader(open('7-dataset.csv'))]) n = len(data) categoricals = [] for i in range(len(enum) - 1): dirichlet = Dirichlet(np.ones(len(enum[i]))) categoricals.append(Categorical(dirichlet, plates=(n, ))) categoricals[i].observe(data[:, i]) target = Dirichlet(np.ones(2), plates=(5, 2, 2, 3, 4, 3)) model = MultiMixture(categoricals, Categorical, target) model.observe(data[:, -1]) target.update() tup = [enum[i][j] for i, j in enumerate(input('Tuple: ').split(','))] result = MultiMixture(tup, Categorical, target).get_moments()[0][0] print(result)