def append_features(self, features, facts, relationships, descriptions): #normally factor_age is a flat prior, but here we make it very non-flat, as we know the answer. Ideally we'd manipulate the other probability distributions to integrate out age, but that's quite tricky (programmatically). if ('age' in facts): if not 'factor_age' in features: #TODO: We need to overwrite factor_age with this more certain distribution age = facts['age'] if (age >= 0): if (age > 99): age = 100 p = np.zeros(101) p[age] = 1 #certain features['factor_age'] = pm.Categorical('factor_age', p) if ('gender' in facts): if not 'factor_gender' in features: #TODO: We need to overwrite factor_gender with this more certain distribution ratio = [0.5, 0.5] #prior... if facts['gender'].lower() == 'male': ratio = [1.0, 0] if facts['gender'].lower() == 'female': ratio = [0, 1.0] if facts['gender'].lower() == 'other': ratio = [ 0.5, 0.5 ] #don't know what to do, as the census etc doesn't have data for this situation. features['factor_gender'] = pm.Categorical( 'factor_gender', np.array(ratio)) #male, female... descriptions['factor_age'] = {'desc': 'Your age'} descriptions['factor_gender'] = {'desc': 'Your gender'}
def inference(self, iter_=5000, burn=1000): theta = pm.Container([ pm.CompletedDirichlet( "theta_%s" % d, pm.Dirichlet("ptheta_%s" % d, theta=self.alpha)) for d in range(self.D) ]) phi = pm.Container([ pm.CompletedDirichlet("phi_%s" % k, pm.Dirichlet("pphi_%s" % k, theta=self.beta)) for k in range(self.K) ]) z_d = pm.Container([ pm.Categorical("z_%s" % d, p=theta[d], value=np.random.randint(self.K, size=len(self.bw[d])), size=len(self.bw[d])) for d in range(self.D) ]) w_z = pm.Container([ pm.Categorical("w_%s_%s" % (d, w), p=phi[z_d[d][w].get_value()], value=self.bw[d][w], observed=True) for d in range(self.D) for w in range(len(self.bw[d])) ]) model = pm.Model([theta, phi, z_d, w_z]) self.mcmc = pm.MCMC(model) self.mcmc.sample(iter=iter_, burn=burn)
def append_features(self, features, facts): """Alters the features dictionary in place, adds: - age - gender - this instance's feature Args: features (dictionary): Dictionary of pyMC probability distributions. Raises: DuplicateFeatureException: If an identically named feature already exists that clashes with this instance """ #age: 0-100 if not 'factor_age' in features: p = np.ones(101) #flat prior p = p / p.sum() features['factor_age'] = pm.Categorical('factor_age', p) #gender: male or female if not 'factor_gender' in features: #flat prior features['factor_gender'] = pm.Categorical('factor_gender', np.array([0.5, 0.5])) if self.featurename in features: raise DuplicateFeatureException( 'The "%s" feature is already in the feature list.' % self.featurename) seen = ohf.true_string(self.answer) features[self.featurename] = pm.Categorical( self.featurename, self.get_pymc_function(features), value=seen, observed=True)
def get_z_data(self, p, p_pos, q): K = 2 # Num topics M = p # Num documents N = q # Total num of unique words across all documents alpha = 1.0 # Concentration parameter for distribution over # distributions over words (one for each topic) beta = 1.0 # Concentration parameter for distribution over # distributions over topics (one for each # document) phi = pymc.Container([ pymc.CompletedDirichlet( name="phi_" + str(k), D=pymc.Dirichlet(name="phi_temp_" + str(k), theta=beta * numpy.ones(N)), ) for k in range(K) ]) theta = pymc.Container([ pymc.CompletedDirichlet( name="theta_" + str(m), D=pymc.Dirichlet(name="theta_temp_" + str(m), theta=alpha * numpy.ones(K)), ) for m in range(M) ]) z = pymc.Container([ pymc.Categorical(name="z_" + str(m), p=theta[m], size=N) for m in range(M) ]) w = pymc.Container([ pymc.Categorical( name="w_" + str(m) + "_" + str(n), p=pymc.Lambda( "phi_z_" + str(m) + str(n), lambda z_in=z[m][n], phi_in=phi: phi_in[z_in], ), ) for m in range(M) for n in range(N) ]) lda = pymc.Model([w, z, theta, phi]) z_rvs = [] for m in range(M): metadata = {"doc_idx": m, "num_unique_words": N} rv = WordCountVecRV( model=lda, name="w_0_0", metadata=metadata) # Note: w_0_0 is just a dummy # argument that must be present in # the pymc.Model z_rvs += [rv] return z_rvs
def __init__(self, corpus, K=10, iterations=1000, burn=100): print("Building model ...") self.K = K self.V = corpus.wordCount + 1 self.M = corpus.documentCount self.alpha = np.ones(self.K) self.beta = np.ones(self.V) self.corpus = corpus self.observations = np.array(corpus.observations) self.phi = np.empty(self.K, dtype=object) for i in range(self.K): self.phi[i] = pm.CompletedDirichlet( "Phi[%i]" % i, pm.Dirichlet("phi[%i]" % i, theta=self.beta)) self.phi = pm.Container(self.phi) self.theta = np.empty(self.M, dtype=object) for i in range(self.M): self.theta[i] = pm.CompletedDirichlet( "Theta[%i]" % i, pm.Dirichlet("theta[%i]" % i, theta=self.alpha)) self.theta = pm.Container(self.theta) self.z = np.empty(self.observations.shape, dtype=object) for i in range(self.M): self.z[i] = pm.Categorical("z[%i]" % i, size=len(self.observations[i]), p=self.theta[i], value=np.random.randint( self.K, size=len(self.observations[i]))) self.z = pm.Container(self.z) self.w = [] for i in range(self.M): self.w.append([]) for j in range(len(self.observations[i])): self.w[i].append( pm.Categorical( "w[%i][%i]" % (i, j), p=pm.Lambda( "phi[z[%i][%i]]" % (i, j), lambda z=self.z[i][j], phi=self.phi: phi[z]), value=self.observations[i][j], observed=True)) self.w = pm.Container(self.w) self.mcmc = pm.MCMC(pm.Model([self.theta, self.phi, self.z, self.w])) print("Fitting model ...") self.mcmc.sample(iter=iterations, burn=burn)
def cartesian_categorical_child(name, parents, levels, value=None, N=None, return_coeffs=False, fixed={}): if value is None and N is None: raise ValueError('either "value" or "N" must be specified') if value is not None: value = mask_missing(value) ranges = [range(get_levels_count(p)) for p in parents] parents2index = {} coeffs = [] for i, parent_vals in enumerate(product(*ranges)): parents2index[parent_vals] = i parents_repr = ' '.join('%s=%s' % (parent, v) for parent, v in zip(parents, parent_vals)) coeff_name = COEFFS_PREFIX + 'p(%s | %s)' % (name, parents_repr) coeff = fixed.get(coeff_name, pymc.Dirichlet(coeff_name, theta=[1] * levels)) coeffs.append(coeff) intify = lambda x: tuple(map(int, x)) @pymc.deterministic def child_prob(parents=parents, coeffs=coeffs): probs = np.array([ coeffs[parents2index[intify(parent_vals)]] for parent_vals in zip(*parents) ]) remainders = 1 - probs.sum(axis=1) remainders = remainders.reshape((len(remainders), 1)) return np.hstack([probs, remainders]) child_prob.__name__ = 'p(%s)' % name if value is None: child = pymc.Categorical(name, p=child_prob, value=np.zeros(N)) else: child = pymc.Categorical(name, p=child_prob, value=value, observed=True) set_levels_count(child, levels) if return_coeffs: return child, coeffs + [child_prob] else: return child
def getModel(): nA, nB, nK = 5, 2, 10; B = pm.Beta('1-Beta', alpha=nA/nK, beta=nB*(nK-1)/nK); #@UndefinedVariable # p_B = pm.Lambda('p_Bern', lambda b=B: np.where(b==0, 0.9, 0.1), doc='Pr[Bern|Beta]'); C = pm.Categorical('2-Cat', [1-B, B]); #@UndefinedVariable # C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3], observed=True, value=3); #@UndefinedVariable return pm.Model([B,C]);
def test_1d_w(self): nd = self.nd npop = self.npop mus = self.mus size = 100 with pm.Model() as model: m = pm.NormalMixture("m", w=np.ones(npop) / npop, mu=mus, sigma=1e-5, comp_shape=(nd, npop), shape=nd) z = pm.Categorical("z", p=np.ones(npop) / npop) latent_m = pm.Normal("latent_m", mu=mus[..., z], sigma=1e-5, shape=nd) m_val = m.random(size=size) latent_m_val = latent_m.random(size=size) assert m_val.shape == latent_m_val.shape # Test that each element in axis = -1 comes from the same mixture # component assert all(np.all(np.diff(m_val) < 1e-3, axis=-1)) assert all(np.all(np.diff(latent_m_val) < 1e-3, axis=-1)) self.samples_from_same_distribution(m_val, latent_m_val) self.logp_matches(m, latent_m, z, npop, model=model)
def run_Categorical_Normal(): nC = 3 #Num. Clusters aD = [0, 1, 8, 9, 20, 21] #Data Points nPts = len(aD) + 1 #Clusters aUh = [ pm.Uniform('UnifH' + str(i), lower=-50, upper=50) for i in range(nC) ] # @UndefinedVariable aNc = [pm.Normal('NormC' + str(i), mu=aUh[i], tau=1) for i in range(nC)] # @UndefinedVariable #Dirichlet & Categorical Nodes Dir = pm.Dirichlet('Dirichlet', theta=[1] * nC) # @UndefinedVariable aC = [pm.Categorical('Cat' + str(i), Dir) for i in range(nPts)] # @UndefinedVariable aL = [ pm.Lambda('p_Norm' + str(i), lambda k=aC[i], aNcl=aNc: aNcl[int(k)]) for i in range(nPts) ] # @UndefinedVariable #Points aN = [ pm.Normal('NormX' + str(i), mu=aL[i], tau=1, observed=True, value=aD[i]) for i in range(nPts - 1) ] # @UndefinedVariable Nz = pm.Normal('NormZ', mu=aL[-1], tau=1) # @UndefinedVariable return np.concatenate([[Nz, Dir], aUh, aNc, aC, aN])
def getModel(): nA, nK = 0.05, 4 aDir = [nA / nK] * nK D = pm.Dirichlet('1-Dirichlet', theta=aDir) #@UndefinedVariable C1 = pm.Categorical('2-Cat', D) #@UndefinedVariable # C2 = pm.Categorical('10-Cat', D); #@UndefinedVariable # C3 = pm.Categorical('11-Cat', D); #@UndefinedVariable # C4 = pm.Categorical('14-Cat', D); #@UndefinedVariable # C5 = pm.Categorical('15-Cat', D); #@UndefinedVariable # G0_0 = pm.Gamma('4-Gamma0_1', alpha=1, beta=1.5); #@UndefinedVariable N0_1 = pm.Normal('5-Norm0_1', mu=10, tau=1) #@UndefinedVariable N0_2 = pm.Normal('6-Norm0_2', mu=-10, tau=1) #@UndefinedVariable N0_3 = pm.Normal('7-Norm0_3', mu=30, tau=1) #@UndefinedVariable N0_4 = pm.Normal('16-Norm0_3', mu=-30, tau=1) #@UndefinedVariable aMu = [N0_1.value, N0_2.value, N0_3.value, N0_4.value] p_N1 = pm.Lambda('p_Norm1', lambda n=C1: aMu[n], doc='Pr[Norm|Cat]') # p_N2 = pm.Lambda('p_Norm2', lambda n=C2: aMu[n], doc='Pr[Norm|Cat]'); # p_N3 = pm.Lambda('p_Norm3', lambda n=C3: aMu[n], doc='Pr[Norm|Cat]'); # p_N4 = pm.Lambda('p_Norm4', lambda n=C4: aMu[n], doc='Pr[Norm|Cat]'); # p_N5 = pm.Lambda('p_Norm6', lambda n=C5: aMu[n], doc='Pr[Norm|Cat]'); N = pm.Normal('3-Norm', mu=p_N1, tau=1) #@UndefinedVariable # obsN1 = pm.Normal('8-Norm', mu=p_N2, tau=1, observed=True, value=40); #@UndefinedVariable @UnusedVariable # obsN2 = pm.Normal('9-Norm', mu=p_N3, tau=1, observed=True, value=40); #@UndefinedVariable @UnusedVariable # obsN3 = pm.Normal('12-Norm', mu=p_N4, tau=1, observed=True, value=-40); #@UndefinedVariable @UnusedVariable # obsN4 = pm.Normal('13-Norm', mu=p_N5, tau=1, observed=True, value=-40); #@UndefinedVariable @UnusedVariable return pm.Model([D, C1, N, N0_1, N0_2, N0_3, N0_4, N])
def test_2d_w(self): nd = self.nd npop = self.npop mus = self.mus size = 100 with pm.Model() as model: m = pm.NormalMixture( "m", w=np.ones((nd, npop)) / npop, mu=mus, sigma=1e-5, comp_shape=(nd, npop), shape=nd, ) z = pm.Categorical("z", p=np.ones(npop) / npop, shape=nd) mu = at.as_tensor_variable([mus[i, z[i]] for i in range(nd)]) latent_m = pm.Normal("latent_m", mu=mu, sigma=1e-5, shape=nd) m_val = m.random(size=size) latent_m_val = latent_m.random(size=size) assert m_val.shape == latent_m_val.shape # Test that each element in axis = -1 can come from independent # components assert not all(np.all(np.diff(m_val) < 1e-3, axis=-1)) assert not all(np.all(np.diff(latent_m_val) < 1e-3, axis=-1)) self.samples_from_same_distribution(m_val, latent_m_val) self.logp_matches(m, latent_m, z, npop, model=model)
def run_Categorical_Normal(): C = pm.Categorical('Cat', [0.2, 0.4, 0.1, 0.3]) # @UndefinedVariable p_N = pm.Lambda('p_Norm', lambda node=C: [-5, 0, 5, 10][node]) N = pm.Normal('Norm', mu=p_N, tau=1) # @UndefinedVariable return [C, N]
def append_features(self, features, facts, relationships, descriptions): """Alters the features dictionary in place, adds: - age - gender - this instance's feature Args: features (dictionary): Dictionary of pyMC probability distributions. Raises: DuplicateFeatureException: If an identically named feature already exists that clashes with this instance """ #if we're not in the us then we just skip if self.prob_in_us(facts) < 0.01: return self.calc_probs_age(facts) if not 'factor_age' in features: p = np.ones( 101 ) #flat prior, will be unflattened by US stats (TODO confirm) p = p / p.sum() features['factor_age'] = pm.Categorical('factor_age', p) if not 'blockgroup' in features: p = self.get_list_of_bg_probs(facts) features['blockgroup'] = pm.Categorical('blockgroup', p) if self.featurename + "_age" in features: raise DuplicateFeatureException( 'The "%s" feature is already in the feature list.' % self.featurename + "_age") features[self.featurename + "_blockgroup"] = pm.Categorical( self.featurename + "_age", self.get_pymc_function_age(features), value=True, observed=True) relationship = {'parent': 'factor_age', 'child': 'blockgroup'} relationships.append(relationship) descriptions['factor_age'] = {'desc': 'Your age'} descriptions['blockgroup'] = {'desc': 'Your geographical location'} descriptions[self.featurename + "_blockgroup"] = { 'desc': 'Probability of being in this block group given your features' } #TODO Figure this out
def test_unobserved_categorical(self): with pm.Model() as m: mu = pm.Categorical("mu", p=[0.1, 0.3, 0.6], size=2) pm.Normal("like", mu=mu, sigma=0.1, observed=[1, 2]) trace = pm.sample_smc(chains=1, return_inferencedata=False) assert np.all(np.median(trace["mu"], axis=0) == [1, 2])
def append_features(self, features, facts, relationships, descriptions): """Alters the features dictionary in place, adds: - age - gender - this instance's feature Args: features (dictionary): Dictionary of pyMC probability distributions. facts (dictionary): should already be populated with facts Raises: DuplicateFeatureException: If an identically named feature already exists that clashes with this instance """ #age: 0-100 if 'first_name' not in facts: #we don't know their first name return logging.info('Appending babynames features') if 'first_name' in facts: self.answer = facts['first_name'] else: self.answer = None if not 'factor_age' in features: p = np.ones(101) #flat prior p = p / p.sum() features['factor_age'] = pm.Categorical('factor_age', p) if not 'factor_gender' in features: #flat prior features['factor_gender'] = pm.Categorical('factor_gender', np.array([0.5, 0.5])) if self.featurename in features: raise DuplicateFeatureException( 'The "%s" feature is already in the feature list.' % self.featurename) features[self.featurename] = pm.Categorical( self.featurename, self.get_pymc_function(features), value=True, observed=True) relationships.append({ 'parent': 'factor_gender', 'child': 'first_name' }) relationships.append({'parent': 'factor_age', 'child': 'first_name'})
def make_categorical(name, levels, value=None, N=None, return_coeffs=False, fixed={}): """ creates a Bernoulli random variable with a Dirichlet parent :param name: name of the variable :param levels: integer - how many levels does the variable have :param value: optional - list of observed values of the variable. Must consist of integers from 0 to levels - 1. May be a masked array - if the variable has missing values :param N: size of the variable (number of values). Either N or value must be specified :param return_coeffs: if true, will return the parent Beta variable as well as the bernoulli child. False by defaut. :param fixed: optional dictionary of values of coefficients to be fixed. :return: Categorical pymc random variable, or (if return_coeffs == True) a tuple (categorical variable; a list with a single element - the Dirichlet parent) """ if value is None and N is None: raise ValueError('either "value" or "N" must be specified') if value is not None: value = mask_missing(value) N = N or len(value) coeff_name = COEFFS_PREFIX + 'p(%s)' % name if coeff_name in fixed: probs = fixed[coeff_name] parent = list(probs) + [1 - sum(probs)] else: parent = pymc.Dirichlet(coeff_name, theta=[1] * levels) if value is None: child = pymc.Categorical(name, p=parent, value=np.zeros(N)) else: child = pymc.Categorical(name, p=parent, observed=True, value=value) set_levels_count(child, levels) if return_coeffs: return child, [parent] else: return child
def getData(self, dfTrack, nTrackId, avgSpeed, nProb_Gait, sGait): # g = pgm.Graph(); # cpt1 = [.5, .5]; # cpt2 = {"['False']": [.5, .5],"['True']": [nProb_Gait, 1-nProb_Gait]}; # g.addnode(pgm.Node(sGait, ["False", "True"], [None], cpt1)); # g.addnode(pgm.Node("Speed=%.2f"%avgSpeed, ["False", "True"], [g.node[sGait]], cpt2)); # g.setup(); G_obs = [1.]; N = len(G_obs); gait = pm.Categorical(sGait, [0.5, 0.5], value=pl.ones(N)); p_speed = pm.Lambda('p_'+sGait, lambda gait=gait: pl.where(gait, nProb_Gait, [0.5, 0.5])); speed = pm.Categorical("Speed=%.2f"%avgSpeed, p_speed, value=G_obs, observed=True); model = pm.Model([gait, speed]); g = pm.graph.graph(model); g.write_pdf("./Models/Graph2_"+str(int(nTrackId))+"_"+str(sGait)+".pdf"); # g.write2pdf("./Models/Graph_"+str(int(nTrackId))+"_"+str(sGait)+".pdf"); data = {"TrackId":nTrackId, "Type":sGait, "Belief":nProb_Gait, "Obs":["Speed"], "Obs_Vals":[avgSpeed], "MEs":["Walk","Stand"], "Graph":g};
def test_discrete_not_allowed(): mu_true = np.array([-2, 0, 2]) z_true = np.random.randint(len(mu_true), size=100) y = np.random.normal(mu_true[z_true], np.ones_like(z_true)) with pm.Model(): mu = pm.Normal("mu", mu=0, sigma=10, size=3) z = pm.Categorical("z", p=at.ones(3) / 3, size=len(y)) pm.Normal("y_obs", mu=mu[z], sigma=1.0, observed=y) with pytest.raises(opvi.ParametrizationError): pm.fit(n=1) # fails
def getModel(): D = pm.Dirichlet('1-Dirichlet', theta=[3,2,4]); #@UndefinedVariable C1 = pm.Categorical('2-Cat', D); #@UndefinedVariable C2 = pm.Categorical('10-Cat', D); #@UndefinedVariable C3 = pm.Categorical('11-Cat', D); #@UndefinedVariable W0_0 = pm.WishartCov('4-Wishart0_1', n=5, C=np.eye(2)); #@UndefinedVariable N0_1 = pm.MvNormalCov('5-Norm0_1', mu=[-20,-20], C=np.eye(2)); #@UndefinedVariable N0_2 = pm.MvNormalCov('6-Norm0_2', mu=[0,0], C=np.eye(2)); #@UndefinedVariable N0_3 = pm.MvNormalCov('7-Norm0_3', mu=[20,20], C=np.eye(2)); #@UndefinedVariable aMu = [N0_1.value, N0_2.value, N0_3.value]; fL1 = lambda n=C1: np.select([n==0, n==1, n==2], aMu); fL2 = lambda n=C2: np.select([n==0, n==1, n==2], aMu); fL3 = lambda n=C3: np.select([n==0, n==1, n==2], aMu); p_N1 = pm.Lambda('p_Norm1', fL1, doc='Pr[Norm|Cat]'); p_N2 = pm.Lambda('p_Norm2', fL2, doc='Pr[Norm|Cat]'); p_N3 = pm.Lambda('p_Norm3', fL3, doc='Pr[Norm|Cat]'); N = pm.MvNormalCov('3-Norm', mu=p_N1, C=W0_0); #@UndefinedVariable obsN1 = pm.MvNormalCov('8-Norm', mu=p_N2, C=W0_0, observed=True, value=[-20,-20]); #@UndefinedVariable @UnusedVariable obsN2 = pm.MvNormalCov('9-Norm', mu=p_N3, C=W0_0, observed=True, value=[20,20]); #@UndefinedVariable @UnusedVariable return pm.Model([D,C1,C2,C3,N,W0_0,N0_1,N0_2,N0_3,N,obsN1,obsN2]);
def get_Models(): #Full Model (Dirichlet & Categorical) aAlphas = [1, 2, 8, 2] aD = [0, 3, 1] Dir = pm.Dirichlet('Dir', theta=aAlphas) # @UndefinedVariable CatD = [ pm.Categorical('CatD_' + str(i), p=Dir, observed=True, value=aD[i]) for i in range(len(aD)) ] # @UndefinedVariable @UnusedVariable CatQ = pm.Categorical('CatQ', p=Dir) # @UndefinedVariable #Collapsed Model (Categorical) aP = [] for i in range(len(aAlphas)): #For each Category, get its probability p_i aP.append((aAlphas[i] + aD.count(i)) / (sum(aAlphas) + len(aD))) CatQ2 = pm.Categorical('CatQ2', p=aP) # @UndefinedVariable return np.concatenate([[Dir, CatQ, CatQ2], CatD])
def run_Categorical_Normal(): C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3]); # @UndefinedVariable p_N = pm.Lambda('p_Norm', lambda node=C: np.select([node==0, node==1, node==2, node==3], [-5, 0, 5, 10]), doc='Pr[Norm|Cat]'); N = pm.Normal('2-Norm', mu=p_N, tau=1); # @UndefinedVariable model = pm.Model([C,N]); mcmc = pm.MCMC(model); mcmc.sample(5000, progress_bar=True); print "C:", C.stats()["mean"], C.value; print "N:", N.stats()["mean"], N.value; plot_Samples(mcmc, aBins=[2,500]);
def __setup_eqv(self): """Populates the self.eqv list for each classifier by assigning it a categorical distribution. """ # per_class = self.num_classifiers / self.num_equiv self.eqv = pymc.Container( [pymc.Categorical('categ_%s' % i, p=self.theta[i], value=numpy.random.randint(0, self.num_equiv)) # value=min(i / per_class, self.num_equiv - 1)) for i in xrange(0, self.num_classifiers)])
def append_features(self, features, facts): #normally factor_age is a flat prior, but here we make it very non-flat, as we know the answer. Ideally we'd manipulate the other probability distributions to integrate out age, but that's quite tricky (programmatically). if ('age' in facts): age = facts['age'] if (age >= 0): if (age > 99): age = 100 p = np.zeros(101) p[age] = 1 #certain features['factor_age'] = pm.Categorical('factor_age', p) if ('gender' in facts): if facts['gender'] == 'Male': ratio = [1.0, 0] if facts['gender'] == 'Female': ratio = [1.0, 0] if facts['gender'] == 'Other': ratio = [ 0.5, 0.5 ] #don't know what to do, as the census etc doesn't have data for this situation. features['factor_gender'] = pm.Categorical('factor_gender', np.array(ratio))
def getModel(): C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3]) #@UndefinedVariable # C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3], observed=True, value=3); #@UndefinedVariable p_N = pm.Lambda('p_Norm', lambda n=C: np.select([n == 0, n == 1, n == 2, n == 3], [-5, 0, 5, 10]), doc='Pr[Norm|Cat]') N = pm.Normal('2-Norm', mu=p_N, tau=1) #@UndefinedVariable # N = pm.Normal('2-Norm', mu=p_N, tau=1, observed=True, value=2.5); #@UndefinedVariable return pm.Model([C, N])
def main(): data = np.loadtxt("data/mixture_data.csv", delimiter=",") p = pm.Uniform("p", 0, 1) assignment = pm.Categorical("assignment", [p, 1 - p], size=data.shape[0]) taus = 1.0 / pm.Uniform("stds", 0, 100, size=2)**2 centers = pm.Normal("centers", [120, 190], [0.01, 0.01], size=2) """ The below deterministic functions map an assignment, in this case 0 or 1, to a set of parameters, located in the (1,2) arrays `taus` and `centers`. """ @pm.deterministic def center_i(assignment=assignment, centers=centers): return centers[assignment] @pm.deterministic def tau_i(assignment=assignment, taus=taus): return taus[assignment] # and to combine it with the observations: observations = pm.Normal("obs", center_i, tau_i, value=data, observed=True) # below we create a model class model = pm.Model([p, assignment, observations, taus, centers]) map_ = pm.MAP(model) map_.fit() #stores the fitted variables' values in foo.value mcmc = pm.MCMC(model) # Where 50000 is the burn-in iterations where fitting is # started but the results are not counted to the end model mcmc.sample(100000, 50000) p_trace = mcmc.trace("p")[:] center_trace = mcmc.trace("centers")[:] std_trace = mcmc.trace("stds")[:] x = 175 v = ((p_trace * stats.norm.pdf(x, loc=center_trace[:, 0], scale=std_trace[:, 0])) > (1 - p_trace) * stats.norm.pdf(x, loc=center_trace[:, 1], scale=std_trace[:, 1])) # If you try this with out the 50000 burn-in iterations, the certainty is # much less that the pixel belongs to cluster 0 print "Probability of belonging to cluster 1:", v.mean() print "Probability of belonging to cluster 0:", 1 - v.mean() mcmc.sample(25000, 0, 10) mcplot(mcmc.trace("centers", 2), common_scale=False)
def getModel(): C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3]) #@UndefinedVariable # C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3], observed=True, value=3); #@UndefinedVariable p_N = pm.Lambda( 'p_Norm', lambda n=C: np.select([n == 0, n == 1, n == 2, n == 3], [[-5, -5], [0, 0], [5, 5], [10, 10]]), doc='Pr[Norm|Cat]') N = pm.MvNormal('2-Norm_2D', mu=p_N, tau=np.eye(2, 2)) #@UndefinedVariable # N = pm.MvNormal('2-Norm', mu=p_N, tau=np.eye(2,2), observed=True, value=[2.5,2.5]); #@UndefinedVariable return pm.Model([C, N])
def getModel(): D = pm.Dirichlet('1-Dirichlet', theta=[3, 2, 4]) #@UndefinedVariable C1 = pm.Categorical('2-Cat', D) #@UndefinedVariable C2 = pm.Categorical('10-Cat', D) #@UndefinedVariable C3 = pm.Categorical('11-Cat', D) #@UndefinedVariable G0_0 = pm.Gamma('4-Gamma0_1', alpha=1, beta=1.5) #@UndefinedVariable U1 = pm.Uniform('12-Unif', lower=-100, upper=500) #@UndefinedVariable U2 = pm.Uniform('13-Unif', lower=-100, upper=500) #@UndefinedVariable U3 = pm.Uniform('14-Unif', lower=-100, upper=500) #@UndefinedVariable N0_1 = pm.Normal('5-Norm0_1', mu=U1, tau=1) #@UndefinedVariable N0_2 = pm.Normal('6-Norm0_2', mu=U2, tau=1) #@UndefinedVariable N0_3 = pm.Normal('7-Norm0_3', mu=U3, tau=1) #@UndefinedVariable aMu = [N0_1.value, N0_2.value, N0_3.value] fL1 = lambda n=C1: np.select([n == 0, n == 1, n == 2], aMu) fL2 = lambda n=C2: np.select([n == 0, n == 1, n == 2], aMu) fL3 = lambda n=C3: np.select([n == 0, n == 1, n == 2], aMu) p_N1 = pm.Lambda('p_Norm1', fL1, doc='Pr[Norm|Cat]') p_N2 = pm.Lambda('p_Norm2', fL2, doc='Pr[Norm|Cat]') p_N3 = pm.Lambda('p_Norm3', fL3, doc='Pr[Norm|Cat]') N = pm.Normal('3-Norm', mu=p_N1, tau=1) #@UndefinedVariable obsN1 = pm.Normal('8-Norm', mu=p_N2, tau=1, observed=True, value=0) #@UndefinedVariable @UnusedVariable obsN2 = pm.Normal('9-Norm', mu=p_N3, tau=1, observed=True, value=150) #@UndefinedVariable @UnusedVariable return pm.Model( [D, C1, C2, C3, N, G0_0, N0_1, N0_2, N0_3, N, obsN1, obsN2])
def logistic_categorical_child(name, parents, levels, value=None, N=None, return_coeffs=False, fixed={}): if value is not None: value = mask_missing(value) N = N or len(value) all_coeffs, one_v_all = [], [] for i in range(levels): theta, coeffs = _linearised_many(parents, '%s==%s' % (name, i), True, fixed) level_prob = pymc.InvLogit('p(%s==%s)' % (name, i), theta) one_v_all.append(level_prob) all_coeffs.extend(coeffs) @pymc.deterministic def child_prob(level_probs=one_v_all): ret = [np.array(probs) / sum(probs) for probs in zip(*level_probs)] return ret child_prob.__name__ = 'p(%s)' % name if value is None: child = pymc.Categorical(name, p=child_prob, value=np.zeros(N)) else: child = pymc.Categorical(name, p=child_prob, value=value, observed=True) set_levels_count(child, levels) if return_coeffs: return child, all_coeffs else: return child
def runMCMC(df, cents, show=False): """ Run the MCMC algo for as many centers as needed """ if type(cents) is not list: cents = [cents] numCents = len(cents) p = None # Tau = the precision of the normal distribution (of the above peaks) taus = 1. / pm.Uniform('stds', 0, 100, size=numCents)**2 # tau = 1/sigma**2 centers = pm.Normal('centers', cents, [0.0025 for i in cents], size=numCents) if numCents == 2: # Assignment probability p = pm.Uniform('p', 0, 1) assignment = pm.Categorical('asisgnment', [p, 1-p], size=len(df.intervals)) @pm.deterministic def center_i(assignment=assignment, centers=centers): return centers[assignment] @pm.deterministic def tau_i(assignment=assignment, taus=taus): return taus[assignment] observations = pm.Normal('obs', center_i, tau_i, value=df.intervals, observed=True) # Create the model 2 peaks mcmc = pm.MCMC([p, assignment, observations, taus, centers]) else: observations = pm.Normal('obs', value=df.intervals, observed=True) mcmc = pm.MCMC([observations, taus, centers]) # Create model, 1 peak # Run the model mcmc.sample(50000) center_trace = mcmc.trace("centers")[:] try: clusts = [center_trace[:,i] for i in range(numCents)] except: clusts = [center_trace] if show: for i in range(numCents): plt.hist(center_trace[:,i], bins=50, histtype='stepfilled', color=['blue', 'red'][i], alpha=0.7) plt.show() print('Evolved clusters at:') print([np.mean(c) for c in clusts]) return clusts
def get_Models(): #Full Model (DP [Dirichlet] & Categorical) aD = [1, 0, 1] #Data Points nA, nC = 0.3, 3 #Alpha & Max No. Clusters aAlphas = [nA / nC] * nC Dir = pm.Dirichlet('Dir', theta=aAlphas) # @UndefinedVariable CatD = [ pm.Categorical('CatD_' + str(i), p=Dir, observed=True, value=aD[i]) for i in range(len(aD)) ] # @UndefinedVariable @UnusedVariable CatQ = pm.Categorical('CatQ', p=Dir) # @UndefinedVariable #Collapsed Model (Categorical) aP = [] for i in range(len(aAlphas)): #For each Category, get its probability p_i aP.append((aAlphas[i] + aD.count(i)) / (sum(aAlphas) + len(aD))) CatQ2 = pm.Categorical('CatQ2', p=aP) # @UndefinedVariable return np.concatenate([[Dir, CatQ, CatQ2], CatD])