Esempio n. 1
0
    def append_features(self, features, facts, relationships, descriptions):
        #normally factor_age is a flat prior, but here we make it very non-flat, as we know the answer. Ideally we'd manipulate the other probability distributions to integrate out age, but that's quite tricky (programmatically).
        if ('age' in facts):
            if not 'factor_age' in features:  #TODO: We need to overwrite factor_age with this more certain distribution
                age = facts['age']
                if (age >= 0):
                    if (age > 99):
                        age = 100
                    p = np.zeros(101)
                    p[age] = 1  #certain
                    features['factor_age'] = pm.Categorical('factor_age', p)
        if ('gender' in facts):
            if not 'factor_gender' in features:  #TODO: We need to overwrite factor_gender with this more certain distribution
                ratio = [0.5, 0.5]  #prior...
                if facts['gender'].lower() == 'male':
                    ratio = [1.0, 0]
                if facts['gender'].lower() == 'female':
                    ratio = [0, 1.0]
                if facts['gender'].lower() == 'other':
                    ratio = [
                        0.5, 0.5
                    ]  #don't know what to do, as the census etc doesn't have data for this situation.
                features['factor_gender'] = pm.Categorical(
                    'factor_gender', np.array(ratio))
                #male, female...

        descriptions['factor_age'] = {'desc': 'Your age'}
        descriptions['factor_gender'] = {'desc': 'Your gender'}
Esempio n. 2
0
    def inference(self, iter_=5000, burn=1000):
        theta = pm.Container([
            pm.CompletedDirichlet(
                "theta_%s" % d, pm.Dirichlet("ptheta_%s" % d,
                                             theta=self.alpha))
            for d in range(self.D)
        ])
        phi = pm.Container([
            pm.CompletedDirichlet("phi_%s" % k,
                                  pm.Dirichlet("pphi_%s" % k, theta=self.beta))
            for k in range(self.K)
        ])
        z_d = pm.Container([
            pm.Categorical("z_%s" % d,
                           p=theta[d],
                           value=np.random.randint(self.K,
                                                   size=len(self.bw[d])),
                           size=len(self.bw[d])) for d in range(self.D)
        ])
        w_z = pm.Container([
            pm.Categorical("w_%s_%s" % (d, w),
                           p=phi[z_d[d][w].get_value()],
                           value=self.bw[d][w],
                           observed=True) for d in range(self.D)
            for w in range(len(self.bw[d]))
        ])

        model = pm.Model([theta, phi, z_d, w_z])
        self.mcmc = pm.MCMC(model)
        self.mcmc.sample(iter=iter_, burn=burn)
Esempio n. 3
0
 def append_features(self, features, facts):
     """Alters the features dictionary in place, adds:
      - age
      - gender
      - this instance's feature
      
     Args:
       features (dictionary): Dictionary of pyMC probability distributions.
     
     Raises:
       DuplicateFeatureException: If an identically named feature already exists that clashes with this instance
     """
     #age: 0-100
     if not 'factor_age' in features:
         p = np.ones(101)  #flat prior
         p = p / p.sum()
         features['factor_age'] = pm.Categorical('factor_age', p)
     #gender: male or female
     if not 'factor_gender' in features:
         #flat prior
         features['factor_gender'] = pm.Categorical('factor_gender',
                                                    np.array([0.5, 0.5]))
     if self.featurename in features:
         raise DuplicateFeatureException(
             'The "%s" feature is already in the feature list.' %
             self.featurename)
     seen = ohf.true_string(self.answer)
     features[self.featurename] = pm.Categorical(
         self.featurename,
         self.get_pymc_function(features),
         value=seen,
         observed=True)
Esempio n. 4
0
    def get_z_data(self, p, p_pos, q):
        K = 2  # Num topics
        M = p  # Num documents
        N = q  # Total num of unique words across all documents

        alpha = 1.0  # Concentration parameter for distribution over
        # distributions over words (one for each topic)
        beta = 1.0  # Concentration parameter for distribution over
        # distributions over topics (one for each
        # document)

        phi = pymc.Container([
            pymc.CompletedDirichlet(
                name="phi_" + str(k),
                D=pymc.Dirichlet(name="phi_temp_" + str(k),
                                 theta=beta * numpy.ones(N)),
            ) for k in range(K)
        ])

        theta = pymc.Container([
            pymc.CompletedDirichlet(
                name="theta_" + str(m),
                D=pymc.Dirichlet(name="theta_temp_" + str(m),
                                 theta=alpha * numpy.ones(K)),
            ) for m in range(M)
        ])

        z = pymc.Container([
            pymc.Categorical(name="z_" + str(m), p=theta[m], size=N)
            for m in range(M)
        ])

        w = pymc.Container([
            pymc.Categorical(
                name="w_" + str(m) + "_" + str(n),
                p=pymc.Lambda(
                    "phi_z_" + str(m) + str(n),
                    lambda z_in=z[m][n], phi_in=phi: phi_in[z_in],
                ),
            ) for m in range(M) for n in range(N)
        ])
        lda = pymc.Model([w, z, theta, phi])

        z_rvs = []
        for m in range(M):
            metadata = {"doc_idx": m, "num_unique_words": N}
            rv = WordCountVecRV(
                model=lda, name="w_0_0",
                metadata=metadata)  # Note: w_0_0 is just a dummy
            # argument that must be present in
            # the pymc.Model
            z_rvs += [rv]
        return z_rvs
Esempio n. 5
0
    def __init__(self, corpus, K=10, iterations=1000, burn=100):
        print("Building model ...")
        self.K = K
        self.V = corpus.wordCount + 1
        self.M = corpus.documentCount
        self.alpha = np.ones(self.K)
        self.beta = np.ones(self.V)
        self.corpus = corpus
        self.observations = np.array(corpus.observations)

        self.phi = np.empty(self.K, dtype=object)
        for i in range(self.K):
            self.phi[i] = pm.CompletedDirichlet(
                "Phi[%i]" % i, pm.Dirichlet("phi[%i]" % i, theta=self.beta))
        self.phi = pm.Container(self.phi)

        self.theta = np.empty(self.M, dtype=object)
        for i in range(self.M):
            self.theta[i] = pm.CompletedDirichlet(
                "Theta[%i]" % i, pm.Dirichlet("theta[%i]" % i,
                                              theta=self.alpha))
        self.theta = pm.Container(self.theta)

        self.z = np.empty(self.observations.shape, dtype=object)
        for i in range(self.M):
            self.z[i] = pm.Categorical("z[%i]" % i,
                                       size=len(self.observations[i]),
                                       p=self.theta[i],
                                       value=np.random.randint(
                                           self.K,
                                           size=len(self.observations[i])))
        self.z = pm.Container(self.z)

        self.w = []
        for i in range(self.M):
            self.w.append([])
            for j in range(len(self.observations[i])):
                self.w[i].append(
                    pm.Categorical(
                        "w[%i][%i]" % (i, j),
                        p=pm.Lambda(
                            "phi[z[%i][%i]]" % (i, j),
                            lambda z=self.z[i][j], phi=self.phi: phi[z]),
                        value=self.observations[i][j],
                        observed=True))
        self.w = pm.Container(self.w)

        self.mcmc = pm.MCMC(pm.Model([self.theta, self.phi, self.z, self.w]))

        print("Fitting model ...")
        self.mcmc.sample(iter=iterations, burn=burn)
Esempio n. 6
0
def cartesian_categorical_child(name,
                                parents,
                                levels,
                                value=None,
                                N=None,
                                return_coeffs=False,
                                fixed={}):
    if value is None and N is None:
        raise ValueError('either "value" or "N" must be specified')
    if value is not None:
        value = mask_missing(value)

    ranges = [range(get_levels_count(p)) for p in parents]
    parents2index = {}
    coeffs = []
    for i, parent_vals in enumerate(product(*ranges)):
        parents2index[parent_vals] = i
        parents_repr = ' '.join('%s=%s' % (parent, v)
                                for parent, v in zip(parents, parent_vals))
        coeff_name = COEFFS_PREFIX + 'p(%s | %s)' % (name, parents_repr)
        coeff = fixed.get(coeff_name,
                          pymc.Dirichlet(coeff_name, theta=[1] * levels))
        coeffs.append(coeff)

    intify = lambda x: tuple(map(int, x))

    @pymc.deterministic
    def child_prob(parents=parents, coeffs=coeffs):
        probs = np.array([
            coeffs[parents2index[intify(parent_vals)]]
            for parent_vals in zip(*parents)
        ])
        remainders = 1 - probs.sum(axis=1)
        remainders = remainders.reshape((len(remainders), 1))
        return np.hstack([probs, remainders])

    child_prob.__name__ = 'p(%s)' % name

    if value is None:
        child = pymc.Categorical(name, p=child_prob, value=np.zeros(N))
    else:
        child = pymc.Categorical(name,
                                 p=child_prob,
                                 value=value,
                                 observed=True)
    set_levels_count(child, levels)

    if return_coeffs:
        return child, coeffs + [child_prob]
    else:
        return child
Esempio n. 7
0
def getModel():
    nA, nB, nK = 5, 2, 10;
    B = pm.Beta('1-Beta', alpha=nA/nK, beta=nB*(nK-1)/nK); #@UndefinedVariable
#     p_B = pm.Lambda('p_Bern', lambda b=B: np.where(b==0, 0.9, 0.1), doc='Pr[Bern|Beta]');
    C = pm.Categorical('2-Cat', [1-B, B]); #@UndefinedVariable
#     C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3], observed=True, value=3); #@UndefinedVariable
    return pm.Model([B,C]);
Esempio n. 8
0
    def test_1d_w(self):
        nd = self.nd
        npop = self.npop
        mus = self.mus
        size = 100
        with pm.Model() as model:
            m = pm.NormalMixture("m",
                                 w=np.ones(npop) / npop,
                                 mu=mus,
                                 sigma=1e-5,
                                 comp_shape=(nd, npop),
                                 shape=nd)
            z = pm.Categorical("z", p=np.ones(npop) / npop)
            latent_m = pm.Normal("latent_m",
                                 mu=mus[..., z],
                                 sigma=1e-5,
                                 shape=nd)

        m_val = m.random(size=size)
        latent_m_val = latent_m.random(size=size)
        assert m_val.shape == latent_m_val.shape
        # Test that each element in axis = -1 comes from the same mixture
        # component
        assert all(np.all(np.diff(m_val) < 1e-3, axis=-1))
        assert all(np.all(np.diff(latent_m_val) < 1e-3, axis=-1))

        self.samples_from_same_distribution(m_val, latent_m_val)
        self.logp_matches(m, latent_m, z, npop, model=model)
def run_Categorical_Normal():
    nC = 3
    #Num. Clusters
    aD = [0, 1, 8, 9, 20, 21]
    #Data Points
    nPts = len(aD) + 1
    #Clusters
    aUh = [
        pm.Uniform('UnifH' + str(i), lower=-50, upper=50) for i in range(nC)
    ]
    # @UndefinedVariable
    aNc = [pm.Normal('NormC' + str(i), mu=aUh[i], tau=1) for i in range(nC)]
    # @UndefinedVariable
    #Dirichlet & Categorical Nodes
    Dir = pm.Dirichlet('Dirichlet', theta=[1] * nC)
    # @UndefinedVariable
    aC = [pm.Categorical('Cat' + str(i), Dir) for i in range(nPts)]
    # @UndefinedVariable
    aL = [
        pm.Lambda('p_Norm' + str(i), lambda k=aC[i], aNcl=aNc: aNcl[int(k)])
        for i in range(nPts)
    ]
    # @UndefinedVariable
    #Points
    aN = [
        pm.Normal('NormX' + str(i),
                  mu=aL[i],
                  tau=1,
                  observed=True,
                  value=aD[i]) for i in range(nPts - 1)
    ]
    # @UndefinedVariable
    Nz = pm.Normal('NormZ', mu=aL[-1], tau=1)
    # @UndefinedVariable
    return np.concatenate([[Nz, Dir], aUh, aNc, aC, aN])
Esempio n. 10
0
def getModel():
    nA, nK = 0.05, 4
    aDir = [nA / nK] * nK
    D = pm.Dirichlet('1-Dirichlet', theta=aDir)
    #@UndefinedVariable
    C1 = pm.Categorical('2-Cat', D)
    #@UndefinedVariable
    #     C2 = pm.Categorical('10-Cat', D); #@UndefinedVariable
    #     C3 = pm.Categorical('11-Cat', D); #@UndefinedVariable
    #     C4 = pm.Categorical('14-Cat', D); #@UndefinedVariable
    #     C5 = pm.Categorical('15-Cat', D); #@UndefinedVariable
    #     G0_0 = pm.Gamma('4-Gamma0_1', alpha=1, beta=1.5); #@UndefinedVariable
    N0_1 = pm.Normal('5-Norm0_1', mu=10, tau=1)
    #@UndefinedVariable
    N0_2 = pm.Normal('6-Norm0_2', mu=-10, tau=1)
    #@UndefinedVariable
    N0_3 = pm.Normal('7-Norm0_3', mu=30, tau=1)
    #@UndefinedVariable
    N0_4 = pm.Normal('16-Norm0_3', mu=-30, tau=1)
    #@UndefinedVariable
    aMu = [N0_1.value, N0_2.value, N0_3.value, N0_4.value]
    p_N1 = pm.Lambda('p_Norm1', lambda n=C1: aMu[n], doc='Pr[Norm|Cat]')
    #     p_N2 = pm.Lambda('p_Norm2', lambda n=C2: aMu[n], doc='Pr[Norm|Cat]');
    #     p_N3 = pm.Lambda('p_Norm3', lambda n=C3: aMu[n], doc='Pr[Norm|Cat]');
    #     p_N4 = pm.Lambda('p_Norm4', lambda n=C4: aMu[n], doc='Pr[Norm|Cat]');
    #     p_N5 = pm.Lambda('p_Norm6', lambda n=C5: aMu[n], doc='Pr[Norm|Cat]');
    N = pm.Normal('3-Norm', mu=p_N1, tau=1)
    #@UndefinedVariable
    #     obsN1 = pm.Normal('8-Norm', mu=p_N2, tau=1, observed=True, value=40); #@UndefinedVariable @UnusedVariable
    #     obsN2 = pm.Normal('9-Norm', mu=p_N3, tau=1, observed=True, value=40); #@UndefinedVariable @UnusedVariable
    #     obsN3 = pm.Normal('12-Norm', mu=p_N4, tau=1, observed=True, value=-40); #@UndefinedVariable @UnusedVariable
    #     obsN4 = pm.Normal('13-Norm', mu=p_N5, tau=1, observed=True, value=-40); #@UndefinedVariable @UnusedVariable
    return pm.Model([D, C1, N, N0_1, N0_2, N0_3, N0_4, N])
Esempio n. 11
0
    def test_2d_w(self):
        nd = self.nd
        npop = self.npop
        mus = self.mus
        size = 100
        with pm.Model() as model:
            m = pm.NormalMixture(
                "m",
                w=np.ones((nd, npop)) / npop,
                mu=mus,
                sigma=1e-5,
                comp_shape=(nd, npop),
                shape=nd,
            )
            z = pm.Categorical("z", p=np.ones(npop) / npop, shape=nd)
            mu = at.as_tensor_variable([mus[i, z[i]] for i in range(nd)])
            latent_m = pm.Normal("latent_m", mu=mu, sigma=1e-5, shape=nd)

        m_val = m.random(size=size)
        latent_m_val = latent_m.random(size=size)
        assert m_val.shape == latent_m_val.shape
        # Test that each element in axis = -1 can come from independent
        # components
        assert not all(np.all(np.diff(m_val) < 1e-3, axis=-1))
        assert not all(np.all(np.diff(latent_m_val) < 1e-3, axis=-1))

        self.samples_from_same_distribution(m_val, latent_m_val)
        self.logp_matches(m, latent_m, z, npop, model=model)
Esempio n. 12
0
def run_Categorical_Normal():
    C = pm.Categorical('Cat', [0.2, 0.4, 0.1, 0.3])
    # @UndefinedVariable
    p_N = pm.Lambda('p_Norm', lambda node=C: [-5, 0, 5, 10][node])
    N = pm.Normal('Norm', mu=p_N, tau=1)
    # @UndefinedVariable
    return [C, N]
Esempio n. 13
0
    def append_features(self, features, facts, relationships, descriptions):
        """Alters the features dictionary in place, adds:
         - age
         - gender
         - this instance's feature
         
        Args:
          features (dictionary): Dictionary of pyMC probability distributions.
        
        Raises:
          DuplicateFeatureException: If an identically named feature already exists that clashes with this instance
        """
        #if we're not in the us then we just skip
        if self.prob_in_us(facts) < 0.01:
            return

        self.calc_probs_age(facts)
        if not 'factor_age' in features:
            p = np.ones(
                101
            )  #flat prior, will be unflattened by US stats (TODO confirm)
            p = p / p.sum()
            features['factor_age'] = pm.Categorical('factor_age', p)
        if not 'blockgroup' in features:
            p = self.get_list_of_bg_probs(facts)
            features['blockgroup'] = pm.Categorical('blockgroup', p)

        if self.featurename + "_age" in features:
            raise DuplicateFeatureException(
                'The "%s" feature is already in the feature list.' %
                self.featurename + "_age")

        features[self.featurename + "_blockgroup"] = pm.Categorical(
            self.featurename + "_age",
            self.get_pymc_function_age(features),
            value=True,
            observed=True)

        relationship = {'parent': 'factor_age', 'child': 'blockgroup'}
        relationships.append(relationship)

        descriptions['factor_age'] = {'desc': 'Your age'}
        descriptions['blockgroup'] = {'desc': 'Your geographical location'}
        descriptions[self.featurename + "_blockgroup"] = {
            'desc':
            'Probability of being in this block group given your features'
        }  #TODO Figure this out
Esempio n. 14
0
    def test_unobserved_categorical(self):
        with pm.Model() as m:
            mu = pm.Categorical("mu", p=[0.1, 0.3, 0.6], size=2)
            pm.Normal("like", mu=mu, sigma=0.1, observed=[1, 2])

            trace = pm.sample_smc(chains=1, return_inferencedata=False)

        assert np.all(np.median(trace["mu"], axis=0) == [1, 2])
Esempio n. 15
0
    def append_features(self, features, facts, relationships, descriptions):
        """Alters the features dictionary in place, adds:
         - age
         - gender
         - this instance's feature
         
        Args:
          features (dictionary): Dictionary of pyMC probability distributions.
          facts (dictionary): should already be populated with facts
        
        Raises:
          DuplicateFeatureException: If an identically named feature already exists that clashes with this instance
        """
        #age: 0-100

        if 'first_name' not in facts:  #we don't know their first name
            return
        logging.info('Appending babynames features')
        if 'first_name' in facts:
            self.answer = facts['first_name']
        else:
            self.answer = None
        if not 'factor_age' in features:
            p = np.ones(101)  #flat prior
            p = p / p.sum()
            features['factor_age'] = pm.Categorical('factor_age', p)
        if not 'factor_gender' in features:
            #flat prior
            features['factor_gender'] = pm.Categorical('factor_gender',
                                                       np.array([0.5, 0.5]))
        if self.featurename in features:
            raise DuplicateFeatureException(
                'The "%s" feature is already in the feature list.' %
                self.featurename)
        features[self.featurename] = pm.Categorical(
            self.featurename,
            self.get_pymc_function(features),
            value=True,
            observed=True)

        relationships.append({
            'parent': 'factor_gender',
            'child': 'first_name'
        })
        relationships.append({'parent': 'factor_age', 'child': 'first_name'})
Esempio n. 16
0
def make_categorical(name,
                     levels,
                     value=None,
                     N=None,
                     return_coeffs=False,
                     fixed={}):
    """ creates a Bernoulli random variable with a Dirichlet parent

    :param name: name of the variable
    :param levels: integer - how many levels does the variable have
    :param value: optional - list of observed values of the variable. Must consist of integers
        from 0 to levels - 1. May be a masked array - if the variable has missing values
    :param N: size of the variable (number of values). Either N or value must be specified
    :param return_coeffs: if true, will return the parent Beta variable as well as the bernoulli
        child. False by defaut.
    :param fixed: optional dictionary of values of coefficients to be fixed.
    :return: Categorical pymc random variable, or (if return_coeffs == True) a tuple
        (categorical variable; a list with a single element - the Dirichlet parent)
    """
    if value is None and N is None:
        raise ValueError('either "value" or "N" must be specified')
    if value is not None:
        value = mask_missing(value)

    N = N or len(value)
    coeff_name = COEFFS_PREFIX + 'p(%s)' % name
    if coeff_name in fixed:
        probs = fixed[coeff_name]
        parent = list(probs) + [1 - sum(probs)]
    else:
        parent = pymc.Dirichlet(coeff_name, theta=[1] * levels)

    if value is None:
        child = pymc.Categorical(name, p=parent, value=np.zeros(N))
    else:
        child = pymc.Categorical(name, p=parent, observed=True, value=value)

    set_levels_count(child, levels)

    if return_coeffs:
        return child, [parent]
    else:
        return child
Esempio n. 17
0
    def getData(self, dfTrack, nTrackId, avgSpeed, nProb_Gait, sGait):
#         g = pgm.Graph();
#         cpt1 = [.5, .5];
#         cpt2 = {"['False']": [.5, .5],"['True']": [nProb_Gait, 1-nProb_Gait]};
#         g.addnode(pgm.Node(sGait, ["False", "True"], [None], cpt1));
#         g.addnode(pgm.Node("Speed=%.2f"%avgSpeed, ["False", "True"], [g.node[sGait]], cpt2));
#         g.setup();
        G_obs = [1.];
        N = len(G_obs);
        gait = pm.Categorical(sGait, [0.5, 0.5], value=pl.ones(N));
        p_speed = pm.Lambda('p_'+sGait, lambda gait=gait: pl.where(gait, nProb_Gait, [0.5, 0.5]));
        speed = pm.Categorical("Speed=%.2f"%avgSpeed, p_speed, value=G_obs, observed=True);
        model = pm.Model([gait, speed]);
        g = pm.graph.graph(model);
        g.write_pdf("./Models/Graph2_"+str(int(nTrackId))+"_"+str(sGait)+".pdf");
#         g.write2pdf("./Models/Graph_"+str(int(nTrackId))+"_"+str(sGait)+".pdf");
        data = {"TrackId":nTrackId, "Type":sGait, "Belief":nProb_Gait, 
                "Obs":["Speed"], "Obs_Vals":[avgSpeed], "MEs":["Walk","Stand"], 
                "Graph":g};
def test_discrete_not_allowed():
    mu_true = np.array([-2, 0, 2])
    z_true = np.random.randint(len(mu_true), size=100)
    y = np.random.normal(mu_true[z_true], np.ones_like(z_true))

    with pm.Model():
        mu = pm.Normal("mu", mu=0, sigma=10, size=3)
        z = pm.Categorical("z", p=at.ones(3) / 3, size=len(y))
        pm.Normal("y_obs", mu=mu[z], sigma=1.0, observed=y)
        with pytest.raises(opvi.ParametrizationError):
            pm.fit(n=1)  # fails
Esempio n. 19
0
def getModel():
    D = pm.Dirichlet('1-Dirichlet', theta=[3,2,4]); #@UndefinedVariable
    C1 = pm.Categorical('2-Cat', D); #@UndefinedVariable
    C2 = pm.Categorical('10-Cat', D); #@UndefinedVariable
    C3 = pm.Categorical('11-Cat', D); #@UndefinedVariable
    W0_0 = pm.WishartCov('4-Wishart0_1', n=5, C=np.eye(2)); #@UndefinedVariable
    N0_1 = pm.MvNormalCov('5-Norm0_1', mu=[-20,-20], C=np.eye(2)); #@UndefinedVariable
    N0_2 = pm.MvNormalCov('6-Norm0_2', mu=[0,0], C=np.eye(2)); #@UndefinedVariable
    N0_3 = pm.MvNormalCov('7-Norm0_3', mu=[20,20], C=np.eye(2)); #@UndefinedVariable
    aMu = [N0_1.value, N0_2.value, N0_3.value];
    fL1 = lambda n=C1: np.select([n==0, n==1, n==2], aMu);
    fL2 = lambda n=C2: np.select([n==0, n==1, n==2], aMu);
    fL3 = lambda n=C3: np.select([n==0, n==1, n==2], aMu);
    p_N1 = pm.Lambda('p_Norm1', fL1, doc='Pr[Norm|Cat]');
    p_N2 = pm.Lambda('p_Norm2', fL2, doc='Pr[Norm|Cat]');
    p_N3 = pm.Lambda('p_Norm3', fL3, doc='Pr[Norm|Cat]');
    N = pm.MvNormalCov('3-Norm', mu=p_N1, C=W0_0); #@UndefinedVariable
    obsN1 = pm.MvNormalCov('8-Norm', mu=p_N2, C=W0_0, observed=True, value=[-20,-20]); #@UndefinedVariable @UnusedVariable
    obsN2 = pm.MvNormalCov('9-Norm', mu=p_N3, C=W0_0, observed=True, value=[20,20]); #@UndefinedVariable @UnusedVariable
    return pm.Model([D,C1,C2,C3,N,W0_0,N0_1,N0_2,N0_3,N,obsN1,obsN2]);
Esempio n. 20
0
def get_Models():
    #Full Model (Dirichlet & Categorical)
    aAlphas = [1, 2, 8, 2]
    aD = [0, 3, 1]
    Dir = pm.Dirichlet('Dir', theta=aAlphas)
    # @UndefinedVariable
    CatD = [
        pm.Categorical('CatD_' + str(i), p=Dir, observed=True, value=aD[i])
        for i in range(len(aD))
    ]
    # @UndefinedVariable @UnusedVariable
    CatQ = pm.Categorical('CatQ', p=Dir)
    # @UndefinedVariable
    #Collapsed Model (Categorical)
    aP = []
    for i in range(len(aAlphas)):  #For each Category, get its probability p_i
        aP.append((aAlphas[i] + aD.count(i)) / (sum(aAlphas) + len(aD)))
    CatQ2 = pm.Categorical('CatQ2', p=aP)
    # @UndefinedVariable
    return np.concatenate([[Dir, CatQ, CatQ2], CatD])
Esempio n. 21
0
def run_Categorical_Normal():
    C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3]);  # @UndefinedVariable
    p_N = pm.Lambda('p_Norm', lambda node=C: np.select([node==0, node==1, node==2, node==3],
                                                       [-5, 0, 5, 10]), doc='Pr[Norm|Cat]');
    N = pm.Normal('2-Norm', mu=p_N, tau=1);  # @UndefinedVariable
    model = pm.Model([C,N]);
    mcmc = pm.MCMC(model);
    mcmc.sample(5000, progress_bar=True);
    print "C:", C.stats()["mean"], C.value;
    print "N:", N.stats()["mean"], N.value;
    plot_Samples(mcmc, aBins=[2,500]);
Esempio n. 22
0
 def __setup_eqv(self):
     """Populates the self.eqv list for each classifier by assigning it
     a categorical distribution.
     """
     # per_class = self.num_classifiers / self.num_equiv
     self.eqv = pymc.Container(
         [pymc.Categorical('categ_%s' % i,
                           p=self.theta[i],
                           value=numpy.random.randint(0, self.num_equiv))
          # value=min(i / per_class, self.num_equiv - 1))
          for i in xrange(0, self.num_classifiers)])
Esempio n. 23
0
 def append_features(self, features, facts):
     #normally factor_age is a flat prior, but here we make it very non-flat, as we know the answer. Ideally we'd manipulate the other probability distributions to integrate out age, but that's quite tricky (programmatically).
     if ('age' in facts):
         age = facts['age']
         if (age >= 0):
             if (age > 99):
                 age = 100
             p = np.zeros(101)
             p[age] = 1  #certain
             features['factor_age'] = pm.Categorical('factor_age', p)
     if ('gender' in facts):
         if facts['gender'] == 'Male':
             ratio = [1.0, 0]
         if facts['gender'] == 'Female':
             ratio = [1.0, 0]
         if facts['gender'] == 'Other':
             ratio = [
                 0.5, 0.5
             ]  #don't know what to do, as the census etc doesn't have data for this situation.
         features['factor_gender'] = pm.Categorical('factor_gender',
                                                    np.array(ratio))
Esempio n. 24
0
def getModel():
    C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3])
    #@UndefinedVariable
    #     C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3], observed=True, value=3); #@UndefinedVariable
    p_N = pm.Lambda('p_Norm',
                    lambda n=C: np.select([n == 0, n == 1, n == 2, n == 3],
                                          [-5, 0, 5, 10]),
                    doc='Pr[Norm|Cat]')
    N = pm.Normal('2-Norm', mu=p_N, tau=1)
    #@UndefinedVariable
    #     N = pm.Normal('2-Norm', mu=p_N, tau=1, observed=True, value=2.5); #@UndefinedVariable
    return pm.Model([C, N])
Esempio n. 25
0
def main():
    data = np.loadtxt("data/mixture_data.csv", delimiter=",")

    p = pm.Uniform("p", 0, 1)

    assignment = pm.Categorical("assignment", [p, 1 - p], size=data.shape[0])

    taus = 1.0 / pm.Uniform("stds", 0, 100, size=2)**2
    centers = pm.Normal("centers", [120, 190], [0.01, 0.01], size=2)
    """
    The below deterministic functions map an assignment, in this case 0 or 1,
    to a set of parameters, located in the (1,2) arrays `taus` and `centers`.
    """
    @pm.deterministic
    def center_i(assignment=assignment, centers=centers):
        return centers[assignment]

    @pm.deterministic
    def tau_i(assignment=assignment, taus=taus):
        return taus[assignment]

    # and to combine it with the observations:
    observations = pm.Normal("obs", center_i, tau_i, value=data, observed=True)

    # below we create a model class
    model = pm.Model([p, assignment, observations, taus, centers])

    map_ = pm.MAP(model)
    map_.fit()  #stores the fitted variables' values in foo.value

    mcmc = pm.MCMC(model)
    # Where 50000 is the burn-in iterations where fitting is
    # started but the results are not counted to the end model
    mcmc.sample(100000, 50000)

    p_trace = mcmc.trace("p")[:]
    center_trace = mcmc.trace("centers")[:]
    std_trace = mcmc.trace("stds")[:]
    x = 175

    v = ((p_trace *
          stats.norm.pdf(x, loc=center_trace[:, 0], scale=std_trace[:, 0])) >
         (1 - p_trace) *
         stats.norm.pdf(x, loc=center_trace[:, 1], scale=std_trace[:, 1]))

    # If you try this with out the 50000 burn-in iterations, the certainty is
    # much less that the pixel belongs to cluster 0
    print "Probability of belonging to cluster 1:", v.mean()
    print "Probability of belonging to cluster 0:", 1 - v.mean()

    mcmc.sample(25000, 0, 10)
    mcplot(mcmc.trace("centers", 2), common_scale=False)
Esempio n. 26
0
def getModel():
    C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3])
    #@UndefinedVariable
    #     C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3], observed=True, value=3); #@UndefinedVariable
    p_N = pm.Lambda(
        'p_Norm',
        lambda n=C: np.select([n == 0, n == 1, n == 2, n == 3],
                              [[-5, -5], [0, 0], [5, 5], [10, 10]]),
        doc='Pr[Norm|Cat]')
    N = pm.MvNormal('2-Norm_2D', mu=p_N, tau=np.eye(2, 2))
    #@UndefinedVariable
    #     N = pm.MvNormal('2-Norm', mu=p_N, tau=np.eye(2,2), observed=True, value=[2.5,2.5]); #@UndefinedVariable
    return pm.Model([C, N])
Esempio n. 27
0
def getModel():
    D = pm.Dirichlet('1-Dirichlet', theta=[3, 2, 4])
    #@UndefinedVariable
    C1 = pm.Categorical('2-Cat', D)
    #@UndefinedVariable
    C2 = pm.Categorical('10-Cat', D)
    #@UndefinedVariable
    C3 = pm.Categorical('11-Cat', D)
    #@UndefinedVariable
    G0_0 = pm.Gamma('4-Gamma0_1', alpha=1, beta=1.5)
    #@UndefinedVariable
    U1 = pm.Uniform('12-Unif', lower=-100, upper=500)
    #@UndefinedVariable
    U2 = pm.Uniform('13-Unif', lower=-100, upper=500)
    #@UndefinedVariable
    U3 = pm.Uniform('14-Unif', lower=-100, upper=500)
    #@UndefinedVariable
    N0_1 = pm.Normal('5-Norm0_1', mu=U1, tau=1)
    #@UndefinedVariable
    N0_2 = pm.Normal('6-Norm0_2', mu=U2, tau=1)
    #@UndefinedVariable
    N0_3 = pm.Normal('7-Norm0_3', mu=U3, tau=1)
    #@UndefinedVariable
    aMu = [N0_1.value, N0_2.value, N0_3.value]
    fL1 = lambda n=C1: np.select([n == 0, n == 1, n == 2], aMu)
    fL2 = lambda n=C2: np.select([n == 0, n == 1, n == 2], aMu)
    fL3 = lambda n=C3: np.select([n == 0, n == 1, n == 2], aMu)
    p_N1 = pm.Lambda('p_Norm1', fL1, doc='Pr[Norm|Cat]')
    p_N2 = pm.Lambda('p_Norm2', fL2, doc='Pr[Norm|Cat]')
    p_N3 = pm.Lambda('p_Norm3', fL3, doc='Pr[Norm|Cat]')
    N = pm.Normal('3-Norm', mu=p_N1, tau=1)
    #@UndefinedVariable
    obsN1 = pm.Normal('8-Norm', mu=p_N2, tau=1, observed=True, value=0)
    #@UndefinedVariable @UnusedVariable
    obsN2 = pm.Normal('9-Norm', mu=p_N3, tau=1, observed=True, value=150)
    #@UndefinedVariable @UnusedVariable
    return pm.Model(
        [D, C1, C2, C3, N, G0_0, N0_1, N0_2, N0_3, N, obsN1, obsN2])
Esempio n. 28
0
def logistic_categorical_child(name,
                               parents,
                               levels,
                               value=None,
                               N=None,
                               return_coeffs=False,
                               fixed={}):
    if value is not None:
        value = mask_missing(value)
    N = N or len(value)
    all_coeffs, one_v_all = [], []
    for i in range(levels):
        theta, coeffs = _linearised_many(parents, '%s==%s' % (name, i), True,
                                         fixed)
        level_prob = pymc.InvLogit('p(%s==%s)' % (name, i), theta)
        one_v_all.append(level_prob)
        all_coeffs.extend(coeffs)

    @pymc.deterministic
    def child_prob(level_probs=one_v_all):
        ret = [np.array(probs) / sum(probs) for probs in zip(*level_probs)]
        return ret

    child_prob.__name__ = 'p(%s)' % name

    if value is None:
        child = pymc.Categorical(name, p=child_prob, value=np.zeros(N))
    else:
        child = pymc.Categorical(name,
                                 p=child_prob,
                                 value=value,
                                 observed=True)
    set_levels_count(child, levels)

    if return_coeffs:
        return child, all_coeffs
    else:
        return child
def runMCMC(df, cents, show=False):
  """
  Run the MCMC algo for as many centers as needed
  """
  if type(cents) is not list:
    cents = [cents]
  numCents = len(cents)
  p = None
  
  # Tau = the precision of the normal distribution (of the above peaks)
  taus = 1. / pm.Uniform('stds', 0, 100, size=numCents)**2 # tau = 1/sigma**2
  centers = pm.Normal('centers', cents, [0.0025 for i in cents],
                      size=numCents)
  
  if numCents == 2: # Assignment probability
    p = pm.Uniform('p', 0, 1)
    assignment = pm.Categorical('asisgnment', [p, 1-p],
                                size=len(df.intervals))
    @pm.deterministic
    def center_i(assignment=assignment, centers=centers):
      return centers[assignment]
    @pm.deterministic
    def tau_i(assignment=assignment, taus=taus):
      return taus[assignment]
    observations = pm.Normal('obs', center_i, tau_i, value=df.intervals,
                             observed=True)
    # Create the model 2 peaks
    mcmc = pm.MCMC([p, assignment, observations, taus, centers])
    
  else:
    observations = pm.Normal('obs', value=df.intervals, observed=True)
    mcmc = pm.MCMC([observations, taus, centers]) # Create model, 1 peak
  
  # Run the model
  mcmc.sample(50000)
  center_trace = mcmc.trace("centers")[:]
  try:
    clusts = [center_trace[:,i] for i in range(numCents)]
  except:
    clusts = [center_trace]
  
  if show:
    for i in range(numCents):
      plt.hist(center_trace[:,i], bins=50, histtype='stepfilled',
               color=['blue', 'red'][i], alpha=0.7)
    plt.show()
  
  print('Evolved clusters at:')
  print([np.mean(c) for c in clusts])
  return clusts
Esempio n. 30
0
def get_Models():
    #Full Model (DP [Dirichlet] & Categorical)
    aD = [1, 0, 1]
    #Data Points
    nA, nC = 0.3, 3
    #Alpha & Max No. Clusters
    aAlphas = [nA / nC] * nC
    Dir = pm.Dirichlet('Dir', theta=aAlphas)
    # @UndefinedVariable
    CatD = [
        pm.Categorical('CatD_' + str(i), p=Dir, observed=True, value=aD[i])
        for i in range(len(aD))
    ]
    # @UndefinedVariable @UnusedVariable
    CatQ = pm.Categorical('CatQ', p=Dir)
    # @UndefinedVariable
    #Collapsed Model (Categorical)
    aP = []
    for i in range(len(aAlphas)):  #For each Category, get its probability p_i
        aP.append((aAlphas[i] + aD.count(i)) / (sum(aAlphas) + len(aD)))
    CatQ2 = pm.Categorical('CatQ2', p=aP)
    # @UndefinedVariable
    return np.concatenate([[Dir, CatQ, CatQ2], CatD])