コード例 #1
0
ファイル: test_mixture.py プロジェクト: aloctavodia/pymc3
    def test_mixture_of_mvn(self):
        mu1 = np.asarray([0., 1.])
        cov1 = np.diag([1.5, 2.5])
        mu2 = np.asarray([1., 0.])
        cov2 = np.diag([2.5, 3.5])
        obs = np.asarray([[.5, .5], mu1, mu2])
        with Model() as model:
            w = Dirichlet('w', floatX(np.ones(2)), transform=None)
            mvncomp1 = MvNormal.dist(mu=mu1, cov=cov1)
            mvncomp2 = MvNormal.dist(mu=mu2, cov=cov2)
            y = Mixture('x_obs', w, [mvncomp1, mvncomp2],
                    observed=obs)

        # check logp of each component
        complogp_st = np.vstack((st.multivariate_normal.logpdf(obs, mu1, cov1),
                                 st.multivariate_normal.logpdf(obs, mu2, cov2))
                                ).T
        complogp = y.distribution._comp_logp(theano.shared(obs)).eval()
        assert_allclose(complogp, complogp_st)

        # check logp of mixture
        testpoint = model.test_point
        mixlogp_st = logsumexp(np.log(testpoint['w']) + complogp_st,
                               axis=-1, keepdims=True)
        assert_allclose(y.logp_elemwise(testpoint),
                        mixlogp_st)

        # check logp of model
        priorlogp = st.dirichlet.logpdf(x=testpoint['w'],
                                        alpha=np.ones(2),
                                        )
        assert_allclose(model.logp(testpoint),
                        mixlogp_st.sum() + priorlogp)
コード例 #2
0
ファイル: test_mixture.py プロジェクト: xiaoxi0920/pymc3
    def test_mixture_of_mvn(self):
        mu1 = np.asarray([0.0, 1.0])
        cov1 = np.diag([1.5, 2.5])
        mu2 = np.asarray([1.0, 0.0])
        cov2 = np.diag([2.5, 3.5])
        obs = np.asarray([[0.5, 0.5], mu1, mu2])
        with Model() as model:
            w = Dirichlet("w", floatX(np.ones(2)), transform=None, shape=(2, ))
            mvncomp1 = MvNormal.dist(mu=mu1, cov=cov1)
            mvncomp2 = MvNormal.dist(mu=mu2, cov=cov2)
            y = Mixture("x_obs", w, [mvncomp1, mvncomp2], observed=obs)

        # check logp of each component
        complogp_st = np.vstack((
            st.multivariate_normal.logpdf(obs, mu1, cov1),
            st.multivariate_normal.logpdf(obs, mu2, cov2),
        )).T
        complogp = y.distribution._comp_logp(theano.shared(obs)).eval()
        assert_allclose(complogp, complogp_st)

        # check logp of mixture
        testpoint = model.test_point
        mixlogp_st = logsumexp(np.log(testpoint["w"]) + complogp_st,
                               axis=-1,
                               keepdims=False)
        assert_allclose(y.logp_elemwise(testpoint), mixlogp_st)

        # check logp of model
        priorlogp = st.dirichlet.logpdf(
            x=testpoint["w"],
            alpha=np.ones(2),
        )
        assert_allclose(model.logp(testpoint), mixlogp_st.sum() + priorlogp)
コード例 #3
0
ファイル: diffpri.py プロジェクト: DPBayes/robust-private-lr
def doMCMC(n, nxx, nxy, nyy, x):

    # Optional setting for reproducibility
    use_seed = False

    d = nxx.shape[0]
    ns = 2000
    if use_seed:  # optional setting for reproducibility
        seed = 42

    # Disable printing
    sys.stdout = open(os.devnull, 'w')

    # Sufficient statistics
    NXX = shared(nxx)
    NXY = shared(nxy)
    NYY = shared(nyy)

    # Define model and perform MCMC sampling
    with Model() as model:

        # Fixed hyperparameters for priors
        b0 = Deterministic('b0', th.zeros((d), dtype='float64'))
        ide = Deterministic('ide', th.eye(d, m=d, k=0, dtype='float64'))

        # Priors for parameters
        l0 = Gamma('l0', alpha=2.0, beta=2.0)
        l = Gamma('l', alpha=2.0, beta=2.0)
        b = MvNormal('b', mu=b0, tau=l0 * ide, shape=d)

        # Custom log likelihood
        def logp(xtx, xty, yty):
            return (n / 2.0) * th.log(l / (2 * np.pi)) + (-l / 2.0) * (
                th.dot(th.dot(b, xtx), b) - 2 * th.dot(b, xty) + yty)

        # Likelihood
        delta = DensityDist('delta',
                            logp,
                            observed={
                                'xtx': NXX,
                                'xty': NXY,
                                'yty': NYY
                            })

        # Inference
        print('doMCMC: start NUTS')
        step = NUTS()
        if use_seed:
            trace = sample(ns, step, progressbar=True, random_seed=seed)
        else:
            trace = sample(ns, step, progressbar=True)

    # Enable printing
    sys.stdout = sys.__stdout__

    # Compute prediction over posterior
    return np.mean([np.dot(x, trace['b'][i]) for i in range(ns)], 0)
コード例 #4
0
def doADVI(n, xx, xy, yy, x):

    d = xx.shape[0]
    ns = 5000
    seed = 42  # for reproducibility

    # Disable printing
    sys.stdout = open(os.devnull, 'w')

    # Sufficient statistics
    NXX = shared(xx)
    NXY = shared(xy)
    NYY = shared(yy)

    # Define model and perform MCMC sampling
    with Model() as model:

        # Fixed hyperparameters for priors
        b0 = Deterministic('b0', th.zeros((d), dtype='float64'))
        ide = Deterministic('ide', th.eye(d, m=d, k=0, dtype='float64'))

        # Priors for parameters
        l0 = Gamma('l0', alpha=2.0, beta=2.0)
        l = Gamma('l', alpha=2.0, beta=2.0)
        b = MvNormal('b', mu=b0, tau=l0 * ide, shape=d)

        # Custom log likelihood
        def logp(xtx, xty, yty):
            return (n / 2.0) * th.log(l / (2 * np.pi)) + (-l / 2.0) * (
                th.dot(th.dot(b, xtx), b) - 2 * th.dot(b, xty) + yty)

        # Likelihood
        delta = DensityDist('delta',
                            logp,
                            observed={
                                'xtx': NXX,
                                'xty': NXY,
                                'yty': NYY
                            })

        # Inference
        v_params = advi(n=ns, random_seed=seed)
        trace = sample_vp(v_params, draws=ns, random_seed=seed)

    # Enable printing
    sys.stdout = sys.__stdout__

    # Compute prediction over posterior
    return np.mean([np.dot(x, trace['b'][i]) for i in range(ns)], 0)
コード例 #5
0
ファイル: g_model.py プロジェクト: BIIG-UC3M/MMITB
def run_mv_model(data, K=3, n_feats=2, mus=None, mc_samples=10000, jobs=1):
    with pm.Model() as model:
        n_samples = len(data)
        tau = pm.Deterministic('tau', pm.floatX(tt.eye(n_feats) * 10))
        mus = 0. if mus is None else mus
        mus = MvNormal('mus', mu=mus, tau=tau, shape=(K, n_feats))
        pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K)
        category = pm.Categorical('category', p=pi, shape=n_samples)
        xs = pm.MvNormal('x',
                         mu=mus[category],
                         tau=tt.eye(n_feats),
                         observed=data)

    with model:
        step2 = pm.ElemwiseCategorical(vars=[category], values=range(K))
        trace = sample(mc_samples, step2, n_jobs=jobs)

    pm.traceplot(trace, varnames=['mus', 'pi', 'tau'])
    plt.title('mv model')
    mod = stats.mode(trace['category'][int(mc_samples * 0.75):])

    return model, mod, trace
コード例 #6
0
                ]
                return tt.sum(
                    logsumexp(tt.stacklists(logps)[:, :n_samples], axis=0))

            return logp_

        # Sparse model with diagonal covariance:
        with pm.Model() as model:

            # Weights of each component:
            w = Dirichlet('w', a=pm.floatX(alpha), shape=(n_components, ))

            # Impose sparse structure onto mean with off-diagonal elements all being the same, because background should be the same throughout.
            mus_signal = MvNormal(
                'mus_signal',
                mu=pm.floatX(signalMean_priorMean),
                tau=pm.floatX(np.eye(n_dimensions) / signalMean_priorSD**2),
                shape=n_dimensions)
            mus_background = MvNormal('mus_background',
                                      mu=pm.floatX(backgroundMean_priorMean),
                                      tau=pm.floatX(
                                          np.eye(n_dimensions) /
                                          backgroundMean_priorSD**2),
                                      shape=n_dimensions)
            mus = tt.fill_diagonal(
                tt.reshape(tt.tile(mus_background, n_components),
                           (n_components, n_dimensions)),
                0) + tt.eye(n_components, n_dimensions) * mus_signal

            # Impose structure for covariance as well, with off-diagonal elements being zero, just because that model is easier to fit.
            sigmas_signal = pm.Gamma('sigmas_signal',
コード例 #7
0
n_comp = 2
concentration = 1

with pm.Model() as model:
    # Prior for covariance matrix
    
    # packed_L = [pm.LKJCholeskyCov('packedL_%d' % i, n=dimensions, eta=1., sd_dist=pm.Gamma.dist(mu = 2, sigma = 1)) for i in range(n_comp)]
    # L = [pm.expand_packed_triangular(dimensions, packed_L[i]) for i in range(n_comp)]
    # Σ = [pm.Deterministic('Σ_%d' % i, L[i].dot(L[i].T)) for i in range(n_comp)]
   
    packed_L = pm.LKJCholeskyCov('packedL', n=dimensions, eta=1., sd_dist=pm.Gamma.dist(mu = 2, sigma = 1))
    L = pm.expand_packed_triangular(dimensions, packed_L)
    Σ = pm.Deterministic('Σ', L.dot(L.T))
    
    # Prior for mean:
    mus = [MvNormal('mu_%d' % i, mu=pm.floatX(np.zeros(dimensions)), tau=pm.floatX(0.1 * np.eye(2)), shape=(dimensions,)) for i in range(n_comp)]
    # Prior for weights:
    pi = Dirichlet('pi', a=pm.floatX(concentration * np.ones(n_comp)), shape=(n_comp,))   
    prior = sample_prior()
    x = pm.DensityDist('x', logp_gmix(mus, pi, np.eye(2)), observed=data)
    
# Plot prior for some parameters:
# print(prior.keys())
# plt.hist(prior['Σ'][:,0,1])

with model:
    %time hmc_trace = pm.sample(draws=250, tune=100, cores=4)

with model:
    %time fit_advi = pm.fit(n=50000, obj_optimizer=pm.adagrad(learning_rate=1e-1), method = 'advi')
コード例 #8
0
                         (delta(mu).dot(tau) * delta(mu)).sum(axis=1))

# Log likelihood of Gaussian mixture distribution
def logp_gmix(mus, pi, taus, n_components):
    def logp_(value):        
        logps = [tt.log(pi[i]) + logp_normal(mus[i,:], taus[i], value) for i in range(n_components)]
        return tt.sum(logsumexp(tt.stacklists(logps)[:, :n_samples], axis=0))
    return logp_

## Prior for model:

componentMean = ms + np.random.uniform(0,5,n_dimensions)
componentTau = np.random.uniform(0,2,n_dimensions) * np.eye(n_dimensions)

with pm.Model() as model:
    mus = MvNormal('mu', mu=pm.floatX(componentMean), tau=pm.floatX(componentTau), shape=(n_components, n_dimensions))
    pi = Dirichlet('pi', a=pm.floatX(0.1 * np.ones(n_components)), shape=(n_components,))
    packed_L = [pm.LKJCholeskyCov('packed_L_%d' % i, n=n_dimensions, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) for i in range(n_components)]
    L = [pm.expand_packed_triangular(n_dimensions, packed_L[i]) for i in range(n_components)]
    sigmas = [pm.Deterministic('sigma_%d' % i, tt.dot(L[i],L[i].T)) for i in range(n_components)]
    taus = [tt.nlinalg.matrix_inverse(sigmas[i]) for i in range(n_components)]
    xs = DensityDist('x', logp_gmix(mus, pi, taus, n_components), observed=data)
    
with model:
    advi_fit = pm.fit(n=500000, obj_optimizer=pm.adagrad(learning_rate=1e-1))  
    
advi_trace = advi_fit.sample(10000)    
advi_summary = pm.summary(advi_trace)

pickle_out = open("advi_summary.pickle","wb")
pickle.dump(advi_summary, pickle_out)
コード例 #9
0
ファイル: mvn_pymc3.py プロジェクト: BIIG-UC3M/MMITB
    xs = [z[:, np.newaxis] * rng.multivariate_normal(m, np.eye(2), size=n_samples)
          for z, m in zip(zs, ms)]
    data = np.sum(np.dstack(xs), axis=2)
    
    plt.figure(figsize=(5, 5))
    plt.scatter(data[:, 0], data[:, 1], c='g', alpha=0.5)
    plt.scatter(ms[0, 0], ms[0, 1], c='r', s=100)
    plt.scatter(ms[1, 0], ms[1, 1], c='b', s=100)
    
    from pymc3.math import logsumexp


    #Model original
    with pm.Model() as model:
        mus = [MvNormal('mu_%d' % i,
                        mu=pm.floatX(np.zeros(2)),
                        tau=pm.floatX(0.1 * np.eye(2)),
                        shape=(2,))
               for i in range(2)]
        pi = Dirichlet('pi', a=pm.floatX(0.1 * np.ones(2)), shape=(2,))
        
        xs = DensityDist('x', logp_gmix(mus, pi, np.eye(2)), observed=data)
        
#   
#    #Model for GMM clustering
#    with pm.Model() as model:
#        # cluster sizes
#        p = pm.Dirichlet('p', a=np.array([1., 1.]), shape=2)
#        # ensure all clusters have some points
#        p_min_potential = pm.Potential('p_min_potential', tt.switch(tt.min(p) < .1, -np.inf, 0))
#    
#