Exemple #1
0
 def _build_prior(self, name, X, reparameterize=True, **kwargs):
     mu = self.mean_func(X)
     cov = stabilize(self.cov_func(X))
     shape = infer_shape(X, kwargs.pop("shape", None))
     if reparameterize:
         chi2 = pm.ChiSquared(name + "_chi2_", self.nu)
         v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, shape=shape, **kwargs)
         f = pm.Deterministic(name, (tt.sqrt(self.nu) / chi2) * (mu + cholesky(cov).dot(v)))
     else:
         f = pm.MvStudentT(name, nu=self.nu, mu=mu, cov=cov, shape=shape, **kwargs)
     return f
Exemple #2
0
    def _sample_pymc3(cls, dist, size, seed):
        """Sample from PyMC3."""

        import pymc3
        pymc3_rv_map = {
            'BetaDistribution':
            lambda dist: pymc3.Beta(
                'X', alpha=float(dist.alpha), beta=float(dist.beta)),
            'CauchyDistribution':
            lambda dist: pymc3.Cauchy(
                'X', alpha=float(dist.x0), beta=float(dist.gamma)),
            'ChiSquaredDistribution':
            lambda dist: pymc3.ChiSquared('X', nu=float(dist.k)),
            'ExponentialDistribution':
            lambda dist: pymc3.Exponential('X', lam=float(dist.rate)),
            'GammaDistribution':
            lambda dist: pymc3.Gamma(
                'X', alpha=float(dist.k), beta=1 / float(dist.theta)),
            'LogNormalDistribution':
            lambda dist: pymc3.Lognormal(
                'X', mu=float(dist.mean), sigma=float(dist.std)),
            'NormalDistribution':
            lambda dist: pymc3.Normal('X', float(dist.mean), float(dist.std)),
            'GaussianInverseDistribution':
            lambda dist: pymc3.Wald(
                'X', mu=float(dist.mean), lam=float(dist.shape)),
            'ParetoDistribution':
            lambda dist: pymc3.Pareto(
                'X', alpha=float(dist.alpha), m=float(dist.xm)),
            'UniformDistribution':
            lambda dist: pymc3.Uniform(
                'X', lower=float(dist.left), upper=float(dist.right))
        }

        dist_list = pymc3_rv_map.keys()

        if dist.__class__.__name__ not in dist_list:
            return None

        with pymc3.Model():
            pymc3_rv_map[dist.__class__.__name__](dist)
            return pymc3.sample(size,
                                chains=1,
                                progressbar=False,
                                random_seed=seed)[:]['X']
Exemple #3
0
def model_setup(visible,
                normalized_weights,
                estimateexplanterms={},
                estimatesdexplanterms={},
                inferenceparams={}):
    nsensors = visible['y'].shape[0]
    n = visible['y'].shape[1]

    model = pm.Model()
    with model:
        doft = inferenceparams['doft'] if 'doft' in inferenceparams else 4
        priorfactor = inferenceparams[
            'priorfactor'] if 'priorfactor' in inferenceparams else 1.0

        # m and l for reference product 0
        m0 = tt.zeros((1)) + 0.0
        l0 = tt.zeros((1)) + 1.0
        # m and l priors for the remaining products
        mest = pm.StudentT('mest',
                           doft,
                           mu=0,
                           sd=0.3 * priorfactor,
                           shape=(nsensors - 1))
        lest = pm.StudentT('lest',
                           doft,
                           mu=1,
                           sd=0.3 * priorfactor,
                           shape=(nsensors - 1))

        # define mu and M for remaining products depending on whether mu is estimated or set to zero
        if 'mu' in estimatesdexplanterms and estimatesdexplanterms[
                'mu'] and normalized_weights['mu']['weight'].shape[0] > 0:
            sdmu = pm.Exponential('sdmu', 1 / (0.3 * priorfactor))
        else:
            sdmu = pm.Deterministic('sdmu', tt.ones(1) * 0.3 * priorfactor)
        if 'mu' in estimateexplanterms and estimateexplanterms[
                'mu'] and normalized_weights['mu']['weight'].shape[0] > 0:
            muestnondim = pm.StudentT('muestnondim',
                                      doft,
                                      mu=0,
                                      sd=1,
                                      shape=(normalized_weights['mu']['nfac'],
                                             nsensors - 1))
            muest = pm.Deterministic('muest', muestnondim * sdmu)
            Mest = mest[:, np.newaxis] + tt.sum(
                muest[:, :, np.newaxis] *
                normalized_weights['mu']['weight'][:, np.newaxis, :],
                axis=0
            )  #inside the sum: first dimension: explan. factor, second dimension: product, third dimension: time
        else:
            muest = pm.Deterministic(
                'muest',
                tt.zeros((normalized_weights['mu']['nfac'], nsensors - 1)))
            Mest = mest[:, np.newaxis]
        # same for mu0 and M0
        if 'mu0' in estimateexplanterms and estimateexplanterms[
                'mu0'] and normalized_weights['mu']['weight'].shape[0] > 0:
            mu0estnondim = pm.StudentT(
                'mu0estnondim',
                doft,
                mu=0,
                sd=1,
                shape=(normalized_weights['mu']['nfac']))
            mu0est = pm.Deterministic('mu0est', mu0estnondim * sdmu)
            M0est = m0 + tt.sum(
                mu0est[:, np.newaxis] * normalized_weights['mu']['weight'],
                axis=0)
        else:
            mu0est = pm.Deterministic(
                'mu0est', tt.zeros(normalized_weights['mu']['nfac']))
            M0est = m0

        # define lambda and L for remaining products depending on whether lambda is estimated or set to zero
        if 'lambda' in estimatesdexplanterms and estimatesdexplanterms[
                'lambda'] and normalized_weights['lambda']['weight'].shape[
                    0] > 0:
            sdlambda = pm.Exponential('sdlambda', 1 / (0.3 * priorfactor))
        else:
            sdlambda = pm.Deterministic('sdlambda',
                                        tt.ones(1) * 0.3 * priorfactor)
        if 'lambda' in estimateexplanterms and estimateexplanterms[
                'lambda'] and normalized_weights['lambda']['weight'].shape[
                    0] > 0:
            lambdaestnondim = pm.StudentT(
                'lambdaestnondim',
                doft,
                mu=0,
                sd=1,
                shape=(normalized_weights['lambda']['nfac'], nsensors - 1))
            lambdaest = pm.Deterministic('lambdaest',
                                         lambdaestnondim * sdlambda)
            Lest = lest[:, np.newaxis] + tt.sum(
                lambdaest[:, :, np.newaxis] *
                normalized_weights['lambda']['weight'][:, np.newaxis, :],
                axis=0)
        else:
            lambdaest = pm.Deterministic(
                'lambdaest',
                tt.zeros((normalized_weights['lambda']['nfac'], nsensors - 1)))
            Lest = lest[:, np.newaxis]
        # same for lambda0 and L0
        if 'lambda0' in estimateexplanterms and estimateexplanterms[
                'lambda0'] and normalized_weights['lambda']['weight'].shape[
                    0] > 0:
            lambda0estnondim = pm.StudentT(
                'lambda0estnondim',
                doft,
                mu=0,
                sd=1,
                shape=(normalized_weights['lambda']['nfac']))
            lambda0est = pm.Deterministic('lambda0est',
                                          lambda0estnondim * sdlambda)
            L0est = l0 + tt.sum(lambda0est[:, np.newaxis] *
                                normalized_weights['lambda']['weight'],
                                axis=0)
        else:
            lambda0est = pm.Deterministic(
                'lambda0est', tt.zeros(normalized_weights['lambda']['nfac']))
            L0est = l0

        # product variance
        if 'sigmap0' in estimateexplanterms and not estimateexplanterms[
                'sigmap0']:
            # set sigmap0 to fixed value
            sigmap0value = inferenceparams[
                'sigmap0'] if 'sigmap0' in inferenceparams else 0.01
            sigmap0squared = pm.Deterministic('sigmap0squared',
                                              tt.ones(1) * sigmap0value**2)
            sigmapsquaredest = pm.Exponential('sigmapsquaredest',
                                              1 / (0.1 * priorfactor),
                                              shape=(nsensors - 1))
            sigmapsquared = tt.concatenate([sigmap0squared, sigmapsquaredest],
                                           axis=0)
        else:
            if 'sigmap0prior' in inferenceparams and inferenceparams[
                    'sigmap0prior'] is not None:
                sigmap0squared = pm.Exponential(
                    'sigmap0squared',
                    1 / (inferenceparams['sigmap0prior'] * priorfactor),
                    shape=(1))
                sigmapsquaredest = pm.Exponential('sigmapsquaredest',
                                                  1 / (0.1 * priorfactor),
                                                  shape=(nsensors - 1))
                sigmapsquared = tt.concatenate(
                    [sigmap0squared, sigmapsquaredest], axis=0)
            else:
                # prior for product noise variance (all explanatory factors set to 1)
                sigmapsquared = pm.Exponential('sigmapsquared',
                                               1 / (0.1 * priorfactor),
                                               shape=(nsensors))
        # associated standard deviation for ease of reference
        sigmap = pm.Deterministic('sigmap', tt.sqrt(sigmapsquared))
        # define kappa and predicted product noise variance depending on how/whether kappa is estimated or not
        if 'kappa' in estimatesdexplanterms and estimatesdexplanterms[
                'kappa'] and normalized_weights['kappa']['weight'].shape[0] > 0:
            sdkappa = pm.Exponential('sdkappa', 1 / (1.0 * priorfactor))
        else:
            sdkappa = pm.Deterministic('sdkappa',
                                       tt.ones(1) * 1.0 * priorfactor)
        if 'kappa' in estimateexplanterms and estimateexplanterms[
                'kappa'] and normalized_weights['kappa']['weight'].shape[0] > 0:
            if 'kappa0' in estimateexplanterms and estimateexplanterms[
                    'kappa0']:
                kappaestnondim = pm.StudentT(
                    'kappaestnondim',
                    doft,
                    mu=0,
                    sd=1,
                    shape=(normalized_weights['kappa']['nfac'], nsensors)
                )  #note that for kappa all sensors (including reference sensor) are represented in the same variable
                kappaest = pm.Deterministic('kappaest',
                                            kappaestnondim * sdkappa)
                kappa = pm.Deterministic('kappa', 1.0 * kappaest)
            else:
                kappaestnondim = pm.StudentT(
                    'kappaestnondim',
                    doft,
                    mu=0,
                    sd=1,
                    shape=(normalized_weights['kappa']['nfac'], nsensors - 1))
                kappaest = pm.Deterministic('kappaest',
                                            kappaestnondim * sdkappa)
                kappa0 = tt.zeros((normalized_weights['kappa']['nfac'], 1))
                kappa = pm.Deterministic(
                    'kappa', tt.concatenate([kappa0, kappaest], axis=1))
            sigmasquaredtotal = (sigmapsquared[:, np.newaxis] * tt.prod(
                tt.pow(normalized_weights['kappa']['weight'][:, np.newaxis, :],
                       kappa[:, :, np.newaxis]),
                axis=0))
        else:
            kappa = pm.Deterministic('kappa', tt.zeros((1, nsensors)))
            sigmasquaredtotal = sigmapsquared[:, np.newaxis] * tt.ones(
                (nsensors, n))

        # porosity, i.e. maximum soil moisture content: T prior
        porosity = pm.StudentT('porosity', doft, mu=0.4, sd=0.1 * priorfactor)

        thetamodel = inferenceparams[
            'thetamodel'] if 'thetamodel' in inferenceparams else 'beta'
        dofchi = inferenceparams['dofchi'] if 'dofchi' in inferenceparams else 3
        softabsvalue = inferenceparams[
            'softabsvalue'] if 'softabsvalue' in inferenceparams else 0.01
        # distribution of theta
        if thetamodel == 'beta':
            # beta distribution (A=alpha and B=beta estimated), can vary with explan. factors
            a = pm.ChiSquared('a', dofchi)
            b = pm.ChiSquared('b', dofchi)
            if 'sdalphabeta' in estimatesdexplanterms and estimatesdexplanterms[
                    'alphabeta']:
                sdalphabeta = pm.Exponential('sdalphabeta',
                                             1 / (0.3 * priorfactor))
            else:
                sdalphabeta = pm.Deterministic('sdalphabeta',
                                               tt.ones(1) * 0.3 * priorfactor)
            if 'alphabeta' in estimateexplanterms and estimateexplanterms[
                    'alphabeta']:
                alphanondim = pm.StudentT(
                    'alphanondim',
                    doft,
                    mu=0,
                    sd=1,
                    shape=(normalized_weights['alphabeta']['nfac']))
                betanondim = pm.StudentT(
                    'betanondim',
                    doft,
                    mu=0,
                    sd=1,
                    shape=(normalized_weights['alphabeta']['nfac']))
                alpha = pm.Deterministic('alpha', alphanondim * sdalphabeta)
                beta = pm.Deterministic('beta', betanondim * sdalphabeta)
                A = tt.sqrt(
                    softabsvalue**2 + tt.pow(
                        a + tt.sum(alpha[:, np.newaxis] *
                                   normalized_weights['alphabeta']['weight'],
                                   axis=0), 2)
                )  # soft absolute value; A and B should be >> softabsvalue
                B = tt.sqrt(softabsvalue**2 + tt.pow(
                    b + tt.sum(beta[:, np.newaxis] *
                               normalized_weights['alphabeta']['weight'],
                               axis=0), 2))
            else:
                alpha = pm.Deterministic('alpha', tt.zeros(1) * 0.0)
                beta = pm.Deterministic('beta', tt.zeros(1) * 0.0)
                A = a
                B = b
            thetaub = pm.Beta('thetaub', alpha=A, beta=B, shape=(n))
            theta = pm.Deterministic('theta', porosity * thetaub)
        elif thetamodel == 'logistic':
            # spline with logistic link
            a = pm.StudentT('a', doft, mu=0.0, sd=3.0 * priorfactor)
            b = pm.Exponential('b', 1. / (3 * priorfactor))
            if 'sdalphabeta' in estimatesdexplanterms and estimatesdexplanterms[
                    'alphabeta']:
                sdalphabeta = pm.Exponential('sdalphabeta',
                                             1.0 / (1.0 * priorfactor))
            else:
                sdalphabeta = pm.Deterministic('sdalphabeta',
                                               tt.ones(1) * 1.0 * priorfactor)
            if 'alphabeta' in estimateexplanterms and estimateexplanterms[
                    'alphabeta']:
                alphanondim = pm.StudentT(
                    'alphanondim',
                    doft,
                    mu=0,
                    sd=1,
                    shape=(normalized_weights['alphabeta']['nfac']))
                betanondim = pm.StudentT(
                    'betanondim',
                    doft,
                    mu=0,
                    sd=1,
                    shape=(normalized_weights['alphabeta']['nfac']))
                alpha = pm.Deterministic('alpha', alphanondim * sdalphabeta)
                beta = pm.Deterministic('beta', betanondim * sdalphabeta)
                A = a + tt.sum(alpha[:, np.newaxis] *
                               normalized_weights['alphabeta']['weight'],
                               axis=0)
                B = tt.sqrt(softabsvalue**2 + tt.pow(
                    b + tt.sum(beta[:, np.newaxis] *
                               normalized_weights['alphabeta']['weight'],
                               axis=0), 2))
            else:
                alpha = pm.Deterministic('alpha', tt.zeros(1) * 0.0)
                beta = pm.Deterministic('beta', tt.zeros(1) * 0.0)
                A = a
                B = b
            thetaubnondim = pm.Normal('thetaubnondim', mu=0, sd=1, shape=(n))
            thetaub = pm.Deterministic('thetaub', A + B * thetaubnondim)
            theta = pm.Deterministic(
                'theta', porosity * tt.pow(1 + tt.exp(-thetaub), -1))

        # assemble mean of observed products
        thetaoffset = inferenceparams[
            'thetaoffset'] if 'thetaoffset' in inferenceparams else 0.15
        if thetaoffset == 'observedmean':
            thetaoffset = np.mean(visible['y'][0, :])
        y0 = M0est + L0est * (theta[np.newaxis, :] - thetaoffset)
        yrest = Mest + Lest * (theta[np.newaxis, :] - thetaoffset)
        yest = tt.concatenate([y0, yrest], axis=0) + thetaoffset

        # model for observed products
        studenterrors = inferenceparams[
            'studenterrors'] if 'studenterrors' in inferenceparams else False
        if not studenterrors:
            y = pm.Normal('y',
                          mu=yest,
                          sd=tt.sqrt(sigmasquaredtotal),
                          observed=visible['y'])
        else:
            studenterrors_dof = inferenceparams[
                'studenterrors_dof'] if 'studenterrors_dof' in inferenceparams else doft
            lam = tt.pow(sigmasquaredtotal, -1) * (studenterrors_dof /
                                                   (studenterrors_dof - 2))
            y = pm.StudentT('y',
                            studenterrors_dof,
                            mu=yest,
                            lam=lam,
                            observed=visible['y'])
    return model
def build_biallelic_model4(g, n, s):
    # EXPERIMENTAL: Underlying allele freqs overdispersed before errors.
    a = 2

    with pm.Model() as model:
        # Fraction
        pi = pm.Dirichlet(
            'pi',
            a=np.ones(s),
            shape=(n, s),
            transform=stick_breaking,
        )
        pi_hyper = pm.Data('pi_hyper', value=0.0)
        pm.Potential('heterogeneity_penalty',
                     -(pm.math.sqrt(pi).sum(0).sum()**2) * pi_hyper)

        rho_hyper = pm.Data('rho_hyper', value=0.0)
        pm.Potential('diversity_penalty',
                     -(pm.math.sqrt(pi.sum(0)).sum()**2) * rho_hyper)

        # Genotype
        gamma_ = pm.Uniform('gamma_', 0, 1, shape=(g * s, 1))
        gamma = pm.Deterministic(
            'gamma',
            (pm.math.concatenate([gamma_, 1 - gamma_], axis=1).reshape(
                (g, s, a))))
        gamma_hyper = pm.Data('gamma_hyper', value=0.0)
        pm.Potential(
            'ambiguity_penalty',
            -(pm.math.sqrt(gamma).sum(2)**2).sum(0).sum(0) * gamma_hyper)

        # Product of fraction and genotype
        true_p = pm.Deterministic('true_p', pm.math.dot(pi, gamma))

        # Overdispersion term
        # TODO: Consider making it different between samples.  How to shape?
        alpha = pm.ChiSquared('alpha', nu=50)
        _true_p = true_p.reshape((-1, a))[:, 0]
        _true_q = 1 - _true_p
        overdispersed_p_ = pm.Beta('overdispersed_p_',
                                   alpha=_true_p * alpha,
                                   beta=_true_q * alpha,
                                   shape=(n * g, ))
        overdispersed_p = pm.Deterministic(
            'overdispersed_p',
            pm.math.concatenate([
                overdispersed_p_.reshape(
                    (-1, 1)), 1 - overdispersed_p_.reshape((-1, 1))
            ],
                                axis=1).reshape((n, g, a)))

        # Sequencing error
        # epsilon_hyper = pm.Gamma('epsilon_hyper', alpha=100, beta=1)
        epsilon_hyper = pm.Data('epsilon_hyper', value=100)
        epsilon = pm.Beta('epsilon', alpha=2, beta=epsilon_hyper, shape=n)
        epsilon_ = epsilon.reshape((n, 1, 1))
        p_with_error = (overdispersed_p * (1 - epsilon_) +
                        (1 - overdispersed_p) * (epsilon_ / (a - 1)))

        # Observation
        # _p = p_with_error.reshape((-1, a))[:,0]
        # _q = 1 - _p

        observed = pm.Data('observed', value=np.empty((g * n, a)))
        pm.Binomial('data',
                    p=p_with_error.reshape((-1, a))[:, 0],
                    n=observed.reshape((-1, a)).sum(1),
                    observed=observed[:, 0])

    return model
Exemple #5
0
""")
plt.figure(dpi=100)

##### COMPUTATION #####
# DECLARING THE "TRUE" PARAMETERS UNDERLYING THE SAMPLE
k_real = 2

# DRAW A SAMPLE OF N=1000
np.random.seed(42)
sample = stats.chi2.rvs(df=k_real, size=1000)

##### SIMULATION #####
# MODEL BUILDING
with pm.Model() as model:
    k = pm.DiscreteUniform("k", lower=0, upper=np.mean(sample)*7) # mean + 3stds
    chi_2 = pm.ChiSquared("chi2", nu=k, observed=sample)
    

# MODEL RUN
with model:
    trace = pm.sample(50000)
    burned_trace = trace[45000:]

# MU - 95% CONF INTERVAL
ks = burned_trace["k"]
k_est_95 = np.mean(ks) - 2*np.std(ks), np.mean(ks) + 2*np.std(ks)
print("95% of sampled mus are between {} and {}".format(*k_est_95))

##### PLOTTING #####
# SAMPLE DISTRIBUTION
plt.hist(sample, bins=50,normed=True, alpha=.25)