Ejemplo n.º 1
0
def get_model_GP3(t, K, nsamples, cov_fcn):
    M = tns.slinalg.kron(tns.eye(K), cov_fcn(t, t[:, None], 1.0))    
    tau = pmc.Gamma('tau', 1.0, 1.0, shape=K, testval=1.0)
    h2 = pmc.Gamma('h2', 1.0, 1.0, shape=K, testval=1.0)
    Q = tns.repeat(h2, nsamples)*M**tns.repeat(tau, nsamples) + tns.eye(nsamples*K)*1e-6
    L = tns.slinalg.cholesky(Q)
    psi = pmc.Normal('psi', mu=0.0, sd=1.0, shape=K*nsamples, testval=0.0)
    phi = pmc.Deterministic('phi', pmc.invlogit(L.dot(psi)).reshape((K, nsamples)))
    return phi
def stickbreak_prior(name, a, shape):
    """truncated stick-breaking construction"""
    gamma = pm.Gamma('gamma_{}'.format(name), 1., 1.)
    delta = pm.Gamma('delta_{}'.format(name), 1., a)
    beta_prime = tt.stack([
        pm.Beta('beta_prime_{}_{}'.format(name, k), 1., gamma)
        for k in range(shape)
    ])
    beta = GEM(beta_prime)
    return (beta * delta)
Ejemplo n.º 3
0
def nmf_gpp_hmc(X, M, **kwargs):
    """
    Samples posterior of NMF GPP with Hamiltonian Monte Carlo using the Leapfrog method for .
    :param X: Data matrix
    :param M: Number of latent factors (ie. sources)
    :param kwargs:  Options:
                    'numSamples': Number of samples to be drawn
                    'linkH'     : Link function for H, callable. Inverse link
                    'argsH'     : Extra arguments for linkH. Should be a list.
                    'linkD'     : Link function for D, callable. Inverse link
                    'argsD'     : Extra arguments for linkD. Should be a list.
                    'sigma_N'   : Variance of Gaussian noise.
                    'burn'      : Burn-in to be discarded
                    'dimD'      : Dimension of covariance for D
                    'dimH'      : Dimension of covariance for H

    :return: Traces of NN matrix factors, D and H
    """
    # parse arguments
    try:
        numSamples = kwargs['numSamples']
        dimD = kwargs['dimD']
        dimH = kwargs['dimH']
        numChains = kwargs['numChains']
        db_name = kwargs['db_name']
    except KeyError:
        print("Missing parameter with no default. Terminating")
        sys.exit(1)

    K, L = X.shape

    d_in = np.arange(K*M)[:, None]
    h_in = np.arange(M*L)[:, None]

    # begin actual model
    with pm.Model() as mod:
        ls_D = pm.Gamma(name='lsd', alpha=3, beta=1, shape=(dimD,))
        #covD = pm.gp.cov.ExpQuad(input_dim=dimD, ls=ls_D, active_dims=400)
        covD = pm.gp.cov.Exponential(input_dim=dimD, ls=ls_D)
        gpD = CustomLatent(cov_func=covD)
        d = gpD.prior("d", X=d_in.reshape(K, M))

        ls_H = pm.Gamma(name='lsh', alpha=3, beta=1, shape=(dimH,))
        covH = pm.gp.cov.ExpQuad(input_dim=dimH, ls=ls_H)
        gpH = CustomLatent(cov_func=covH)
        h = gpH.prior("h", X=h_in.reshape(L, M))

        X_ = pm.DensityDist('X', loglik_X, observed={'X': X,
                                                     'd': d,
                                                     'h': h})

        db = pm.backends.Text(db_name)
        trace = pm.sample(numSamples, njobs=1, trace=db, chains=numChains)

    return trace
Ejemplo n.º 4
0
    def create_model(self):
        """ Creates and returns the PyMC3 model.

        Note: The size of the shared variables must match the size of the
        training data. Otherwise, setting the shared variables later will raise
        an error. See http://docs.pymc.io/advanced_theano.html

        Returns
        ----------
        model : the PyMC3 model
        """
        model_input = theano.shared(np.zeros([self.num_training_samples,
                                              self.num_pred]))

        model_output = theano.shared(np.zeros(self.num_training_samples))

        self.shared_vars = {
            'model_input': model_input,
            'model_output': model_output,
        }

        self.gp = None
        model = pm.Model()

        with model:
            length_scale = pm.Gamma('length_scale', alpha=2, beta=0.5,
                                    shape=(1, self.num_pred))
            signal_variance = pm.HalfCauchy('signal_variance', beta=2,
                                            shape=1)
            noise_variance = pm.HalfCauchy('noise_variance', beta=2,
                                           shape=1)
            degrees_of_freedom = pm.Gamma('degrees_of_freedom', alpha=2,
                                          beta=0.1, shape=1)

            if self.kernel is None:
                cov_function = signal_variance ** 2 * RBF(
                    input_dim=self.num_pred,
                    ls=length_scale)
            else:
                cov_function = self.kernel

            if self.prior_mean is None:
                mean_function = pm.gp.mean.Zero()
            else:
                mean_function = pm.gp.mean.Constant(c=self.prior_mean)

            self.gp = pm.gp.Latent(mean_func=mean_function,
                                   cov_func=cov_function)

            f = self.gp.prior('f', X=model_input.get_value())

            y = pm.StudentT('y', mu=f, lam=1 / signal_variance,
                            nu=degrees_of_freedom, observed=model_output)

        return model
Ejemplo n.º 5
0
    def sample_posterior(self, t, T, n_samp, n_burnin=None):
        """
        Get samples from the posterior, e.g. for posterior inference or computing Bayesian credible intervals.
        This routine samples via the random walk Metropolis (RWM) algorithm using the ``pymc3`` library.

        The function returns a ``pymc3.MultiTrace`` object that can be operated on simply like a ``numpy.array``.
        Furthermore, ``pymc3`` can be used to create "traceplots". For example via

        .. code-block:: python

            from matplotlib import pyplot as plt
            import pymc3

            trace = uvb.fit(t, T)
            pymc3.traceplot(trace["mu"])

            plt.plot(trace["mu"], trace["alpha"])

        :param numpy.array[float] t: Observation timestamps of the process up to time T. 1-d array of timestamps.
            must be sorted (asc)
        :param T: (optional) maximum time
        :type T: float or None
        :param int n_samp: number of posterior samples to take
        :param int n_burnin: number of samples to discard (as the burn-in samples)

        :rtype: pymc3.MultiTrace
        :return: the posterior samples for mu, alpha and theta as a trace object
        """

        t, T = self._prep_t_T(t, T)

        if n_burnin is None:
            n_burnin = int(n_samp / 5)

        with pm.Model() as model:
            mu = pm.Gamma("mu", alpha=self.mu_hyp[0], beta=1. / self.mu_hyp[1])
            theta = pm.Gamma("theta",
                             alpha=self.theta_hyp[0],
                             beta=1. / self.theta_hyp[1])
            alpha = pm.Beta("alpha",
                            alpha=self.alpha_hyp[0],
                            beta=self.alpha_hyp[1])

            op = HPLLOp(t, T)
            a = pm.Deterministic('a', op(mu, alpha, theta))
            llop = pm.Potential('ll', a)

            trace = pm.sample(n_samp,
                              step=pm.Metropolis(),
                              cores=1,
                              nchains=1,
                              tune=n_burnin,
                              discard_tuned_samples=True)

        return trace[n_burnin:]
Ejemplo n.º 6
0
def logistic2D(y, coords, knots, pred_coords):
    """Returns an instance of a logistic geostatistical model with
       Matern32 covariance.
       
       Let $y_i, i=1,\dots,n$ be a set of binary observations at locations $x_i, i=1,\dots,n$.
       We model $y_i$ as
       $$
       y_i \sim Bernoulli(p_i)
       $$
       with
       $$
       \mbox{logit}(p_i) = \alpha + S(x_i).
       $$
       
       $S(x_i)$ is a latent Gaussian process defined as
       $$
       S(\bm{x}) \sim \mbox{MultivariateNormal}(\bm{0}, \Sigma^2)
       $$
       where
       $$
       \Sigma_{ij}^2 = \sigma^2 \left(1 + \frac{\sqrt{3(x - x')^2}}{\ell}\right)
                  \mathrm{exp}\left[ - \frac{\sqrt{3(x - x')^2}}{\ell} \right]
       $$

       The model evaluates $S(x_i)$ approximately using a set of inducing points $x^\star_i, i=1,\dots,m$ for $m$ auxilliary locations.  See [Banerjee \emph{et al.} (2009)](https://dx.doi.org/10.1111%2Fj.1467-9868.2008.00663.x) for further details.

       :param y: a vector of binary outcomes {0, 1}
       :param coords: a matrix of coordinates of `y` of shape `[n, d]` for `d`-dimensions and `n` observations.
       :param knots: a matrix of inducing point coordinates of shape `[m, d]`.
       :param pred_coords: a matrix of coordinates at which predictions are required.
       :returns: a dictionary containing the PyMC3 `model`, and the `posterior` PyC3 `Multitrace` object. 
       """
    model = pm.Model()
    with model:
        alpha = pm.Normal('alpha', mu=0., sd=1.)
        sigma_sq = pm.Gamma('sigma_sq', 1., 1.)
        phi = pm.Gamma('phi', 2., 0.1)

        spatial_cov = sigma_sq * pm.gp.cov.Matern32(2, phi)
        spatial_gp = pm.gp.Latent(cov_func=spatial_cov)
        s = spatial_gp.prior('s', X=knots)
        s_star_ = pm.Deterministic('s_star',
                                   project(s, knots, pred_coords, spatial_cov))

        eta = alpha + project(s, knots, coords, spatial_cov)
        y_rv = pm.Bernoulli('y', p=pm.invlogit(eta), observed=y)

    def sample_fn(*args, **kwargs):
        with model:
            trace = pm.sample(*args, **kwargs)
        return {'model': model, 'posterior': trace}

    return sample_fn
Ejemplo n.º 7
0
def big_model(splits,
              stakes,
              actions,
              p_prior='normal',
              f_prior='uniform',
              gamma_param=1.,
              sd=10.,
              f_mean=1.,
              unif_upper=10,
              st_t_param=1.):
    model = pm.Model()
    with model:
        # Specify priors
        # t = pm.Uniform('t', lower=0, upper=1)
        # temp = pm.Uniform('temp', lower=0, upper=1000)
        # st = pm.Beta('st', alpha=1, beta=1)
        if f_prior == 'normal':
            r = pm.Normal('r', mu=0, sd=sd)
            p = pm.Normal('p', mu=0, sd=sd)
            f = pm.Normal('f', mu=f_mean, sd=sd)
            # st_t = pm.Normal('st_t', mu=0, sd=sd)
        else:
            # r = pm.Uniform('r', lower=0, upper=unif_upper)
            # p = pm.Uniform('p', lower=0.0, upper=10)
            # f = pm.Uniform('f', lower=0.0, upper=10)
            p = pm.Gamma('p', alpha=0.1 * 4., beta=0.1 * 4.)
            f = pm.Gamma('f', alpha=0.1 * 4 * gamma_param, beta=0.1 * 4)
            # st_t = pm.Uniform('st_t', lower=0, upper=unif_upper)

        # Specify model
        # soft_indicator_num = np.exp((0.5-t/2 - splits)*temp)
        # soft_indicator = soft_indicator_num / (soft_indicator_num + 1)
        # soft_indicator = (0.5-t/2>splits)
        soft_indicator = (0.4 > splits)
        # odds_a = np.exp(2*r*splits - f*soft_indicator)
        odds_a = np.exp(splits - f * soft_indicator)
        odds_r = np.exp(p * soft_indicator)
        # odds_r = 1
        prob = odds_a / (odds_r + odds_a)
        a = pm.Binomial('a', 1, prob, observed=actions)

        # Fit and sample
        # fitted = pm.fit(method='fullrank_advi')
        # trace_big = fitted.sample(2000)
        trace_big = pm.sample(20000,
                              chains=1,
                              cores=4,
                              seed=3,
                              target_accept=0.95)
        prior = pm.sample_prior_predictive(20000)
    return trace_big, prior, model
Ejemplo n.º 8
0
def density(x):
    """
    输入: 一个list, 是一系列一维样本
    输出: 基于DP的density估计, 范围-3到3
    """
    values = x
    values = np.array(values)
    values = (values - values.mean()) / values.std()

    N = len(values)
    K = 30
    SEED = int(time.time())
    x_plot = np.linspace(-3, 3, 200)

    def stick_breaking(beta):
        portion_remaining = tt.concatenate([[1],
                                            tt.extra_ops.cumprod(1 - beta)[:-1]
                                            ])
        return beta * portion_remaining

    with pm.Model() as model:
        alpha = pm.Gamma('alpha', 1., 1.)
        beta = pm.Beta('beta', 1., alpha, shape=K)
        w = pm.Deterministic('w', stick_breaking(beta))

        tau = pm.Gamma('tau', 1., 1., shape=K)
        lambda_ = pm.Uniform('lambda', 0, 5, shape=K)
        mu = pm.Normal('mu', 0, tau=lambda_ * tau, shape=K)
        obs = pm.NormalMixture('obs',
                               w,
                               mu,
                               tau=lambda_ * tau,
                               observed=values)

    with model:
        trace = pm.sample(1000, random_seed=SEED, init='advi')

    fig, ax = plt.subplots(figsize=(8, 6))
    plot_w = np.arange(K) + 1
    ax.bar(plot_w - 0.5, trace['w'].mean(axis=0), width=1., lw=0)
    ax.set_xlim(0.5, K)
    ax.set_xlabel('Component')
    ax.set_ylabel('Posterior expected mixture weight')

    post_pdf_contribs = sp.stats.norm.pdf(
        np.atleast_3d(x_plot), trace['mu'][:, np.newaxis, :],
        1. / np.sqrt(trace['lambda'] * trace['tau'])[:, np.newaxis, :])
    post_pdfs = (trace['w'][:, np.newaxis, :] * post_pdf_contribs).sum(axis=-1)
    post_pdf_low, post_pdf_high = np.percentile(post_pdfs, [2.5, 97.5], axis=0)
    return post_pdfs
Ejemplo n.º 9
0
def specify_model(training_df):
    '''
    This function sets up some basic parameters from the training_df, and then
    uses PyMC to fit the Dixon-Coles model.
    '''

    teams = training_df['home_team'].unique()
    teams = pd.DataFrame(teams, columns=['team'])
    teams['i'] = teams.index

    observed_home_goals = training_df['home_score'].values
    observed_away_goals = training_df['away_score'].values

    home_team = training_df['i_home'].values
    away_team = training_df['i_away'].values

    num_teams = len(training_df['i_home'].drop_duplicates())
    num_games = len(home_team)

    g = training_df.groupby('i_away')
    att_starting_points = np.log(g.away_score.mean())
    g = training_df.groupby('i_home')
    def_starting_points = -np.log(g.away_score.mean())

    print('Specifying Model')
    # specify model
    with pm.Model() as model:
        # global model parameters
        home = pm.Normal('home', 0, 0.0001)
        tau_att = pm.Gamma('tau_att', .1, .1)
        tau_def = pm.Gamma('tau_def', .1, .1)
        intercept = pm.Normal('intercept', 0, 0.0001)

        # team specific parameters
        atts_star = pm.Normal('atts_star', mu=0, tau=tau_att, shape=num_teams)
        defs_star = pm.Normal('defs_star', mu=0, tau=tau_def, shape=num_teams)

        #
        atts = pm.Deterministic('atts', atts_star - tt.mean(atts_star))
        defs = pm.Deterministic('defs', defs_star - tt.mean(defs_star))

        home_theta = tt.exp(intercept + home + atts[home_team] + defs[away_team])
        away_theta = tt.exp(intercept + atts[away_team] + defs[home_team])

        # likelihood of observed data
        home_goals = pm.Poisson('home_goals', mu=home_theta, observed=observed_home_goals)
        away_goals = pm.Poisson('away_goals', mu=away_theta, observed=observed_away_goals)

    return model
    def __init__(self,
                 X,
                 a_alpha=1e-3,
                 b_alpha=1e-3,
                 a_tau=1e-3,
                 b_tau=1e-3,
                 beta=1e-3):
        # data, # of samples, dims
        self.X = X
        self.d = self.X.shape[1]
        self.N = self.X.shape[0]
        self.q = self.d - 1

        # hyperparameters
        self.a_alpha = a_alpha
        self.b_alpha = b_alpha
        self.a_tau = a_tau
        self.b_tau = b_tau
        self.beta = beta

        with pm.Model() as model:
            z = pm.MvNormal('z',
                            mu=np.zeros(self.q),
                            cov=np.eye(self.q),
                            shape=(self.N, self.q))
            mu = pm.MvNormal('mu',
                             mu=np.zeros(self.d),
                             cov=np.eye(self.d) / self.beta,
                             shape=self.d)
            alpha = pm.Gamma('alpha',
                             alpha=self.a_alpha,
                             beta=self.b_alpha,
                             shape=self.q)
            w = pm.MatrixNormal('w',
                                mu=np.zeros((self.d, self.q)),
                                rowcov=np.eye(self.d),
                                colcov=diag(1 / alpha),
                                shape=(self.d, self.q))
            tau = pm.Gamma('tau', alpha=self.a_tau, beta=self.b_tau)
            x = pm.math.dot(z, w.T) + mu
            obs_x = pm.MatrixNormal('obs_x',
                                    mu=x,
                                    rowcov=np.eye(self.N),
                                    colcov=np.eye(self.d) / tau,
                                    shape=(self.N, self.d),
                                    observed=self.X)

        self.model = model
Ejemplo n.º 11
0
    def create_model(self):
        with pm.Model() as self.model:
            self.mean = pm.gp.mean.Zero()

            # covariance function
            l = pm.Gamma("l_L", alpha=2, beta=2, shape = self.dim)
            # informative, positive normal prior on the period
            eta = pm.HalfNormal("eta_L", sd=5)
            self.cov = eta * pm.gp.cov.ExpQuad(self.dim, l)

            Kuu = self.cov(self.Xu)
            Kuf = self.cov(self.Xu, self.X)
            Luu = tt.slinalg.cholesky(pm.gp.util.stabilize(Kuu))

            vu = pm.Normal("u_rotated_", mu=0.0, sd=1.0, shape=pm.gp.util.infer_shape(self.Xu))
            u = pm.Deterministic("u", Luu.dot(vu))

            Luuinv_u = pm.gp.util.solve_lower(Luu,u)
            A = pm.gp.util.solve_lower(Luu, Kuf)

            Qff = tt.dot(tt.transpose(A),A)

            Kffd = self.cov(self.X, diag=True)
            Lamd = pm.gp.util.stabilize(tt.diag(tt.clip(Kffd - tt.diag(Qff) + self.sigma**2, 0.0, np.inf)))


            v = pm.Normal("fp_rotated_", mu=0.0, sd=1.0, shape=pm.gp.util.infer_shape(self.X))
            fp = pm.Deterministic("fp", tt.dot(tt.transpose(A), Luuinv_u) + tt.sqrt(Lamd).dot(v))

            p = pm.Deterministic("p", pm.math.invlogit(fp))
            y = pm.Bernoulli("y", p=p, observed=self.Y)
    def fit(self, X, Y):
        with pm.Model() as self.model:
            global X_New_shared
            global Y_New_shared
            X_New_shared = theano.shared(X)
            Y_New_shared = theano.shared(Y)
            lm = pm.Gamma("l", alpha=2, beta=1)
            offset = 0.1
            nu = pm.HalfCauchy("nu", beta=1)
            d = 2

            cov = nu**2 * pm.gp.cov.Polynomial(
                X_New_shared.get_value().shape[1], lm, d, offset)

            self.gp = pm.gp.Marginal(cov_func=cov)

            sigma = pm.HalfCauchy("sigma", beta=1)
            y_ = self.gp.marginal_likelihood("y",
                                             X=X_New_shared,
                                             y=Y_New_shared,
                                             noise=sigma)

            self.map_trace = [pm.find_MAP()]

            global f_pred
            f_pred = self.gp.conditional(
                "f_pred",
                X_New_shared,
                shape=X_New_shared.get_value().shape[0])
Ejemplo n.º 13
0
def add_1level_hierarchy_var(var,
                             var_dict,
                             lv1_sd,
                             lv0_sd,
                             nind,
                             dist='halfnorm'):
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    import pymc3 as pm
    import theano.tensor as tt

    n1 = len(np.unique(nind))
    n2 = len(nind)
    var_l1 = var + '_l1'

    var_l0 = var + '_l0'
    var_l0c = var_l0 + 'c'
    dvar_l0 = 'd' + var_l0
    if (False):
        var_dict[var_l1] = pm.HalfNormal(var_l1, sd=lv1_sd)
        var_dict[var_l0] = pm.HalfNormal(var_l0, sd=lv0_sd)
        var_dict[dvar_l0] = pm.Deterministic(
            dvar_l0,
            tt.exp(var_dict[var_l0] * pm.Normal(var_l0c, sd=1, shape=n1)))

        var_dict[var] = pm.Deterministic(var,
                                         var_dict[var_l1] * var_dict[dvar_l0])

    else:
        var_dict[var_l0] = pm.HalfNormal(var_l0, sd=lv1_sd)
        #var_dict[dvar_l0 ] = pm.HalfNormal   ( dvar_l0,  sd=lv0_sd )
        var_dict[dvar_l0] = pm.Gamma(dvar_l0, mu=lv0_sd, sd=lv0_sd * 0.4)
        var_dict[var] = pm.Deterministic(
            var, var_dict[var_l0] *
            tt.exp(var_dict[dvar_l0] * pm.Normal(var_l0c, sd=1, shape=n1)))
    return var_dict[var]
Ejemplo n.º 14
0
    def get_dist(self, distvarname, distparams, dtype):
        # NOTE to developers: make sure any distribution you add passes on
        # the `dtype` argument, so that distributions match the type of the
        # variable for which they are a prior.

        if distparams.dist in ['normal', 'expnormal', 'lognormal']:
            if shim.isscalar(distparams.loc) and shim.isscalar(distparams.scale):
                mu = distparams.loc; sd = distparams.scale
                # Ensure parameters are true scalars rather than arrays
                if isinstance(mu, np.ndarray):
                    mu = mu.flat[0]
                if isinstance(sd, np.ndarray):
                    sd = sd.flat[0]
                distvar = pymc.Normal(distvarname, mu=mu, sd=sd, dtype=dtype)
            else:
                # Because the distribution is 'normal' and not 'mvnormal',
                # we sample the parameters independently, hence the
                # diagonal covariance matrix.
                assert(distparams.loc.shape == distparams.scale.shape)
                kwargs = {'shape' : distparams.loc.flatten().shape, # Required
                          'mu'    : distparams.loc.flatten(),
                          'cov'   : np.diag(distparams.scale.flat)}
                distvar = pymc.MvNormal(distvarname, dtype=dtype, **kwargs)

        elif distparams.dist in ['exp', 'exponential']:
            lam = 1/distparams.scale
            distvar = pymc.Exponential(distvarname, lam=lam, shape=distparams.shape,
                                       dtype=dtype)

        elif distparams.dist == 'gamma':
            a = distparams.a; b = 1/distparams.scale
            distvar = pymc.Gamma(distvarname, alpha=a, beta=b, shape=distparams.shape,
                                 dtype=dtype)

        else:
            raise ValueError("Unrecognized distribution type '{}'."
                                .format(distparams.dist))

        if distparams.dist == 'expnormal':
            distvar = shim.exp(distvar)
        elif distparams.dist == 'lognormal':
            distvar = shim.log(distvar)

        factor = getattr(distparams, 'factor', 1)
        olddistvar = distvar
        distvar = factor * olddistvar
            # The assignment to 'olddistvar' prevents inplace multiplication, which
            # can create a recurrent dependency where 'distvar' depends on 'distvar'.

        if 'transform' in distparams:
            retvar = TransformedVar(distparams.transform, new=distvar)
            if retvar.names.new != distvarname:
                # Probably because a suffix was set.
                raise NotImplementedError
                # retvar.rename(orig=retvar.names.orig + name_suffix,
                #               new =retvar.names.new  + name_suffix)
        else:
            retvar = NonTransformedVar(distvarname, orig=distvar)
        assert(retvar.names.new == distvarname)
        return retvar
    def _make_model(self):
        
        tumorInd = self.pheno['Tumor'] == 1
        tumorTCs = self.pheno.loc[tumorInd, 'tcEst'].values
        tumorRes = self.pheno.loc[tumorInd, 'tcRes'].values
        nTumor = np.round(tumorTCs * tumorRes).astype(int)
        freeInd = self.pheno['Tumor'] == 0
        freeTCs = self.pheno.loc[freeInd, 'tcEst'].values
        freeRes = self.pheno.loc[freeInd, 'tcRes'].values
        nFree = np.round(freeTCs * freeRes).astype(int)
        mu = np.mean(list(tumorRes) + list(freeRes))
        sig = np.std(list(tumorRes) + list(freeRes))
        alpha_gamma = mu**2 / sig
        beta_gamma = mu / sig

        with pm.Model() as model:
            u = pm.Uniform('u', 0, 1, testval = .5, shape = 2)
            v = pm.Gamma('v', alpha = alpha_gamma, beta = beta_gamma,
                         testval = 100, shape = 2)
            alpha = pm.Deterministic('alpha', v * u)
            beta = pm.Deterministic('beta', v * (1 - u))
            p = pm.Beta('p', alpha = alpha, beta = beta, shape = 2)
            obsTumor = [pm.Binomial('obsTumor' + str(i), n = tumorRes[i], p = p[0],
                                     observed = nTumor[i])
                        for i in range(len(nTumor))]
            obsFree = [pm.Binomial('obsFree' + str(i), n = freeRes[i], p = p[1],
                                    observed = nFree[i])
                       for i in range(len(nFree))]
        return model
Ejemplo n.º 16
0
def trial1():
    radon = pd.read_csv('data/radon.csv')[['county', 'floor', 'log_radon']]
    # print(radon.head())
    county = pd.Categorical(radon['county']).codes
    # print(county)

    niter = 1000
    with pm.Model() as hm:
        # County hyperpriors
        mu_a = pm.Normal('mu_a', mu=0, sd=10)
        sigma_a = pm.HalfCauchy('sigma_a', beta=1)
        mu_b = pm.Normal('mu_b', mu=0, sd=10)
        sigma_b = pm.HalfCauchy('sigma_b', beta=1)

        # County slopes and intercepts
        a = pm.Normal('slope', mu=mu_a, sd=sigma_a, shape=len(set(county)))
        b = pm.Normal('intercept', mu=mu_b, sd=sigma_b, shape=len(set(county)))

        # Houseehold errors
        sigma = pm.Gamma("sigma", alpha=10, beta=1)

        # Model prediction of radon level
        mu = a[county] + b[county] * radon.floor.values

        # Data likelihood
        y = pm.Normal('y', mu=mu, sd=sigma, observed=radon.log_radon)

        start = pm.find_MAP()
        step = pm.NUTS(scaling=start)
        hm_trace = pm.sample(niter, step, start=start)

        plt.figure(figsize=(8, 60))
        pm.forestplot(hm_trace, varnames=['slope', 'intercept'])
Ejemplo n.º 17
0
    def create_model(self):
        """ Creates and returns the PyMC3 model.

        Note: The size of the shared variables must match the size of the
        training data. Otherwise, setting the shared variables later will
        raise an error. See http://docs.pymc.io/advanced_theano.html

        Returns
        ----------
        model : the PyMC3 model
        """
        model_input = theano.shared(np.zeros([self.num_training_samples,
                                              self.num_pred]))

        model_output = theano.shared(np.zeros(self.num_training_samples))

        self.shared_vars = {
            'model_input': model_input,
            'model_output': model_output,
        }

        self.gp = None
        model = pm.Model()

        with model:
            length_scale = pm.Gamma('length_scale', alpha=2, beta=1,
                                    shape=(1, self.num_pred))
            signal_variance = pm.HalfCauchy('signal_variance', beta=5,
                                            shape=1)
            noise_variance = pm.HalfCauchy('noise_variance', beta=5,
                                           shape=1)

            if self.kernel is None:
                cov_function = signal_variance ** 2 * RBF(
                    input_dim=self.num_pred,
                    ls=length_scale)
            else:
                cov_function = self.kernel

            if self.prior_mean is None:
                mean_function = pm.gp.mean.Zero()
            else:
                mean_function = pm.gp.mean.Constant(c=self.prior_mean)

            self.gp = pm.gp.MarginalSparse(mean_func=mean_function,
                                           cov_func=cov_function,
                                           approx="FITC")

            # initialize 20 inducing points with K-means
            # gp.util
            Xu = pm.gp.util.kmeans_inducing_points(20,
                                                   X=model_input.get_value())

            y = self.gp.marginal_likelihood('y',
                                            X=model_input.get_value(),
                                            Xu=Xu,
                                            y=model_output.get_value(),
                                            sigma=noise_variance)

        return model
Ejemplo n.º 18
0
def get_model(x, r, R, vaf0, K=10):
    nsamples = r.shape[1]
    r, R, vaf0 = r[:, :, None], R[:, :, None], vaf0[:, :, None]
    idxs = aux.corr_vector_to_matrix_indices(K)
    with pmc.Model() as model:
        w = pmc.Dirichlet('w', nmp.ones(K))
        lw = tns.log(w)

        # alpha = pmc.Gamma('alpha', 1.0, 1.0)
        # u = pmc.Beta('u', 1.0, alpha, shape=K-1)
        # lw = aux.stick_breaking_log(u)

        rho = pmc.Gamma('rho', 1.0, 1.0)
        Cc = tns.fill_diagonal(pmc.LKJCorr('C', eta=2.0, n=K)[idxs], 1.0)
        Cr = aux.cov_quad_exp(x, 1.0, rho)
        mu_psi = pmc.MatrixNormal('mu_psi',
                                  mu=nmp.zeros((nsamples, K)),
                                  rowcov=Cr,
                                  colcov=Cc,
                                  shape=(nsamples, K))
        psi = pmc.Normal('psi', mu=mu_psi, sd=0.1, shape=(nsamples, K))
        phi = pmc.Deterministic('phi', pmc.invlogit(psi))

        # psi = pmc.MvNormal('psi', mu=nmp.zeros(K), tau=nmp.eye(K), shape=(nsamples, K))
        # phi = pmc.Deterministic('phi', pmc.invlogit(psi))

        theta = pmc.Deterministic('theta', vaf0 * phi[None, :, :])
        pmc.DensityDist('r', aux.binmixND_logp_fcn(R, theta, lw), observed=r)
    return model
Ejemplo n.º 19
0
def get_bayesian_model(cat_cols, num_cols):

    # Preprocessing for numerical data
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())])

    # Preprocessing for categorical data

    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='most_frequent', fill_value='missing')),
        ('onehot', OneHotEncoder(handle_unknown='ignore', sparse=False))])

    # Bundle preprocessing for numerical and categorical data
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, num_cols),
            ('cat', categorical_transformer, cat_cols)])

    with pm.Model() as linear_model:
        weights = pm.Normal('weights', mu=0, sigma=1)
        noise = pm.Gamma('noise', alpha=2, beta=1)
        y_observed = pm.Normal('y_observed',
                           mu=0,
                           sigma=10,
                           observed=y_test)

        prior = pm.sample_prior_predictive()
        posterior = pm.sample()
        posterior_pred_clf = pm.sample_posterior_predictive(posterior)

        # Bundle preprocessing and modeling code in a pipeline
        model = Pipeline(steps=[('preprocessor', preprocessor),('classifier', posterior_pred_clf)])

    return model
    def fit(self, X, y):
        """
        train model
        :param X:
        :param y:
        :return:
        """

        # bayesian matern kernel using gaussian processes
        with pm.Model() as self.model:
            l = pm.Gamma("l", alpha=2, beta=1, shape=X.shape[1])
            nu = pm.HalfCauchy("nu", beta=1)

            cov = nu ** 2 * pm.gp.cov.ExpQuad(X.shape[1], l)

            self.gp = pm.gp.Marginal(cov_func=cov)

            sigma = pm.HalfCauchy("sigma", beta=1)
            y_ = self.gp.marginal_likelihood("y", X=X, y=y, noise=sigma)

            if self.is_MAP:
                self.map_trace = [pm.find_MAP()]

            else:
                self.map_trace = pm.sample(500, tune=500)
Ejemplo n.º 21
0
    def create_model(self,
                     x=None,
                     mu_D=None,
                     sd_D=None,
                     mu_A=None,
                     sd_A=None,
                     delta_t=None,
                     N=None):
        with pm.Model() as model:
            D = pm.Gamma('D', mu=mu_D, sd=sd_D)
            A = pm.Gamma('A', mu=mu_A, sd=sd_A)

            B = pm.Deterministic('B', pm.math.exp(-delta_t * D / A))

            path = Ornstein_Uhlenbeck('path', D=D, A=A, B=B, observed=x)
        return model
    def fit(self, size=5000, nodes=10, traceplot_name=None):
        '''
        Args:
            size (int): the length of markov chain
            create_traceplot (boolean): Whether or not generate the traceplot.
            nodes (int): Number of kernels for approximation.
            traceplot_name(str): the name of the traceplot file.
        '''
        self.model = pm.Model()
        with self.model:

            tau = pm.Gamma("tau", alpha=2, beta=1, shape=3)
            eta = pm.HalfCauchy("eta", beta=5)
            cov = eta**2 * pm.gp.cov.Matern52(3, tau)

            self.gp = pm.gp.MarginalSparse(cov_func=cov, approx="VFE")
            Xu = pm.gp.util.kmeans_inducing_points(nodes, self.X_train)

            sigma = pm.HalfNormal('sigma', sd=4)
            y_ = self.gp.marginal_likelihood("y",
                                             X=self.X_train,
                                             Xu=Xu,
                                             y=np.log(self.y_train),
                                             noise=sigma)

            start = pm.find_MAP()
            self.trace = pm.sample(size, start=start)

        if traceplot_name:
            fig, axs = plt.subplots(3, 2)  # 2 RVs
            pm.traceplot(self.trace, varnames=['tau', 'eta', 'sigma'], ax=axs)
            fig.savefig(traceplot_name)
            fig_path = os.path.join(os.getcwd(), traceplot_name)
            print(f'the traceplot has been saved to {fig_path}')
Ejemplo n.º 23
0
def mixture_model(random_seed=1234):
    """Sample mixture model to use in benchmarks"""
    np.random.seed(1234)
    size = 1000
    w_true = np.array([0.35, 0.4, 0.25])
    mu_true = np.array([0.0, 2.0, 5.0])
    sigma = np.array([0.5, 0.5, 1.0])
    component = np.random.choice(mu_true.size, size=size, p=w_true)
    x = np.random.normal(mu_true[component], sigma[component], size=size)

    with pm.Model() as model:
        w = pm.Dirichlet("w", a=np.ones_like(w_true))
        mu = pm.Normal("mu", mu=0.0, sd=10.0, shape=w_true.shape)
        enforce_order = pm.Potential(
            "enforce_order",
            aet.switch(mu[0] - mu[1] <= 0, 0.0, -np.inf) +
            aet.switch(mu[1] - mu[2] <= 0, 0.0, -np.inf),
        )
        tau = pm.Gamma("tau", alpha=1.0, beta=1.0, shape=w_true.shape)
        pm.NormalMixture("x_obs", w=w, mu=mu, tau=tau, observed=x)

    # Initialization can be poorly specified, this is a hack to make it work
    start = {
        "mu": mu_true.copy(),
        "tau_log__": np.log(1.0 / sigma**2),
        "w_stickbreaking__": np.array([-0.03, 0.44]),
    }
    return model, start
Ejemplo n.º 24
0
def trial1():
    radon = pd.read_csv('data/radon.csv')[['county', 'floor', 'log_radon']]
    # print(radon.head())
    county = pd.Categorical(radon['county']).codes
    # print(county)

    with pm.Model() as hm:
        # County hyperpriors
        mu_a = pm.Normal('mu_a', mu=0, tau=1.0 / 100**2)
        sigma_a = pm.Uniform('sigma_a', lower=0, upper=100)
        mu_b = pm.Normal('mu_b', mu=0, tau=1.0 / 100**2)
        sigma_b = pm.Uniform('sigma_b', lower=0, upper=100)

        # County slopes and intercepts
        a = pm.Normal('slope', mu=mu_a, sd=sigma_a, shape=len(set(county)))
        b = pm.Normal('intercept',
                      mu=mu_b,
                      tau=1.0 / sigma_b**2,
                      shape=len(set(county)))

        # Houseehold errors
        sigma = pm.Gamma("sigma", alpha=10, beta=1)

        # Model prediction of radon level
        mu = a[county] + b[county] * radon.floor.values

        # Data likelihood
        y = pm.Normal('y', mu=mu, sd=sigma, observed=radon.log_radon)
        print(y)
Ejemplo n.º 25
0
def fit_counts_model(counts, mins_played):
    ## estimates a hierarchical poisson model for count data
    ## takes as input:
    ##      counts, a numpy array of shape (num_players,) containing the total numbers of actions completed (across all games)
    ##      mins_played, a numpy array of shape (num_players,) containing the total number of minutes each player was observed for
    ## returns:
    ##      sl, a numpy array of shape (6000,N) containing 6000 posterior samples of actions per 90 (N is the number of players in the
    ##      original data frame who have actually played minutes)
    ##      sb, a numpy array of shape (6000,2) containing 6000 posterior samples of the population-level gamma shape parameter &
    ##                                          the population-level mean
    ##      kk, boolean indicating which players have actually played minutes
    import numpy as np
    import pymc3 as pm
    kk = (mins_played > 0) & np.isfinite(counts)
    mins_played = mins_played[kk]
    counts = counts[kk]
    N = counts.shape[0]

    with pm.Model() as model:
        beta = pm.HalfNormal('beta', sigma=100)
        mu = pm.HalfFlat('mu')
        lambdas = pm.Gamma('lambdas', alpha=mu * beta, beta=beta, shape=N)
        lambda_tilde = lambdas * mins_played
        y = pm.Poisson('y', lambda_tilde, observed=counts)
        approx = pm.fit(n=30000)
    sl = approx.sample(6000)['lambdas'] * 90
    sb = np.c_[approx.sample(6000)['beta'], approx.sample(6000)['mu']]
    return [sl, sb, kk, 'count']
	def run_factorization(self, N, S, X, K, num_cov, k, n):
		# Smart initialization
		rat = k/n
		nans = np.isnan(rat)
		conc_inits = np.zeros((1, S))
		beta_inits = np.zeros((num_cov, S))
		for index_s in range(S):
			column_rat = rat[:, index_s]
			column_nans = np.isnan(column_rat)
			valid_rat = column_rat[~column_nans]
			conc_init = min(1.0/np.var(valid_rat), 1000.0)
			m_init = min(max(np.mean(valid_rat), 1.0/1000 ), 1.0-(1.0/1000))
			conc_inits[0, index_s] = conc_init
			beta_inits[0, index_s] = np.log(m_init/(1.0-m_init))
		# Run bb-mf
		with pm.Model() as bb_glm:
			CONC = pm.Gamma('CONC', alpha=1e-4, beta=1e-4, shape=(1,S), testval=conc_inits)
			BETA = pm.Normal('BETA', mu=0, tau=(1/1000000.0), shape=(S, num_cov), testval=beta_inits.T)
			U = pm.Normal('U', mu=0, tau=(1/1000.0), shape=(N, K), testval=np.random.randn(N, K))
			V = pm.Normal('V', mu=0, tau=(1/1000.0), shape=(S, K), testval=np.random.randn(S, K))
			p = pm.math.invlogit(pm.math.dot(X, BETA.T) + pm.math.dot(U,V.T))
			conc_mat = pm.math.dot(np.ones((N,1)), CONC)
			R = pm.BetaBinomial('like',alpha=(p*conc_mat)[~nans], beta=((1.0-p)*conc_mat)[~nans], n=n[~nans], observed=k[~nans])
			approx = pm.fit(method='advi', n=30000)
		pickle.dump(approx, open(self.output_root + '_model', 'wb'))
		#approx = pickle.load( open(self.output_root + '_model', "rb" ) )
		means_dict = approx.bij.rmap(approx.params[0].eval())
		np.savetxt(self.output_root + '_temper_U.txt', (means_dict['U']), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_V.txt', (means_dict['V'].T), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_BETA.txt', (means_dict['BETA'].T), fmt="%s", delimiter='\t')
Ejemplo n.º 27
0
def test_matrix_multiplication():
    # Check matrix multiplication works between RVs, transformed RVs,
    # Deterministics, and numpy arrays
    with pm.Model() as linear_model:
        matrix = pm.Normal('matrix', shape=(2, 2))
        transformed = pm.Gamma('transformed', alpha=2, beta=1, shape=2)
        rv_rv = pm.Deterministic('rv_rv', matrix @ transformed)
        np_rv = pm.Deterministic('np_rv', np.ones((2, 2)) @ transformed)
        rv_np = pm.Deterministic('rv_np', matrix @ np.ones(2))
        rv_det = pm.Deterministic('rv_det', matrix @ rv_rv)
        det_rv = pm.Deterministic('det_rv', rv_rv @ transformed)

        posterior = pm.sample(10,
                              tune=0,
                              compute_convergence_checks=False,
                              progressbar=False)
        for point in posterior.points():
            npt.assert_almost_equal(point['matrix'] @ point['transformed'],
                                    point['rv_rv'])
            npt.assert_almost_equal(
                np.ones((2, 2)) @ point['transformed'], point['np_rv'])
            npt.assert_almost_equal(point['matrix'] @ np.ones(2),
                                    point['rv_np'])
            npt.assert_almost_equal(point['matrix'] @ point['rv_rv'],
                                    point['rv_det'])
            npt.assert_almost_equal(point['rv_rv'] @ point['transformed'],
                                    point['det_rv'])
Ejemplo n.º 28
0
Archivo: crv.py Proyecto: weakit/sympy
    def _sample_pymc3(cls, dist, size):
        """Sample from PyMC3."""

        import pymc3
        pymc3_rv_map = {
            'BetaDistribution': lambda dist:
                pymc3.Beta('X', alpha=float(dist.alpha), beta=float(dist.beta)),
            'CauchyDistribution': lambda dist:
                pymc3.Cauchy('X', alpha=float(dist.x0), beta=float(dist.gamma)),
            'ChiSquaredDistribution': lambda dist:
                pymc3.ChiSquared('X', nu=float(dist.k)),
            'ExponentialDistribution': lambda dist:
                pymc3.Exponential('X', lam=float(dist.rate)),
            'GammaDistribution': lambda dist:
                pymc3.Gamma('X', alpha=float(dist.k), beta=1/float(dist.theta)),
            'LogNormalDistribution': lambda dist:
                pymc3.Lognormal('X', mu=float(dist.mean), sigma=float(dist.std)),
            'NormalDistribution': lambda dist:
                pymc3.Normal('X', float(dist.mean), float(dist.std)),
            'GaussianInverseDistribution': lambda dist:
                pymc3.Wald('X', mu=float(dist.mean), lam=float(dist.shape)),
            'ParetoDistribution': lambda dist:
                pymc3.Pareto('X', alpha=float(dist.alpha), m=float(dist.xm)),
            'UniformDistribution': lambda dist:
                pymc3.Uniform('X', lower=float(dist.left), upper=float(dist.right))
        }

        dist_list = pymc3_rv_map.keys()

        if dist.__class__.__name__ not in dist_list:
            return None

        with pymc3.Model():
            pymc3_rv_map[dist.__class__.__name__](dist)
            return pymc3.sample(size, chains=1, progressbar=False)[:]['X']
def get_model(dist, data) -> pm.Model:
    means = data.mean(0)
    n_exp = data.shape[1]
    if dist == "Poisson":
        with pm.Model() as poi_model:
            lam = pm.Exponential("lam", lam=means, shape=(1, n_exp))
            poi = pm.Poisson(
                "poi",
                mu=lam,
                observed=data,
            )
        return poi_model
    if dist == "ZeroInflatedPoisson":
        with pm.Model() as zip_model:
            psi = pm.Uniform("psi", shape=(1, n_exp))
            lam = pm.Exponential("lam", lam=means, shape=(1, n_exp))
            zip = pm.ZeroInflatedPoisson(
                "zip",
                psi=psi,
                theta=lam,
                observed=data,
            )
        return zip_model
    if dist == "NegativeBinomial":
        with pm.Model() as nb_model:
            gamma = pm.Gamma("gm", 0.01, 0.01, shape=(1, n_exp))
            lam = pm.Exponential("lam", lam=means, shape=(1, n_exp))
            nb = pm.NegativeBinomial(
                "nb",
                alpha=gamma,
                mu=lam,
                observed=data,
            )
        return nb_model
    if dist == "ZeroInflatedNegativeBinomial":
        with pm.Model() as zinb_model:
            gamma = pm.Gamma("gm", 0.01, 0.01, shape=(1, n_exp))
            lam = pm.Exponential("lam", lam=means, shape=(1, n_exp))
            psi = pm.Uniform("psi", shape=(1, n_exp))
            zinb = pm.ZeroInflatedNegativeBinomial(
                "zinb",
                psi=psi,
                alpha=gamma,
                mu=lam,
                observed=data,
            )
        return zinb_model
Ejemplo n.º 30
0
def group_static_ucb_mes_model(X,
                               explore_param_alpha=.1,
                               explore_param_beta=.1,
                               temperature_alpha=1.,
                               temperature_beta=1.,
                               explore_method_p=.5,
                               maxk=10,
                               samples=200):

    nparticipants = X.shape[3]
    actions = theano.shared(X[0])
    mean = theano.shared(X[1])
    var = theano.shared(X[2])
    mes = theano.shared(X[3])
    with pm.Model() as model:

        #explore_param = pm.Gamma('explore_param', explore_param_alpha, explore_param_beta, shape=maxk)
        var_param = pm.Gamma('var_param',
                             explore_param_alpha,
                             explore_param_beta,
                             shape=maxk)
        mes_param = pm.Gamma('mes_param',
                             explore_param_alpha,
                             explore_param_beta,
                             shape=maxk)
        temperature = pm.Gamma('temperature',
                               temperature_alpha,
                               temperature_beta,
                               shape=maxk)
        #explore_method = pm.Bernoulli('explore_method', p=explore_method_p, shape=maxk)

        alpha = pm.Gamma('alpha', 10**-10., 10**-10.)
        #alpha = pm.HalfNormal('alpha', sd = 1000000)
        beta = pm.Beta('beta', 1., alpha, shape=maxk)
        weights = pm.Deterministic('w', stick_breaking(beta))
        assignments = pm.Categorical('assignments',
                                     weights,
                                     shape=nparticipants)

        obs = pm.Potential(
            'obs',
            group_static_ucb_mes_likelihood(actions, mean, var, mes, var_param,
                                            mes_param, temperature,
                                            assignments, maxk))
        #step = pm.Metropolis()
        trace = pm.sample(samples, njobs=4)  #, step=step)
        return trace