Ejemplo n.º 1
0
def fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs):
    callbacks = vi_params.get("callbacks", [])
    for i, c in enumerate(callbacks):
        if isinstance(c, CheckParametersConvergence):
            params = c.__dict__
            params.pop("_diff")
            params.pop("prev")
            params.pop("ord")
            params["diff"] = "absolute"
            callbacks[i] = CheckParametersConvergence(**params)
    if sampler == "variational":
        with self.model:
            try:
                self.trace_ = pm.sample(chains=2, cores=8, tune=5, draws=5)
                vi_params["start"] = self.trace_[-1]
                self.trace_vi_ = pm.fit(**vi_params)
                self.trace_ = self.trace_vi_.sample(draws=draws)
            except Exception as e:
                if hasattr(e, "message"):
                    message = e.message
                else:
                    message = e
                logger.error(message)
                self.trace_vi_ = None
        if self.trace_vi_ is None and self.trace_ is None:
            with self.model:
                logger.info(
                    "Error in vi ADVI sampler using Metropolis sampler with draws {}"
                    .format(draws))
                self.trace = pm.sample(chains=1,
                                       cores=4,
                                       tune=20,
                                       draws=20,
                                       step=pm.NUTS())
    elif sampler == "metropolis":
        with self.model:
            start = pm.find_MAP()
            self.trace_ = pm.sample(
                chains=2,
                cores=8,
                tune=tune,
                draws=draws,
                **kwargs,
                step=pm.Metropolis(),
                start=start,
            )
    else:
        with self.model:
            self.trace_ = pm.sample(chains=2,
                                    cores=8,
                                    tune=tune,
                                    draws=draws,
                                    **kwargs,
                                    step=pm.NUTS())
Ejemplo n.º 2
0
def get_step_for_trace(trace=None, model=None, regularize=True, regular_window=5, regular_variance=1e-3, **kwargs):
    """ Define a tuning procedure that adapts off-diagonal mass matrix terms
        adapted from a blog post by Dan Foreman-Mackey here:
        https://dfm.io/posts/pymc3-mass-matrix/

       Args:
           trace (trace): pymc3 trace object
           model (model): pymc3 model object
           
           regularize (bool): flag to turn on covariance matrix regularization
           regular_window (int): size of parameter space at which regularization becomes important
           regular_variance (float): magnitude of covariance floor
           
       Returns:
           pymc3 step_methods object

    """

    model = pm.modelcontext(model)
    
    # If not given, use the trivial metric
    if trace is None:
        potential = pm.step_methods.hmc.quadpotential.QuadPotentialFull(np.eye(model.ndim))
        return pm.NUTS(potential=potential, **kwargs)
    
    # Loop over samples and convert to the relevant parameter space
    # while removing divergent samples
    div_mask = np.invert(np.copy(trace.diverging))
    samples = np.empty((div_mask.sum() * trace.nchains, model.ndim))
    i = 0
    imask = 0
    for chain in trace._straces.values():
        for p in chain:
            if div_mask[imask]:
                samples[i] = model.bijection.map(p)
                i += 1
            imask += 1
    
    # Compute the sample covariance
    cov = np.cov(samples, rowvar=0)
    
    # Stan uses a regularized estimator for the covariance matrix to
    # be less sensitive to numerical issues for large parameter spaces.
    if regularize:
        N = len(samples)
        cov = cov * N / (N + regular_window)
        cov[np.diag_indices_from(cov)] += regular_variance * regular_window / (N + regular_window)
    
    # Use the sample covariance as the inverse metric
    potential = pm.step_methods.hmc.quadpotential.QuadPotentialFull(cov)

    return pm.NUTS(potential=potential, **kwargs)
Ejemplo n.º 3
0
def fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs):
    callbacks = vi_params.get('callbacks', [])
    for i, c in enumerate(callbacks):
        if isinstance(c, CheckParametersConvergence):
            params = c.__dict__
            params.pop('_diff')
            params.pop('prev')
            params.pop('ord')
            params['diff'] = 'absolute'
            callbacks[i] = CheckParametersConvergence(**params)
    if sampler == 'variational':
        with self.model:
            try:
                self.trace = pm.sample(chains=2, cores=8, tune=5, draws=5)
                vi_params['start'] = self.trace[-1]
                self.trace_vi = pm.fit(**vi_params)
                self.trace = self.trace_vi.sample(draws=draws)
            except Exception as e:
                if hasattr(e, 'message'):
                    message = e.message
                else:
                    message = e
                self.logger.error(message)
                self.trace_vi = None
        if self.trace_vi is None and self.trace is None:
            with self.model:
                self.logger.info(
                    "Error in vi ADVI sampler using Metropolis sampler with draws {}"
                    .format(draws))
                self.trace = pm.sample(chains=1,
                                       cores=4,
                                       tune=20,
                                       draws=20,
                                       step=pm.NUTS())
    elif sampler == 'metropolis':
        with self.model:
            start = pm.find_MAP()
            self.trace = pm.sample(chains=2,
                                   cores=8,
                                   tune=tune,
                                   draws=draws,
                                   **kwargs,
                                   step=pm.Metropolis(),
                                   start=start)
    else:
        with self.model:
            self.trace = pm.sample(chains=2,
                                   cores=8,
                                   tune=tune,
                                   draws=draws,
                                   **kwargs,
                                   step=pm.NUTS())
Ejemplo n.º 4
0
def get_step_for_trace(init_cov=None,
                       trace=None,
                       model=None,
                       regularize_cov=True,
                       regular_window=5,
                       regular_variance=1e-3,
                       **kwargs):
    """
    Construct an estimate of the mass matrix based on the sample covariance,
    which is either provided directly via `init_cov` or generated from a
    `MultiTrace` object from PyMC3. This is then used to initialize a `NUTS`
    object to use in `sample`.
    """

    model = pm.modelcontext(model)

    # If no trace or covariance is provided, just use the identity.
    if trace is None and init_cov is None:
        potential = QuadPotentialFull(np.eye(model.ndim))

        return pm.NUTS(potential=potential, **kwargs)

    # If the trace is provided, loop over samples
    # and convert to the relevant parameter space.
    if trace is not None:
        samples = np.empty((len(trace) * trace.nchains, model.ndim))
        i = 0
        for chain in trace._straces.values():
            for p in chain:
                samples[i] = model.bijection.map(p)
                i += 1

        # Compute the sample covariance.
        cov = np.cov(samples, rowvar=False)

        # Stan uses a regularized estimator for the covariance matrix to
        # be less sensitive to numerical issues for large parameter spaces.
        if regularize_cov:
            N = len(samples)
            cov = cov * N / (N + regular_window)
            diags = np.diag_indices_from(cov)
            cov[diags] += ((regular_variance * regular_window) /
                           (N + regular_window))
    else:
        # Otherwise, just copy `init_cov`.
        cov = np.array(init_cov)

    # Use the sample covariance as the inverse metric.
    potential = QuadPotentialFull(cov)

    return pm.NUTS(potential=potential, **kwargs)
Ejemplo n.º 5
0
def test_explicit_sample():
    with pm.Model() as model:
        a = pm.Normal('a', shape=1)
        pm.HalfNormal('b')
        step1 = pm.NUTS([a])
        step2 = pm.Metropolis([model.b_log__])

    step = pm.CompoundStep([step1, step2])

    proc = ps.ProcessAdapter(10,
                             10,
                             step,
                             chain=3,
                             seed=1,
                             start={
                                 'a': 1.,
                                 'b_log__': 2.
                             })
    proc.start()
    while True:
        proc.write_next()
        out = ps.ProcessAdapter.recv_draw([proc])
        view = proc.shared_point_view
        for name in view:
            view[name].copy()
        if out[1]:
            break
    proc.join()
Ejemplo n.º 6
0
def sample_posterior(x, y, n_samples=1000, random_seed=0):
    '''
    A general linear model.
    
    Paramters
    ---------
    x: A numpy array
    y: A numpy array
    n_samples: The number of samples to draw in pymc3.sample().
               Defaults to 1000.
    random_seed: An int. Used in pymc3.sample().
                 Defaults to 0.
                 
    Returns
    -------
    A pymc3.MultiTrace object with access to sampling values.
    '''

    df = pd.DataFrame({'x': x, 'y': y})
    #Create Bayesian linear model
    with pm.Model() as model_glm:
        family = pm.glm.families.Normal()
        pm.glm.glm('y ~ x', df, family=family)
        #Estimates model parameters
        start = pm.find_MAP()
        #generate posterior samples
        step = pm.NUTS()
        trace = pm.sample(n_samples,
                          step=step,
                          start=start,
                          progressbar=True,
                          random_seed=random_seed)

    return trace
Ejemplo n.º 7
0
    def learn_bayesian_linear_model(self,
                                    encoded_plans,
                                    prior_weights,
                                    number_of_dimensions,
                                    sd=1,
                                    sampling_count=2000,
                                    num_chains=3,
                                    bias_preference=0,
                                    uninformative_prior_var=None):

        #TODO NOTE EVEN WITHOUT PRIOR WEIGHTS ARE NOT CURRENTLY USED, and works just as well
        #the encoded plans contains a list of [<encoding>,<rating>]
        input_dataset = np.array([x[0] for x in encoded_plans], dtype=np.float)
        output_dataset = np.array([x[1] for x in encoded_plans],
                                  dtype=np.float)

        bias_preference = tt.constant(bias_preference)
        #todo Make bias A  learnable parameter
        with pm.Model() as linear_model:
            # Intercept
            # alpha = pm.Normal('alpha', mu=0.0, sd=sd)
            alpha = pm.Deterministic('alpha', bias_preference)
            #todo add support to have the covariance of unknown features to be much larger ? SD = 1.0 is enough !!
            # Slope
            # prior_weights = np.random.rand(number_of_dimensions)
            betas = pm.MvNormal('betas',
                                mu=prior_weights,
                                cov=uninformative_prior_var,
                                shape=(number_of_dimensions, ))
            # Standard deviation
            sigma = pm.HalfNormal('sigma', sd=sd)
            # sigma = sd #unfair knowledge
            # Estimate of mean
            mean = alpha + tt.dot(input_dataset, betas)
            # Observed values
            Y_obs = pm.Normal('Y_obs',
                              mu=mean,
                              sd=sigma,
                              observed=output_dataset)
            # Sampler
            step = pm.NUTS()
            # step = pm.Metropolis()
            # step = pm.HamiltonianMC()
            # Posterior distribution
            linear_params_trace = pm.sample(sampling_count,
                                            step,
                                            chains=num_chains,
                                            cores=num_chains)

            #todo NOTE do not add tuning if deterministic. Fails spectacularly, not it's intended use.
            #todo note: may consider making mu and cov as parameters sampled from distributions too
            # mu = pm.MvNormal('mu', mu=prior_weights, cov=cov, shape=(number_of_dimensions,))
        #end with
        # todo look into the aplha values that were sampled, because they didn't appear in the plot
        self.full_param_trace = linear_params_trace  # we only take the last 2000, and assume it is after sufficient mixing and good values.
        #TODO THIS IS ONLY FROM ONE CHAIN, there is a function called trace.getValues, that lets you get values from each chain. Then we mix it.
        self.linear_params_values = linear_params_trace[
            -2000:]  # we only take the last 2000, and assume it is after sufficient mixing and good values.
        self.set_normal_distr_params(num_chains=num_chains,
                                     num_last_samples=None)
Ejemplo n.º 8
0
def test_abort():
    with pm.Model() as model:
        a = pm.Normal("a", shape=1)
        pm.HalfNormal("b")
        step1 = pm.NUTS([a])
        step2 = pm.Metropolis([model.b_log__])

    step = pm.CompoundStep([step1, step2])

    ctx = multiprocessing.get_context()
    proc = ps.ProcessAdapter(
        10,
        10,
        step,
        chain=3,
        seed=1,
        mp_ctx=ctx,
        start={"a": 1.0, "b_log__": 2.0},
        step_method_pickled=None,
        pickle_backend="pickle",
    )
    proc.start()
    proc.write_next()
    proc.abort()
    proc.join()
Ejemplo n.º 9
0
def sample1000():
    print('----------010')
    # 単純なガウス分布の平均パラメータの推定
    with pm.Model() as model_10:
        mu = pm.Normal('mu', mu=0., sd=0.1)
        print('===001')
        print(mu)
        x = pm.Normal('x', mu=mu, sd=1., observed=x_sample_1000)
        print('===002')
        print(x)
    with model_10:
        # サンプリングの初期値として、MAP推定の結果を利用
        start = pm.find_MAP()
        print('===003')
        print(start)
        # No-U-Turn Sampler の略。サンプリング手法。
        step = pm.NUTS()
        print('===004')
        print(step)
        # 100イテレーション
        trace = pm.sample(100, step, start)
        print('===005')
        print(trace)
    print('===006')
    print(pm.traceplot(trace))
    print(pm.summary(trace).round(2))
    plt.savefig('result_1000')
Ejemplo n.º 10
0
def test_l2hmc_matches_leapfrog():
    with pm.Model():
        x = pm.Normal('x', 0, 1)
        y = pm.Normal('y', x, 1)
        step = pm.NUTS()

    q_func, p_func = default_aux_functions()
    l2hmc_integrator = L2HMCLeapfrogIntegrator(step.potential,
                                               step._logp_dlogp_func,
                                               q_func=q_func,
                                               p_func=p_func)
    hmc_integrator = pm.step_methods.hmc.integration.CpuLeapfrogIntegrator(
        step.potential, step._logp_dlogp_func)

    points = []
    p0 = step.potential.random()
    for integrator in (l2hmc_integrator, hmc_integrator):
        point = {'x': np.array([1.]), 'y': np.array([1.])}
        integrator._logp_dlogp_func.set_extra_values(point)
        q0 = integrator._logp_dlogp_func.dict_to_array(point)
        state = integrator.compute_state(q0, p0)
        points.append(integrator._step(0.1, state))

    l2hmc_state, hmc_state = points
    npt.assert_array_almost_equal(l2hmc_state.q, hmc_state.q)
    npt.assert_array_almost_equal(l2hmc_state.p, hmc_state.p)
    npt.assert_array_almost_equal(l2hmc_state.v, hmc_state.v)
    npt.assert_array_almost_equal(l2hmc_state.q_grad, hmc_state.q_grad)
    assert l2hmc_state.energy == hmc_state.energy
Ejemplo n.º 11
0
    def _estimate_model(self):
        #If user provides model, use that. Otherwise, create default Bayesian Model
        self.x_shared = theano.shared(self.x_train.values)
        if self.model_provided is not None:
            self.model = self.model_provided
        else:
            self.model = pymc3.Model()
            with self.model:

                # Priors for unknown model parameters
                alpha = pymc3.Normal('alpha', mu=0, sd=1)
                beta = pymc3.Normal('beta', mu=0, sd=1, shape=self.number_feat)
                sigma = pymc3.HalfNormal('sigma', sd=1)

                # Expected value of outcome
                #mu = alpha + x_shared[:,0]*beta[0] + x_shared[:,1]*beta[1] + x_shared[:,2]*beta[2] + x_shared[:,3]*beta[3] + x_shared[:,4]*beta[4] + x_shared[:,5]*beta[5]  + x_shared[:,6]*beta[6] + x_shared[:,7]*beta[7]
                mu = alpha + theano.tensor.dot(self.x_shared, beta)

                # Likelihood (sampling distribution) of observations
                Y_obs = pymc3.Normal('Y_obs',
                                     mu=mu,
                                     sd=sigma,
                                     observed=self.y_train.values)

        with self.model:
            self.start = pymc3.find_MAP(fmin=scipy.optimize.fmin_powell)
            step = pymc3.NUTS(scaling=self.start)
            self.trace = pymc3.sample(self.niter, step)

        return self.model
Ejemplo n.º 12
0
def linear_posterior(X, y, n_samples=1000, random_seed=0):
    """
    A general linear model.

    Paramters
    ---------
    X: A numpy array
    y: A numpy array
    n_samples: The number of samples to draw in pymc3.sample().
               Defaults to 1000.
    random_seed: An int. Used in pymc3.sample().
                 Defaults to 0.

    Returns
    -------
    A pymc3.MultiTrace object with access to sampling values.
    """

    df = {'x': X, 'y': y}

    with pm.Model() as model_glm:
        pm.glm.glm('y ~ x', df, family=pm.glm.families.StudentT())
        start = pm.find_MAP()
        step = pm.NUTS(scaling=start)
        trace = pm.sample(n_samples, start=start, step=step, model=model_glm, random_seed=random_seed, progressbar=True)

    return trace
Ejemplo n.º 13
0
def get_trace(X, y, n_samples=1000, random_seed=0):
    '''
    A simple Bayesian linear regression model with normal priors.
    
    Paramters
    ---------
    X: A numpy array
    y: A numpy array
    n_samples: The number of samples to draw in pymc3.sample().
               Defaults to 1000.
    random_seed: An int. Used in pymc3.sample().
                 Defaults to 0.
                 
    Returns
    -------
    A pymc3.MultiTrace object with access to sampling values.
    '''
    #Create linear model with alpha, beta, and sigma defined as above
    with pm.Model() as linear_model:
        alpha = pm.Normal('alpha', mu = 0, sd = 1.0)
        beta = pm.Normal('beta', mu = 10, sd = 1.0)
        sigma = pm.Uniform('sigma', lower = 0, upper = 100)
        y_exp =  alpha+beta*X
        likelihood = pm.Normal('y', mu=y_exp, sd=sigma, observed=y)
        #Estimates model parameters
        start = pm.find_MAP()
        #generate posterior samples
        step = pm.NUTS(scaling=start)
        trace = pm.sample(n_samples, step=step, start=start, progressbar=True, random_seed=random_seed)
        
    return trace
Ejemplo n.º 14
0
def get_trace(X, y, n_samples=1000, random_seed=0):
    """
    A simple Bayesian linear regression model with normal priors.

    Paramters
    ---------
    X: A numpy array
    y: A numpy array
    n_samples: The number of samples to draw in pymc3.sample().
               Defaults to 1000.
    random_seed: An int. Used in pymc3.sample().
                 Defaults to 0.

    Returns
    -------
    A pymc3.MultiTrace object with access to sampling values.
    """

    with pm.Model() as linear_model:
        alpha = pm.Normal('alpha', mu=0.0, sd=1.0)
        beta = pm.Normal('beta', mu=10.0, sd=1.0)
        sigma = pm.Uniform('sigma', lower=0, upper=100)
        mu = alpha + beta * X
        y = pm.Normal('y', mu=mu, sd=sigma, observed=y)
        start = pm.find_MAP()
        step = pm.NUTS(scaling=start)
        trace = pm.sample(n_samples, start=start, step=step, model=linear_model, random_seed=random_seed,
                          progressbar=True)

    return trace
Ejemplo n.º 15
0
def fit(x,y,meanVec,stdVec,errors):
    
    aMu,bMu,cMu = meanVec
    aStd,bStd,cStd = stdVec
    
    model = pm.Model()
    
    if False:    
        df = pd.DataFrame(np.transpose([x,y,errors]),columns=['x','y','error'])
        print df
    
    with model:    
        # Priors for unknown model parameters
        a = pm.Normal('a', mu=aMu, sd=aStd)
        b = pm.Normal('b', mu=bMu, sd=bStd)
        c = pm.Normal('c', mu=cMu, sd=cStd)    
            
        # Expected value of outcome
        mu = Model(x,a,b,c)
    
        # Likelihood (sampling distribution) of observations
        Like = pm.Normal('Like', mu=mu, sd=errors, observed=y)
        
        # do sampling        
        trace = pm.sample(1000,progressbar=False,init='ADVI',step = pm.NUTS(),njobs=1)
        
        # give summary
        summary = pm.df_summary(trace)
        
        return summary
Ejemplo n.º 16
0
def fit(x,y,errors,signA):
        
    model = pm.Model()
    
    if False:    
        df = pd.DataFrame(np.transpose([x,y,errors]),columns=['x','y','error'])
        print df
    
    with model:    
        # Priors for unknown model parameters
        LowerA = 0.
        UpperA = 0.1
        if signA == -1.0:
            UpperA = 0.
            LowerA = -0.1
            
        a = pm.Uniform('a', lower=LowerA, upper=UpperA)        
        b = pm.Uniform('b', lower=0., upper=1.0)
        c = pm.Uniform('c', lower=0., upper=1.0)    
            
        # Expected value of outcome
        mu = Model(x,a,b,c)
    
        # Likelihood (sampling distribution) of observations
        Like = pm.Normal('Like', mu=mu, sd=errors, observed=y)
        
        # do sampling        
        trace = pm.sample(1000,progressbar=False,init='ADVI',step = pm.NUTS(),njobs=1)
        
        # give summary
        summary = pm.df_summary(trace)
        
        return summary
Ejemplo n.º 17
0
def glm_mcmc_inference(df, formula, family, I):
    """
    Calculates the Markov Chain Monte Carlo trace of
    a Generalised Linear Model Bayesian linear regression
    model on supplied data.

    df: DataFrame containing the data
    formula: Regressing equation in terms of columns of DataFrame df
    family: Type of liner model. Takes a pymc object (pm.glm.families).
    I: Number of iterations for MCMC
    """
    
    if family.lower() == 'normal':
        family_object = pm.glm.families.Normal()
    elif family.lower() == 'logistic':
        family_object = pm.glm.families.Binomial()
    elif family.lower() == 'poisson':
        family_object = pm.glm.families.Poisson()
    else:
        print("Family {} is not a supported family".format(family))
        raise(NameError("Invalid family"))

    # Use PyMC3 to construct a model context
    basic_model = pm.Model()
    with basic_model:
        # Create the glm using the Patsy model syntax
        pm.glm.GLM.from_formula(str(formula), df.dropna(), family=family_object)
        step = pm.NUTS()

        trace = pm.sample(I, step, progressbar=False, tune=50)
        return(trace)
Ejemplo n.º 18
0
def samplePosterior(model, N, fit_intercept=False, fit_slope=True):
    """
    Monte Carlo for the posterior. Sample posterior predictive
    """
    RANDOM_SEED = 58
    with model:
        step = pm.NUTS()
        trace = pm.sample(N, step)

        if fit_intercept and not fit_slope:
            var_names = ["Intercept", "Y_obs"]
            summary_names = ["Intercept"]
        elif not fit_intercept and fit_slope:
            var_names = ["slope", "Y_obs"]
            summary_names = ["slope"]
        else:
            var_names = ["Intercept", "slope", "Y_obs"]
            summary_names = ["Intercept", "slope"]

        ppc = pm.sample_posterior_predictive(trace,
                                             var_names=var_names,
                                             random_seed=RANDOM_SEED)

    summary = az.summary(trace, var_names=summary_names, round_to=3)
    print(summary)

    params = {}
    for name in summary_names:
        params[name] = {}
        params[name]['hpd_3%'] = summary['hpd_3%'][name]
        params[name]['hpd_mean'] = summary['mean'][name]
        params[name]['hpd_97%'] = summary['hpd_97%'][name]

    return params, ppc['Y_obs']
Ejemplo n.º 19
0
def model_returns_normal(data, samples=500):
    """
    Run Bayesian model assuming returns are normally distributed.

    Parameters
    ----------
    returns : pandas.Series
        Series of simple returns of an algorithm or stock.
    samples : int (optional)
        Number of posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.
    """

    with pm.Model() as model:
        mu = pm.Normal('mean returns', mu=0, sd=.01, testval=data.mean())
        sigma = pm.HalfCauchy('volatility', beta=1, testval=data.std())
        returns = pm.Normal('returns', mu=mu, sd=sigma, observed=data)
        pm.Deterministic('annual volatility',
                         returns.distribution.variance**.5 * np.sqrt(252))
        pm.Deterministic(
            'sharpe', returns.distribution.mean /
            returns.distribution.variance**.5 * np.sqrt(252))

        start = pm.find_MAP(fmin=sp.optimize.fmin_powell)
        step = pm.NUTS(scaling=start)
        trace = pm.sample(samples, step, start=start)
    return model, trace
Ejemplo n.º 20
0
def posterior_distribution(X, y, N):
    reg = LinearRegression(fit_intercept=True).fit(X.values.reshape(-1, 1),
                                                   y.values)
    # Set up
    with pm.Model() as model:
        # Intercept
        intercept = pm.Normal('Intercept', mu=.5, sd=1)
        # sd = 25

        # Slope
        slope = pm.Normal('slope', mu=float(2 * reg.coef_), sd=1)
        # sd = 1

        # Standard Deviation
        sigma = pm.HalfNormal('sigma', sd=1)
        # sd = 25

        # Estimate of Mean
        mean = intercept + (slope * X.values)

        # Observed Values
        Y_obs = pm.Normal('Y_obs', mu=mean, sd=sigma, observed=y.values)

        # Sampler
        step = pm.NUTS()

        # Posterior distribution
        return pm.sample(N, step)
Ejemplo n.º 21
0
def bayes_linregress(y, x=None, nsamples=1000, showtrace=False, ):
  """
  Linear regression. Regress y onto x (or linspace(0,1,len(y)) if None).
  """
  with pm.Model() as model:
    a = pm.Normal('a', mu=0, sd=20)
    b = pm.Normal('b', mu=0, sd=20)
    sigma = pm.Uniform('sigma', lower=0, upper=20)
    # y_estimate
    y_est = a*x + b
    likelihood = pm.Normal('y', mu=y_est, sd=sigma, observed=y)
    # Inference and MCMC sampling
    start = pm.find_MAP() # Max a post. inference
    step = pm.NUTS() # Hamiltonian MCMC with No U-Turn Sampler
    trace = pm.sample(nsamples, step, start, random_seed=123,
                      progressbar=True)
  
  # Show/print the results
    if showtrace:
      pm.traceplot(trace)
      plt.show()
  print('\n') # Report the findings
  for obj in trace.varnames:
    print('%s: %.3f +/- %.3f' %(obj, np.mean(trace[obj]), np.std(trace[obj])))
  
  return trace
Ejemplo n.º 22
0
def train_BLM_model(X, y, random_seed = None):
    if random_seed is not None:
        np.random.seed(random_seed)

    with pm.Model() as model:
        # Intercept
        alpha = pm.Normal('alpha', mu = 0, sd = sd_for_priors)

        mu = np.zeros(number_of_dimensions)
        cov = np.diag(np.full(number_of_dimensions, sd_for_priors))
        # Slope
        betas = pm.MvNormal('betas', mu=mu, cov=cov, shape=(number_of_dimensions,))

        # Standard deviation
        sigma = pm.HalfNormal('sigma', sd = sd_for_priors)

        # Estimate of mean
        mean = alpha + tt.dot(X, betas)

        # Observed values
        Y_obs = pm.Normal('Y_obs', mu = mean, sd = sigma, observed = y)

        # Sampler
        step = pm.NUTS()

        # Posterior distribution
        linear_trace = pm.sample(no_of_samples, step, chains=chains)
    return linear_trace
Ejemplo n.º 23
0
def test_explicit_sample():
    with pm.Model() as model:
        a = pm.Normal("a", shape=1)
        pm.HalfNormal("b")
        step1 = pm.NUTS([a])
        step2 = pm.Metropolis([model.b_log__])

    step = pm.CompoundStep([step1, step2])

    ctx = multiprocessing.get_context()
    proc = ps.ProcessAdapter(
        10,
        10,
        step,
        chain=3,
        seed=1,
        mp_ctx=ctx,
        start={"a": 1.0, "b_log__": 2.0},
        step_method_pickled=None,
        pickle_backend="pickle",
    )
    proc.start()
    while True:
        proc.write_next()
        out = ps.ProcessAdapter.recv_draw([proc])
        view = proc.shared_point_view
        for name in view:
            view[name].copy()
        if out[1]:
            break
    proc.join()
def halo_posteriors(n_halos_in_sky, galaxy_data,samples = 5e5, burn_in = 500):
    #set the size of the halo's mass
    with pm.Model() as model:
        mass_large = pm.Uniform("mass_large", 40, 180)

        mass_small_1 = 20
        mass_small_2 = 20

        masses = np.array([mass_large,mass_small_1, mass_small_2], dtype=object)

        #set the initial prior positions of the halos, it's a 2-d Uniform dist.
        halo_positions = pm.Uniform("halo_positions", 0, 4200, shape=(n_halos_in_sky,2)) #notice this size

        fdist_constants = np.array([240, 70, 70])

        _sum = 0
        for i in range(n_halos_in_sky):
            _sum += masses[i]/f_distance(data[:,:2], halo_positions[i, :], fdist_constants[i])*\
                tangential_distance(data[:,:2], halo_positions[i, :])

        mean = pm.Deterministic("mean", _sum)

        ellpty = pm.Normal("ellipcity", mu=mean, tau=1./0.05, observed=data[:,2:])

        mu, sds, elbo = pm.variational.advi(n=50000)
        step = pm.NUTS(scaling=model.dict_to_array(sds), is_cov=True)
        trace = pm.sample(samples, step=step, start=mu)

    burned_trace = trace[burn_in:]
    return burned_trace["halo_positions"]
Ejemplo n.º 25
0
def test_nuts_tuning():
    model = pymc3.Model()
    with model:
        mu = pymc3.Normal("mu", mu=0, sd=1)
        step = pymc3.NUTS()
        trace = pymc3.sample(10, step=step, tune=5, progressbar=False)
    assert not step.tune
Ejemplo n.º 26
0
def fit(x, y, lowerVec, upperVec):

    lA, lB, lC = lowerVec
    uA, uB, uC = upperVec

    model = pm.Model()

    with model:
        # Priors for unknown model parameters

        a = pm.Uniform('a', lower=lA, upper=uA)
        b = pm.Uniform('b', lower=lB, upper=uB)
        c = pm.Uniform('c', lower=lC, upper=uC)

        # Expected value of outcome
        mu = Model(x, a, b, c)

        # Likelihood (sampling distribution) of observations
        Like = pm.Normal('Like', mu=mu, sd=0.1 * np.ones_like(y), observed=y)

        # do sampling
        trace = pm.sample(1000,
                          progressbar=False,
                          init='ADVI',
                          step=pm.NUTS(),
                          njobs=1)

        # give summary
        summary = pm.df_summary(trace)

        return summary
Ejemplo n.º 27
0
    def simple_model_error_dist(self, ymincentroid):

        import pymc3 as pm

        #import seaborn as sns
        #f, ax = pyplot.subplots(figsize=(6, 6))
        #sns.distplot(ymincentroid)
        #sns.kdeplot(ymincentroid, ax=ax, shade=True, color="g")
        #sns.rugplot(ymincentroid, color="black", ax=ax)
        #ax.set(xlabel= "Peak Minima Magnitude", ylabel= "Density")
        #pyplot.show()

        with pm.Model() as model:

            #mu = pm.Uniform('mu', lower=-1, upper=1)
            lower = ymincentroid.min()
            upper = ymincentroid.max()

            sd = pm.Uniform('sd', lower=lower, upper=upper)

            y = pm.HalfNormal('y', sd=sd, observed=ymincentroid)

            start = pm.find_MAP()
            step = pm.NUTS()  # Hamiltonian MCMC with No U-Turn Sampler
            trace = pm.sample(1000,
                              step,
                              start,
                              random_seed=123,
                              progressbar=True,
                              tune=1000)

            print(pm.summary(trace))

            return pm.summary(trace)['mean'].values[0]
Ejemplo n.º 28
0
def fitFlat(x, y):

    model = pm.Model()

    with model:
        # Priors for unknown model parameters
        a = pm.Flat('a')
        b = pm.Flat('b')
        c = pm.Flat('c')

        # Expected value of outcome
        mu = Model(x, a, b, c)

        # Likelihood (sampling distribution) of observations
        Like = pm.Normal('Like', mu=mu, sd=0.01 * np.ones_like(y), observed=y)

        # do sampling
        trace = pm.sample(1000,
                          progressbar=False,
                          init='ADVI',
                          step=pm.NUTS(),
                          njobs=1)

        # give summary
        summary = pm.df_summary(trace)

        return summary
Ejemplo n.º 29
0
def cgpt(y, mdl, idx):
    timesteps = range(len(y))
    exp_scale = y.mean()

    with mdl:
        # Exponential priors
        lambda_1 = lambdas[idx]  # pm.Exponential('lambda_1', lam=1 / exp_scale)
        lambda_2 = lambdas[idx + 1 ]  #pm.Exponential('lambda_2', lam=1 / exp_scale)

        lambda_diff = pm.Deterministic('lambda_diff', lambda_2 - lambda_1)

        # Change point
        changepoint = pm.DiscreteUniform('changepoint', lower=0, upper=max(timesteps), testval=len(y) // 2)

        # First distribution is strictly before the other
        lamda_selected = tt.switch(timesteps < changepoint, lambda_1, lambda_2)

        # Observations come from Poission distributions with one of the priors
        obs = pm.Poisson('obs', mu=lamda_selected, observed=y)

        # sample
        samples = 1000
        step_method = pm.NUTS(target_accept=0.90, max_treedepth=15)
        cpt_trace = pm.sample(samples, chains=None, step=step_method, tune=1000)
        cpt_smry = pm.summary(cpt_trace)
        pm.traceplot(cpt_trace)
        spp = pm.sample_posterior_predictive(cpt_trace, samples=samples * 2, progressbar=False,
                                             var_names=['changepoint'])
        return np.round(spp['changepoint'].mean(), 0)
Ejemplo n.º 30
0
def trial1():
    radon = pd.read_csv('data/radon.csv')[['county', 'floor', 'log_radon']]
    # print(radon.head())
    county = pd.Categorical(radon['county']).codes
    # print(county)

    niter = 1000
    with pm.Model() as hm:
        # County hyperpriors
        mu_a = pm.Normal('mu_a', mu=0, sd=10)
        sigma_a = pm.HalfCauchy('sigma_a', beta=1)
        mu_b = pm.Normal('mu_b', mu=0, sd=10)
        sigma_b = pm.HalfCauchy('sigma_b', beta=1)

        # County slopes and intercepts
        a = pm.Normal('slope', mu=mu_a, sd=sigma_a, shape=len(set(county)))
        b = pm.Normal('intercept', mu=mu_b, sd=sigma_b, shape=len(set(county)))

        # Houseehold errors
        sigma = pm.Gamma("sigma", alpha=10, beta=1)

        # Model prediction of radon level
        mu = a[county] + b[county] * radon.floor.values

        # Data likelihood
        y = pm.Normal('y', mu=mu, sd=sigma, observed=radon.log_radon)

        start = pm.find_MAP()
        step = pm.NUTS(scaling=start)
        hm_trace = pm.sample(niter, step, start=start)

        plt.figure(figsize=(8, 60))
        pm.forestplot(hm_trace, varnames=['slope', 'intercept'])