Beispiel #1
1
    def _fit_time_series_model(self, signal, target, samples):
        
        model_randomwalk = pm.Model()
        with model_randomwalk:

            sigma_alpha = pm.Exponential('sigma_alpha', 1. / .02, testval=.1)
            sigma_beta = pm.Exponential('sigma_beta', 1. / .02, testval=.1)

            alpha = GaussianRandomWalk('alpha', sigma_alpha ** -2, shape=len(tar))
            beta = GaussianRandomWalk('beta', sigma_beta ** -2, shape=len(tar))

            # Define regression
            regression = alpha + beta * rev.values

            # Assume prices are Normally distributed, the mean comes from the regression.
            sd = pm.Uniform('sd', 0, 20)
            likelihood = pm.Normal('y', 
                                   mu=regression, 
                                   sd=sd, 
                                   observed=tar.values)
        
            # First optimize random walk
            start = pm.find_MAP(vars=[alpha, beta], fmin=optimize.fmin_l_bfgs_b)
            step = pm.NUTS(scaling=start)
            trace = pm.sample(10, step, start)

            # Sample
            start2 = trace.point(-1)
            step = pm.NUTS(scaling=start2)
            trace_rw = pm.sample(samples, step, start=start)
            
            
Beispiel #2
0
def init_nuts(init='advi', n_init=500000, model=None, **kwargs):
    """Initialize and sample from posterior of a continuous model.

    This is a convenience function. NUTS convergence and sampling speed is extremely
    dependent on the choice of mass/scaling matrix. In our experience, using ADVI
    to estimate a diagonal covariance matrix and using this as the scaling matrix
    produces robust results over a wide class of continuous models.

    Parameters
    ----------
    init : str {'advi', 'advi_map', 'map', 'nuts'}
        Initialization method to use.
        * advi : Run ADVI to estimate posterior mean and diagonal covariance matrix.
        * advi_map: Initialize ADVI with MAP and use MAP as starting point.
        * map : Use the MAP as starting point.
        * nuts : Run NUTS and estimate posterior mean and covariance matrix.
    n_init : int
        Number of iterations of initializer
        If 'advi', number of iterations, if 'metropolis', number of draws.
    model : Model (optional if in `with` context)
    **kwargs : keyword arguments
        Extra keyword arguments are forwarded to pymc3.NUTS.

    Returns
    -------
    start, nuts_sampler

    start : pymc3.model.Point
        Starting point for sampler
    nuts_sampler : pymc3.step_methods.NUTS
        Instantiated and initialized NUTS sampler object
    """

    model = pm.modelcontext(model)

    pm._log.info('Initializing NUTS using {}...'.format(init))

    if init == 'advi':
        v_params = pm.variational.advi(n=n_init)
        start = pm.variational.sample_vp(v_params, 1, progressbar=False)[0]
        cov = np.power(model.dict_to_array(v_params.stds), 2)
    elif init == 'advi_map':
        start = pm.find_MAP()
        v_params = pm.variational.advi(n=n_init, start=start)
        cov = np.power(model.dict_to_array(v_params.stds), 2)
    elif init == 'map':
        start = pm.find_MAP()
        cov = pm.find_hessian(point=start)

    elif init == 'nuts':
        init_trace = pm.sample(step=pm.NUTS(), draws=n_init)
        cov = pm.trace_cov(init_trace[n_init//2:])

        start = {varname: np.mean(init_trace[varname]) for varname in init_trace.varnames}
    else:
        raise NotImplemented('Initializer {} is not supported.'.format(init))

    step = pm.NUTS(scaling=cov, is_cov=True, **kwargs)

    return start, step
Beispiel #3
0
def model_returns_t(data, samples=500):
    """Run Bayesian model assuming returns are normally distributed.

    Parameters
    ----------
    returns : pandas.Series
        Series of simple returns of an algorithm or stock.
    samples : int, optional
        Number of posterior samples to draw.

    Returns
    -------
    pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    """

    with pm.Model():
        mu = pm.Normal('mean returns', mu=0, sd=.01, testval=data.mean())
        sigma = pm.HalfCauchy('volatility', beta=1, testval=data.std())
        nu = pm.Exponential('nu_minus_two', 1. / 10., testval=3.)

        returns = pm.T('returns', nu=nu + 2, mu=mu, sd=sigma, observed=data)
        pm.Deterministic('annual volatility',
                         returns.distribution.variance**.5 * np.sqrt(252))

        pm.Deterministic('sharpe', returns.distribution.mean /
                         returns.distribution.variance**.5 *
                         np.sqrt(252))

        start = pm.find_MAP(fmin=sp.optimize.fmin_powell)
        step = pm.NUTS(scaling=start)
        trace = pm.sample(samples, step, start=start)
    return trace
    def fit(self, x, y, mcmc_samples=1000):
        t = x.shape[0] - 1  # number of additive components
        varnames = ["xc", "w", "decay", "sigma", "b", "lam"]

        with pm.Model() as model:
            # Priors for additive predictor
            w = pm.Normal("w", mu=0, sd=1, shape=t)
            decay = pm.HalfNormal("decay", sd=200, shape=t)
            # Prior for likelihood
            sigma = pm.Uniform("sigma", 0, 0.3)
            b = pm.Normal("b", mu=0, sd=20)
            lam = pm.Uniform("lam", 0, 0.3)

            # Building linear predictor
            lin_pred = 0
            for ii in range(1, t + 1):
                lin_pred += self.bias(w[ii - 1], decay[ii - 1])(x[ii, :])

            phi2 = pm.Deterministic("phi2", 0.5 * lam + (1 - lam) * phi(b + lin_pred + x[0, :] / sigma))
            y = pm.Bernoulli("y", p=phi2, observed=y)

        with model:
            # Inference
            start = pm.find_MAP()  # Find starting value by optimization
            print("MAP found:")
            # step = pm.NUTS(scaling = start)
            # step = pm.Slice()
            step = pm.NUTS(scaling=start)
            trace = pm.sample(mcmc_samples, step, start=start, progressbar=True)  # draw posterior samples

        return trace, model
Beispiel #5
0
	def fit(self,xdata,ydata,yerr,arange=[-100.,100],brange=[-100.,100]):
		trace = None
		with pm.Model() as model:
		    # alpha = pm.Normal('alpha', mu=1.0e7, sd=1.0e6)
		    # beta  = pm.Normal('beta', mu=1.0e7, sd=1.0e6)
		    # sigma = pm.Uniform('sigma', lower=0, upper=20)
		    alpha = pm.Uniform('alpha', lower=arange[0], upper=arange[1])
		    beta  = pm.Uniform('beta',  lower=brange[0], upper=brange[1])
		    sigma = yerr
		    
		    y_est = alpha + beta * xdata
		    
		    likelihood = pm.Normal('y', mu=y_est, sd=sigma, observed=ydata)
		    
		    # obtain starting values via MAP
		    start = pm.find_MAP()
		    step  = pm.NUTS(state=start)
		    trace = pm.sample(2000, step, start=start, progressbar=False)
		    
		    # pm.traceplot(trace)

		# plt.show()
		# pprint(trace['alpha'].mean())
		# pprint(trace['alpha'].std())
		# print pm.summary(trace)
		# print pm.summary(trace, ['alpha'])
		# print pm.stats()
		# print(trace.__dict__)

		# Return the traces
		return [trace['alpha'], trace['beta']]
Beispiel #6
0
def sample_pymc3(d, samples=2000, njobs=2):
    with pm.Model() as model:
        dfc = pm.Normal(mu=0.0, sd=d['sigma_fc'], name='dfc')
        Q = pm.Gamma(mu=d['mu_Q'], sd=d['sigma_Q'], name='Q')
        Pdet = pm.Gamma(mu=d['mu_Pdet'], sd=d['sigma_Pdet'], name='Pdet')
        kc = pm.Gamma(mu=d['mu_kc'], sd=d['sigma_kc'], name='kc')

        M = d['M']
        T = d['T']
        scale=d['scale']
        mu_fc = d['mu_fc']
        f = d['f']


        like = pm.Gamma(alpha=M, beta=(M/(((2 * 1.381e-5 * T) / (np.pi * Q * kc)) / scale * (dfc + mu_fc)**3 /
                    ((f * f - (dfc + mu_fc)**2) * (f * f - (dfc + mu_fc)**2) + f * f * (dfc + mu_fc)**2 / Q**2)
                    + Pdet)),
                            observed=d['y'],
                            name='like')

        start = pm.find_MAP()
        step = pm.NUTS(state=start)
        
        trace = pm.sample(samples, step=step, start=start, progressbar=True, njobs=njobs)
    return trace
Beispiel #7
0
    def run(self, samples=1000, find_map=True, verbose=True, step='nuts',
            burn=0.5, **kwargs):
        ''' Run the model.
        Args:
            samples (int): Number of MCMC samples to generate
            find_map (bool): passed to find_map argument of pm.sample()
            verbose (bool): if True, prints additional information
            step (str or PyMC3 Sampler): either an instantiated PyMC3 sampler,
                or the name of the sampler to use (either 'nuts' or
                'metropolis').
            start: Optional starting point to pass onto sampler.
            burn (int or float): Number or proportion of samples to treat as
                burn-in; passed onto the BayesianModelResults instance returned
                by this method.
            kwargs (dict): optional keyword arguments passed on to the sampler.

        Returns: an instance of class BayesianModelResults.

        '''
        with self.model:
            njobs = kwargs.pop('njobs', 1)
            start = kwargs.pop('start', pm.find_MAP() if find_map else None)
            chain = kwargs.pop('chain', 0)
            if isinstance(step, string_types):
                step = {
                    'nuts': pm.NUTS,
                    'metropolis': pm.Metropolis
                }[step.lower()](**kwargs)

            self.start = start
            trace = pm.sample(
                samples, start=start, step=step, progressbar=verbose, njobs=njobs, chain=chain)
            self.last_trace = trace  # for convenience
            return BayesianModelResults(trace)
    def test_linear_component(self):
        vars_to_create = {
            'sigma',
            'sigma_interval__',
            'y_obs',
            'lm_x0',
            'lm_Intercept'
        }
        with Model() as model:
            lm = LinearComponent(
                self.data_linear['x'],
                self.data_linear['y'],
                name='lm'
            )   # yields lm_x0, lm_Intercept
            sigma = Uniform('sigma', 0, 20)     # yields sigma_interval__
            Normal('y_obs', mu=lm.y_est, sigma=sigma, observed=self.y_linear)  # yields y_obs
            start = find_MAP(vars=[sigma])
            step = Slice(model.vars)
            trace = sample(500, tune=0, step=step, start=start,
                           progressbar=False, random_seed=self.random_seed)

            assert round(abs(np.mean(trace['lm_Intercept'])-self.intercept), 1) == 0
            assert round(abs(np.mean(trace['lm_x0'])-self.slope), 1) == 0
            assert round(abs(np.mean(trace['sigma'])-self.sd), 1) == 0
        assert vars_to_create == set(model.named_vars.keys())
Beispiel #9
0
def lin_fit(t, y, yerr=None, samples=10000, sampler="NUTS", alphalims=[-100,100]):
    """
    Bayesian linear fitting function.
    See Jake Vanderplas' blog post on how to be a
    bayesian in python for more details

    uses pymc3 MCMC sampling

    inputs:
        t    ::    Vector of values at which the function is evaluated ("x" values)
        y    ::    Vector of dependent values (observed y(t))
        yerr (optional = None) :: Errors on y values.  If not provided, errors are taken to be the same for each dta point,
            with a 1/sigma (jefferys) prior.
        samples (optional = 1000)  :: Number of samples to draw from MCMC
        sampler (optional = "NUTS")  :: Type of MCMC sampler to use.  "NUTS" or "Metropolis"
        alphalims (optional = [-100,100])  ::  Length 2 vector of endpoints for uniform prior on intercept of the line
    """
    with pm.Model() as model:
            #Use uninformative priors on slope/intercept of line
            alpha = pm.Uniform('alpha',alphalims[0],alphalims[1])
            #this defines an uninformative prior on slope.  See Jake's blog post
            beta = pm.DensityDist('beta',lambda value: -1.5 * T.log(1 + value**2.),testval=0)
            #if yerr not given, assume all values have same errorbar
            if yerr is None:
                sigma = pm.DensityDist('sigma', lambda value: -T.log(T.abs_(value)),testval=1)
            else:
                sigma = yerr
            like = pm.Normal('likelihood',mu=alpha+beta*t, sd=sigma, observed=y)
            #start the sampler at the maximum a-posteriori value
            start = pm.find_MAP()
            step = select_sampler(sampler,start)
            trace = pm.sample(draws=samples,start=start,step=step)
    return trace
Beispiel #10
0
def run(n=5000):
    with model_1:
        xstart = pm.find_MAP()
        xstep = pm.Slice()
        trace = pm.sample(5000, xstep, xstart, random_seed=123, progressbar=True)

        pm.summary(trace)
Beispiel #11
0
def model_returns_t_alpha_beta(data, bmark, samples=2000):
    """Run Bayesian alpha-beta-model with T distributed returns.

    This model estimates intercept (alpha) and slope (beta) of two
    return sets. Usually, these will be algorithm returns and
    benchmark returns (e.g. S&P500). The data is assumed to be T
    distributed and thus is robust to outliers and takes tail events
    into account.

    Parameters
    ----------
    returns : pandas.Series
        Series of simple returns of an algorithm or stock.
    bmark : pandas.Series
        Series of simple returns of a benchmark like the S&P500.
        If bmark has more recent returns than returns_train, these dates
        will be treated as missing values and predictions will be
        generated for them taking market correlations into account.
    samples : int (optional)
        Number of posterior samples to draw.

    Returns
    -------
    pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.
    """

    if len(data) != len(bmark):
        # pad missing data
        data = pd.Series(data, index=bmark.index)

    data_no_missing = data.dropna()

    with pm.Model():
        sigma = pm.HalfCauchy(
            'sigma',
            beta=1,
            testval=data_no_missing.values.std())
        nu = pm.Exponential('nu_minus_two', 1. / 10., testval=.3)

        # alpha and beta
        beta_init, alpha_init = sp.stats.linregress(
            bmark.loc[data_no_missing.index],
            data_no_missing)[:2]

        alpha_reg = pm.Normal('alpha', mu=0, sd=.1, testval=alpha_init)
        beta_reg = pm.Normal('beta', mu=0, sd=1, testval=beta_init)

        pm.T('returns',
             nu=nu + 2,
             mu=alpha_reg + beta_reg * bmark,
             sd=sigma,
             observed=data)
        start = pm.find_MAP(fmin=sp.optimize.fmin_powell)
        step = pm.NUTS(scaling=start)
        trace = pm.sample(samples, step, start=start)

    return trace
Beispiel #12
0
def run(n=1000):
    if n == "short":
        n = 50
    with model:
        start = pm.find_MAP()
        step = pm.NUTS(scaling=start)
        trace = pm.sample(n, step=step, start=start)
    return trace
    def _inference(self, reinit=True):
        with self.cached_model:
            if reinit or (self.cached_start is None) or (self.cached_sampler is None):
                self.cached_start = pm.find_MAP(fmin=sp.optimize.fmin_powell)
                self.cached_sampler = pm.NUTS(scaling=self.cached_start)

            trace = pm.sample(self.samples, self.cached_sampler, start=self.cached_start)

        return trace
 def fit(self, X, y, sampling_iterations):
     X = self._force_shape(X)
     self.input_data_dimension = len(X[0])
     model, w, b = self._build_model(X, y)
     with model:
         self.map_estimate = pymc3.find_MAP(model=model, vars=[w, b])
         step = pymc3.NUTS(scaling=self.map_estimate)
         trace = pymc3.sample(sampling_iterations, step, start=self.map_estimate)
     self.samples = trace
Beispiel #15
0
def learn_model(model, draws=50000):
    with model:
        start = pm.find_MAP()
        #step = pm.Slice()  # It is very slow when the model has many parameters
        #step = pm.HamiltonianMC(scaling=start)  # It leads to constant samples
        #step = pm.NUTS(scaling=start)           # It leads to constant samples
        step = pm.Metropolis()
        trace = pm.sample(draws, step, start=start)
    return trace
Beispiel #16
0
    def test_run(self):
        model = self.build_model()
        with model:
            # move the chain to the MAP which should be a good starting point
            start = pm.find_MAP()
            H = model.fastd2logp()  # find a good orientation using the hessian at the MAP
            h = H(start)

            step = pm.HamiltonianMC(model.vars, h)
            pm.sample(50, step, start)
Beispiel #17
0
def test_errors():
    _, model, _ = exponential_beta(2)
    with model:
        try:
            newstart = find_MAP(Point(x=[-0.5, 0.01], y=[0.5, 4.4]))
        except ValueError as e:
            msg = str(e)
            assert "x.logp" in msg, msg
            assert "x.value" not in msg, msg
        else:
            assert False, newstart
Beispiel #18
0
def run(n=2000):
    if n == "short":
        n = 50
    import matplotlib.pyplot as plt

    with model:
        start = find_MAP(fmin=opt.fmin_powell)
        trace = sample(n, Slice(), start=start)

    plt.plot(x, y, 'x')
    glm.plot_posterior_predictive(trace)
Beispiel #19
0
    def test_glm_from_formula(self):
        with Model() as model:
            NAME = 'glm'
            GLM.from_formula('y ~ x', self.data_linear, name=NAME)
            start = find_MAP()
            step = Slice(model.vars)
            trace = sample(500, step=step, start=start, progressbar=False, random_seed=self.random_seed)

            assert round(abs(np.mean(trace['%s_Intercept' % NAME])-self.intercept), 1) == 0
            assert round(abs(np.mean(trace['%s_x' % NAME])-self.slope), 1) == 0
            assert round(abs(np.mean(trace['%s_sd' % NAME])-self.sd), 1) == 0
Beispiel #20
0
    def test_linear_component(self):
        with Model() as model:
            y_est, _ = glm.linear_component('y ~ x', self.data_linear)
            sigma = Uniform('sigma', 0, 20)
            Normal('y_obs', mu=y_est, sd=sigma, observed=self.y_linear)
            start = find_MAP(vars=[sigma])
            step = Slice(model.vars)
            trace = sample(500, step, start, progressbar=False, random_seed=self.random_seed)

            self.assertAlmostEqual(np.mean(trace['Intercept']), self.intercept, 1)
            self.assertAlmostEqual(np.mean(trace['x']), self.slope, 1)
            self.assertAlmostEqual(np.mean(trace['sigma']), self.sd, 1)
def _find_map(self):
    """Find mode of posterior using Powell optimization."""
    tstart = time.time()
    with self.model:
        logging.info('finding PMF MAP using Powell optimization...')
        self._map = pm.find_MAP(fmin=sp.optimize.fmin_powell, disp=True)

    elapsed = int(time.time() - tstart)
    logging.info('found PMF MAP in %d seconds' % elapsed)

    # This is going to take a good deal of time to find, so let's save it.
    save_np_vars(self._map, self.map_dir)
Beispiel #22
0
    def test_linear_component_from_formula(self):
        with Model() as model:
            lm = LinearComponent.from_formula('y ~ x', self.data_linear)
            sigma = Uniform('sigma', 0, 20)
            Normal('y_obs', mu=lm.y_est, sd=sigma, observed=self.y_linear)
            start = find_MAP(vars=[sigma])
            step = Slice(model.vars)
            trace = sample(500, step=step, start=start, progressbar=False, random_seed=self.random_seed)

            assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0
            assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0
            assert round(abs(np.mean(trace['sigma'])-self.sd), 1) == 0
def BayesianLearning(fig, path, measurements,
                     pos_min=-50, pos_max=50, subplot=133):
    with pm.Model() as model:
        # Compute bounds based on measurements
        pos_min_x, pos_max_x, pos_min_y, pos_max_y = boundsFromPath(path)
        minPos = min(pos_min_x, pos_min_y)
        maxPos = max(pos_max_x, pos_max_y)

        # Priors
        # See: http://stackoverflow.com/q/25342899
        thermal_position_x = pm.Uniform('thermal_position_x',
                                      lower=pos_min_x, upper=pos_max_x)
        thermal_position_y = pm.Uniform('thermal_position_y',
                                      lower=pos_min_y, upper=pos_max_y)
        thermal_amplitude = pm.Uniform('thermal_amplitude',
                                       lower=-10, upper=10)
        thermal_sd = pm.Uniform('sd', lower=0.1, upper=100)

        # When sampling, look at the values of the test thermal field at the points
        # we have taken measurements at.
        velocity = deterministicVelocity(path, measurements,
                                         thermal_position_x, thermal_position_y,
                                         thermal_amplitude, thermal_sd)

        # Observe the vertical velocities
        thermal_vert_vel = pm.Normal('thermal_vert_vel', mu=velocity,
                                     observed=measurements)

        # Sample this to find the posterior, note Metropolis works with discrete
        step = pm.Metropolis()
        start = pm.find_MAP(fmin=sp.optimize.fmin_powell)
        trace = pm.sample(2000, step=step, progressbar=True, start=start)

        # Find the most probable surface and plot that for comparison
        x = lameMAP(trace['thermal_position_x'])
        y = lameMAP(trace['thermal_position_y'])
        amp = lameMAP(trace['thermal_amplitude'])
        sd = lameMAP(trace['sd'])
        eq = thermalEq((x,y), amp, sd)

        # Plot it
        prev = plt.gca()
        visualizeThermalField([eq], path, measurements, trace, pos_min, pos_max,
                              only2d=False, fig=fig, subplot=subplot, lines=False,
                              limits=[prev.get_xlim(),prev.get_ylim(),prev.get_zlim()])

        # Really, we have more information than just this MAP estimate.
        # We have probability distributions over all the parameters.
        # It's hard to visualize this in one figure that we can directly
        # compare with the GPR though.
        pm.traceplot(trace, ['thermal_position_x','thermal_position_y',
                             'thermal_amplitude','sd'])
Beispiel #24
0
 def too_slow(self):
     model = self.build_model()
     start = {'groupmean': self.obs_means.mean(),
              'groupsd_interval_': 0,
              'sd_interval_': 0,
              'means': self.obs_means,
              'floor_m': 0.,
              }
     with model:
         start = pm.find_MAP(start=start,
                             vars=[model['groupmean'], model['sd_interval_'], model['floor_m']])
         step = pm.NUTS(model.vars, scaling=start)
         pm.sample(50, step, start)
Beispiel #25
0
 def _get_norm_params(self):
     trace = []
     for i, s in zip([0, 1], ['+', '-']):
         print "Estimating Gaussian parameters in %s strand" % s
         with pm.Model() as model:
             model.verbose = 0
             mu = pm.Uniform('mu')
             sigma = pm.Uniform('sigma')
             tau = 1 / sigma**2
             y_pred = pm.Normal('y_pred', mu=mu, tau=tau)
             y_est = pm.Normal('y_est', mu=mu, tau=tau, observed=self.cleanFcArray[i])
             start = pm.find_MAP()
             step = pm.Metropolis()
             trace.append(pm.sample(self.mcmcSteps, step, start=start, progressbar=self.showProgress))
         print
     return trace
Beispiel #26
0
def model_stoch_vol(data, samples=2000):
    """Run stochastic volatility model.

    This model estimates the volatility of a returns series over time.
    Returns are assumed to be T-distributed. lambda (width of
    T-distributed) is assumed to follow a random-walk.

    Parameters
    ----------
    data : pandas.Series
        Return series to model.
    samples : int, optional
        Posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    See Also
    --------
    plot_stoch_vol : plotting of tochastic volatility model
    """
    from pymc3.distributions.timeseries import GaussianRandomWalk

    with pm.Model() as model:
        nu = pm.Exponential('nu', 1. / 10, testval=5.)
        sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
        s = GaussianRandomWalk('s', sigma**-2, shape=len(data))
        volatility_process = pm.Deterministic('volatility_process',
                                              pm.exp(-2 * s))
        StudentT('r', nu, lam=volatility_process, observed=data)
        start = pm.find_MAP(vars=[s], fmin=sp.optimize.fmin_l_bfgs_b)

        step = pm.NUTS(scaling=start)
        trace = pm.sample(100, step, progressbar=False)

        # Start next run at the last sampled position.
        step = pm.NUTS(scaling=trace[-1], gamma=.25)
        trace = pm.sample(samples, step, start=trace[-1],
                          progressbar=False)

    return model, trace
Beispiel #27
0
    def too_slow(self):
        model = self.build_model()
        with model:
            start = pm.Point({
                'groupmean': self.obs_means.mean(),
                'groupsd_interval_': 0,
                'sd_interval_': 0,
                'means': np.array(self.obs_means),
                'u_m': np.array([.72]),
                'floor_m': 0.,
            })

            start = pm.find_MAP(start, model.vars[:-1])
            H = model.fastd2logp()
            h = np.diag(H(start))

            step = pm.HamiltonianMC(model.vars, h)
            pm.sample(50, step, start)
def test_bernoulli():
    data = [random.randint(0,1) for i in range(200)]

    model = pymc3.Model()

    with model:
        p = pymc3.Uniform(lower=0,upper=1, name='p')
        X = pymc3.Bernoulli(p=p, name='X', observed=data)

        start = pymc3.find_MAP()

        # instantiate sampler
        step = pymc3.NUTS(scaling=start)

        # draw 500 posterior samples
        trace = pymc3.sample(10000, step, start=start)

    pymc3.traceplot(trace)
    plt.show()
Beispiel #29
0
def main():
  X, Y = generate_sample()

  with pm.Model() as model:
    alpha = pm.Normal('alpha', mu=0, sd=20)
    beta = pm.Normal('beta', mu=0, sd=20)
    sigma = pm.Uniform('sigma', lower=0)
    y = pm.Normal('y', mu=beta*X+alpha, sd=sigma, observed=Y)
    start = pm.find_MAP()
    step = pm.NUTS(state=start)

  with model:
    if (multicore):
      trace = pm.sample(itenum, step, start=start,
        njobs=chainnum, random_seed=range(chainnum), progressbar=progress)
    else:
      ts = [pm.sample(itenum, step, chain=i, progressbar=progress)
            for i in range(chainnum)]
      trace = merge_traces(ts)

    if (saveimage):
      pm.traceplot(trace).savefig("simple_linear_trace.png")
    print "Rhat = {0}".format(pm.gelman_rubin(trace))

  t1 = time.clock()
  print "elapsed time = {0}".format(t1 - t0)

  #trace
  if(not multicore):
  	trace=ts[0]
  with model:
  	pm.traceplot(trace,model.vars)

  pm.forestplot(trace)

  with open("simplelinearregression_model.pkl","w") as fpw:
  	pkl.dump(model,fpw)
  with open("simplelinearregression_trace.pkl","w") as fpw:
  	pkl.dump(trace,fpw)
  with open("simplelinearregression_model.pkl") as fp:
  	model=pkl.load(fp)
  with open("simplelinearregression_trace.pkl") as fp:
  	trace=pkl.load(fp)
Beispiel #30
0
    def __init__(self,X_train,y_train,n_hidden,lam=1):
        n_train = y_train.shape[0]
        n_dim = X_train.shape[1]
        print X_train.shape
        with pm.Model() as rbfnn:
            C = pm.Normal('C',mu=0,sd=10,shape=(n_hidden))
            #beta = pm.Gamma('beta',1,1)
            w = pm.Normal('w',mu=0,sd=10,shape=(n_hidden+1))
            
            #component, updates = theano.scan(fn=lambda x: T.sum(C-x)**2,sequences=[X_train])
            y_out=[]
            for x in X_train:
                #rbf_out =  T.exp(-lam*T.sum((C-x)**2,axis=1)) 
                #1d speed up
                rbf_out =  T.exp(-lam*(C-x)**2)
                #rbf_out = theano.printing.Print(rbf_out)                 
                rbf_out_biased = \
                        T.concatenate([ rbf_out, T.alloc(1,1) ], 0)
                y_out.append(T.dot(w,rbf_out_biased))
            
            y = pm.Normal('y',mu=y_out,sd=0.01,observed=y_train)
            
            start = pm.find_MAP(fmin=scipy.optimize.fmin_l_bfgs_b)
            print start
            step = pm.NUTS(scaling=start)
            trace = pm.sample(2000, step, progressbar=False)
            step = pm.NUTS(scaling=trace[-1])
            trace = pm.sample(20000,step,start=trace[-1])
            

            print summary(trace, vars=['C', 'w'])

            vars = trace.varnames   
            for i, v in enumerate(vars):
                for d in trace.get_values(v, combine=False, squeeze=False):
                    d=np.squeeze(d)
                    with open(str(v)+".txt","w+") as thefile:
                        for item in d:
                            print>>thefile, item

            traceplot(trace)
            plt.show()
Beispiel #31
0
    data2 = T131A[:, 5]
    cov2 = np.diag(T131A[:, 6]**2)
    llk2 = pm.MvNormal('llk2',
                       mu=synthetic2(x0, y0, z0, dV),
                       cov=cov2,
                       observed=data2)
    data3 = T130A[:, 5]
    cov3 = np.diag(T130A[:, 6]**2)
    llk3 = pm.MvNormal('llk3',
                       mu=synthetic3(x0, y0, z0, dV),
                       cov=cov3,
                       observed=data3)

    niter = 1000

    start = pm.find_MAP(model=basic_model)
    step = pm.NUTS(scaling=start)
    trace = pm.sample(niter, start=start, step=step)

    #n_chains = 100
    #n_steps = 50
    #tune_interval = 10
    #n_jobs = 1
    #trace = smc.sample_smc(
    #	n_steps=n_steps,
    #	n_chains=n_chains,
    #	tune_interval=tune_interval,
    #	n_jobs=n_jobs,
    #	#start=start,
    #	progressbar=False,
    #	stage=0,
Beispiel #32
0
def n_polyfit_MCMC(n,
                   data,
                   init_guess,
                   n_tuning_steps=1500,
                   n_draws=2500,
                   n_chains=4,
                   nosetest=False,
                   compute_traces=False):
    """
    Fits the data to a polynomial function of degree n  using pymc3
    Errors on temperature are considered in the model
    model: temp = C_0 + C_1 * depth + C_2 * depth ^2 + ... + C_n * depth^n — uniform priors on all parameters bounded by Antarctic ice temps
    Plots the traces in the MCMC (if n_chains > 2)

    Parameters
    ----------
    data : pandas DataFrame
        data and metadata contained in pandas DataFrame
        Format described in tutorial notebook
    init_guess : dict
        dictionary containing initial values for each of the parameters in the model (C_0, C_1, C_2))
    n_tuning_steps : int (>= 0)
        number of tuning steps used in MCMC (default = 1500)
        NOTE: Number of tuning steps must be >= 0
        If < 0, n_tuning_steps will automatically be set to the default (1500)
    n_draws : int (> 0)
        number of draws used in MCMC (default = 2500)
        NOTE: n_draws must be >= 4 for convergence checks and > 0 in general
        If < 1, n_draws will automatically be set to the default (2500)
    n_chains : int (> 0)
        number of walkers used to sample posterior in MCMC (default = 5)
        NOTE: number of chains must be >= 2 to visualize traces and must be > 0 in general
        If < 1, n_chains will automatically be set to the default (4)
    nosetest : bool
        bool that specifies whether or not a test is being conducted
        if testing is being run, then sampling will not be performed
    compute_traces : bool
        bool that indicates wheter or not to compute the traces

    Returns
    -------
    traces : pymc3 MultiTrace object, OR int (depending on compute_traces)
        Traces generated from MCMC sampling
        0 if compute_traces == False
    best_fit : dict
        dictionary containing best-fit parameters and covariance matrix

    NOTE: when testing, None is returned, as no sampling/inference is performed

    """
    # error checking for MCMC-related parameters
    # if parameters outside allowed values, set them to the default
    if n_tuning_steps < 0:
        print(
            "You have entered an invalid value for n_tuning_steps (must be >= 0). Reverting to default (1500)"
        )
        n_tuning_steps = 1500
    if n_draws < 1:
        print(
            "You have entered an invalid value for n_draws (must be >= 1). Reverting to default (2500)"
        )
        n_draws = 2500
    if n_chains < 1:
        print(
            "You have entered an invalid value for n_chains (must be >= 1). Reverting to default (4)"
        )
        n_chains = 4

    # prepare data
    depth = data['Depth'].values
    temp = data['Temperature'].values
    sigma_y = data['temp_errors'].values

    with pm.Model() as poly_model:
        # define priors for each parameter in the polynomial fit (e.g C_0 + C_1*x + C_2*x^2 + ...)
        C_0 = pm.Uniform(
            'C_0', -60, -40
        )  # not expected to change more than +/- 5 deg C according to base camp measurements
        C_n = [
            pm.Uniform('C_{}'.format(i), -60 / 800**i, 10 / 800**i)
            for i in range(1, n + 1)
        ]
        polynomial = C_0 + np.sum([C_n[i] * depth**(i + 1) for i in range(n)])

        # define likelihood
        sigma_T = 1.
        y_obs = pm.Normal("temp_pred",
                          mu=polynomial,
                          sd=sigma_T,
                          observed=temp)

    if not nosetest:
        with poly_model:
            # unleash the inference
            if compute_traces == True:
                traces = pm.sample(
                    init="adapt_diag",
                    tune=n_tuning_steps,
                    draws=n_draws,
                    chains=n_chains)  # need at least two chains to plot traces
                #az.plot_pair(traces, divergences=True)

                if n_chains >= 2:
                    az.plot_trace(traces)
            else:
                traces = 0

            best_fit, scipy_output = pm.find_MAP(start=init_guess,
                                                 return_raw=True)
            covariance_matrix = np.flip(scipy_output.hess_inv.todense() /
                                        sigma_y[0])
            best_fit['covariance matrix'] = covariance_matrix

    return (traces, best_fit) if not nosetest else None
Beispiel #33
0
def init_nuts(init='ADVI',
              njobs=1,
              n_init=500000,
              model=None,
              random_seed=-1,
              progressbar=True,
              **kwargs):
    """Initialize and sample from posterior of a continuous model.

    This is a convenience function. NUTS convergence and sampling speed is extremely
    dependent on the choice of mass/scaling matrix. In our experience, using ADVI
    to estimate a diagonal covariance matrix and using this as the scaling matrix
    produces robust results over a wide class of continuous models.

    Parameters
    ----------
    init : str {'ADVI', 'ADVI_MAP', 'MAP', 'NUTS'}
        Initialization method to use.
        * ADVI : Run ADVI to estimate posterior mean and diagonal covariance matrix.
        * ADVI_MAP: Initialize ADVI with MAP and use MAP as starting point.
        * MAP : Use the MAP as starting point.
        * NUTS : Run NUTS and estimate posterior mean and covariance matrix.
    njobs : int
        Number of parallel jobs to start.
    n_init : int
        Number of iterations of initializer
        If 'ADVI', number of iterations, if 'metropolis', number of draws.
    model : Model (optional if in `with` context)
    progressbar : bool
        Whether or not to display a progressbar for advi sampling.
    **kwargs : keyword arguments
        Extra keyword arguments are forwarded to pymc3.NUTS.

    Returns
    -------
    start : pymc3.model.Point
        Starting point for sampler
    nuts_sampler : pymc3.step_methods.NUTS
        Instantiated and initialized NUTS sampler object
    """

    model = pm.modelcontext(model)

    pm._log.info('Initializing NUTS using {}...'.format(init))

    random_seed = int(np.atleast_1d(random_seed)[0])

    if init is not None:
        init = init.lower()

    if init == 'advi':
        v_params = pm.variational.advi(n=n_init,
                                       random_seed=random_seed,
                                       progressbar=progressbar)
        start = pm.variational.sample_vp(v_params,
                                         njobs,
                                         progressbar=False,
                                         hide_transformed=False,
                                         random_seed=random_seed)
        if njobs == 1:
            start = start[0]
        cov = np.power(model.dict_to_array(v_params.stds), 2)
    elif init == 'advi_map':
        start = pm.find_MAP()
        v_params = pm.variational.advi(n=n_init,
                                       start=start,
                                       random_seed=random_seed)
        cov = np.power(model.dict_to_array(v_params.stds), 2)
    elif init == 'map':
        start = pm.find_MAP()
        cov = pm.find_hessian(point=start)
    elif init == 'nuts':
        init_trace = pm.sample(step=pm.NUTS(),
                               draws=n_init,
                               random_seed=random_seed)[n_init // 2:]
        cov = np.atleast_1d(pm.trace_cov(init_trace))
        start = np.random.choice(init_trace, njobs)
        if njobs == 1:
            start = start[0]
    else:
        raise NotImplementedError(
            'Initializer {} is not supported.'.format(init))

    step = pm.NUTS(scaling=cov, is_cov=True, **kwargs)

    return start, step
Beispiel #34
0
        for train_index, test_index in kf.split(X):
            # prepare train/test batch
            pred_train, y_train, X_train = pred[train_index], y[
                train_index], X[train_index]
            pred_test, y_test, X_test = pred[test_index], y[test_index], X[
                test_index]

            pred_tt.set_value(pred_train)
            y_tt.set_value(y_train)
            X_tt.set_value(X_train)

            # fit model
            # with model_spec:
            #     model_fit = pm.fit(n=100000, method=pm.ADVI())
            #     trace = model_fit.sample(1000)
            model_fit = pm.find_MAP(model=model_spec)

            # do prediction
            w_tr = model_fit["w"]  # np.mean(trace["w"], axis=0)
            w_cv = ensemble_pred(X_test,
                                 X_train,
                                 model_est=model_fit,
                                 P=P,
                                 ls=ls)
            w_or = ensemble_pred(X_pred,
                                 X_train,
                                 model_est=model_fit,
                                 P=P,
                                 ls=ls)

            # training error
Beispiel #35
0
    p_grid, posterior = posterior_grid_approx(ps, w, n)
    ax[idx].plot(p_grid, posterior, "o-", label=f"success = {w}\ntosses = {n}")
    ax[idx].set_xlabel("probability of water")
    ax[idx].set_ylabel("posterior probability")
    ax[idx].set_title(f"{ps} points")
    ax[idx].legend(loc=0)

# %%
data = np.repeat((0, 1), (3, 6))

# %%
# %%
with pm.Model() as normal_approximation:
    p = pm.Uniform("p", 0, 1)
    w = pm.Binomial("w", n=len(data), p=p, observed=data.sum())
    mean_q = pm.find_MAP()
    std_q = ((1 / pm.find_hessian(mean_q, vars=[p]))**0.5)[0]
mean_q["p"], std_q

# %%
w, n = 6, 9
x = np.linspace(0, 1, 100)
plt.plot(x, stats.beta.pdf(x, w + 1, n - w + 1), label="True posterior")

# quadratic approximation
plt.plot(x,
         stats.norm.pdf(x, mean_q["p"], std_q),
         label="Quadratic approximation")
plt.legend(loc=0)

plt.title(f"n = {n}")
Beispiel #36
0
    atts_star = pm3.Normal("atts_star", mu=0, tau=tau_att, shape=num_teams)
    defs_star = pm3.Normal("defs_star", mu=0, tau=tau_def, shape=num_teams)

    atts = pm3.Deterministic('atts', atts_star - tt.mean(atts_star))
    defs = pm3.Deterministic('defs', defs_star - tt.mean(defs_star))
    home_theta = tt.exp(intercept + home + atts[away_team] + defs[home_team])
    away_theta = tt.exp(intercept + atts[away_team] + defs[home_team])

    # likelihood of observed data
    home_points = pm3.Poisson('home_points',
                              mu=home_theta,
                              observed=observed_home_goals)
    away_points = pm3.Poisson('away_points',
                              mu=away_theta,
                              observed=observed_away_goals)

# * We specified the model and the likelihood function
# * Now we need to fit our model using the Maximum A Posteriori algorithm to decide where to start out No U Turn Sampler

# In[6]:

with model:

    start = pm3.find_MAP()
    step = pm3.NUTS(state=start)
    trace = pm3.sample(2000, step, start=start, progressbar=True)

    pm3.traceplot(trace)

# In[ ]:
Beispiel #37
0
 def test_run(self):
     with self.build_model():
         start = pm.find_MAP(method="Powell")
         pm.sample(50, pm.Slice(), start=start)
from plot_post import plot_post

# Generate the data
y1 = np.array([1, 1, 1, 1, 1, 0, 0])  # 5 heads and 2 tails
y2 = np.array([1, 1, 0, 0, 0, 0, 0])  # 2 heads and 5 tails

with pm.Model() as model:
    # define the prior
    theta1 = pm.Beta('theta1', 3, 3)  # prior
    theta2 = pm.Beta('theta2', 3, 3)  # prior
    # define the likelihood
    y1 = pm.Bernoulli('y1', p=theta1, observed=y1)
    y2 = pm.Bernoulli('y2', p=theta2, observed=y2)

    # Generate a MCMC chain
    start = pm.find_MAP()  # Find starting value by optimization
    trace = pm.sample(10000, pm.Metropolis(),
                      progressbar=False)  # Use Metropolis sampling
#    start = pm.find_MAP()  # Find starting value by optimization
#    step = pm.NUTS()  # Instantiate NUTS sampler
#    trace = pm.sample(10000, step, start=start, progressbar=False)

# create an array with the posterior sample
theta1_sample = trace['theta1']
theta2_sample = trace['theta2']

# Plot the trajectory of the last 500 sampled values.
plt.plot(theta1_sample[:-500], theta2_sample[:-500], marker='o')
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xlabel(r'$\theta1$')
            x_dim, y_dim = image.shape
            pixel_values = np.concatenate(image)  # grey scale between 0 and 1

            N, wmat, amat = create_matrices(x_dim, y_dim)

            with pm.Model() as model:
                beta0 = pm.Normal('beta0', mu=0., tau=1e-2)
                tau = pm.Gamma('tau_c', alpha=1.0, beta=1.0)
                mu_phi = CAR2('mu_phi', w=wmat, a=amat, tau=tau, shape=N)
                phi = pm.Deterministic('phi', mu_phi -
                                       tt.mean(mu_phi))  # zero-center phi

                mu = pm.Deterministic('mu', beta0 + phi)
                Yi = pm.LogitNormal('Yi', mu=mu, observed=pad(pixel_values))

                max_a_post = pm.find_MAP()
                step = pm.NUTS()
                trace = pm.sample(draws=N_SAMPLES,
                                  step=step,
                                  start=max_a_post,
                                  cores=2,
                                  tune=N_TUNE,
                                  chains=N_CHAINS)
                posterior_pred = pm.sample_posterior_predictive(trace)

                prefix_file_name = 'mnist_digit{}(label{})_'.format(i, label)

                np.save(
                    new_name(name=prefix_file_name + 'phi_values',
                             suffix='.npy',
                             directory=DIRECTORY), trace.get_values('phi'))
Beispiel #40
0
    # wide variance = less information for prior, data is stronger
    # alpha=100, beta=1: QUITE CLOSE to observed data! Also more stable?
    c = pm.Normal('c', mu=hyper_mu, sd=hyper_sd, shape=(config.K, config.K))
    # mask = np.ones((config.K, config.K))
    # np.fill_diagonal(mask, 0)

    # p = pm.Deterministic('p', config.sigmoid(a + b + offset))
    # p = config.sigmoid(a + b + offset)
    #p = config.sigmoid(a + b + c)
    p = config.sigmoid(c)

    # Likelihood (sampling distribution) of observations
    pm.Binomial('L', n=N_data, p=p, observed=s_with_obs)

# MAP
MAP = pm.find_MAP(model=model)  # Find starting point of MCMC
#a_ = np.tile(np.squeeze(MAP['a']), (4, 1)).T
#b_ = np.tile(np.squeeze(MAP['b']), (4, 1))
c_ = np.squeeze(MAP['c'])
#intercept_ = MAP['intercept']
# y_MAP = config.sigmoid(a_ + b_ + c_ + intercept_)
y_MAP = config.sigmoid(c_)

#tic()
# Draw posterior samples
with model:
    # THIS TAKES A WHILE TO RUN!
    trace = pm.sample(1000, nuts_kwargs=dict(target_accept=.9,
                      max_treedepth=20), chains=config.N_MCMC_CHAINS)

#toc()
	T0= Uniform('T0',0,24)
	tau= Gamma('tau',0.0001, 0.0001)

	mu_temp= c*T*((T-T0)*(T0<T))*np.sqrt((Tm-T)*(Tm>T))
	mu= 0*(mu_temp<0) + mu_temp*(mu_temp>0)

	Y_obs = Normal('Y_obs',mu=mu, sd=tau, observed= Y)


from pymc3 import Metropolis, sample, find_MAP
from scipy import optimize

with basic_model_GCR:  

    # obtain starting values via MAP
    start = find_MAP(fmin=optimize.fmin_powell)

    # draw 5000 posterior samples

    trace= sample(sample_size, step= Metropolis(), start=start)
  



#thin the samples by selecting every 5 samples
thin_factor=5

#summary(trace)
#traceplot(trace); 

Beispiel #42
0
    def find_MAP(self,
                 start=None,
                 points=1,
                 plot=False,
                 return_points=False,
                 display=True,
                 powell=True):
        points_list = list()
        if start is None:
            start = self.get_params_current()
        if type(start) is list:
            i = 0
            for s in start:
                i += 1
                points_list.append(('start' + str(i), self.model.logp(s), s))
        else:
            points_list.append(('start', self.model.logp(start), start))
        if self.outputs.get_value() is None:
            print('For find_MAP it is necessary to have observations')
            return start
        if display:
            print('Starting function value (-logp): ' +
                  str(-self.model.logp(points_list[0][2])))
        if plot:
            plt.figure(0)
            self.plot(params=points_list[0][2], title='start')
            plt.show()
        with self.model:
            i = -1
            while i < points:
                i += 1
                try:
                    if powell:
                        name, logp, start = points_list[i // 2]
                    else:
                        name, logp, start = points_list[i]
                    if i % 2 == 0 or not powell:  #
                        if name.endswith('_bfgs'):
                            if i > 0:
                                points += 1
                            continue
                        name += '_bfgs'
                        if display:
                            print('\n' + name)
                        new = pm.find_MAP(fmin=sp.optimize.fmin_bfgs,
                                          vars=self.sampling_vars,
                                          start=start,
                                          disp=display)
                    else:
                        if name.endswith('_powell'):
                            if i > 1:
                                points += 1
                            continue
                        name += '_powell'
                        if display:
                            print('\n' + name)
                        new = pm.find_MAP(fmin=sp.optimize.fmin_powell,
                                          vars=self.sampling_vars,
                                          start=start,
                                          disp=display)
                    points_list.append((name, self.model.logp(new), new))
                    if plot:
                        plt.figure(i + 1)
                        self.plot(params=new, title=name)
                        plt.show()
                except:
                    pass

        optimal = points_list[0]
        for test in points_list:
            if test[1] > optimal[1]:
                optimal = test
        name, logp, params = optimal
        if display:
            #print(params)
            pass
        if return_points is False:
            return params
        else:
            return params, points_list
def get_posterior(data,
                  n=100,
                  draws=2000,
                  n_init=200000,
                  progressbar=True,
                  *args,
                  **kwargs):
    with pm.Model() as model:

        # Define Priors
        p_err = pm.Uniform('p_err', 0, 0.1)  # Upper limit due to normalization
        p_ent = pm.Uniform('p_ent', 0, 1 - 6 * p_err)
        p_a = pm.Uniform('p_a', 0, 1 - 6 * p_err - p_ent)
        p_e = pm.Uniform('p_e', 0, 1 - 6 * p_err - p_ent)
        p_o = pm.Uniform('p_o', 0, 1 - 6 * p_err - p_ent)
        p_i = pm.Uniform('p_i', 0, 1 - 6 * p_err - p_ent)
        nvc_a = pm.Deterministic('nvc_a', 1 - p_a - 6 * p_err - p_ent)
        nvc_i = pm.Deterministic('nvc_i', 1 - p_i - 6 * p_err - p_ent)
        nvc_e = pm.Deterministic('nvc_e', 1 - p_e - 6 * p_err - p_ent)
        nvc_o = pm.Deterministic('nvc_o', 1 - p_o - 6 * p_err - p_ent)

        # Model specification: define all possible moods
        # syll tt-syntax a     i       e      o     NVC
        aa = [p_a, p_ent, p_err, p_err, nvc_a]
        ai = [p_err, p_a, p_err, p_ent, nvc_a]
        ia = ai
        ae = [p_err, p_err, p_a, p_ent, nvc_a]
        ea = ae
        ao = [p_err, p_ent, p_err, p_a, nvc_a]
        oa = ao
        ii = [p_err, p_i, p_err, p_ent, nvc_i]
        ie = [p_err, p_err, p_i, p_ent, nvc_i]
        ei = ie
        io = [p_err, p_ent, p_err, p_i, nvc_i]
        oi = io
        ee = [p_err, p_err, p_e, p_ent, nvc_e]
        eo = [p_err, p_ent, p_err, p_e, nvc_e]
        oe = eo
        oo = [p_err, p_ent, p_err, p_o, nvc_o]

        # Define the relationship between moods and syllogisms
        moods = [
            aa, ai, ae, ao, ia, ii, ie, io, ea, ei, ee, eo, oa, oi, oe, oo
        ]
        syllogs = []
        for m in moods:
            # Figure 1
            line = m[0:4] + [p_err] * 4 + [m[-1]]
            syllogs += [line]
            # Figure 2
            line = [p_err] * 4 + m[0:4] + [m[-1]]
            syllogs += [line]

            line = []
            for para in m[0:4]:
                if para == p_err:
                    line += [p_err]
                else:
                    line += [para / 2]
            # Paste this two times
            line *= 2
            # Add NVC
            line += [m[-1]]

            syllogs += [line] * 2

        model_matrix = tt.stack(syllogs)

        # Define likelihood
        pm.Multinomial(name='rates', n=n, p=model_matrix, observed=data)
        map_estimate = pm.find_MAP(model=model)

        trace = pm.sample(draws=draws,
                          njobs=1,
                          start=map_estimate,
                          n_init=n_init,
                          progressbar=progressbar)

        print('Model logp = ', model.logp(map_estimate))
        return model, trace
Beispiel #44
0
import pymc3 as pm

basic_model = pm.Model()

with basic_model:

    # Priors for unknown model parameters
    alpha = pm.Normal('alpha', mu=0, sd=10)
    beta = pm.Normal('beta', mu=0, sd=10, shape=2)
    sigma = pm.HalfNormal('sigma', sd=1)

    # Expected value of outcome
    mu = alpha + beta[0] * X1 + beta[1] * X2

    # Likelihood (sampling distribution) of observations
    Y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=Y)

map_estimate = pm.find_MAP(model=basic_model)
# map_estimate = pm.find_MAP(model=basic_model, fmin=optimize.fmin_powell)
print(map_estimate)

alpha1 = map_estimate['alpha']
beta1 = map_estimate['beta']
sigma1 = map_estimate['sigma']
yp = (alpha1 + beta1[0] * TX1 + beta1[1] * TX2)
# print(TY)
# print(yp)
plt.plot(FY, color="green")
plt.plot(yp, color="pink")
plt.show()
Beispiel #45
0
def calc_MAP(mcmc: MCMCSpec, model):
    with model:
        return pm.find_MAP()
                                    eta=2,
                                    sd_dist=sd_dist)
    chol = pm.expand_packed_triangular(numberOfFeatures, chol_packed)
    cov_mx = pm.Deterministic('estimated_cov', chol.dot(chol.T))

    # observations x1, x0 are supposed to be P(x|y=class1)=N(mu1,cov_both), P(x|y=class0)=N(mu0,cov_both)
    # here is where the Dataset (x1,x0) comes to influence the choice of paramters (mu1,mu0, cov_both)
    # this is done through the "observed = ..." argument; note that above we didn't have that
    x1_obs = pm.MvNormal('x1', mu=mu1, chol=chol, observed=x1)
    x0_obs = pm.MvNormal('x0', mu=mu0, chol=chol, observed=x0)

# done with setting up the model

# now perform maximum likelihood (actually, maximum a posteriori (MAP), since we have priors) estimation
# map_estimate1 is a dictionary: "parameter name" -> "it's estimated value"
map_estimate1 = pm.find_MAP(model=basic_model)

#compare map_estimate1['estimated_mu1'] with true_mu1
#same for mu_2, cov

# we can also do MCMC sampling from the distribution over the parameters
# and e.g. get confidence intervals
#
with basic_model:

    # obtain starting values via MAP
    start = pm.find_MAP()

    # instantiate sampler
    step = pm.Slice()
    )  #account for removal of burn in at the beginning of each chain
    with pm.Model() as model:
        # define priors
        # Based on P. Barbera's work we know ideology scores tend to be normally distributed around 0
        # For standard deviation, typically an exponential distribution is used
        mu = pm.Normal('mu', mu=0, sigma=2, shape=sample_mu.shape)
        sigma = pm.Exponential('sigma', lam=2, shape=sample_sigma.shape)

        # define likelihood
        observed_data = pm.Normal('observed_data',
                                  mu=mu,
                                  sigma=sigma,
                                  observed=samples)

        # inference
        map_estimate = pm.find_MAP()
        step = pm.NUTS(target_accept=0.90)
        trace = pm.sample(draws=niter,
                          start=None,
                          init='advi_map',
                          step=step,
                          random_seed=323,
                          cores=n_cores,
                          chains=n_cores)

    print("Done with inference!")

    # Get samples of population-level posterior for use as prior in individual-level inference later
    del samples
    posterior_mu = trace.get_values('mu', burn=burn_in, combine=True)
    posterior_sigma = trace.get_values('sigma', burn=burn_in, combine=True)
Beispiel #48
0
plt.scatter(x, y, marker='+', c='r')
plt.plot(x, true_y, 'b')
plt.show()

# pymc modeling
with pm.Model() as model:
    amp = pm.HalfCauchy("amp", 1)
    ls = pm.HalfCauchy("ls", 1)
    cov_func = amp**2 * pm.gp.cov.ExpQuad(1, ls)  # input_dim=1,ls=ls
    M = pm.gp.mean.Linear(coeffs=(y / x).mean())
    gp = pm.gp.Marginal(M, cov_func)
    noise = pm.HalfCauchy("noise", 2)
    gp.marginal_likelihood("f", X=x.reshape(-1, 1), y=y, noise=noise)
    trace = pm.sample(1000, chains=1)

map_ = pm.find_MAP(model=model)
X_new = np.linspace(0, np.pi * 2, 150).reshape(-1, 1)

# .predict method: return the mean and variance given a particular point
mu, var = gp.predict(X_new, point=map_, diag=True, pred_noise=True)
sd = np.sqrt(var)

# plot
# draw plot
plt.figure(figsize=(4, 3))
# plot mean and 2σ intervals
plt.ylim(-2, 2)
plt.xlim(0, np.pi * 2)
plt.plot(X_new, mu, lw=2, c='r', label="mean and 2σ region")
plt.plot(X_new, mu - 2 * sd, lw=1, c='r')
plt.plot(X_new, mu + 2 * sd, lw=1, c='r')
    dislike = pm.Poisson('dislike',
                         mu=lambda_minus,
                         observed=df_videos['低評価数'])

    trace = pm.sample(1500, tune=3000, chains=5, random_seed=57)
# -

pm.traceplot(trace)

# +
df_trace = pm.summary(trace)

df_trace
# -

model_map = pm.find_MAP(model=model)
model_map

df_trace.loc['fun[0]':'beta_plus', ['mean']].sort_values('mean',
                                                         ascending=False)

# +
df_videos['fun'] = model_map['fun']

df_videos = df_videos.sort_values(by='fun', ascending=False)

print('top 5 fun videos!')
display(df_videos.head(5))

print('worst 5 fun videos...')
display(df_videos.tail(5))
Beispiel #50
0
        observedRenewed = data[0, :]
        observedReleased = data[1, :]

        # Released entries every year
        released = tt.mul(p[1:].log(), observedReleased[1:])

        # Renewed entries every year
        renewed = s[-1].log() * observedRenewed[-1]
        return released.sum() + renewed

    retention = pm.DensityDist('retention', logp, observed=data)
    step = pm.DEMetropolis()
    trace = pm.sample(10000, step=step, tune=2000)

# Maximum a posteriori estimators for the model
mapValues = pm.find_MAP(model=BdWwithcfromNorm)

# Extract alpha and beta MAP-estimators
betaParams = mapValues.get('alpha').item(), mapValues.get('beta').item()

theta = stats.beta.mean(betaParams[0], betaParams[1])
cHat = mapValues.get('c').item()
rvar = stats.beta.var(betaParams[0], betaParams[1])


# Define a Discrete Weibull distribution
def DiscreteWeibull(q, b, x):
    return (1 - q)**(x**b) - (1 - q)**((x + 1)**b)


# Plot stuff
Beispiel #51
0
def init_nuts(init='advi', n_init=500000, model=None, **kwargs):
    """Initialize and sample from posterior of a continuous model.

    This is a convenience function. NUTS convergence and sampling speed is extremely
    dependent on the choice of mass/scaling matrix. In our experience, using ADVI
    to estimate a diagonal covariance matrix and using this as the scaling matrix
    produces robust results over a wide class of continuous models.

    Parameters
    ----------
    init : str {'advi', 'advi_map', 'map', 'nuts'}
        Initialization method to use.
        * advi : Run ADVI to estimate posterior mean and diagonal covariance matrix.
        * advi_map: Initialize ADVI with MAP and use MAP as starting point.
        * map : Use the MAP as starting point.
        * nuts : Run NUTS and estimate posterior mean and covariance matrix.
    n_init : int
        Number of iterations of initializer
        If 'advi', number of iterations, if 'metropolis', number of draws.
    model : Model (optional if in `with` context)
    **kwargs : keyword arguments
        Extra keyword arguments are forwarded to pymc3.NUTS.

    Returns
    -------
    start, nuts_sampler

    start : pymc3.model.Point
        Starting point for sampler
    nuts_sampler : pymc3.step_methods.NUTS
        Instantiated and initialized NUTS sampler object
    """

    model = pm.modelcontext(model)

    pm._log.info('Initializing NUTS using {}...'.format(init))

    if init == 'advi':
        v_params = pm.variational.advi(n=n_init)
        start = pm.variational.sample_vp(v_params,
                                         1,
                                         progressbar=False,
                                         hide_transformed=False)[0]
        cov = np.power(model.dict_to_array(v_params.stds), 2)
    elif init == 'advi_map':
        start = pm.find_MAP()
        v_params = pm.variational.advi(n=n_init, start=start)
        cov = np.power(model.dict_to_array(v_params.stds), 2)
    elif init == 'map':
        start = pm.find_MAP()
        cov = pm.find_hessian(point=start)

    elif init == 'nuts':
        init_trace = pm.sample(step=pm.NUTS(), draws=n_init)
        cov = pm.trace_cov(init_trace[n_init // 2:])

        start = {
            varname: np.mean(init_trace[varname])
            for varname in init_trace.varnames
        }
    else:
        raise NotImplemented('Initializer {} is not supported.'.format(init))

    step = pm.NUTS(scaling=cov, is_cov=True, **kwargs)

    return start, step
Beispiel #52
0
def init_nuts(init='auto',
              chains=1,
              n_init=500000,
              model=None,
              random_seed=None,
              progressbar=True,
              **kwargs):
    """Set up the mass matrix initialization for NUTS.

    NUTS convergence and sampling speed is extremely dependent on the
    choice of mass/scaling matrix. This function implements different
    methods for choosing or adapting the mass matrix.

    Parameters
    ----------
    init : str
        Initialization method to use.

        * auto : Choose a default initialization method automatically.
          Currently, this is `'jitter+adapt_diag'`, but this can change in
          the future. If you depend on the exact behaviour, choose an
          initialization method explicitly.
        * adapt_diag : Start with a identity mass matrix and then adapt
          a diagonal based on the variance of the tuning samples. All
          chains use the test value (usually the prior mean) as starting
          point.
        * jitter+adapt_diag : Same as `adapt_diag`, but add uniform jitter
          in [-1, 1] to the starting point in each chain.
        * advi+adapt_diag : Run ADVI and then adapt the resulting diagonal
          mass matrix based on the sample variance of the tuning samples.
        * advi+adapt_diag_grad : Run ADVI and then adapt the resulting
          diagonal mass matrix based on the variance of the gradients
          during tuning. This is **experimental** and might be removed
          in a future release.
        * advi : Run ADVI to estimate posterior mean and diagonal mass
          matrix.
        * advi_map: Initialize ADVI with MAP and use MAP as starting point.
        * map : Use the MAP as starting point. This is discouraged.
        * nuts : Run NUTS and estimate posterior mean and mass matrix from
          the trace.
    chains : int
        Number of jobs to start.
    n_init : int
        Number of iterations of initializer
        If 'ADVI', number of iterations, if 'nuts', number of draws.
    model : Model (optional if in `with` context)
    progressbar : bool
        Whether or not to display a progressbar for advi sampling.
    **kwargs : keyword arguments
        Extra keyword arguments are forwarded to pymc3.NUTS.

    Returns
    -------
    start : pymc3.model.Point
        Starting point for sampler
    nuts_sampler : pymc3.step_methods.NUTS
        Instantiated and initialized NUTS sampler object
    """
    model = pm.modelcontext(model)

    vars = kwargs.get('vars', model.vars)
    if set(vars) != set(model.vars):
        raise ValueError('Must use init_nuts on all variables of a model.')
    if not pm.model.all_continuous(vars):
        raise ValueError('init_nuts can only be used for models with only '
                         'continuous variables.')

    if not isinstance(init, str):
        raise TypeError('init must be a string.')

    if init is not None:
        init = init.lower()

    if init == 'auto':
        init = 'jitter+adapt_diag'

    pm._log.info('Initializing NUTS using {}...'.format(init))

    if random_seed is not None:
        random_seed = int(np.atleast_1d(random_seed)[0])
        np.random.seed(random_seed)

    cb = [
        pm.callbacks.CheckParametersConvergence(tolerance=1e-2,
                                                diff='absolute'),
        pm.callbacks.CheckParametersConvergence(tolerance=1e-2,
                                                diff='relative'),
    ]

    if init == 'adapt_diag':
        start = [model.test_point] * chains
        mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0)
        var = np.ones_like(mean)
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, var, 10)
    elif init == 'jitter+adapt_diag':
        start = []
        for _ in range(chains):
            mean = {var: val.copy() for var, val in model.test_point.items()}
            for val in mean.values():
                val[...] += 2 * np.random.rand(*val.shape) - 1
            start.append(mean)
        mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0)
        var = np.ones_like(mean)
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, var, 10)
    elif init == 'advi+adapt_diag_grad':
        approx = pm.fit(
            random_seed=random_seed,
            n=n_init,
            method='advi',
            model=model,
            callbacks=cb,
            progressbar=progressbar,
            obj_optimizer=pm.adagrad_window,
        )  # type: pm.MeanField
        start = approx.sample(draws=chains)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        mean = approx.bij.rmap(approx.mean.get_value())
        mean = model.dict_to_array(mean)
        weight = 50
        potential = quadpotential.QuadPotentialDiagAdaptGrad(
            model.ndim, mean, cov, weight)
    elif init == 'advi+adapt_diag':
        approx = pm.fit(
            random_seed=random_seed,
            n=n_init,
            method='advi',
            model=model,
            callbacks=cb,
            progressbar=progressbar,
            obj_optimizer=pm.adagrad_window,
        )  # type: pm.MeanField
        start = approx.sample(draws=chains)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        mean = approx.bij.rmap(approx.mean.get_value())
        mean = model.dict_to_array(mean)
        weight = 50
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, cov, weight)
    elif init == 'advi':
        approx = pm.fit(random_seed=random_seed,
                        n=n_init,
                        method='advi',
                        model=model,
                        callbacks=cb,
                        progressbar=progressbar,
                        obj_optimizer=pm.adagrad_window)  # type: pm.MeanField
        start = approx.sample(draws=chains)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        potential = quadpotential.QuadPotentialDiag(cov)
    elif init == 'advi_map':
        start = pm.find_MAP(include_transformed=True)
        approx = pm.MeanField(model=model, start=start)
        pm.fit(random_seed=random_seed,
               n=n_init,
               method=pm.KLqp(approx),
               callbacks=cb,
               progressbar=progressbar,
               obj_optimizer=pm.adagrad_window)
        start = approx.sample(draws=chains)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        potential = quadpotential.QuadPotentialDiag(cov)
    elif init == 'map':
        start = pm.find_MAP(include_transformed=True)
        cov = pm.find_hessian(point=start)
        start = [start] * chains
        potential = quadpotential.QuadPotentialFull(cov)
    elif init == 'nuts':
        init_trace = pm.sample(draws=n_init,
                               step=pm.NUTS(),
                               tune=n_init // 2,
                               random_seed=random_seed)
        cov = np.atleast_1d(pm.trace_cov(init_trace))
        start = list(np.random.choice(init_trace, chains))
        potential = quadpotential.QuadPotentialFull(cov)
    else:
        raise NotImplementedError(
            'Initializer {} is not supported.'.format(init))

    step = pm.NUTS(potential=potential, model=model, **kwargs)

    return start, step
Beispiel #53
0
y = np.repeat([1, 0], [z, N - z])

# THE MODEL.
with pm.Model() as model:
    # Hyperprior on model index:
    model_index = pm.DiscreteUniform('model_index', lower=0, upper=1)
    # Prior
    nu = pm.Normal('nu', mu=0, tau=0.1)  # it is posible to use tau or sd
    eta = pm.Gamma('eta', .1, .1)
    theta0 = 1 / (1 + pm.exp(-nu))  # theta from model index 0
    theta1 = pm.exp(-eta)  # theta from model index 1
    theta = pm.switch(pm.eq(model_index, 0), theta0, theta1)
    # Likelihood
    y = pm.Bernoulli('y', p=theta, observed=y)
    # Sampling
    start = pm.find_MAP()
    step1 = pm.Metropolis(model.vars[1:])
    step2 = pm.ElemwiseCategoricalStep(var=model_index, values=[0, 1])
    trace = pm.sample(10000, [step1, step2], start=start, progressbar=False)

# EXAMINE THE RESULTS.
burnin = 1000
thin = 5

## Print summary for each trace
#pm.summary(trace[burnin::thin])
#pm.summary(trace)

## Check for mixing and autocorrelation
#pm.autocorrplot(trace[burnin::thin], vars =[nu, eta])
#pm.autocorrplot(trace, vars =[nu, eta])
Beispiel #54
0
    vert_des2 = strSource2 * coeff_up_des2
    east_des = east_des1 + east_des2
    north_des = north_des1 + north_des2
    vert_des = vert_des1 + vert_des2
    Ulos_des = east_des * UEast_des + north_des * UNorth_des + vert_des * UVert_des

    UObs_asc = mv.MvNormal('Uobs_asc',
                           mu=Ulos_asc,
                           cov=covariance_asc,
                           observed=U_asc)
    UObs_des = mv.MvNormal('Uobs_des',
                           mu=Ulos_des,
                           cov=covariance_des,
                           observed=U_des)
    step = pm.Metropolis()
    trace = pm.sample(Niter, step)
trace = trace[Nburn:]  #Discard first 1000 samples of each chain
print(pm.summary(trace))
map_estimate = pm.find_MAP(model=model)
print(map_estimate)

results = {}
results['MAP'] = map_estimate
results['trace'] = trace
results['ref_coord'] = data['ref_coord']
results['iterations'] = Niter
pickle.dump(
    results,
    open(pathgg_results + 'Mogi_Metropolis_' + str(Niter) + '_2mogi.pickle',
         'wb'))
    a = pm.Normal(
        "a",
        0.0,
        10.0,
        transform=pm.distributions.transforms.ordered,
        shape=6,
        testval=np.arange(6) - 2.5,
    )

    resp_obs = pm.OrderedLogistic(
        "resp_obs", 0.0, a, observed=trolley_df.response.values - 1
    )

# %%
with m11_1:
    map_11_1 = pm.find_MAP()

# %%
map_11_1["a"]

# %%
sp.special.expit(map_11_1["a"])

# %%
with m11_1:
    trace_11_1 = pm.sample(1000, tune=1000)

# %%


def ordered_logistic_proba(a):
Beispiel #56
0
def model(sim_data, prior_data, keys=[], out_fold='test'):

    #x = tt.as_tensor(np.ones(4))
    #y = tt.as_tensor(np.ones(3))
    #z = tt.concatenate([x,y],axis=0)
    #print z.eval()

    for file in os.listdir(out_fold):
        name = "%s/%s" % (out_fold, file)
        os.remove(name)
        #print file

    mod = pm.Model()
    with mod:

        #probailities of each primitive

        ps = []
        #weights = pm.Dirichlet("weights", np.ones(len(prim_type)))
        for i in xrange(len(prim_type)):
            #weight = weights[i]
            #weight = 1.
            #prob = np.ones(prim_type[i])/float(prim_type[i])

            name = "p_%s" % i
            name_w = name + "_w"
            weight = pm.Exponential(name_w, 1.0) * np.ones(prim_type[i])
            prob = pm.Dirichlet(name, np.ones(prim_type[i]))

            ps.append(weight * prob)

        probs = tt.concatenate(ps, axis=0)

        #probs = np.ones(N_PRIM)/float(N_PRIM)

        #copy the probability vector a number of times
        #so that it becomes a tensor
        #ith that the probabilities of each primtive
        #for each hypothesis for each sequence pair
        probs = tt.tile(probs, (N_TOP, 1))
        probs = tt.tile(probs, (N_PAIRS, 1, 1))

        #and now convert the probabilities assigned
        #to each hypothesis for a given sequence pair
        #into entropy

        ents = tt.pow(probs, prior_data)
        ents = tt.log(ents)
        """
        x1 = tt.sum(ents,axis=2)
        x2 = tt.exp(x1)
        norms = tt.sum(x2)
        x2 = x2/norms
        ents = -1.0 * x1 * x2
        ents = tt.sum(ents, axis=1)
        """

        ents = tt.sum(ents, axis=2)
        ents = tt.max(ents, axis=1)
        #ents = tt.exp(ents)

        ents = pm.Deterministic('ents', ents)

        mean_pr = tt.mean(ents)
        #ents = ents - mean_pr
        std_pr = tt.std(ents)
        ents = (ents - mean_pr) / std_pr
        ents = ents[assigns]

        #intercept
        #alpha = pm.Uniform('alpha', 0,1) * 5.
        alpha = pm.Normal('alpha', mu=3, sd=1)
        #slope
        beta = pm.Normal('beta', mu=0.0, sd=10)
        #standard deviation
        sigma = pm.HalfNormal('sigma', sd=1)
        #expected value of similarity
        mu = alpha + beta * ents

        #compare fit to observed similarity data
        Y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=sim_data_lst)

        db = pm.backends.Text(out_fold)
        trace = pm.sample(MCMC_STEPS, tune=BURNIN, thin=MCMC_THIN, trace=db)

    map_estimate = pm.find_MAP(model=mod)
    print map_estimate
    c = 0

    if len(keys) > 0:
        for z in map_estimate['ents']:
            print keys[c], z
            c += 1

    return trace
Beispiel #57
0
ydata = theta_true[0] + theta_true[1] * xdata

# add scatter to points
xdata = np.random.normal(xdata, 10)
ydata = np.random.normal(ydata, 10)
data = {'x': xdata, 'y': ydata}

with pymc3.Model() as model:
    alpha = pymc3.Uniform('intercept', -100, 100)
    # Create custom densities
    beta = pymc3.DensityDist('slope', lambda value: -1.5 * T.log(1 + value**2), testval=0)
    sigma = pymc3.DensityDist('sigma', lambda value: -T.log(T.abs_(value)), testval=1)
    # Create likelihood
    like = pymc3.Normal('y_est', mu=alpha + beta * xdata, sd=sigma, observed=ydata)

    start = pymc3.find_MAP()
    step = pymc3.NUTS(scaling=start) # Instantiate sampler
    trace = pymc3.sample(10000, step, start=start)


#################################################
# Create some convenience routines for plotting
# All functions below written by Jake Vanderplas

def compute_sigma_level(trace1, trace2, nbins=20):
    """From a set of traces, bin by number of standard deviations"""
    L, xbins, ybins = np.histogram2d(trace1, trace2, nbins)
    L[L == 0] = 1E-16
    logL = np.log(L)

    shape = L.shape
            ### END OF NEW FOR COUPLED CALIBRATION WITH DLM-GASP

            # Likelihood (sampling distribution) of observations
            #
            #y_obs = pm.Normal('y_obs', mu=muGP[0], sd=sigma, observed=0)
            y_obs = pm.Normal('y_obs', mu=mu, sd=sigma, observed=0)
            #y_obs = pm.Normal('y_obs', mu=mu,sd=sigma, observed=90)

            #y = pm.DensityDist('y', logp(var1,var2,var3))

        #basic_model.logp({'y': 0.})

        with basic_model:
            print "Starting ...."
            start = pm.find_MAP(
                fmin=optimize.fmin_powell)  #=optimize.fmin_powell)
            #start = {'var1': 0.5, 'var2': 0.5, 'var3': 0.5, 'var4': 0.5, 'var5': 0.5, 'var6': 0.5, 'var7': 0.5, 'var8': 0.5, 'var9': 0.5, 'var10': 0.5, 'var11': 0.5, 'var12': 0.5, 'var13': 0.5, 'var14': 0.5, 'var15': 0.5, 'var16': 0.5, 'var17': 0.5, 'var18': 0.5, 'var19': 0.5, 'var20': 0.5, 'var21': 0.5, 'sigma': 4000}
            #C = approx_hessian(model.test_point)
            #step = pm.HamiltonianMC([var1,var2,var3,var4,var5,var6,var7,var8,var9,var10,var11,var12,var13,var14,var15,var16,var17,var18,var19,var20,var21,sigma,err])
            print "Assigning step method...."
            #step1 = pm.Metropolis(vars=[var1,var2,var3,var4,var5,var6,var7,var8,var9,var10,var11,var12,var13,var14,var15,var16,var17,var18,var19,var20,var21])
            #step2 = pm.Metropolis(vars=[muGP,sigma])
            step = pm.Metropolis()
            #step = pm.NUTS()
            print "Running sample algorightm...."
            trace = pm.sample(7500, tune=500, step=step, njobs=1)  #tune=100
            pm.traceplot(trace)
            pm.backends.text.dump(os.getcwd(), trace)

            for csvfile in glob.glob(os.path.join('.', 'chain-0.csv')):
                worksheet = workbook.add_worksheet('Case ' + str(case))
Beispiel #59
0
    def fit(
        self,
        draws: int = 500,
        chains: int = 4,
        trace_size: int = 500,
        method: Sampler = Sampler.NUTS,
        map_initialization: bool = False,
        finalize: bool = True,
        step_kwargs: Dict = None,
        sample_kwargs: Dict = None,
    ):
        """Fit the PMProphet model.

        Parameters
        ----------
        draws : int, > 0
            The number of MCMC samples.
        chains: int, =4
            The number of MCMC draws.
        trace_size: int, =1000
            The last N number of samples to keep in the trace
        method : Sampler
            The sampler of your choice
        map_initialization : bool
            Initialize the model with maximum a posteriori estimates.
        finalize : bool
            Finalize the model.
        step_kwargs : dict
            Additional arguments for the sampling algorithms
            (`NUTS` or `Metropolis`).
        sample_kwargs : dict
            Additional arguments for the PyMC3 `sample` function.
        """

        if sample_kwargs is None:
            sample_kwargs = {}
        if step_kwargs is None:
            step_kwargs = {}
        if chains * draws < trace_size and method != Sampler.ADVI:
            raise Exception(
                "Desired trace size should be smaller than the sampled data points"
            )

        self.skip_first = (chains *
                           draws) - trace_size if method != Sampler.ADVI else 0
        self.chains = chains

        if finalize:
            self.finalize_model()

        with self.model:
            if map_initialization:
                self.start = pm.find_MAP(maxeval=10000)
                if draws == 0:
                    self.trace = {
                        k: np.array([v])
                        for k, v in self.start.items()
                    }

            if draws:
                if method != Sampler.ADVI:
                    step_method = method.value(**step_kwargs)
                    self.trace = pm.sample(
                        draws,
                        chains=chains,
                        step=step_method,
                        start=self.start if map_initialization else None,
                        **sample_kwargs)
                else:
                    res = pm.fit(
                        draws,
                        start=self.start if map_initialization else None)
                    self.trace = res.sample(trace_size)
Beispiel #60
0
alpha_A = 400.0 / 16.0
beta_A = 1.0 / 16.0
alpha_N = 400.0 / 16.0
beta_N = 1.0 / 16.0
alpha_D = 2.0 + 1.0 / 1.6
beta_D = 100 * (alpha_D - 1)
delta_t = 0.802

with pm.Model() as model:
    D = pm.InverseGamma('D', alpha=alpha_D, beta=beta_D)
    A = pm.Gamma('A', alpha=alpha_A, beta=beta_A)
    B = pm.Deterministic('B', pm.exp(-delta_t * D / A))

    path = lcm.Ornstein_Uhlenbeck('path', D=D, A=A, B=B, observed=time_series)

    start = pm.find_MAP(fmin=sp.optimize.fmin_powell)

    trace = pm.sample(100000, start=start)

pm.summary(trace)

data_dict = {
    'D': trace['D'],
    'A': trace['A'],
    'B': trace['B'],
}

df = pd.DataFrame(data_dict)
df.to_csv(datadir + 'LIG' + region + str(voxel) + '.csv', index=False)

pm.traceplot(trace)