Exemple #1
0
def do_mcmc_bmlingam(xs, hparams, mcmc_params):
    """Do MCMC for sampling posterior of bmlingam coefficient.

    Example: 

    .. code:: python

        mcmc_params = MCMCParams(
            n_burn=10000,     # Samples in burn-in period
            n_mcmc_samples=10000, # Samples in MCMC (after burn-in)
            seed_burn=1, # Random seed for burn-in period
            seed=2 # Random seed for MCMC
        ) 
        trace = do_mcmc_bmlingam(data['xs'], hparams, mcmc_params)
        b_post = np.mean(trace['b'])

    :code:`xs` is the numpy.ndarray containing samples. 

    :param xs: Data array. 
    :type xs: numpy.ndarray, shape=(n_samples, 2)

    :code:`hparams` is a dict including hyperparameters. 
    See :func:`bmlingam.hparam.define_hparam_searchspace`. 

    :param hparams: Set of hyperparameters.
    :type hparams: dict

    :code:`mcmc_params` includes parameters for MCMC. 

    :param mcmc_params: Parameters for MCMC. 
    :type mcmc_params: :class:`bmlingam.MCMCParams`
    """
    assert(type(mcmc_params) == MCMCParams)

    # ---- Import PyMC3 modules when required ----
    from pymc3 import Metropolis, sample

    # ---- Standardization ----
    scale_ratio = np.std(xs[:, 1]) / np.std(xs[:, 0])
    xs = standardize_samples(xs, hparams['standardize'])

    model = get_pm3_model_bmlingam(xs, hparams, mcmc_params.verbose)

    # ---- MCMC sampling ----
    with model:
        # Burn-in
        # start = find_MAP()
        step = Metropolis()
        trace = sample(
            mcmc_params.n_burn, step, random_seed=mcmc_params.seed_burn, 
            progressbar=False
        )

        # Sampling
        trace = sample(
            mcmc_params.n_mcmc_samples, step, start=trace[-1], 
            random_seed=mcmc_params.seed, progressbar=False
        )

    trace_b = np.array(trace['b'])
    if hparams['standardize']:
        if hparams['causality'] == [1, 2]:
            trace_b *= scale_ratio
        elif hparams['causality'] == [2, 1]:
            trace_b /= scale_ratio
        else:
            raise ValueError("Invalid value of causality: %s" %
                hparams['causality'])

    return {'b': trace_b}

from pymc3 import Metropolis, sample, find_MAP
from scipy import optimize
trace_copy= {}
with basic_model:  

    # obtain starting values via MAP
    start = find_MAP(fmin=optimize.fmin_powell)

    #instantiate sampler
 

    # draw 5000 posterior samples

    trace= sample(100, step= Metropolis(), start=start)
    trace_copy= trace

thin_factor=2

print(trace['c'][0:9])
trace= trace[:][0::thin_factor]
print(trace['c'][0:9])

#summary(trace)
#traceplot(trace); 




Exemple #3
0
def simple_init():
    start, model, moments = simple_model()
    step = Metropolis(model.vars, np.diag([1.]), model=model)
    return model, start, step, moments
Metropolis Hastings Sampler
'''

MLpoint = ML(useToAs)
hess = hessian(useToAs)

from pymc3 import Metropolis, sample

with basic_model:

    # Use starting ML point
    start = MLpoint

    #hess = hessian(useToAs)

    step1 = Metropolis(vars=[amplitude, offset, noise, phase],
                       h=np.diag(basic_model.dict_to_array(hess)))

    # draw 2000 posterior samples
    trace = sample(10000, start=start, step=step1)

from pymc3 import traceplot

traceplot(trace)
plt.show()

accept = np.float64(np.sum(trace['phase'][1:] != trace['phase'][:-1]))
print "Acceptance Rate: ", accept / trace['phase'].shape[0]
'''
HMC Sampler
'''
Exemple #5
0
#    
#        # latent cluster of each observation
#        category = pm.Categorical('category',p=p,shape=data.shape[0])
#    
#        # likelihood for each observed value
#        points = pm.Normal('obs',
#                           mu=means[category],
#                           sd=sd,
#                           observed=data)
    
    
    
    ##For comparison with ADVI, run MCMC. 
    with model:
        start = find_MAP()
        step = Metropolis()
        trace = sample(1000, step, start=start)
        
    plt.figure(figsize=(5, 5))
    plt.scatter(data[:, 0], data[:, 1], alpha=0.5, c='g')
    mu_0, mu_1 = trace['mu_0'], trace['mu_1']
    plt.scatter(mu_0[-500:, 0], mu_0[-500:, 1], c="r", s=10)
    plt.scatter(mu_1[-500:, 0], mu_1[-500:, 1], c="b", s=10)
    plt.xlim(-6, 6)
    plt.ylim(-6, 6)
    plt.figure()
    sns.barplot([1, 2], np.mean(trace['pi'][-5000:], axis=0),palette=['red', 'blue'])
    
    #We can use the same model with ADVI as follows.
    
#    with pm.Model() as model:
    # Priors for unknown model parameters
    lf = Gamma('lf', alpha=2, beta=4)

    sigma = Uniform('sigma', lower=0.1, upper=50)

    #you can print searched values from every draw
    #lf_print = T.printing.Print('lf')(lf)

    #Deterministic value (RT in ms) established by the ACT-R model
    mu = model(lf)

    # Likelihood (sampling distribution) of observations
    Normal('Y_obs', mu=mu, sd=sigma, observed=Y)

    #Metropolis algorithm for steps in simulation
    step = Metropolis(basic_model.vars)

    trace = sample(1000, tune=1000, step=step, init='auto')

print(summary(trace))
traceplot(trace)
pp.savefig("plot_u8_estimating_using_pymc3.png")
print(trace['lf'], trace['sigma'])
print("Latency factor: mean ", np.mean(trace['lf']))
print("This value should be close to 0.1")
print("Sigma estimate: mean ", np.mean(trace['sigma']))
print("This value should be close to 10")

# Of course, much more things can be explored this way:
# more parameters could be studied; different priors could be used etc.
Exemple #7
0
    mu = -c * ((T - T0) * (T0 < T)) * ((T - Tm) * (Tm > T))

    Y_obs = Normal('Y_obs', mu=mu, sd=tau, observed=Y)

from pymc3 import Metropolis, sample, find_MAP
from scipy import optimize

with basic_model_lf_Aeg:

    # obtain starting values via MAP
    start = find_MAP(fmin=optimize.fmin_powell)

    # draw 5000 posterior samples

    trace = sample(sample_size, step=Metropolis(), start=start)

#thin the samples by selecting every 5 samples
thin_factor = 5

#summary(trace)
#traceplot(trace);

#PLOTTING THE HISTOGRAM

figure_count = mua.create_2x2_histograms(trace, figure_count)

#Create the Brier Function
Temps = np.arange(0, 50, 0.1)
lf_Aeg_samps = mua.make_sims_temp_resp("quad", trace, Temps, thin_factor)
samps = {}
Exemple #8
0
def run_phi(data, **kwargs):
    if isinstance(data, str):
        data = csv(data)
    data = np.array(data)

    # Check limits in **kwargs
    if kwargs.get("limits") is not None:
        limits = kwargs.get("limits")
    else:
        limits = (np.nanmin(list(itertools.chain.from_iterable(data))),
                  np.nanmax(list(itertools.chain.from_iterable(data))))

    if kwargs.get("verbose") is not None:
        verbose = kwargs.get("verbose")
    else:
        verbose = False

    if (kwargs.get("binning") is not None) and not kwargs.get("binning"):
        print("removing binning on borders")
        binning_multiplier = 2
    else:
        binning_multiplier = 1

    if kwargs.get("seed") is not None:
        seed = kwargs.get("seed")
    else:
        seed = 123

    if kwargs.get("table") is not None:
        table = kwargs.get("table")
    else:
        table = False

    if kwargs.get("N") is not None:
        N = kwargs.get("N")
    else:
        N = 1000

    if kwargs.get("keep_missing") is not None:
        keep_missing = kwargs.get("keep_missing")
    else:
        keep_missing = None  #AUTO
        #keep_missing = True

    if kwargs.get("fast") is not None:
        fast = kwargs.get("fast")
    else:
        fast = True

    if kwargs.get("njobs") is not None:
        njobs = kwargs.get("njobs")
    else:
        njobs = 2

    if kwargs.get("sd") is not None:
        sd = kwargs.get("sd")
    else:
        sd = 1000000

    # Check gt in **kwargs
    if kwargs.get("gt") is not None:
        gt = kwargs.get("gt")
    else:
        gt = [None] * len(data)

    if verbose: print("Computing Phi")
    idx_of_gt = np.array([x is not None for x in gt])
    idx_of_not_gt = np.array([x is None for x in gt])
    num_of_gt = np.sum(idx_of_gt)

    basic_model = Model()

    for i, g in enumerate(gt):
        if g is not None:
            gt[i] = scale_mat(np.array([[gt[i]] * len(data[i])]),
                              limits,
                              binning_multiplier=binning_multiplier)[0][0]

    num_of_docs = len(data)  # number of documents

    rectangular = True
    sparse = False
    if np.isnan(data).any():
        sparse = True
        data = np.ma.masked_invalid(data)
        data = minimal_matrix(data)

    scaled = scale_mat(data, limits, binning_multiplier=binning_multiplier)

    if (np.count_nonzero(np.isnan(scaled)) /
            scaled.size) > 0.2:  # a lot of nans
        if verbose:
            print(
                "WARNING: a lot of missing values: we are going to set keep_missing=False to improve convergence (if not manually overridden)"
            )
        if keep_missing is None:
            keep_missing = False

    if (sparse and keep_missing == False):
        rectangular = False
        scaled = [doc[~np.isnan(doc)].tolist()
                  for doc in scaled]  #make data a list of lists

    NUM_OF_ITERATIONS = N

    with basic_model:
        precision = Normal('precision', mu=2, sd=sd)
        #precision = Gamma('precision',mu=2,sd=1)

        if num_of_docs - num_of_gt == 1:
            mu = Normal('mu', mu=1 / 2, sd=sd)
        else:
            mu = Normal('mu', mu=1 / 2, sd=sd, shape=num_of_docs - num_of_gt)
        alpha = mu * precision
        beta = precision * (1 - mu)

        if rectangular:
            masked = pd.DataFrame(
                scaled[idx_of_not_gt])  #needed to keep nan working
            if num_of_docs - num_of_gt == 1:
                Beta('beta_obs', observed=masked, alpha=alpha, beta=beta)
            else:
                Beta('beta_obs',
                     observed=masked.T,
                     alpha=alpha,
                     beta=beta,
                     shape=num_of_docs - num_of_gt)
        else:
            for i, doc in enumerate(scaled):
                Beta('beta_obs' + str(i),
                     observed=doc,
                     alpha=alpha[i],
                     beta=beta[i])

        for i, g in enumerate(gt):
            if g is not None:
                mu = Normal('mu' + str(i), mu=gt[i], sd=1)
                alpha = mu * precision
                beta = precision * (1 - mu)
                Beta('beta_obs_g' + str(i),
                     observed=scaled[i],
                     alpha=alpha,
                     beta=beta)  #alpha=a,beta=b,observed=beta)

        try:
            if fast:
                assert False
            stds = np.ones(basic_model.ndim)
            for _ in range(5):
                args = {'scaling': stds**2, 'is_cov': True}
                trace = pm.sample(round(NUM_OF_ITERATIONS / 10),
                                  tune=round(NUM_OF_ITERATIONS / 10),
                                  init=None,
                                  nuts_kwargs=args,
                                  chains=10,
                                  progressbar=verbose,
                                  random_seed=seed)
                samples = [basic_model.dict_to_array(p) for p in trace]
                stds = np.array(samples).std(axis=0)

            step = pm.NUTS(scaling=stds**2, is_cov=True, target_accept=0.9)
            start = trace[0]
            trace = sample(NUM_OF_ITERATIONS,
                           tune=round(NUM_OF_ITERATIONS / 2),
                           njobs=njobs,
                           chains=8,
                           init=None,
                           step=step,
                           start=start,
                           progressbar=verbose,
                           random_seed=seed)
            # Staistical inference
            beg = time()
            #start = find_MAP()
            bef_slice = time()
            #step = NUTS()# Metropolis()
            #step = Metropolis()
            aft_slice = time()
            bef_trace = time()
            #trace = sample(NUM_OF_ITERATIONS, progressbar=verbose,random_seed=123, njobs=njobs,start=start,step=step)
    #        trace = sample(NUM_OF_ITERATIONS, progressbar=verbose,random_seed=123, njobs=njobs,init=None,tune=100)
        except:
            beg = time()
            step = Metropolis()
            #start = find_MAP()
            trace = sample(NUM_OF_ITERATIONS,
                           progressbar=verbose,
                           random_seed=seed,
                           njobs=njobs,
                           step=step)  #,start=start)
        #pm.summary(trace,include_transformed=True)
        if np.float(pymc3.__version__) <= 3.3:
            res = pm.stats.df_summary(trace, include_transformed=True)
        else:
            res = pm.summary(trace, include_transformed=True)
        res.drop(["sd", "mc_error"], axis=1, inplace=True)
        res = res.transpose()
        res["agreement"] = agreement(res['precision'])
        # ----

        #sub_res = res.copy()

        # Mu rescaling

        col_agreement = res["agreement"]
        col_precision = res["precision"]

        res.drop("agreement", inplace=True, axis=1)
        res.drop("precision", inplace=True, axis=1)

        if table:
            col_names = res.columns[0:len(data) - 1]
            for i, name in enumerate(col_names):
                l = len(scaled[i]) * binning_multiplier
                for j in range(3):

                    b = res[name].iloc[j]
                    mu_res = (b * l - 0.5) / (l - 1)
                    res[name].iloc[j] = np.clip(mu_res, 0,
                                                1) * (limits[1] - limits[0])

        res["agreement"] = col_agreement
        res.insert(0, "precision", col_precision)
        aft_trace = time()
    computation_time = time() - beg
    if verbose: print("Elapsed time for computation: ", computation_time)

    convergence = True
    if np.isnan(res.loc['Rhat']['precision']
                ) or np.abs(res.loc['Rhat']['precision'] - 1) > 1e-1:
        print("Warning! You need more iterations!")
        convergence = False
    if table:
        return {
            'agreement': col_agreement['mean'],
            'interval': col_agreement[['hpd_2.5', 'hpd_97.5']].values,
            "computation_time": computation_time,
            "convergence_test": convergence,
            'table': res
        }
    else:
        return {
            'agreement': col_agreement['mean'],
            'interval': col_agreement[['hpd_2.5', 'hpd_97.5']].values,
            "computation_time": computation_time,
            "convergence_test": convergence
        }