Esempio n. 1
0
 def test_value_n_eff_rhat(self):
     mu = -2.1
     tau = 1.3
     with Model():
         Normal('x0', mu, tau, testval=floatX_array(.1)) # 0d
         Normal('x1', mu, tau, shape=2, testval=floatX_array([.1, .1]))# 1d
         Normal('x2', mu, tau, shape=(2, 2),
                testval=floatX_array(np.tile(.1, (2, 2))))# 2d
         Normal('x3', mu, tau, shape=(2, 2, 3),
                testval=floatX_array(np.tile(.1, (2, 2, 3))))# 3d
         trace = pm.sample(100, step=pm.Metropolis())
     for varname in trace.varnames:
         # test effective_n value
         n_eff = pm.effective_n(trace, varnames=[varname])[varname]
         n_eff_df = np.asarray(
                 pm.summary(trace, varnames=[varname])['n_eff']
                              ).reshape(n_eff.shape)
         npt.assert_equal(n_eff, n_eff_df)
         
         # test Rhat value
         rhat = pm.gelman_rubin(trace, varnames=[varname])[varname]
         rhat_df = np.asarray(
                 pm.summary(trace, varnames=[varname])['Rhat']
                              ).reshape(rhat.shape)
         npt.assert_equal(rhat, rhat_df)
Esempio n. 2
0
def main():
  X, Y = generate_sample()

  with pm.Model() as model:
    alpha = pm.Normal('alpha', mu=0, sd=20)
    beta = pm.Normal('beta', mu=0, sd=20)
    sigma = pm.Uniform('sigma', lower=0)
    y = pm.Normal('y', mu=beta*X+alpha, sd=sigma, observed=Y)
    start = pm.find_MAP()
    step = pm.NUTS(state=start)

  with model:
    if (multicore):
      trace = pm.sample(itenum, step, start=start,
        njobs=chainnum, random_seed=range(chainnum), progressbar=progress)
    else:
      ts = [pm.sample(itenum, step, chain=i, progressbar=progress)
            for i in range(chainnum)]
      trace = merge_traces(ts)

    if (saveimage):
      pm.traceplot(trace).savefig("simple_linear_trace.png")
    print "Rhat = {0}".format(pm.gelman_rubin(trace))

  t1 = time.clock()
  print "elapsed time = {0}".format(t1 - t0)

  #trace
  if(not multicore):
  	trace=ts[0]
  with model:
  	pm.traceplot(trace,model.vars)

  pm.forestplot(trace)

  with open("simplelinearregression_model.pkl","w") as fpw:
  	pkl.dump(model,fpw)
  with open("simplelinearregression_trace.pkl","w") as fpw:
  	pkl.dump(trace,fpw)
  with open("simplelinearregression_model.pkl") as fp:
  	model=pkl.load(fp)
  with open("simplelinearregression_trace.pkl") as fp:
  	trace=pkl.load(fp)
Esempio n. 3
0
def summary(trace, varnames=None, transform=lambda x: x, stat_funcs=None,
               extend=False, include_transformed=False,
               alpha=0.05, start=0, batches=None):
    R"""Create a data frame with summary statistics.

    Parameters
    ----------
    trace : MultiTrace instance
    varnames : list
        Names of variables to include in summary
    transform : callable
        Function to transform data (defaults to identity)
    stat_funcs : None or list
        A list of functions used to calculate statistics. By default,
        the mean, standard deviation, simulation standard error, and
        highest posterior density intervals are included.

        The functions will be given one argument, the samples for a
        variable as a 2 dimensional array, where the first axis
        corresponds to sampling iterations and the second axis
        represents the flattened variable (e.g., x__0, x__1,...). Each
        function should return either

        1) A `pandas.Series` instance containing the result of
           calculating the statistic along the first axis. The name
           attribute will be taken as the name of the statistic.
        2) A `pandas.DataFrame` where each column contains the
           result of calculating the statistic along the first axis.
           The column names will be taken as the names of the
           statistics.
    extend : boolean
        If True, use the statistics returned by `stat_funcs` in
        addition to, rather than in place of, the default statistics.
        This is only meaningful when `stat_funcs` is not None.
    include_transformed : bool
        Flag for reporting automatically transformed variables in addition
        to original variables (defaults to False).
    alpha : float
        The alpha level for generating posterior intervals. Defaults
        to 0.05. This is only meaningful when `stat_funcs` is None.
    start : int
        The starting index from which to summarize (each) chain. Defaults
        to zero.
    batches : None or int
        Batch size for calculating standard deviation for non-independent
        samples. Defaults to the smaller of 100 or the number of samples.
        This is only meaningful when `stat_funcs` is None.

    Returns
    -------
    `pandas.DataFrame` with summary statistics for each variable Defaults one
    are: `mean`, `sd`, `mc_error`, `hpd_2.5`, `hpd_97.5`, `n_eff` and `Rhat`.
    Last two are only computed for traces with 2 or more chains.

    Examples
    --------
    .. code:: ipython

        >>> import pymc3 as pm
        >>> trace.mu.shape
        (1000, 2)
        >>> pm.summary(trace, ['mu'])
                   mean        sd  mc_error     hpd_5    hpd_95
        mu__0  0.106897  0.066473  0.001818 -0.020612  0.231626
        mu__1 -0.046597  0.067513  0.002048 -0.174753  0.081924

                  n_eff      Rhat
        mu__0     487.0   1.00001
        mu__1     379.0   1.00203

    Other statistics can be calculated by passing a list of functions.

    .. code:: ipython

        >>> import pandas as pd
        >>> def trace_sd(x):
        ...     return pd.Series(np.std(x, 0), name='sd')
        ...
        >>> def trace_quantiles(x):
        ...     return pd.DataFrame(pm.quantiles(x, [5, 50, 95]))
        ...
        >>> pm.summary(trace, ['mu'], stat_funcs=[trace_sd, trace_quantiles])
                     sd         5        50        95
        mu__0  0.066473  0.000312  0.105039  0.214242
        mu__1  0.067513 -0.159097 -0.045637  0.062912
    """
    from .backends import tracetab as ttab

    if varnames is None:
        varnames = get_default_varnames(trace.varnames,
                       include_transformed=include_transformed)

    if batches is None:
        batches = min([100, len(trace)])

    funcs = [lambda x: pd.Series(np.mean(x, 0), name='mean'),
             lambda x: pd.Series(np.std(x, 0), name='sd'),
             lambda x: pd.Series(mc_error(x, batches), name='mc_error'),
             lambda x: _hpd_df(x, alpha)]

    if stat_funcs is not None:
        if extend:
            funcs = funcs + stat_funcs
        else:
            funcs = stat_funcs

    var_dfs = []
    for var in varnames:
        vals = transform(trace.get_values(var, burn=start, combine=True))
        flat_vals = vals.reshape(vals.shape[0], -1)
        var_df = pd.concat([f(flat_vals) for f in funcs], axis=1)
        var_df.index = ttab.create_flat_names(var, vals.shape[1:])
        var_dfs.append(var_df)
    dforg = pd.concat(var_dfs, axis=0)

    if (stat_funcs is not None) and (not extend):
        return dforg
    elif trace.nchains < 2:
        return dforg
    else:
        n_eff = pm.effective_n(trace,
                               varnames=varnames,
                               include_transformed=include_transformed)
        n_eff_pd = dict2pd(n_eff, 'n_eff')
        rhat = pm.gelman_rubin(trace,
                               varnames=varnames,
                               include_transformed=include_transformed)
        rhat_pd = dict2pd(rhat, 'Rhat')
        return pd.concat([dforg, n_eff_pd, rhat_pd],
                         axis=1, join_axes=[dforg.index])
Esempio n. 4
0
 def posterior_rhat(self):
     return pm.gelman_rubin(self.posterior_)
Esempio n. 5
0
 def test_Rhat(self):
     rhat = pm.gelman_rubin(self.trace[self.burn:])
     for var in rhat:
         npt.assert_allclose(rhat[var], 1, rtol=0.01)
with pm.Model() as model:
	# Palatability slopes, one for each time point (one set for each laser condition)
	coeff_pal = pm.Normal("coeff_pal", mu = 0, sd = 1, shape = (len(analyze_indices), unique_lasers[0].shape[0]))
	# Observation standard deviation
	sd = pm.HalfCauchy("sd", 1)
	# Regression equation for the mean observation
	regression = coeff_pal[tt.cast(firing_data["Time"], 'int32'), tt.cast(firing_data["Laser"], 'int32')]*firing_data["Palatability"]
	# Actual observations
	obs = pm.Normal("obs", mu = regression, sd = sd, observed = firing_data["Firing"])

	# Metropolis sampling works best!
	tr = pm.sample(tune = 10000, draws = 50000, cores = 4, start = pm.find_MAP(), step = pm.Metropolis())

# Print the Gelman-Rubin rhat convergence statistics to a file
f = open("palatability_regression_convergence.txt", "w")
print(str(pm.gelman_rubin(tr)), file = f)
f.close()

# Save the trace to the output folder as a numpy array, for later reference
# Save every 10th sample from the trace, to avoid any autocorrelation issues
np.save("palatability_regression_trace.npy", tr[::10]["coeff_pal"])

# Convert the trace to a dataframe, and save that too
# Save every 10th sample from the trace, to avoid any autocorrelation issues
tr_df = pm.trace_to_dataframe(tr[::10])
tr_df.to_csv("palatability_regression_trace.csv")

# Plot the results of the palatability regression analysis
# First just plot the mean regression coefficients for every laser condition, across time
fig = plt.figure()
mean_coeff = np.mean(tr[::10]["coeff_pal"], axis = 0)
# Change to the correct laser/taste directory
os.chdir('MCMC_switch/Laser{:d}/Taste{:d}'.format(laser_condition, taste_num))

# Choose the switch function according to the laser condition being used
switch_functions = {'0': fn.laser_off_trials, '1': fn.laser_early_trials, '2': fn.laser_middle_trials, '3': fn.laser_late_trials}

# Get the model and trace after fitting the switching model with MCMC
model, tr = switch_functions[str(laser_condition)](spikes_cat[laser_condition, taste_num, trial, :], num_emissions)

# Set up things to plot the traceplot for this trial
fig, axarr = plt.subplots(4, 2)
axarr = pm.traceplot(tr, ax = axarr)
fig.savefig("Trial{:d}.png".format(trial + 1))
plt.close('all')

# Save the trace for this trial
with open('Trial{:d}_trace.pickle'.format(trial + 1), 'wb') as handle:
	pickle.dump(tr, handle, protocol = pickle.HIGHEST_PROTOCOL)

# Save the Gelam-Rubin convergence statistics for this trial
with open('Trial{:d}_Gelman_Rubin.pickle'.format(trial + 1), 'wb') as handle:
	pickle.dump(pm.gelman_rubin(tr), handle, protocol = pickle.HIGHEST_PROTOCOL)

hf5.close()





Esempio n. 8
0
 def test_Rhat(self):
     rhat = pm.gelman_rubin(self.trace[self.burn:])
     for var in rhat:
         npt.assert_allclose(rhat[var], 1, rtol=0.01)
Esempio n. 9
0
	observed = pm.Normal("observed", mu = regression, sd = sd[state], observed = data_after_700)
	
with model_after_700:
	trace_after_700 = pm.sample(tune = 10000, draws = 2000, njobs = 3)
'''

# Save the traces and gelman_rubin statistics to file
# First the 2500ms condition
os.chdir("/media/patience/resorted_data/Plots/2500ms_EMG")
trace_2500_df = pm.backends.tracetab.trace_to_dataframe(trace_2500)
trace_2500_df.to_csv("trace_2500.csv")
np.save("trace_2500_switchpoints", trace_2500["switchpoints"])
np.save("trace_2500_alpha", trace_2500["alpha"])
np.save("trace_2500_beta", trace_2500["beta"])
with open("trace_2500_gelman_rubin.txt", "w") as f:
    print(pm.gelman_rubin(trace_2500), file=f)
np.save("kl_2500.npy", kl_2500)
# Then the 500ms condition
os.chdir("/media/patience/resorted_data/Plots/500ms_EMG")
trace_500_df = pm.backends.tracetab.trace_to_dataframe(trace_500)
trace_500_df.to_csv("trace_500.csv")
np.save("trace_500_switchpoints", trace_500["switchpoints"])
np.save("trace_500_alpha", trace_500["alpha"])
np.save("trace_500_beta", trace_500["beta"])
with open("trace_500_gelman_rubin.txt", "w") as f:
    print(pm.gelman_rubin(trace_500), file=f)
np.save("kl_500.npy", kl_500)
# Then the Jenn data condition
os.chdir("/media/patience/resorted_data/Jenn_Data/EMG_Plots")
trace_Jenn_df = pm.backends.tracetab.trace_to_dataframe(trace_Jenn)
trace_Jenn_df.to_csv("trace_Jenn.csv")
Esempio n. 10
0
llV = list(range(-lenUV + end_bits, -end_bits))[
    0::1000]  # from end_bits to end_bits from the end, steps of 1000
xV = [lenUV + ll for ll in llV]  # list of iteration numbers

# find the last to cross

last_x = 0  # initialise
last_i = 0
last_grV = list()
for i in range(len(years_mod) -
               1):  # we don't do the last variable U_T bc it's always zero

    # calculate GR on sections

    grV = [
        pymc3.gelman_rubin(np.vstack((log_UV1[-ll:, i], log_UV2[-ll:, i])))
        for ll in llV
    ]

    # if the Gelman-Rubin statistic calls below the threshold later than the latest we've found so far
    # then store

    threshold_x = next(x for x, gr in zip(xV, grV) if gr <= GR_threshold)

    if threshold_x > last_x:

        last_x = threshold_x
        last_i = i
        last_grV = grV

# plot the traces and the Gelman-Rubin statistic for the variable that was last to satisfy $\hat{R}$
Esempio n. 11
0
def summary(trace, varnames=None, transform=lambda x: x, stat_funcs=None,
               extend=False, include_transformed=False,
               alpha=0.05, start=0, batches=None):
    R"""Create a data frame with summary statistics.

    Parameters
    ----------
    trace : MultiTrace instance
    varnames : list
        Names of variables to include in summary
    transform : callable
        Function to transform data (defaults to identity)
    stat_funcs : None or list
        A list of functions used to calculate statistics. By default,
        the mean, standard deviation, simulation standard error, and
        highest posterior density intervals are included.

        The functions will be given one argument, the samples for a
        variable as a 2 dimensional array, where the first axis
        corresponds to sampling iterations and the second axis
        represents the flattened variable (e.g., x__0, x__1,...). Each
        function should return either

        1) A `pandas.Series` instance containing the result of
           calculating the statistic along the first axis. The name
           attribute will be taken as the name of the statistic.
        2) A `pandas.DataFrame` where each column contains the
           result of calculating the statistic along the first axis.
           The column names will be taken as the names of the
           statistics.
    extend : boolean
        If True, use the statistics returned by `stat_funcs` in
        addition to, rather than in place of, the default statistics.
        This is only meaningful when `stat_funcs` is not None.
    include_transformed : bool
        Flag for reporting automatically transformed variables in addition
        to original variables (defaults to False).
    alpha : float
        The alpha level for generating posterior intervals. Defaults
        to 0.05. This is only meaningful when `stat_funcs` is None.
    start : int
        The starting index from which to summarize (each) chain. Defaults
        to zero.
    batches : None or int
        Batch size for calculating standard deviation for non-independent
        samples. Defaults to the smaller of 100 or the number of samples.
        This is only meaningful when `stat_funcs` is None.

    See also
    --------
    summary : Generate a pretty-printed summary of a trace.

    Returns
    -------
    `pandas.DataFrame` with summary statistics for each variable Defaults one
    are: `mean`, `sd`, `mc_error`, `hpd_2.5`, `hpd_97.5`, `n_eff` and `Rhat`.
    Last two are only computed for traces with 2 or more chains.

    Examples
    --------
    .. code:: ipython

        >>> import pymc3 as pm
        >>> trace.mu.shape
        (1000, 2)
        >>> pm.summary(trace, ['mu'])
                   mean        sd  mc_error     hpd_5    hpd_95
        mu__0  0.106897  0.066473  0.001818 -0.020612  0.231626
        mu__1 -0.046597  0.067513  0.002048 -0.174753  0.081924

                  n_eff      Rhat
        mu__0     487.0   1.00001
        mu__1     379.0   1.00203

    Other statistics can be calculated by passing a list of functions.

    .. code:: ipython

        >>> import pandas as pd
        >>> def trace_sd(x):
        ...     return pd.Series(np.std(x, 0), name='sd')
        ...
        >>> def trace_quantiles(x):
        ...     return pd.DataFrame(pm.quantiles(x, [5, 50, 95]))
        ...
        >>> pm.summary(trace, ['mu'], stat_funcs=[trace_sd, trace_quantiles])
                     sd         5        50        95
        mu__0  0.066473  0.000312  0.105039  0.214242
        mu__1  0.067513 -0.159097 -0.045637  0.062912
    """
    from .backends import tracetab as ttab

    if varnames is None:
        varnames = get_default_varnames(trace.varnames,
                       include_transformed=include_transformed)

    if batches is None:
        batches = min([100, len(trace)])

    funcs = [lambda x: pd.Series(np.mean(x, 0), name='mean'),
             lambda x: pd.Series(np.std(x, 0), name='sd'),
             lambda x: pd.Series(mc_error(x, batches), name='mc_error'),
             lambda x: _hpd_df(x, alpha)]

    if stat_funcs is not None:
        if extend:
            funcs = funcs + stat_funcs
        else:
            funcs = stat_funcs

    var_dfs = []
    for var in varnames:
        vals = transform(trace.get_values(var, burn=start, combine=True))
        flat_vals = vals.reshape(vals.shape[0], -1)
        var_df = pd.concat([f(flat_vals) for f in funcs], axis=1)
        var_df.index = ttab.create_flat_names(var, vals.shape[1:])
        var_dfs.append(var_df)
    dforg = pd.concat(var_dfs, axis=0)

    if (stat_funcs is not None) and (not extend):
        return dforg
    elif trace.nchains < 2:
        return dforg
    else:
        n_eff = pm.effective_n(trace,
                               varnames=varnames,
                               include_transformed=include_transformed)
        n_eff_pd = dict2pd(n_eff, 'n_eff')
        rhat = pm.gelman_rubin(trace,
                               varnames=varnames,
                               include_transformed=include_transformed)
        rhat_pd = dict2pd(rhat, 'Rhat')
        return pd.concat([dforg, n_eff_pd, rhat_pd],
                         axis=1, join_axes=[dforg.index])
	# Categorical observations
	obs = pm.DensityDist('obs', logp, observed = {'value': spikes_cat[0, :, :150]})

	# Inference button :D
	tr = pm.sample(1000000, init = None, step = pm.Metropolis(), njobs = 2, trace = [t1, t2], start = {'t1': np.ones(num_trials)*25.0, 't2': np.ones(num_trials)*120.0})

# Make a list to save the converged trial numbers and switchpoints for this laser condition
this_converged_trial_nums = []
this_switchpoints = []
# Lists for palatability ranks and firing rates in this laser condition
this_pal = []
this_firing = []
# Get the spiking data for this laser condition
inactivated_spikes.append(spikes[0, :, :150, :])
# Get the Gelman-Rubin convergence statistics
converged = pm.gelman_rubin(tr)
# Run through the trials in this condition
for i in range(num_trials):
	# Check if this trial converged
	if converged['t1'][i] < 1.1 and converged['t2'][i] < 1.1:
		# Save 1.) Trial number
		this_converged_trial_nums.append(i)
		# 2.) Switchpoints (averaged over the last 100k samples, skipping 100 samples at a time)
		start = int(np.mean(tr[-100000::100]['t1'][:, i]))
		end = int(np.mean(tr[-100000::100]['t2'][:, i]))
		#start = int(Counter(tr[-100000::100]['t1'][:, i].astype('int')).most_common()[0][0])
		#end = int(Counter(tr[-100000::100]['t2'][:, i].astype('int')).most_common()[0][0])
		this_switchpoints.append([start, end])
		# 3.) Palatability rank
		this_pal.append(palatability[0, i])
		# 4.) Firing rates