Beispiel #1
0
def pymc_linear_fit(data1, data2, data1err=None, data2err=None,
        print_results=False, intercept=True, nsample=5000, burn=1000,
        thin=10, return_MC=False, guess=None, ignore_nans=True,
        progress_bar=True):
    import numpy as np
    old_errsettings = np.geterr()
    import pymc # pymc breaks np error settings
    np.seterr(**old_errsettings)

    if ignore_nans:
        data1,data2,data1err,data2err = remove_nans(data1,data2,data1err,data2err)

    if guess is None:
        guess = (0,0)

    xmu = pymc.distributions.Uninformative(name='x_observed',value=0)
    if data1err is None:
        xdata = pymc.distributions.Normal('x', mu=xmu, observed=True,
                value=data1, tau=1, trace=False)
    else:
        xtau = pymc.distributions.Uninformative(name='x_tau',
                value=1.0/data1err**2, observed=True, trace=False)
        xdata = pymc.distributions.Normal('x', mu=xmu, observed=True,
                value=data1, tau=xtau, trace=False)

    d={'slope':pymc.distributions.Uninformative(name='slope', value=guess[0]), 
       }
    if intercept:
        d['intercept'] = pymc.distributions.Uninformative(name='intercept',
                value=guess[1])

        @pymc.deterministic(trace=False)
        def model(x=xdata,slope=d['slope'],intercept=d['intercept']):
            return x*slope+intercept
    else:
        @pymc.deterministic(trace=False)
        def model(x=xdata,slope=d['slope']):
            return x*slope

    d['f'] = model

    if data2err is None:
        ydata = pymc.distributions.Normal('y', mu=model, observed=True,
                value=data2, tau=1, trace=False)
    else:
        ytau = pymc.distributions.Uninformative(name='y_tau',
                value=1.0/data2err**2, observed=True, trace=False)
        ydata = pymc.distributions.Normal('y', mu=model, observed=True,
                value=data2, tau=ytau, trace=False)
    d['y'] = ydata
    
    MC = pymc.MCMC(d)
    MC.sample(nsample,burn=burn,thin=thin,progress_bar=progress_bar)

    MCs = MC.stats()
    m,em = MCs['slope']['mean'],MCs['slope']['standard deviation']
    if intercept: 
        b,eb = MCs['intercept']['mean'],MCs['intercept']['standard deviation']

    if print_results:
        print "MCMC Best fit y = %g x" % (m),
        if intercept: 
            print " + %g" % (b)
        else:
            print ""
        print "m = %g +/- %g" % (m,em)
        if intercept:
            print "b = %g +/- %g" % (b,eb)
        print "Chi^2 = %g, N = %i" % (((data2-(data1*m))**2).sum(),data1.shape[0]-1)

    if return_MC: 
        return MC
    if intercept:
        return m,b
    else:
        return m
mut_region_length = config.getint('Input','mut_region_length')
expname = config.get('Input','expname') #ex MscS mut1, describes the experiment without the different batch numbers.

# file to save trace to is passed as command line argument
# also, the config file should be passed as cli. This enables some 
# rudimentary error checking that everything is consistent.
parser = argparse.ArgumentParser()
parser.add_argument('runnum',help='Filename of database file to save traces to.')
parser.add_argument('savefn')
#parser.add_argument('cfg_fn',help='Filename pymc config file for the run.')

args = parser.parse_args()

# make sure the generic_energy_matrix module and this script are
# looking at the same data
# cli_cfg = os.path.abspath(args.cfg_fn)
# module_cfg = os.path.abspath(generic_energy_matrix.cfg_fn)
# if cli_cfg != module_cfg:
#     raise ValueError("generic_energy_matrix module %s and script cli %s are not consistent" % (module_cfg,cli_cfg))

#print args.db_fn
fulldbname = '/home/wireland/mscS4-8-15/results/' + str(args.savefn) + str(args.runnum) + '.sql'
#fulldbname = '/home/wireland/lassoresults/MCMC/MCMCtest3_' + str(args.runnum) + '.sql'
M = pymc.MCMC(generic_energy_matrix_nonunique,db='sqlite',dbname=fulldbname)
M.use_step_method(stepper.GaugePreservingStepper,generic_energy_matrix_nonunique.emat)
f = open('/home/wireland/mscS4-8-15/runsdetails/' + str(args.savefn) + str(args.runnum) + '.txt','w')
f.writelines(['dbname = ' + fulldbname + '\n', 'mut_region_start = ' + str(mut_region_start) + '\n','mut_region_length = ' + str(mut_region_length) + '\n', 'exp_name = ' + str(expname)])
f.close()
M.sample(30000,thin=10)

    #    return p.evaluate(a)

    @pm.stochastic(dtype=float, observed=True)
    def loglike(value=1.0, fg=fg, gamma=gamma):
        f = fg[0]
        g = fg[1:]
        return gamma * 20. * f + gamma * (min(0., g[0]))
        # return gamma * f + \
        #         np.sum(np.log(1.0 / (1.0 + np.exp(-gamma*10. * g))))

    return locals()


if __name__ == '__main__':
    model = make_model()
    mcmc = pm.MCMC(model)
    mcmc.use_step_method(ps.RandomWalk, model['a'], proposal_sd=0.002)
    smc = SteadyPaceSMC(mcmc,
                        num_particles=400,
                        num_mcmc=5,
                        verbose=4,
                        gamma_is_an_exponent=True,
                        ess_reduction=0.9,
                        adapt_proposal_step=True,
                        mpi=mpi)
    smc.initialize(0.1)
    results = []
    for gamma in np.linspace(0., 20, 200)[1:]:
        smc.move_to(gamma)
        pa = smc.get_particle_approximation().gather()
        if mpi.COMM_WORLD.Get_rank() == 0:
Beispiel #4
0
                                           Tleaf=df["Tleaf"],
                                           Jmax=Jmax,
                                           Vcmax=Vcmax,
                                           Rd=Rd)
    return An


y = pymc.Normal('y',
                mu=farquhar_wrapper,
                tau=1.0 / obs_sigma**2,
                value=obs,
                observed=True)

N = 100000
model = pymc.Model([y, df, Vcmax, Jmax, Rd])
M = pymc.MCMC(model)
M.sample(iter=N, burn=N * 0.1, thin=10)

Vcmax = M.stats()["Vcmax"]['mean']
Jmax = M.stats()["Jmax"]['mean']
Rd = M.stats()["Rd"]['mean']
(An, Anc, Anj) = F.calc_photosynthesis(Ci=df["Ci"],
                                       Tleaf=df["Tleaf"],
                                       Jmax=Jmax,
                                       Vcmax=Vcmax,
                                       Rd=Rd)
rmse = np.sqrt(((obs - An)**2).mean(0))
print("RMSE: %.4f" % (rmse))

# Get the fits
Vcmax = M.trace('Vcmax').gettrace()
    analysis_power = observed_power_spectrum[fftfreq >= 0][1:-1]

    # get a simple estimate of the power law index
    estimate = rn_utils.do_simple_fit(analysis_frequencies, analysis_power)
    c_estimate = estimate[0]
    m_estimate = estimate[1]

    # plot the power law spectrum
    power_fit = c_estimate * analysis_frequencies**(-m_estimate)

    # Define the MCMC model
    this_model = pymcmodels.single_power_law(analysis_frequencies,
                                             analysis_power, m_estimate)

    # Set up the MCMC model
    M1 = pymc.MCMC(this_model)

    # Run the sampler
    M1.sample(iter=50000, burn=1000, thin=10, progress_bar=False)

    # Get the power law index and save the results
    pli = M1.trace("power_law_index")[:]
    s_pli = rn_utils.summary_stats(pli, 0.05, bins=40)
    pi95 = rn_utils.credible_interval(pli, ci=0.95)

    cli = M1.trace("power_law_norm")[:]
    s_cli = rn_utils.summary_stats(cli, 0.05, bins=40)
    ci95 = rn_utils.credible_interval(cli, ci=0.95)

    bayes_mean_fit = np.exp(s_cli["mean"]) * (analysis_frequencies**
                                              -s_pli["mean"])
Beispiel #6
0
 def setUpClass(self):
     self.S = pymc.MCMC(disaster_model,
                        db='pickle',
                        dbname=os.path.join(testdir, 'Disaster.pickle'),
                        dbmode='w')
     self.S.use_step_method(pymc.Metropolis, self.S.early_mean, tally=True)
Beispiel #7
0
 def setUpClass(self):
     self.S = pymc.MCMC(disaster_model, db='base')
alpha_of_beta_after_map, beta_of_beta_after_map = alpha_of_beta.value, beta_of_beta.value
alpha_of_beta_after_map, beta_of_beta_after_map

# In[34]:

a_list = []
b_list = []

alpha_of_beta.value, beta_of_beta.value = alpha_of_beta.random(
), beta_of_beta.random()
alpha_of_beta_random, beta_of_beta_random = alpha_of_beta.value, beta_of_beta.value
a_list.append(alpha_of_beta_random)
b_list.append(beta_of_beta_random)
print alpha_of_beta_random, beta_of_beta_random
bin_beta_MCMC = mc.MCMC(bin_beta_model)

# In[35]:

bin_beta_MCMC.sample(iter=40000, burn=20000, thin=1)

alpha_of_beta_after_mcmc, beta_of_beta_after_mcmc = alpha_of_beta.value, beta_of_beta.value

a_list.append(alpha_of_beta_after_mcmc)
b_list.append(beta_of_beta_after_mcmc)

alpha_of_beta_after_mcmc, beta_of_beta_after_mcmc

# In[36]:

mc.Matplot.plot(bin_beta_MCMC)

y = pymc.Normal('y',
                mu=y_model,
                tau=1.0 / (sigma**2.0),
                value=ydata,
                observed=True)

DelayLin_model = dict(kappa=kappa,
                      tau=tau,
                      Omega=Omega,
                      sigma=sigma,
                      y_model=y_model,
                      y=y)

MDL = pymc.MCMC(DelayLin_model)
MDL.sample(niter, burn)

kappa_trace = MDL.trace('kappa')[:]
tau_trace = MDL.trace('tau')[:]
Omega_trace = MDL.trace('Omega')[:]

kappa_est = np.median(kappa_trace)
tau_est = np.median(tau_trace)
Omega_est = np.median(Omega_trace)

a_est = ((2 * np.pi) * Omega_est / np.tan(
    (2 * np.pi) * Omega_est * tau_est)) - epsilon
b_est = -(2 * np.pi) * Omega_est / np.sin((2 * np.pi) * Omega_est * tau_est)

Power_est = func_DelayLin(w=w, kappa=kappa_est, a=a_est, b=b_est, tau=tau_est)
import pymc
import numpy as np
import matplotlib.pyplot as plt

#---------------------------- Run Time Params --------------------------------#

# Probably going to try and use - flag conventions with __init__(self, *args, **kwargs)

#---------------------------- Load Data --------------------------------------#
data_matrix = np.loadtxt("../data/Y_alpha0.1_K5_N20.txt", delimiter=',')
num_people = 20
num_groups = 5
alpha = np.ones(num_groups).ravel() * 0.1
#B = np.eye(num_groups)*0.85
#B = B + np.random.random(size=[num_groups,num_groups])*0.1

B = np.eye(num_groups) * 0.8
B = B + np.ones([num_groups, num_groups]) * 0.2 - np.eye(num_groups) * 0.2

#---------------------------- Setup Model -----------------------------------#
raw_model = model.create_model(data_matrix, num_people, num_groups, alpha, B)
#model_instance = pymc.Model(raw_model)

#---------------------------- Call MAP to initialize MCMC -------------------#
#pymc.MAP(model_instance).fit(method='fmin_powell')
print '---------- Finished Running MAP to Set MCMC Initial Values ----------'
#---------------------------- Run MCMC --------------------------------------#
print '--------------------------- Starting MCMC ---------------------------'
M = pymc.MCMC(raw_model)
M.sample(100000, 50000, thin=5, verbose=0)
def main():
    print('Starting MCMC simulation and fit.')
    global params
    # Check if path to configfile is provided and if file exists.
    if len(sys.argv) > 1:
        if os.path.isfile(sys.argv[1]) == True:
            params['NAME_CONFIGFILE_TEMPLATE'] = sys.argv[1]
            params['RUN_SCRIPT'] = 'RUN_HELPER.sh'
        else:
            print('* ERROR:', sys.argv[1], 'does not exist.')
        if len(sys.argv) > 2:
            if os.path.isfile(sys.argv[2]) == True:
                params['RUN_SCRIPT'] = sys.argv[2]
            else:
                print('* ERROR: Optional run script', sys.argv[2],
                      'does not exist.')
                print('Aborting.')
                exit()
    else:
        print('* ERROR: No command line argument for configfile provided.')

    if params['NAME_CONFIGFILE_TEMPLATE'] == '':
        print('Usage: python3', sys.argv[0],
              '<PATH/TO/CONFIGFILE> [<PATH/TO/RUN/SCRIPT]')
        print('Aborting.')
        exit()

    name = create_testcase_name(TESTED_VARIABLES, params)
    db_name = name + '.pickle'
    parse_pymc_from_config_file(params)

    sample_iterations = params['ITERATIONS']
    sample_burns = params['BURNS']
    print('Number of sample iterations: {}.'.format(sample_iterations))
    print('Number of sample burns: {}.'.format(sample_burns))
    targetValues = [params['T_NORMAL'], params['T_TUMOR'], params['T_VESSEL']]
    print('Target values: [T_normal, T_tumor, T_vessel]')
    print('Target values for this dataset: {}.'.format(targetValues))

    # Apply MCMC sampler.
    MDL = pymc.MCMC(fitSimulation(targetValues), db='pickle',
                    dbname=db_name)
    MDL.sample(iter=sample_iterations, burn=sample_burns)
    print()

    # Extract and plot results.
    temperatures = MDL.stats()['callScaFES']['mean']
    lambda_bt = MDL.stats()['lambda_bt']['mean']
    pymc.Matplot.plot(MDL)
    print()
    print('T_final: [T_normal, T_tumor, T_vessel]')
    print('T_final:', temperatures)
    print()
    print('lambda_bt:', lambda_bt)

    graph = pymc.graph.graph(MDL)
    graph.write_png('graph.png')

    print()
    print('Number of ScaFES calls:', count)
    print()

    T_normal = MDL.trace('callScaFES')[:,0]
    T_tumor = MDL.trace('callScaFES')[:,1]
    T_vessel = MDL.trace('callScaFES')[:,2]
    lambda_bt = MDL.trace('lambda_bt')[:]

    l2_norm = np.linalg.norm(np.subtract(MDL.trace('callScaFES')[:],
                                         targetValues), 2, axis=1)
    iterations = l2_norm.shape[0]
    nc_file = create_mcmc_netcdf_file('pymc_' + name + '.nc', iterations)
    save_vector_to_mcmc_file(nc_file, lambda_bt, 'lambda_bt')
    save_vector_to_mcmc_file(nc_file, l2_norm, 'L2-norm')
    save_vector_to_mcmc_file(nc_file, T_normal, 'T_normal')
    save_vector_to_mcmc_file(nc_file, T_tumor, 'T_tumor')
    save_vector_to_mcmc_file(nc_file, T_vessel, 'T_vessel')
    write_ini_file_to_nc_file(nc_file, params['NAME_CONFIGFILE_TEMPLATE'])
    close_nc_file(nc_file)

    MDL.db.close()
    print()
    print('Done.')
Beispiel #12
0
                pod = 1.-stats.norm.cdf((np.log(ai)-lmd)/beta)
                pinrange = stats.norm.cdf(2.0619, loc=ai, scale=sigmae)-\
                        stats.norm.cdf(1.8243, loc=ai, scale=sigmae)
                if value == 0:
                    p = (1.-pod)+pod*(1.-pinrange)
                    return np.log(p)
                else:
                    return np.log(pod*pinrange)
            def random(ai):
                pod = 1.-stats.norm.cdf((np.log(ai)-lmd)/beta)
                pinrange = stats.norm.cdf(2.0619, loc=ai, scale=sigmae)-\
                        stats.norm.cdf(1.8243, loc=ai, scale=sigmae)
                if np.random.rand()<=pod:
                    return stats.bernoulli.rvs(p=pinrange,size=1)
                else:
                    return 0

        return locals()

    M = pymc.MCMC(make_pymc_model(), db='pickle', dbname='soliman_pymc.pickle')
    # graph = pymc.graph.graph(M)
    # graph.write_png('soliman2014_pymc.png')
    # import os
    # os.system('eog soliman2014_pymc.png')

    nchain=4; niter=int(1e6); nburn=niter/2; nthin=2
    for i in range(nchain):
        M.sample(iter=niter, burn=nburn, thin=nthin)
    pymc.Matplot.plot(M)
    M.db.close()
Beispiel #13
0
def pymc_linear_fit_mixture(data1, data2, data1err=None, data2err=None,
        print_results=False, intercept=True, nsample=5000, burn=1000,
        thin=10, return_MC=False, guess=None, verbose=0):
    """
    ***NOT TESTED***  ***MAY IGNORE X ERRORS***
    Use pymc to fit a line to data with outliers assuming outliers
    come from a broad, uniform distribution that cover all the data

    The model doesn't work exactly right; it over-rejects "outliers" because
    'bady' is an unobserved stochastic parameter that can therefore move. 

    Maybe it should be implemented as a "potential" or a mixture model, e.g.
    as in Hogg 2010 / Van Der Plas' astroml example:
    http://astroml.github.com/book_figures/chapter8/fig_outlier_rejection.html
    """
    import pymc

    if guess is None:
        guess = (0,0)

    xmu = pymc.distributions.Uninformative(name='x_observed',value=0)
    if data1err is None:
        xdata = pymc.distributions.Normal('x', mu=xmu, observed=True,
                value=data1, tau=1, trace=False)
    else:
        xtau = pymc.distributions.Uninformative(name='x_tau',
                value=1.0/data1err**2, observed=True, trace=False)
        xdata = pymc.distributions.Normal('x', mu=xmu, observed=True,
                value=data1, tau=xtau, trace=False)

    d={'slope':pymc.distributions.Uninformative(name='slope', value=guess[0]), 
        #d['badvals'] = pymc.distributions.Binomial('bad',len(data2),0.5,value=[False]*len(data2))
        #d['badx'] = pymc.distributions.Uniform('badx',min(data1-data1err),max(data1+data1err),value=data1)
        #'badvals':pymc.distributions.DiscreteUniform('bad',0,1,value=[False]*len(data2)),
        #'bady':pymc.distributions.Uniform('bady',min(data2-data2err),max(data2+data2err),value=data2),
       }

    # set up "mixture model" from Hogg et al 2010:
    # uniform prior on Pb, the fraction of bad points
    Pb = pymc.Uniform('Pb', 0, 1.0, value=0.1)

    # uniform prior on Yb, the centroid of the outlier distribution
    Yb = pymc.Uniform('Yb', -10000, 10000, value=0)

    # uniform prior on log(sigmab), the spread of the outlier distribution
    log_sigmab = pymc.Uniform('log_sigmab', -10, 10, value=5)

    @pymc.deterministic
    def sigmab(log_sigmab=log_sigmab):
        return np.exp(log_sigmab)

    MixtureNormal = pymc.stochastic_from_dist('mixturenormal',
                                              logp=mixture_likelihood,
                                              dtype=np.float,
                                              mv=True)



    if intercept:
        d['intercept'] = pymc.distributions.Uninformative(name='intercept',
                value=guess[1])

        @pymc.deterministic(trace=False)
        def model(x=xdata,slope=d['slope'],intercept=d['intercept']):
            return (x*slope+intercept) 

    else:

        @pymc.deterministic(trace=False)
        def model(x=xdata,slope=d['slope']):
            return x*slope

    d['f'] = model

    dyi = data2err if data2err is not None else np.ones(data2.shape)

    y_mixture = MixtureNormal('y_mixture', model=model, dyi=dyi,
                              Pb=Pb, Yb=Yb, sigmab=sigmab,
                              observed=True, value=data2)

    d['y'] = y_mixture


    #if data2err is None:
    #    ydata = pymc.distributions.Normal('y', mu=model, observed=True,
    #            value=data2, tau=1, trace=False)
    #else:
    #    ytau = pymc.distributions.Uninformative(name='y_tau',
    #            value=1.0/data2err**2, observed=True, trace=False)
    #    ydata = pymc.distributions.Normal('y', mu=model, observed=True,
    #            value=data2, tau=ytau, trace=False) 
    #d['y'] = ydata
    
    MC = pymc.MCMC(d)
    MC.sample(nsample,burn=burn,thin=thin,verbose=verbose)
Beispiel #14
0
def pymc_linear_fit_withoutliers(data1, data2, data1err=None, data2err=None,
        print_results=False, intercept=True, nsample=5000, burn=1000,
        thin=10, return_MC=False, guess=None, verbose=0, ignore_nans=True,
        progress_bar=True):
    """
    Use pymc to fit a line to data with outliers assuming outliers
    come from a broad, uniform distribution that cover all the data

    The model doesn't work exactly right; it over-rejects "outliers" because
    'bady' is an unobserved stochastic parameter that can therefore move. 

    Maybe it should be implemented as a "potential" or a mixture model, e.g.
    as in Hogg 2010 / Van Der Plas' astroml example:
    http://astroml.github.com/book_figures/chapter8/fig_outlier_rejection.html
    """
    import pymc

    if ignore_nans:
        data1,data2,data1err,data2err = remove_nans(data1,data2,data1err,data2err)

    if guess is None:
        guess = (0,0)

    xmu = pymc.distributions.Uninformative(name='x_observed',value=0)
    if data1err is None:
        xdata = pymc.distributions.Normal('x', mu=xmu, observed=True,
                value=data1, tau=1, trace=False)
    else:
        xtau = pymc.distributions.Uninformative(name='x_tau',
                value=1.0/data1err**2, observed=True, trace=False)
        xdata = pymc.distributions.Normal('x', mu=xmu, observed=True,
                value=data1, tau=xtau, trace=False)

    d={'slope':pymc.distributions.Uninformative(name='slope', value=guess[0]), 
        #d['badvals'] = pymc.distributions.Binomial('bad',len(data2),0.5,value=[False]*len(data2))
        #d['badx'] = pymc.distributions.Uniform('badx',min(data1-data1err),max(data1+data1err),value=data1)
        'badvals':pymc.distributions.DiscreteUniform('bad',0,1,value=[False]*len(data2)),
        'bady':pymc.distributions.Uniform('bady',min(data2-data2err),max(data2+data2err),value=data2),
       }
    if intercept:
        d['intercept'] = pymc.distributions.Uninformative(name='intercept',
                value=guess[1])

        @pymc.deterministic(trace=False)
        def model(x=xdata,slope=d['slope'],intercept=d['intercept'],
                badvals=d['badvals'], bady=d['bady']):
            return (x*slope+intercept) * (True-badvals) + badvals*bady

    else:

        @pymc.deterministic(trace=False)
        def model(x=xdata,slope=d['slope'], badvals=d['badvals'], bady=d['bady']):
            return x*slope*(True-badvals) + badvals*bady

    d['f'] = model

    if data2err is None:
        ydata = pymc.distributions.Normal('y', mu=model, observed=True,
                value=data2, tau=1, trace=False)
    else:
        ytau = pymc.distributions.Uninformative(name='y_tau',
                value=1.0/data2err**2, observed=True, trace=False)
        ydata = pymc.distributions.Normal('y', mu=model, observed=True,
                value=data2, tau=ytau, trace=False) 
    d['y'] = ydata
    
    MC = pymc.MCMC(d)
    MC.sample(nsample,burn=burn,thin=thin,verbose=verbose,progress_bar=progress_bar)

    MCs = MC.stats()
    m,em = MCs['slope']['mean'],MCs['slope']['standard deviation']
    if intercept: 
        b,eb = MCs['intercept']['mean'],MCs['intercept']['standard deviation']

    if print_results:
        print "MCMC Best fit y = %g x" % (m),
        if intercept: 
            print " + %g" % (b)
        else:
            print ""
        print "m = %g +/- %g" % (m,em)
        if intercept:
            print "b = %g +/- %g" % (b,eb)
        print "Chi^2 = %g, N = %i" % (((data2-(data1*m))**2).sum(),data1.shape[0]-1)

    if return_MC: 
        return MC
    if intercept:
        return m,b
    else:
        return m
Beispiel #15
0
    -0.8762523, 0.47377688, 0.76516415, 0.27890419, -0.07819642, -0.13399348,
    0.82877293, 0.22308624, 0.7485783, -0.14700254, -1.03145657, 0.85641097,
    0.43396285, 0.47901653, 0.80137086, 0.33566812, 0.71443253, -1.57590815,
    -0.24090179, -2.0128344, 0.34503324, 0.12944091, -1.5327008, 0.06363034,
    0.21042021, -0.81425636, 0.20209279, -1.48130423, -1.04983523, 0.16001774,
    -0.75239072, 0.33427956, -0.10224921, 0.26463561, -1.09374674, -0.72749811,
    -0.54892116, -1.89631844, -0.94393545, -0.2521341, 0.26840341, 0.23563219,
    0.35333094
])

# Model: the data are truncated-normally distributed with unknown upper bound.
mu = pm.Normal('mu', 0, .01, value=0)
tau = pm.Exponential('tau', .01, value=1)
cutoff = pm.Exponential('cutoff', 1, value=1.3)
D = pm.TruncatedNormal('D',
                       mu,
                       tau,
                       -np.inf,
                       cutoff,
                       value=data,
                       observed=True)

M = pm.MCMC([mu, tau, cutoff, D])

# Use a TruncatedMetropolis step method that will never propose jumps below D's maximum value.
M.use_step_method(TruncatedMetropolis, cutoff, D.value.max(), np.inf)
# Get a handle to the step method handling cutoff to investigate its behavior.
S = M.step_method_dict[cutoff][0]

M.isample(10000, 0, 10)
Beispiel #16
0
lifetime_[censor] = 10 - birth[censor]  #we only see this part of their lives.

#this begins the model
alpha = pm.Uniform("alpha", 0, 20)
#lets just use uninformative priors
beta = pm.Uniform("beta", 0, 20)


@pm.observed
def survival(value=lifetime_, alpha=alpha, beta=beta):
    return np.sum((1 - censor) *
                  (np.log(alpha / beta) + (alpha - 1) * np.log(value / beta) -
                   (value / beta)**(alpha)))


mcmc = pm.MCMC([alpha, beta, survival])
mcmc.sample(50000, 30000)

alpha_samples = mcmc.trace('alpha')[:]
beta_samples = mcmc.trace('beta')[:]

# histogram of the samples:

plt.hist(alpha_samples, normed=True)
plt.show()
plt.hist(beta_samples, normed=True)
plt.show()

medianlifetime_samples = beta_samples * (np.log(2)**(1 / alpha_samples))

plt.hist(medianlifetime_samples, normed=True)
Beispiel #17
0
    def approximate_map(self,
                        individual_subjs=True,
                        minimizer='Powell',
                        use_basin=False,
                        fall_to_simplex=True,
                        cycles=1,
                        debug=False,
                        minimizer_kwargs=None,
                        basin_kwargs=None):
        """Set model to its approximate MAP.

        :Arguments:
            individual_subjs : bool <default=True>
                Optimize each subject individually.
            minimizer : str <default='Powell'>
                Optimize using Powell. See numpy.optimize.minimize.
                Other choice might be 'Nelder-Mead'
            use_basin : bool <default=True>
                Use basin hopping optimization to avoid local minima.
            fall_to_simplex : bool <default=True>
                should map try using simplex algorithm if powell method failes
            cycles : int <default=1>
                How many times to optimize the model.
                Since lower level nodes depend on higher level nodes,
                they might be estimated differently in a second pass.
            minimizer_kwargs : dict <default={}>
                Keyword arguments passed to minimizer.
                See scipy.optimize.minimize for options.
            basin_kwargs : dict <default={}>
                Keyword arguments passed to basinhopping.
                See scipy.optimize.basinhopping for options.
            debug : bool <default=False>
                Whether to print current values and neg logp at each
                iteration.

        """
        ###############################
        # In order to find the MAP of a hierarchical model one needs
        # to integrate over the subj nodes. Since this is difficult we
        # optimize the generations iteratively on the generation below.

        # only need this to get at the generations
        # TODO: Find out how to get this from pymc.utils.find_generations()
        m = pm.MCMC(self.nodes_db.node)
        generations_unsorted = m.generations
        generations_unsorted.append(self.get_observeds().node)
        # Filter out empty generations
        generations_unsorted = [
            gen for gen in generations_unsorted if len(gen) != 0
        ]
        # Sort generations according to order of nodes_db
        generations = []
        for gen in generations_unsorted:
            generations.append([
                row.node for name, row in self.nodes_db.iterrows()
                if name in [node.__name__ for node in gen]
            ])

        for cyc in range(cycles):
            for i in range(len(generations) - 1, 0, -1):
                if self.is_group_model and individual_subjs and (
                        i == len(generations) - 1):
                    self._approximate_map_subj(
                        fall_to_simplex=fall_to_simplex,
                        minimizer=minimizer,
                        use_basin=use_basin,
                        debug=debug,
                        minimizer_kwargs=minimizer_kwargs,
                        basin_kwargs=basin_kwargs)
                    continue
                # Optimize the generation at i-1 evaluated over the generation at i
                self._partial_optimize(generations[i - 1],
                                       generations[i],
                                       fall_to_simplex=fall_to_simplex,
                                       minimizer=minimizer,
                                       use_basin=use_basin,
                                       debug=debug,
                                       minimizer_kwargs=minimizer_kwargs,
                                       basin_kwargs=basin_kwargs)

        #update map in nodes_db
        self.nodes_db['map'] = np.NaN
        for name, value in self.values.iteritems():
            try:
                self.nodes_db['map'].ix[name] = value
            # Some values can be series which we'll just ignore
            except (AttributeError, ValueError):
                pass
Beispiel #18
0
    def run_SPXcentered(self, sigma_x, n_subjs, size, mu_value, mu_step_method,
                        seed):
        """ run a single Spxcentered test"""

        #init basic  mcmc
        if np.isscalar(mu_value):
            n_conds = 1
        else:
            n_conds = len(mu_value)

        max_tries = 5
        iter = 10000  #100000
        burnin = 5000  #90000
        nodes, t_values = self.create_hierarchical_model(sigma_x=sigma_x,
                                                         n_subjs=n_subjs,
                                                         size=size,
                                                         mu_value=mu_value,
                                                         seed=seed)
        mcmc = pm.MCMC(nodes)
        [mcmc.use_step_method(mu_step_method, node) for node in nodes['mu']]

        #init mcmc with SPX step method
        nodes_spx, t_values = self.create_hierarchical_model(sigma_x=sigma_x,
                                                             n_subjs=n_subjs,
                                                             size=size,
                                                             mu_value=mu_value,
                                                             seed=seed)
        mcmc_spx = pm.MCMC(nodes_spx)
        mcmc_spx.use_step_method(kabuki.steps.SPXcentered,
                                 loc=nodes_spx['mu'],
                                 scale=nodes_spx['sigma'],
                                 loc_step_method=mu_step_method)

        #init mcmc with spx on vec model
        nodes_vpx, t_values = self.create_hierarchical_model(sigma_x=sigma_x,
                                                             n_subjs=n_subjs,
                                                             size=size,
                                                             mu_value=mu_value,
                                                             seed=seed,
                                                             vec=True)
        mcmc_vpx = pm.MCMC(nodes_vpx)
        mcmc_vpx.use_step_method(kabuki.steps.SPXcentered,
                                 loc=nodes_vpx['mu'],
                                 scale=nodes_vpx['sigma'],
                                 loc_step_method=mu_step_method)

        #run all the models until they converge to the same values
        i_try = 0
        while i_try < max_tries:
            print("~~~~~ trying for the %d time ~~~~~~" % (i_try + 1))

            #run spx mcmc
            i_t = time()
            mcmc_spx.sample(iter, burnin)
            print("spx sampling took %.2f seconds" % (time() - i_t))
            stats = dict([('mu%d spx' % x, mcmc_spx.mu[x].stats())
                          for x in range(n_conds)])

            #run vpx mcmc
            i_t = time()
            mcmc_vpx.sample(iter, burnin)
            print("vpx sampling took %.2f seconds" % (time() - i_t))
            stats.update(
                dict([('mu%d vpx' % x, mcmc_vpx.mu[x].stats())
                      for x in range(n_conds)]))

            #run basic mcmc
            i_t = time()
            mcmc.sample(iter, burnin)
            print("basic sampling took %.2f seconds" % (time() - i_t))
            stats.update(
                dict([('mu%d basic' % x, mcmc.mu[x].stats())
                      for x in range(n_conds)]))

            df = DataFrame(stats, index=['mean', 'standard deviation']).T
            df = df.rename(columns={
                'mean': 'mean',
                'standard deviation': 'std'
            })
            print(df)

            #check if all the results are close enough
            try:
                for i in range(len(df) / 3):
                    np.testing.assert_allclose(df[(3 * i + 0):(3 * i + 1)],
                                               df[(3 * i + 1):(3 * i + 2)],
                                               atol=0.1,
                                               rtol=0.01)
                    np.testing.assert_allclose(df[(3 * i + 1):(3 * i + 2)],
                                               df[(3 * i + 2):(3 * i + 3)],
                                               atol=0.1,
                                               rtol=0.01)
                    np.testing.assert_allclose(df[(3 * i + 2):(3 * i + 3)],
                                               df[(3 * i + 0):(3 * i + 1)],
                                               atol=0.1,
                                               rtol=0.01)

                break
            #if not add more runs
            except AssertionError:
                print("Failed to reach agreement. trying again")
                i_try += 1

        assert (i_try < max_tries
                ), "could not replicate values using different mcmc samplers"
Beispiel #19
0
    def setUpClass(self):

        self.S = pymc.MCMC(disaster_model,
                           db='txt',
                           dbname=os.path.join(testdir, 'Disaster.txt'),
                           dbmode='w')
Beispiel #20
0
    def run_SliceStep(self,
                      sigma_x,
                      n_subjs,
                      size,
                      mu_value,
                      seed,
                      left=None,
                      max_tries=5):

        #init basic  mcmc
        if np.isscalar(mu_value):
            n_conds = 1
        else:
            n_conds = len(mu_value)

        iter = 10000  #100000
        burnin = 5000  #90000

        #init basic mcmc
        nodes, t_values = self.create_hierarchical_model(sigma_x=sigma_x,
                                                         n_subjs=n_subjs,
                                                         size=size,
                                                         mu_value=mu_value,
                                                         seed=seed)
        mcmc = pm.MCMC(nodes)
        [
            mcmc.use_step_method(kabuki.steps.kNormalNormal, node)
            for node in nodes['mu']
        ]

        #init mcmc with slice step
        nodes_s, t_values = self.create_hierarchical_model(sigma_x=sigma_x,
                                                           n_subjs=n_subjs,
                                                           size=size,
                                                           mu_value=mu_value,
                                                           seed=seed)
        mcmc_s = pm.MCMC(nodes_s)
        [
            mcmc_s.use_step_method(kabuki.steps.kNormalNormal, node)
            for node in nodes_s['mu']
        ]
        mcmc_s.use_step_method(kabuki.steps.SliceStep,
                               nodes_s['sigma'],
                               width=3,
                               left=left)

        #run all the models until they converge to the same values
        i_try = 0
        stats = {}
        while i_try < max_tries:
            print("~~~~~ trying for the %d time ~~~~~~" % (i_try + 1))

            #run slice mcmc
            i_t = time()
            mcmc_s.sample(iter, burnin)
            print("slice sampling took %.2f seconds" % (time() - i_t))
            stats.update(
                dict([('mu%d S' % x, mcmc_s.mu[x].stats())
                      for x in range(n_conds)]))

            #run basic mcmc
            i_t = time()
            mcmc.sample(iter, burnin)
            print("basic sampling took %.2f seconds" % (time() - i_t))
            stats.update(
                dict([('mu%d basic' % x, mcmc.mu[x].stats())
                      for x in range(n_conds)]))

            df = DataFrame(stats, index=['mean', 'standard deviation']).T
            df = df.rename(columns={
                'mean': 'mean',
                'standard deviation': 'std'
            })
            print(df)

            #check if all the results are close enough
            try:
                for i in range(len(df) / 2):
                    np.testing.assert_allclose(df[(2 * i + 0):(2 * i + 1)],
                                               df[(2 * i + 1):(2 * i + 2)],
                                               atol=0.1,
                                               rtol=0.01)
                break
            #if not add more runs
            except AssertionError:
                print("Failed to reach agreement In:")
                print(df[(2 * i):(2 * (i + 1))])
                print("trying again")

            i_try += 1

        assert (i_try < max_tries
                ), "could not replicate values using different mcmc samplers"

        return mcmc, mcmc_s
Beispiel #21
0
 def setUpClass(self):
     self.S = pymc.MCMC(disaster_model, db='ram')
     self.S.use_step_method(pymc.Metropolis, self.S.early_mean, tally=True)
Beispiel #22
0
def mk_multi_bayes(tree, chars,nregime,qidx, pi="Equal" ,seglen=0.02,stepsize=0.05):
    """
    Create a Bayesian multi-mk model. User specifies which regime models
    to use and the Bayesian model finds the switchpoints.

    Args:
        tree (Node): Root node of tree.
        chars (dict): Dict mapping tip labels to discrete character
          states. Character states must be in the form of [0,1,2...]

        regime (int): The number of distinct regimes to test. Set to
          1 for an Mk model, set to greater than 1 for a multi-regime Mk model.
        qidx (np.array): Index specifying the model to test

            columns:
                0, 1, 2 - index axes of q
                3 - index of params
            This scheme allows flexible specification of models. E.g.:
            Symmetric mk2:
                params = [0.2]; qidx = [[0,0,1,0],
                                        [0,1,0,0]]

            Asymmetric mk2:
                params = [0.2,0.6]; qidx = [[0,0,1,0],
                                            [0,1,0,1]]
           NOTE:
             The qidx corresponding to the first q matrix (first column 0)
             is always the root regime
        pi (str or np.array): Option to weight the root node by given values.
           Either a string containing the method or an array
           of weights. Weights should be given in order.

           Accepted methods of weighting root:

           Equal: flat prior
           Equilibrium: Prior equal to stationary distribution
             of Q matrix
           Fitzjohn: Root states weighted by how well they
             explain the data at the tips.
        seglen (float): Size of segments to break tree into. The smaller this
          value, the more "fine-grained" the analysis will be. Optional,
          defaults to 2% of the root-to-tip length.
        stepsize (float): Maximum size of steps for switchpoints to take.
          Optional, defaults to 5% of root-to-tip length.


    """
    if type(chars) == dict:
        data = chars.copy()
        chars = [chars[l] for l in [n.label for n in tree.leaves()]]
    else:
        data = dict(zip([n.label for n in tree.leaves()],chars))
    # Preparations
    nchar = len(set(chars))
    nparam = len(set([n[-1] for n in qidx]))
    # This model has 2 components: Q parameters and switchpoints
    # They are combined in a custom likelihood function
    ###########################################################################
    # Switchpoint:
    ###########################################################################
    # Modeling the movement of the regime shift(s) is the tricky part
    # Regime shifts will only be allowed to happen at a node
    seg_map = tree_map(tree,seglen)
    switch = [None]*(nregime-1)
    for regime in range(nregime-1):
        switch[regime]= make_switchpoint_stoch(seg_map, name=str("switch_{}".format(regime)))
    ###########################################################################
    # Qparams:
    ###########################################################################
    # Each Q parameter is an exponential
    Qparams = [None] * nparam
    for i in range(nparam):
         Qparams[i] = pymc.Exponential(name=str("Qparam_{}".format(i)), beta=1.0, value=0.1*(i+1))


    ###########################################################################
    # Likelihood
    ###########################################################################
    # The likelihood function
    l = cyexpokit.make_mklnl_func(tree, data,nchar,nregime,qidx)

    @pymc.deterministic
    def likelihood(q = Qparams, s=switch,name="likelihood"):
        return l(np.array(q),np.array([x[0].ni for x in s],dtype=np.intp),np.array([x[1] for x in s]))

    @pymc.potential
    def multi_mklik(lnl=likelihood):
        if not (np.isnan(lnl)):
            return lnl
        else:
            return -np.inf
    mod = pymc.MCMC(locals())
    for s in switch:
        mod.use_step_method(SwitchpointMetropolis, s, tree, seg_map,stepsize=stepsize,seglen=seglen)
    return mod
Beispiel #23
0
 def test(self):
     S = pymc.MCMC(input=BinaryTestModel)
     S.sample(1000, 500)
     f = S.fair.trace()
     assert (1.0 * f.sum() / len(f) > .5)
Beispiel #24
0
def test_RAM1d():
    """
    Test the RobustAdaptiveMetro step from my_pymc_steps.py.

    :param gaussian_data1d: A fixture function that generates some data to use for testing.
    """
    data = gaussian_data1d()

    # Setup PyMC model
    @pymc.stochastic
    def Mu(value=data.mean()):
        """
        Normal parameters (mu,variance).
        """
        return 0.0  # Uniform prior, so log(prior) is just zero.

    @pymc.stochastic(observed=True)
    def normal_data(value=data, Mu=Mu):
        """
        Data generated from a normal distribution.
        """
        data_mean = np.mean(value)
        ndata = value.size
        loglik = -ndata / 2.0 * np.sqrt(2.0 * np.pi * true_variance) - \
            0.5 * ndata * (data_mean - Mu) ** 2 / true_variance
        return loglik

    # Now let's run the MCMC sampler and make sure the RAM algorithm behaves as expected

    mcmc = pymc.MCMC({
        'Mu': Mu,
        'normal_data': normal_data
    })  # The MCMC sampler

    covar_guess = data.var() / data.size
    target_rate = 0.40
    # Make sure we use the RAM step to update the normal mean.
    mcmc.use_step_method(RobustAdaptiveMetro,
                         Mu,
                         target_rate,
                         proposal_covar=covar_guess,
                         proposal_distribution='T')

    # Before we start the MCMC sampler, let's run some tests to make sure that the RAM step is properly initialized.
    RAM = mcmc.step_method_dict[Mu][0]
    assert RAM._dim == 1
    assert RAM._proposal_distribution == 'T'
    assert np.abs(RAM._cholesky_factor -
                  np.sqrt(data.var() / data.size)) < 1e-5

    niter = 100000
    nburn = 10000

    mcmc.sample(niter, burn=nburn)
    mu_draws = mcmc.trace('Mu')[:]
    assert RAM._current_iter == niter
    assert RAM._accepted + RAM._rejected == niter

    # Make sure acceptance rate is within 2% of the target rate
    acceptance_rate = RAM._accepted / float(RAM._current_iter)
    assert abs(acceptance_rate - target_rate) / abs(target_rate) < 0.02

    # Compare the histogram of the draws obtained from the RAM algorithm
    # with the expected distribution
    xgrid = np.linspace(data.mean() - 5.0 * np.std(data) / np.sqrt(data.size),
                        data.mean() + 5.0 * np.std(data) / np.sqrt(data.size))
    post_var = true_variance / data.size
    post_mean = data.mean()

    post_pdf = 1.0 / np.sqrt(2.0 * np.pi * post_var) * np.exp(
        -0.5 * (xgrid - post_mean)**2 / post_var)

    plt.subplot(111)
    plt.hist(mu_draws, bins=25, normed=True)
    plt.plot(xgrid, post_pdf, 'r', lw=2)
    plt.title("Normal Model: Test of Metropolis step method")
    plt.xlabel("Mean")
    plt.ylabel("PDF")
    plt.show()
        c_estimate = estimate[0]
        m_estimate = estimate[1]

        # Keep the least squares fit
        lstsqr_fit[i, j, 0] = c_estimate
        lstsqr_fit[i, j, 1] = m_estimate

        # plot the power law spectrum
        power_fit = c_estimate * analysis_frequencies**(-m_estimate)

        # Define the model we are going to use
        single_power_law_model = pymcmodels.single_power_law(
            analysis_frequencies, analysis_power, m_estimate)

        # Set up the MCMC model
        M1 = pymc.MCMC(single_power_law_model)

        # Run the sampler
        M1.sample(iter=iterations, burn=burn, thin=thin, progress_bar=False)

        # Get the power law index and the normalization
        pli = M1.trace("power_law_index")[:]
        bayes_mean[i, j, 1] = np.mean(pli)
        cli = M1.trace("power_law_norm")[:]
        bayes_mean[i, j, 0] = np.mean(cli)

        # Get various properties of the power law index marginal distribution
        # and save them
        ci_keep68[i, j, :] = rn_utils.credible_interval(pli, ci=0.68)

        for k in range(0, 100):
Beispiel #26
0
def test_RAM2d():

    data = gaussian_data2d()

    # Setup PyMC model
    @pymc.stochastic
    def Mu(value=data.mean(axis=0)):
        """
        Normal parameters (mu,variance).
        """
        return 0.0  # Uniform prior, so log(prior) is just zero.

    @pymc.stochastic(observed=True)
    def normal_data(value=data, Mu=Mu):
        """
        Data generated from a normal distribution.
        """
        data_mean = value.mean(axis=0)
        ndata = value.shape[0]
        zcent = data_mean - Mu
        loglik = -0.5 * ndata * np.dot(zcent.T,
                                       np.linalg.inv(true_covar).dot(zcent))
        return loglik

    mcmc = pymc.MCMC({'Mu': Mu, 'normal_data': normal_data})

    covar_guess = np.cov(data, rowvar=0) / data.shape[0]
    target_rate = 0.4
    # Make sure we use the RAM step to update the normal mean.
    mcmc.use_step_method(RobustAdaptiveMetro,
                         Mu,
                         proposal_covar=covar_guess,
                         target_rate=target_rate)

    # Before we start the MCMC sampler, let's run some tests to make sure that the RAM step is properly initialized.
    RAM = mcmc.step_method_dict[Mu][0]
    assert RAM._dim == 2
    assert RAM._proposal_distribution == 'Normal'

    # Now let's run the MCMC sampler and make sure the RAM algorithm behaves as expected
    niter = 100000
    nburn = 50000

    mcmc.sample(niter, nburn)
    assert RAM._current_iter == niter

    mu_draws = mcmc.trace(Mu)[:]

    # Make sure acceptance rate is within 2% of the target rate
    acceptance_rate = RAM._accepted / float(RAM._current_iter)
    assert abs(acceptance_rate - target_rate) / abs(target_rate) < 0.02

    # Compare covariance matrix of proposals with posterior covariance
    post_covar = true_covar / data.shape[0]

    covar_n = RAM._cholesky_factor.T.dot(RAM._cholesky_factor)
    eigenval_n, eigenvect_n = linalg.eig(covar_n)
    eigenval_n = np.diagflat(eigenval_n)
    covroot_n = np.dot(
        eigenvect_n.T,
        np.sqrt(eigenval_n).dot(
            eigenvect_n))  # Matrix square-root of proposal covariance
    covroot_n = covroot_n.real

    eigenval, eigenvect = linalg.eig(post_covar)
    eigenval_inv = np.diagflat(1.0 / eigenval)
    covroot_inv = np.dot(eigenvect.T, np.sqrt(eigenval_inv).dot(eigenvect))
    covroot_inv = covroot_inv.real

    evals, evects = linalg.eig(covroot_n.dot(covroot_inv))
    # Compute the 'suboptimality factor'. This should be unity if the two matrices are proportional.
    subopt_factor = evals.size * np.sum(1.0 / evals**2) / (np.sum(
        1.0 / evals))**2

    assert subopt_factor < 1.01  # Test for proportionality of proposal covariance and posterior covariance

    # Now make nice plots comparing sampled values with the true posterior
    data_mean = data.mean(axis=0)

    xgrid = np.linspace(data_mean[0] - 4.0 * np.sqrt(post_covar[0, 0]),
                        data_mean[0] + 4.0 * np.sqrt(post_covar[0, 0]), 100)
    ygrid = np.linspace(data_mean[1] - 4.0 * np.sqrt(post_covar[1, 1]),
                        data_mean[1] + 4.0 * np.sqrt(post_covar[1, 1]), 100)

    X, Y = np.meshgrid(xgrid, ygrid)

    true_pdf = mlab.bivariate_normal(X, Y, np.sqrt(post_covar[0, 0]),
                                     np.sqrt(post_covar[1, 1]), data_mean[0],
                                     data_mean[1], post_covar[0, 1])

    plt.subplot(211)
    post_pdf = 1.0 / np.sqrt(2.0 * np.pi * post_covar[0, 0]) * \
        np.exp(-0.5 * (xgrid - data_mean[0]) ** 2 / post_covar[0, 0])

    plt.hist(mu_draws[:, 0], bins=25, normed=True)
    plt.plot(xgrid, post_pdf, 'r', lw=2)
    plt.title(
        "Bivariate Normal Model: Test of Robust Adaptive Metropolis step method"
    )
    plt.xlabel("Mean 1")
    plt.ylabel("PDF")

    plt.subplot(212)
    post_pdf = 1.0 / np.sqrt(2.0 * np.pi * post_covar[1, 1]) * \
        np.exp(-0.5 * (ygrid - data_mean[1]) ** 2 / post_covar[1, 1])

    plt.hist(mu_draws[:, 1], bins=25, normed=True)
    plt.plot(ygrid, post_pdf, 'r', lw=2)
    plt.xlabel("Mean 2")
    plt.ylabel("PDF")
    plt.show()

    plt.figure()
    plt.plot(mu_draws[:, 0], mu_draws[:, 1], '.', ms=2)
    plt.contour(X, Y, true_pdf, linewidths=5)
    plt.title(
        'Test of Robust Adaptive Metropolis Algorithms: Bivariate Normal Model'
    )
    plt.ylabel('Mean 2')
    plt.xlabel('Mean 1')
    plt.show()
Beispiel #27
0
def get_Bayes(measurements=[], chunksize=5, Ndp=5, iter=50000, burn=5000):

    sc = pymc.Uniform('sc', 0.1, 2.0, value=0.24)
    tau = pymc.Uniform('tau', 0.0, 1.0, value=0.5)

    concinit = 1.0
    conclo = 0.1
    conchi = 10.0
    concentration = pymc.Uniform('concentration',
                                 lower=conclo,
                                 upper=conchi,
                                 value=concinit)

    # The stick-breaking construction: requires Ndp beta draws dependent on the
    # concentration, before the probability mass function is actually constructed.
    #betas = pymc.Beta('betas', alpha=1, beta=concentration, size=Ndp)
    betas = pymc.Beta('betas', alpha=1, beta=1, size=Ndp - 1)

    @pymc.deterministic
    def pmf(betas=betas):
        "Construct a probability mass function for the truncated Dirichlet process"
        # prod = lambda x: np.exp(np.sum(np.log(x))) # Slow but more accurate(?)
        prod = np.prod
        value = map(lambda i, u: u * prod(1.0 - betas[:i]), enumerate(betas))
        value.append(1.0 - sum(value[:]))  # force value to sum to 1
        return value

    # The cluster assignments: each data point's estimated cluster ID.
    # Remove idinit to allow clusterid to be randomly initialized:
    Ndata = len(measurements)
    idinit = np.zeros(Ndata, dtype=np.int64)
    clusterid = pymc.Categorical('clusterid', p=pmf, size=Ndata, value=idinit)

    @pymc.deterministic(name='clustermean')
    def clustermean(clusterid=clusterid, sc=sc, Ndp=Ndp):
        return sc * np.arange(1, Ndp + 1)[clusterid]

    @pymc.deterministic(name='clusterprec')
    def clusterprec(clusterid=clusterid, sc=sc, tau=tau, Ndp=Ndp):
        return 1.0 / (sc * sc * tau * tau * (np.arange(1, Ndp + 1)[clusterid]))

    y = pymc.Normal('y',
                    mu=clustermean,
                    tau=clusterprec,
                    observed=True,
                    value=measurements)

    ## for predictive poeterior simulation
    @pymc.deterministic(name='y_sim')
    def y_sim(value=[0], sc=sc, tau=tau, clusterid=clusterid, Ndp=Ndp):
        n = np.arange(1, Ndp + 1)[np.random.choice(clusterid)]
        return np.random.normal(loc=sc * n, scale=sc * tau * n)

    m = pymc.Model({
        "scale": sc,
        "tau": tau,
        "betas": betas,
        "clusterid": clusterid,
        "normal": y,
        "pred": y_sim
    })

    sc_samples = []
    modes = []
    simulations = []

    for i in range(0, chunksize):
        mc = pymc.MCMC(m)
        mc.sample(iter=50000, burn=10000)
        plot(mc)

        sc_sample = mc.trace('sc')[:]
        sc_samples.append(sc_sample)

        simulation = mc.trace('y_sim')[:]
        simulations.append(simulation)

        plt.hist(measurements,
                 50,
                 fc='gray',
                 histtype='stepfilled',
                 alpha=0.3,
                 normed=False)
        plt.hist(simulation,
                 30,
                 fc='blue',
                 histtype='stepfilled',
                 alpha=0.3,
                 normed=True)
        hist, edges = np.histogram(
            measurements,
            bins=100,
            range=[np.min(measurements) - 0.25,
                   np.max(measurements) + 0.25])

        argm = hist.argmax()
        (edges[argm] + edges[argm + 1]) / 2
        modes.append((edges[argm] + edges[argm + 1]) / 2)

    if chunksize <= 1:
        gr = np.nan
    else:
        pymc.gelman_rubin(sc_samples)

    dic = {
        'gelman_rubin': gr,
        'modes': modes,
        'simulations': simulations,
        'sc_samples': sc_samples
    }
    return dic
Beispiel #28
0
    cutoff_b = 27.0
    print cutoff_b
    masked_values = np.ma.masked_values(full_data_day_wind_speed, value=None)
    print masked_values.mask.sum()
    print masked_values.data.max()
    wind_speed_day = TruncatedNormal('dws',
                                     mu,
                                     tau,
                                     a=cutoff_a,
                                     b=cutoff_b,
                                     value=masked_values,
                                     observed=True)
    return locals()


M_missing_day = pm.MCMC(day_missing_model())
M_missing_day.sample(iter=no_of_iterations, burn=burn, thin=thin)
missing_values = np.mean(np.array(M_missing_day.trace('dws')[-50:-1]), axis=0)
print missing_values.dtype
# print len(missing_values)
# print len(full_data_day.index)
# full_data_day.to_csv('/media/kiruba/New Volume/ACCUWA_Data/weather_station/KSNDMC/Tubgere_prior.csv')
masked_values = np.ma.masked_values(full_data_day_wind_speed, value=None)
masked_values[masked_values.mask] = missing_values
full_data_day['corrected_wind_speed'] = np.array(masked_values.data.tolist())
full_data_day['corrected_wind_speed'] = full_data_day[
    'corrected_wind_speed'].astype(int)
# full_data_day.to_csv('/media/kiruba/New Volume/ACCUWA_Data/weather_station/KSNDMC/Tubgere_after.csv')


def night_missing_model():
Beispiel #29
0
        # Concatenate Poisson means
        out = np.zeros(len(stormsNumbers))
        out[:s] = e
        out[s:] = l
        return out

    storms = pm.Poisson('storms',
                        mu=rate,
                        value=stormsNumbers,
                        observed=True)

    storms_model = pm.Model([storms,
                             early_mean,
                             late_mean, rate])

    strmsM = pm.MCMC(storms_model)
    strmsM.sample(iter=40000, burn=1000, thin=20)

    plt.hist(strmsM.trace('late_mean')[:], edgecolor="k")
    general.set_grid_to_plot()
    plt.savefig(general.folderPath2 + "exp2_late_mean.png")
    plt.clf()

    plt.hist(strmsM.trace('early_mean')[:], edgecolor="k")
    general.set_grid_to_plot()
    plt.savefig(general.folderPath2 + "exp2_early_mean.png")
    plt.clf()

    plt.hist(strmsM.trace('switchpoint')[:], edgecolor="k")
    general.set_grid_to_plot()
    plt.savefig(general.folderPath2 + "exp2_switchpoint.png")
Beispiel #30
0
def run_mcmc(gp, img, compare_img, transverse_sigma=1.0, motion_angle=0.0):
    """Estimate PSF using Markov Chain Monte Carlo

    gp - Gaussian priors - array of N objects with attributes
                           a, b, sigma

    img  - image to apply PSF to
    compare_img - comparison image
    transverse_sigma - prior
    motion_angle - prior


    Model a Point Spread Function consisting of the sum of N
    collinear Gaussians, blurred in the transverse direction
    and the result rotated.  Each of the collinear Gauusians
    is parameterized by a (amplitude), b (center), and sigma (std. deviation). 

    The Point Spread Function is applied to the image img
    and the result compared with the image  compare_img.
    """

    print "gp.shape", gp.shape
    print "gp", gp

    motion_angle = np.deg2rad(motion_angle)
    motion_angle = pm.VonMises("motion_angle",
                               motion_angle,
                               1.0,
                               value=motion_angle)

    transverse_sigma = pm.Exponential("transverse_sigma",
                                      1.0,
                                      value=transverse_sigma)
    N = gp.shape[0]

    mixing_coeffs = pm.Exponential("mixing_coeffs", 1.0, size=N)
    #mixing_coeffs.set_value(gp['a'])
    mixing_coeffs.value = gp['a']
    longitudinal_sigmas = pm.Exponential("longitudinal_sigmas", 1.0, size=N)
    #longitudinal_sigmas.set_value(gp['sigma'])
    longitudinal_sigmas.value = gp['sigma']

    b = np.array(sorted(gp['b']), dtype=float)
    cut_points = (b[1:] + b[:-1]) * 0.5
    long_means = [None] * b.shape[0]
    print long_means
    left_mean = pm.Gamma("left_mean", 1.0, 2.5 * gp['sigma'][0])
    long_means[0] = cut_points[0] - left_mean
    right_mean = pm.Gamma("right_mean", 1.0, 2.5 * gp['sigma'][-1])
    long_means[-1] = cut_points[-1] + right_mean
    for ix in range(1, N - 1):
        long_means[ix] = pm.Uniform("mid%d_mean" % ix,
                                    lower=cut_points[ix - 1],
                                    upper=cut_points[ix])
    print "cut_points", cut_points
    print "long_means", long_means

    #longitudinal_means = pm.Normal("longitudinal_means", 0.0, 0.04, size=N)
    #longitudinal_means.value = gp['b']

    dtype = np.dtype([('a', np.float), ('b', np.float), ('sigma', np.float)])

    @pm.deterministic
    def psf(mixing_coeffs=mixing_coeffs, longitudinal_sigmas=longitudinal_sigmas, \
            longitudinal_means=long_means, transverse_sigma=transverse_sigma, motion_angle=motion_angle):
        gp = np.ones((N, ), dtype=dtype)
        gp['a'] = mixing_coeffs
        gp['b'] = longitudinal_means
        gp['sigma'] = longitudinal_sigmas
        motion_angle_deg = np.rad2deg(motion_angle)
        if True:
            print "gp: a", mixing_coeffs
            print "    b", longitudinal_means
            print "    s", longitudinal_sigmas
            print "tr-sigma", transverse_sigma, "angle=", motion_angle_deg
        return generate_sum_gauss(gp, transverse_sigma, motion_angle_deg)

    @pm.deterministic
    def image_fitness(psf=psf, img=img, compare_img=compare_img):
        img_convolved = ndimage.convolve(img, psf)
        img_diff = img_convolved.astype(int) - compare_img
        return img_diff.std()

    if False:
        trial_psf = generate_sum_gauss(gp,
                                       2.0,
                                       50.0,
                                       plot_unrot_kernel=True,
                                       plot_rot_kernel=True,
                                       verbose=True)
        print "trial_psf", trial_psf.min(), trial_psf.mean(), trial_psf.max(
        ), trial_psf.std()
        obs_psf = pm.Uniform("obs_psf",
                             lower=-1.0,
                             upper=1.0,
                             doc="Point Spread Function",
                             value=trial_psf,
                             observed=True,
                             verbose=False)

    print "image_fitness value started at", image_fitness.value
    known_fitness = pm.Exponential("fitness",
                                   image_fitness + 0.001,
                                   value=0.669,
                                   observed=True)

    #mcmc = pm.MCMC([motion_angle, transverse_sigma, mixing_coeffs, longitudinal_sigmas, longitudinal_means, image_fitness, known_fitness], verbose=2)
    mcmc = pm.MCMC([
        motion_angle, transverse_sigma, mixing_coeffs, longitudinal_sigmas,
        image_fitness, known_fitness, left_mean, right_mean
    ] + long_means,
                   verbose=2)
    pm.graph.dag(mcmc, format='png')
    plt.show()
    #mcmc.sample(20000, 1000)
    mcmc.sample(2000)

    motion_angle_samples = mcmc.trace("motion_angle")[:]
    transverse_sigma_samples = mcmc.trace("transverse_sigma")[:]
    image_fitness_samples = mcmc.trace("image_fitness")[:]

    best_fit = np.percentile(image_fitness_samples, 1.0)
    best_fit_selection = image_fitness_samples < best_fit

    print mcmc.db.trace_names
    for k in [k for k in mcmc.stats().keys() if k != "known_fitness"]:
        #samples = mcmc.trace(k)[:]
        samples = mcmc.trace(k).gettrace()
        print samples.shape
        selected_samples = samples[best_fit_selection]
        print k, samples.mean(axis=0), samples.std(axis=0), \
            selected_samples.mean(axis=0), selected_samples.std(axis=0)

    ax = plt.subplot(211)
    plt.hist(motion_angle_samples,
             histtype='stepfilled',
             bins=25,
             alpha=0.85,
             label="posterior of $p_\\theta$",
             color="#A60628",
             normed=True)
    plt.legend(loc="upper right")
    plt.title("Posterior distributions of $p_\\theta$, $p_\\sigma$")

    ax = plt.subplot(212)
    plt.hist(transverse_sigma_samples,
             histtype='stepfilled',
             bins=25,
             alpha=0.85,
             label="posterior of $p_\\sigma$",
             color="#467821",
             normed=True)
    plt.legend(loc="upper right")
    plt.show()

    for k, v in mcmc.stats().iteritems():
        print k, v
    # deprecated?  use discrepancy...  print mcmc.goodness()
    mcmc.write_csv("out.csv")
    pm.Matplot.plot(mcmc)
    plt.show()