Ejemplo n.º 1
0
def convergence_diagnose_birt(m, a):
    # birt_model = pymc.MCMC(...)

    pymc.raftery_lewis(m.a, q=0.025, r=0.01)
    scores = pymc.geweke(m.a, intervals=20)
    pymc.Matplot.geweke_plot(scores)
    pymc.gelman_rubin(m)
Ejemplo n.º 2
0
def analyzeConvergence(dbFilename, db, pn, burnin):
	print 'ANALYZING {0}'.format(pn)

	vals = np.array(getParameter(db, pn, burnin=burnin))
	
	convDict = OrderedDict()
	gw = pymc.geweke(vals)
	gwDict = OrderedDict()
	gwDict['scores'] = gw

	frac2SD = len([x for x in gw if x[1] > -2 and x[1] < 2]) / float(len(gw))
	gwDict['frac_2sd'] = frac2SD
	convDict['geweke'] = gwDict

	rl = pymc.raftery_lewis(vals, 0.025, r=0.01)
	rlDict = OrderedDict()
	rlDict['iter_req_acc'] = rl[0]
	rlDict['thin_first_ord'] = rl[1]
	rlDict['burnin'] = rl[2]
	rlDict['iter_total'] = rl[3]
	rlDict['thin_ind'] = rl[4]

	convDict['raftery_lewis'] = rlDict

	print ''

	return convDict
Ejemplo n.º 3
0
    def test_simple(self):

        nmin, kthin, nburn, nprec, kmind = pymc.raftery_lewis(S.a,
                                                              0.5,
                                                              .05,
                                                              verbose=0)

        # nmin should approximately be the same as nprec/kmind
        assert (0.8 < (float(nprec) / kmind) / nmin < 1.2)
Ejemplo n.º 4
0
def fit_two_mcmc(time, 
                 signal, 
                 height_th,
                 one_pulse,
                 sigma0,    # signal noise
                 sum_mu, 
                 sum_tau, 
                 sum_a,
                 sum_b,
                 diff_tau,
                 diff_a,
                 diff_b, 
                 sampling, 
                 burn, 
                 thin,
                 Plot=False, 
                 debug=False, 
                 auto=False):
    
    # LIMIT SEARCH FOR OFFSETS
    _t_initial=time[pd.srlatch_rev(signal,0,height_th)][0] 
    _t_final=time[pd.srlatch_rev(signal,0,height_th)][-1] 
    
    def model(x, f): 
        # PRIORS
        y_err = sigma0
        # print (_t_initial,_t_final, one_x_offset_init)
        one_x_offset = pymc.Uniform("one_x_offset", _t_initial, time[np.argmax(signal)], value=_t_initial)
        two_x_offset = pymc.Uniform("two_x_offset", _t_initial, _t_final, value=_t_final)
        sum_of_amps = pymc.TruncatedNormal("sum_amps", 
                                           mu=sum_mu, 
                                           tau=sum_tau, 
                                           a=sum_a, 
                                           b=sum_b, 
                                           value=sum_mu) #sigma/mu is the n=1 std deviation in units of n=1 amplitude
        diff_of_amps = pymc.TruncatedNormal("diff_amps", 
                                            mu=0, 
                                            tau=diff_tau, 
                                            a=diff_a, 
                                            b=diff_b, 
                                            value=0)
        one_x_amplitude = (sum_of_amps+diff_of_amps)/2
        two_x_amplitude = (sum_of_amps-diff_of_amps)/2
        # MODEL
        @pymc.deterministic(plot=False)
        def mod_two_pulse(x=time, 
                          one_x_offset=one_x_offset, 
                          two_x_offset=two_x_offset, 
                          one_x_amplitude=one_x_amplitude, 
                          two_x_amplitude=two_x_amplitude):
              return one_pulse(x, x_offset=one_x_offset, amplitude=one_x_amplitude)+\
            one_pulse(x, x_offset=two_x_offset, amplitude=two_x_amplitude)

        #likelihoodsy
        y = pymc.Normal("y", mu=mod_two_pulse, tau= 1.0/y_err**2, value=signal, observed=True)
        return locals()

    MDL = pymc.MCMC(model(time,signal), db='pickle') # The sample is stored in a Python serialization (pickle) database
    # MDL.use_step_method(pymc.AdaptiveMetropolis, 
    #     [MDL.sum_of_amps, MDL.diff_of_amps],
    #     scales={MDL.sum_of_amps:np.sqrt(1/sum_tau), 
    #             MDL.diff_of_amps:np.sqrt(1/diff_tau)}, 
    #     )
    if auto: 
        # uses Raftery Lewis to determine fit Parameters per trace: 
        # https://pymc-devs.github.io/pymc/modelchecking.html#convergence-diagnostics
        
        # pilot run
        InitSamples = 4*len(time)
        InitMDL = MDL
        InitMDL.sample(iter=InitSamples, burn=int(InitSamples*.5), thin=10)
        pymc_diagnostic = pymc.raftery_lewis(InitMDL, q=0.025, r=0.02, verbose=0) 
        [EstBurn, EstSampling, EstThin] = np.max(
            np.array(
                [pymc_diagnostic[i] for i in pymc_diagnostic.keys()[1:]] # first key: mod_two_pulse irrelavent
            ),
            axis=0)[2:] # first 2 diagnostics: 1st order Markov Chain irrelavent
        # print [EstBurn, EstSampling, EstThin]
        # actual run
        MDL.sample(iter=EstSampling, burn=EstBurn, thin=EstThin, verbose=0)
    else:
        MDL.sample(iter=sampling, burn=burn, thin=thin, verbose=-1)  
    # thin: consider every 'thin' samples
    # burn: number of samples to discard: decide by num of samples to run till parameters stabilise at desired precision
    if Plot:
        y_fit = MDL.mod_two_pulse.value #get mcmc fitted values
        plt.plot(time, signal, 'b', marker='o', ls='-', lw=1, label='Observed')
        plt.plot(time,y_fit,'k', marker='+', ls='--', ms=5, mew=2, label='Bayesian Fit Values')
        plt.legend()
        pymc.Matplot.plot(MDL)      
    if debug:
        for i in np.arange(10):
            MDL.sample(iter=sampling, burn=burn, thin=thin, verbose=0)
            pymc.gelman_rubin(MDL)
            pymc.Matplot.summary_plot(MDL)
    return MDL #usage: MDL.one_x_offset.value for fitted result
Ejemplo n.º 5
0
    # return (cur_x, funEvals['funevals']) if returnFunEvals else cur_x


if __name__ == '__main__':
    npr.seed(1)

    import pylab as pl
    import pymc

    D = 10
    fn = lambda x: -0.5 * np.sum(x**2)

    iters = 1000
    samps = np.zeros((iters, D))
    for ii in xrange(1, iters):
        samps[ii, :] = slice_sample(samps[ii - 1, :],
                                    fn,
                                    sigma=0.1,
                                    step_out=False,
                                    doubling_step=True,
                                    verbose=False)

    ll = -0.5 * np.sum(samps**2, axis=1)

    scores = pymc.geweke(ll)
    pymc.Matplot.geweke_plot(scores, 'test')

    pymc.raftery_lewis(ll, q=0.025, r=0.01)

    pymc.Matplot.autocorrelation(ll, 'test')
Ejemplo n.º 6
0
# Instantiate and run sampler
S = pm.MCMC(my_model)
S.sample(10000, burn=5000)

# Calculate and plot Geweke scores
scores = pm.geweke(S, intervals=20)
pm.Matplot.geweke_plot(scores)

# Geweke plot for a single parameter
trace = S.trace('alpha')[:]
alpha_scores = pm.geweke(trace, intervals=20)
pm.Matplot.geweke_plot(alpha_scores, 'alpha')

# Calculate Raftery-Lewis diagnostics
pm.raftery_lewis(S, q=0.025, r=0.01)

"""
Sample output:

========================
Raftery-Lewis Diagnostic
========================

937 iterations required (assuming independence) to achieve 0.01 accuracy
with 95 percent probability.

Thinning factor of 1 required to produce a first-order Markov chain.

39 iterations to be discarded at the beginning of the simulation (burn-in).
Ejemplo n.º 7
0
        direction = direction / np.sqrt(np.sum(direction**2))
        new_x = direction_slice(direction, init_x)

    if scalar:
        return float(new_x[0])
    else:
        return new_x
                    
if __name__ == '__main__':
    npr.seed(1)

    import pylab as pl

    D  = 10
    fn = lambda x: -0.5*np.sum(x**2)

    iters = 1000
    samps = np.zeros((iters,D))
    for ii in xrange(1,iters):
        samps[ii,:] = slice_sample(samps[ii-1,:], fn, sigma=0.1, step_out=False, doubling_step=True, verbose=False)

    ll = -0.5*np.sum(samps**2, axis=1)

    scores = pymc.geweke(ll)
    pymc.Matplot.geweke_plot(scores, 'test')

    pymc.raftery_lewis(ll, q=0.025, r=0.01)

    pymc.Matplot.autocorrelation(ll, 'test')

Ejemplo n.º 8
0
def print_diagn(M, q, r, s):
    return raftery_lewis(M, q, r, s, verbose=0)
Ejemplo n.º 9
0
    def test_simple(self):

        nmin, kthin, nburn, nprec, kmind = pymc.raftery_lewis(S.a, 0.5, .05, verbose=0)

        # nmin should approximately be the same as nprec/kmind
        assert(0.8 < (float(nprec)/kmind) / nmin < 1.2)
Ejemplo n.º 10
0
        print
        print "Thinning factor of %i required to produce a first-order Markov chain." % kthin
        print
        print "%i iterations to be discarded at the beginning of the simulation (burn-in)." % nburn
        print
        print "%s subsequent iterations required." % nprec
        print
        print "Thinning factor of %i required to produce an independence chain." % kmind

Read : Raftery and Lewis, StatSci 1992.pdf for I = measures the increase in the number of iterations due to dependence in the sequence
"""
for s in list(M.stochastics):
    if 'predictive' in str(s): continue
    elif 'mu_r_i' in str(s) or 'mu_L0_i' in str(s) or 'mu_Lmax_i' in str(s):
        print "Line    : ", str(s)
        pymc.raftery_lewis(s, q=quantile, r=accuracy)
        Nmin, kthin, Nburn, Nprec, Kmind = pymc.raftery_lewis(s,
                                                              q=quantile,
                                                              r=accuracy,
                                                              verbose=0)
        print "  Calculated Dependence Factor = ", Nprec / float(Nmin)
        print "-----------------------------------"
        # NOTE: if there are too many parameters, you may need to save them in a .csv file
    elif 'e_' in str(s):
        print "Plant ID : ", str(s)
        pymc.raftery_lewis(s, q=quantile, r=accuracy)
        Nmin, kthin, Nburn, Nprec, Kmind = pymc.raftery_lewis(s,
                                                              q=quantile,
                                                              r=accuracy,
                                                              verbose=0)
        print "  Calculated Dependence Factor = ", Nprec / float(Nmin)
Ejemplo n.º 11
0
def print_diagn(M, q, r, s):
    return raftery_lewis(M, q, r, s, verbose=0)
        return samples, log_probs

    num_samples = 8192
    samples, log_probs = run_mcmc(test_means, num_samples)

    emp_means = np.mean(samples, axis=0)
    emp_cov = np.cov(samples.T)
    print(emp_means, '\n\n', test_cov, '\n\n', emp_cov, '\n\n',
          test_cov / emp_cov)

    import pylab as pl
    import pymc
    scores = pymc.geweke(log_probs)
    pymc.Matplot.geweke_plot(scores, 'test')
    pymc.raftery_lewis(log_probs, q=0.025, r=0.01)
    pymc.Matplot.autocorrelation(log_probs, 'test')
    pl.show()

    bp()

# ==============================================
#                            Developers' Section
# ==============================================
# ------------ Scrap work goes here ------------
'''
# 'Stepping in' with a mode sophisticated stopping criterion.
def step_in(self, local_fn, threshold, upper, lower, dtype=None):
	if (dtype is None): dtype = threshold.dtype
	with tf.name_scope('step_in') as scope:
		# Source: https://github.com/HIPS/Spearmint/blob/master/spearmint/sampling/mcmc.py
Ejemplo n.º 13
0
def get_mcmc_stats(all_samples,v_names,out_file_base,debug):
    """
    Generate statistics of the passed in MCMC samples.
    Assumes that the first column of all_samples contains the step number, and the last two
    columns contain the acceptance probability and the posterior probability for each sampled state.

    Inputs:
        all_samples   : Array with all samples (one sample set per row). Has step number in first
                        column and acceptance probability and posterior in last two columns
        v_names       : Actual variable names
        out_file_base : Base for output file names
        debug         : Writes out more info if number is larger

    Outputs:
        Various statistics written to the screen
        Correlation functions written to pdf files
        Returns array of map values
    """

    # Number of variables, columns, samples in the file
    n_vars = len(v_names)
    n_cols = all_samples.shape[1]
    n_sam  = all_samples.shape[0]

    # Extract all MCMC chain variables in separate array
    var_samples = all_samples[:,1:1+n_vars]
    if (debug > 0):
        print var_samples.shape

    # Compute mean parameter values
    par_mean = np.mean(var_samples,axis=0,dtype=np.float64)

    #print "\nParameter mean values:\n"
    #for i_v in range(n_vars):
    #    print "  ", v_names[i_v], ":", par_mean[i_v]

    # Compute the covariance
    par_cov = np.cov(var_samples,rowvar=0)

    print "\nParameter covariances:\n"
    print par_cov

    # write out covariance matrix to file
    cov_file_name = out_file_base + ".covariance.dat"
    np.savetxt(cov_file_name,par_cov)

    # print the square root of the diagonal entries of the covariance
    #print "\nParameter standard deviations (proposal width estimates):\n"
    #for i_v in range(n_vars):
    #    print "  ", v_names[i_v], ":", math.sqrt(par_cov[i_v,i_v])

    #
    # Compute the MAP values
    # (could also get this from the last line of the MCMC output file
    # but this line is not always there; and it is more fun
    # to do it with Python)
    #

    # Sample index with max posterior prop (last column in MCMC file):
    i_map = all_samples[:,-1].argmax()

    print "\n",
    print '%27s' % "Parameter :", '%15s' % "Mean Value", '%15s' % "MAP values", '%15s' % "Std. Dev."
    for i_v in range(n_vars):
        print '%25s' % v_names[i_v], ":", '%15.8e' % par_mean[i_v], '%15.8e' % var_samples[i_map,i_v],
        print '%15.8e' % math.sqrt(par_cov[i_v,i_v])

    # Write mean and MAP to file
    mean_file_name = out_file_base + ".mean.dat"
    np.savetxt(mean_file_name,par_mean)

    map_file_name = out_file_base + ".map.dat"
    np.savetxt(map_file_name,var_samples[i_map,:])

    # Compute mean and standard deviation of acceptance probability
    print "\nAcceptance Probability:\n"

    # In some cases, the next to last column contains the ratio of posterior
    # values rather than the acceptance probability. First convert this number
    # to acceptance probabilities: acc_prob = min(alpha,1)
    # (This does no harm if the next to last column already contains the actual acceptance probability)
    acc_prob = np.minimum(all_samples[:,-2],np.ones_like(all_samples[:,-2]))
    # In some cases, a very large negative number is shown in the column for acceptance
    # probability to indicate a proposed value was out of bounds. In that case, replace
    # the value with 0. Again, this does no harm if the next to last column already contains
    # the actual acceptance probability.
    acc_prob = np.maximum(acc_prob,np.zeros_like(acc_prob))
    print "Mean     :",acc_prob.mean(),
    print "Std. Dev.:",acc_prob.std()

    # #
    # # Compute effective sample size  (ESS)
    # #
    # print "\nEffective Sample Sizes:\n"
    #
    # ess = effective_sample_sizes(var_samples,par_mean,par_cov)
    #
    # for i_v in range(n_vars):
    #     print "  ",v_names[i_v],":",int(ess[i_v]),"out of",n_sam

    #
    # Compute autocorrelations and effective sample size  (ESS)
    #
    print "\nAutocorrelations and Effective Sample Sizes:\n"

    # Number of variable samples in this file
    n_sam = var_samples.shape[0]

    # Cut-off point for autocorrelation
    # Ideally, n_a should be chosen such that the autocorrelation goes to 0 at this lag.
    # Chosing n_a too low will give inaccurate results (overpredicting ESS), but going
    # to much higher lag will create a lot of noise in ESS estimate.
    n_a = min(1000,n_sam)

    # Autocorrelation computation
    auto_corr_vars = compute_group_auto_corr(var_samples,n_a)

    # Plotting and computation of effective sample size
    for i_v in range(n_vars):
      # Plot autocorrelation to see if n_a is large enough
      plot_auto_corr(auto_corr_vars[:,i_v],v_names[i_v])
      # Effective Sample Size (Number of samples divided by integral of autocorrelation)
      ESS = compute_effective_sample_size(n_sam,auto_corr_vars[:,i_v])
      print "  ",v_names[i_v],":",ESS,"out of",n_sam," ; skip factor:",n_sam/ESS

    print "\n  See plots corr-*.pdf for autocorrelations of chain samples for all variables."

    # The following operations rely on PyMC
    if have_pymc:
        #
        # Compute Raftery-Lewis convergence test
        #
        print "\nComputing Raftery-Lewis criteria for all variables\n"
        quant = 0.025 # Quantile level to be estimated
        relacc = 0.01  # Error in quantile level (relative to the mean of the parameter)
        conf = 0.95  # Confidence in the achieved accuray
        print "  Computing # of samples needed to compute quantile",quant
        print "  for an accuracy",relacc*100,"% relative to parameter mean, with confidence",conf*100,"%:\n"
        print "  Variable name: # initial samples to skip, # additional samples to take, thinning factor"
        for i_v in range(n_vars):
            output = pymc.raftery_lewis(var_samples[:,i_v], q=quant, r=relacc*par_mean[i_v], s=conf, verbose=0)
            print "  ",'%25s' % v_names[i_v], ":", '%8d' % output[2],",",'%8d' % output[3],",",'%8d' % output[4]

        print "\n"

        quant = 0.5 # Quantile level to be estimated
        print "  Computing # of samples needed to compute quantile",quant
        print "  for an accuracy",relacc*100,"% relative to parameter mean, with confidence",conf*100,"%:\n"
        print "  Variable name: # initial samples to skip, # additional samples to take, thinning factor"
        for i_v in range(n_vars):
            output = pymc.raftery_lewis(var_samples[:,i_v], q=quant, r=relacc*par_mean[i_v], s=conf, verbose=0)
            print "  ",'%25s' % v_names[i_v], ":", '%8d' % output[2],",",'%8d' % output[3],",",'%8d' % output[4]

        #
        # Geweke test
        #
        print "\nComputing Geweke test for all variables\n"
        print "Geweke Test temporarily disabled. Needs to be debugged."
        # for i_v in range(n_vars):
        #     var_scores = pymc.geweke(var_samples[:,i_v], intervals=20)
        #     pymc.Matplot.geweke_plot(var_scores, v_names[i_v])
        # print "  See plots *-diagnostic.png"

        #
        # Autocorrelations (done above already)
        #
        # print "\nComputing autocorrelations for all variables\n"
        # for i_v in range(n_vars):
        #     pymc.Matplot.autocorrelation(var_samples[:,i_v], v_names[i_v])
        # print "  See plots *-acf.png"

    return var_samples[i_map,:]
Ejemplo n.º 14
0
mcmc.cont(AllBAOh.trace('omega_M_0')[:],AllBAOh.trace('w')[:],color='red',nsmooth=sm)
xx=np.linspace(0,1,1000)
plot(xx,xx*0-1,'k:')








#### convergence checking

# Geweke: mean in segments compared with global mean
scores=pymc.geweke(BAOh,intervals=10)
pymc.Matplot.geweke_plot(scores)

# Raftery-Lewis
pymc.raftery_lewis(BAOh,q=0.68,r=0.01)

ft=scipy.fft(BAOh.trace('w')[:])
ps=abs(ft)**2
clf()
xscale('log')
yscale('log')
plot(ps)