def convergence_diagnose_birt(m, a): # birt_model = pymc.MCMC(...) pymc.raftery_lewis(m.a, q=0.025, r=0.01) scores = pymc.geweke(m.a, intervals=20) pymc.Matplot.geweke_plot(scores) pymc.gelman_rubin(m)
def analyzeConvergence(dbFilename, db, pn, burnin): print 'ANALYZING {0}'.format(pn) vals = np.array(getParameter(db, pn, burnin=burnin)) convDict = OrderedDict() gw = pymc.geweke(vals) gwDict = OrderedDict() gwDict['scores'] = gw frac2SD = len([x for x in gw if x[1] > -2 and x[1] < 2]) / float(len(gw)) gwDict['frac_2sd'] = frac2SD convDict['geweke'] = gwDict rl = pymc.raftery_lewis(vals, 0.025, r=0.01) rlDict = OrderedDict() rlDict['iter_req_acc'] = rl[0] rlDict['thin_first_ord'] = rl[1] rlDict['burnin'] = rl[2] rlDict['iter_total'] = rl[3] rlDict['thin_ind'] = rl[4] convDict['raftery_lewis'] = rlDict print '' return convDict
def test_simple(self): nmin, kthin, nburn, nprec, kmind = pymc.raftery_lewis(S.a, 0.5, .05, verbose=0) # nmin should approximately be the same as nprec/kmind assert (0.8 < (float(nprec) / kmind) / nmin < 1.2)
def fit_two_mcmc(time, signal, height_th, one_pulse, sigma0, # signal noise sum_mu, sum_tau, sum_a, sum_b, diff_tau, diff_a, diff_b, sampling, burn, thin, Plot=False, debug=False, auto=False): # LIMIT SEARCH FOR OFFSETS _t_initial=time[pd.srlatch_rev(signal,0,height_th)][0] _t_final=time[pd.srlatch_rev(signal,0,height_th)][-1] def model(x, f): # PRIORS y_err = sigma0 # print (_t_initial,_t_final, one_x_offset_init) one_x_offset = pymc.Uniform("one_x_offset", _t_initial, time[np.argmax(signal)], value=_t_initial) two_x_offset = pymc.Uniform("two_x_offset", _t_initial, _t_final, value=_t_final) sum_of_amps = pymc.TruncatedNormal("sum_amps", mu=sum_mu, tau=sum_tau, a=sum_a, b=sum_b, value=sum_mu) #sigma/mu is the n=1 std deviation in units of n=1 amplitude diff_of_amps = pymc.TruncatedNormal("diff_amps", mu=0, tau=diff_tau, a=diff_a, b=diff_b, value=0) one_x_amplitude = (sum_of_amps+diff_of_amps)/2 two_x_amplitude = (sum_of_amps-diff_of_amps)/2 # MODEL @pymc.deterministic(plot=False) def mod_two_pulse(x=time, one_x_offset=one_x_offset, two_x_offset=two_x_offset, one_x_amplitude=one_x_amplitude, two_x_amplitude=two_x_amplitude): return one_pulse(x, x_offset=one_x_offset, amplitude=one_x_amplitude)+\ one_pulse(x, x_offset=two_x_offset, amplitude=two_x_amplitude) #likelihoodsy y = pymc.Normal("y", mu=mod_two_pulse, tau= 1.0/y_err**2, value=signal, observed=True) return locals() MDL = pymc.MCMC(model(time,signal), db='pickle') # The sample is stored in a Python serialization (pickle) database # MDL.use_step_method(pymc.AdaptiveMetropolis, # [MDL.sum_of_amps, MDL.diff_of_amps], # scales={MDL.sum_of_amps:np.sqrt(1/sum_tau), # MDL.diff_of_amps:np.sqrt(1/diff_tau)}, # ) if auto: # uses Raftery Lewis to determine fit Parameters per trace: # https://pymc-devs.github.io/pymc/modelchecking.html#convergence-diagnostics # pilot run InitSamples = 4*len(time) InitMDL = MDL InitMDL.sample(iter=InitSamples, burn=int(InitSamples*.5), thin=10) pymc_diagnostic = pymc.raftery_lewis(InitMDL, q=0.025, r=0.02, verbose=0) [EstBurn, EstSampling, EstThin] = np.max( np.array( [pymc_diagnostic[i] for i in pymc_diagnostic.keys()[1:]] # first key: mod_two_pulse irrelavent ), axis=0)[2:] # first 2 diagnostics: 1st order Markov Chain irrelavent # print [EstBurn, EstSampling, EstThin] # actual run MDL.sample(iter=EstSampling, burn=EstBurn, thin=EstThin, verbose=0) else: MDL.sample(iter=sampling, burn=burn, thin=thin, verbose=-1) # thin: consider every 'thin' samples # burn: number of samples to discard: decide by num of samples to run till parameters stabilise at desired precision if Plot: y_fit = MDL.mod_two_pulse.value #get mcmc fitted values plt.plot(time, signal, 'b', marker='o', ls='-', lw=1, label='Observed') plt.plot(time,y_fit,'k', marker='+', ls='--', ms=5, mew=2, label='Bayesian Fit Values') plt.legend() pymc.Matplot.plot(MDL) if debug: for i in np.arange(10): MDL.sample(iter=sampling, burn=burn, thin=thin, verbose=0) pymc.gelman_rubin(MDL) pymc.Matplot.summary_plot(MDL) return MDL #usage: MDL.one_x_offset.value for fitted result
# return (cur_x, funEvals['funevals']) if returnFunEvals else cur_x if __name__ == '__main__': npr.seed(1) import pylab as pl import pymc D = 10 fn = lambda x: -0.5 * np.sum(x**2) iters = 1000 samps = np.zeros((iters, D)) for ii in xrange(1, iters): samps[ii, :] = slice_sample(samps[ii - 1, :], fn, sigma=0.1, step_out=False, doubling_step=True, verbose=False) ll = -0.5 * np.sum(samps**2, axis=1) scores = pymc.geweke(ll) pymc.Matplot.geweke_plot(scores, 'test') pymc.raftery_lewis(ll, q=0.025, r=0.01) pymc.Matplot.autocorrelation(ll, 'test')
# Instantiate and run sampler S = pm.MCMC(my_model) S.sample(10000, burn=5000) # Calculate and plot Geweke scores scores = pm.geweke(S, intervals=20) pm.Matplot.geweke_plot(scores) # Geweke plot for a single parameter trace = S.trace('alpha')[:] alpha_scores = pm.geweke(trace, intervals=20) pm.Matplot.geweke_plot(alpha_scores, 'alpha') # Calculate Raftery-Lewis diagnostics pm.raftery_lewis(S, q=0.025, r=0.01) """ Sample output: ======================== Raftery-Lewis Diagnostic ======================== 937 iterations required (assuming independence) to achieve 0.01 accuracy with 95 percent probability. Thinning factor of 1 required to produce a first-order Markov chain. 39 iterations to be discarded at the beginning of the simulation (burn-in).
direction = direction / np.sqrt(np.sum(direction**2)) new_x = direction_slice(direction, init_x) if scalar: return float(new_x[0]) else: return new_x if __name__ == '__main__': npr.seed(1) import pylab as pl D = 10 fn = lambda x: -0.5*np.sum(x**2) iters = 1000 samps = np.zeros((iters,D)) for ii in xrange(1,iters): samps[ii,:] = slice_sample(samps[ii-1,:], fn, sigma=0.1, step_out=False, doubling_step=True, verbose=False) ll = -0.5*np.sum(samps**2, axis=1) scores = pymc.geweke(ll) pymc.Matplot.geweke_plot(scores, 'test') pymc.raftery_lewis(ll, q=0.025, r=0.01) pymc.Matplot.autocorrelation(ll, 'test')
def print_diagn(M, q, r, s): return raftery_lewis(M, q, r, s, verbose=0)
def test_simple(self): nmin, kthin, nburn, nprec, kmind = pymc.raftery_lewis(S.a, 0.5, .05, verbose=0) # nmin should approximately be the same as nprec/kmind assert(0.8 < (float(nprec)/kmind) / nmin < 1.2)
print print "Thinning factor of %i required to produce a first-order Markov chain." % kthin print print "%i iterations to be discarded at the beginning of the simulation (burn-in)." % nburn print print "%s subsequent iterations required." % nprec print print "Thinning factor of %i required to produce an independence chain." % kmind Read : Raftery and Lewis, StatSci 1992.pdf for I = measures the increase in the number of iterations due to dependence in the sequence """ for s in list(M.stochastics): if 'predictive' in str(s): continue elif 'mu_r_i' in str(s) or 'mu_L0_i' in str(s) or 'mu_Lmax_i' in str(s): print "Line : ", str(s) pymc.raftery_lewis(s, q=quantile, r=accuracy) Nmin, kthin, Nburn, Nprec, Kmind = pymc.raftery_lewis(s, q=quantile, r=accuracy, verbose=0) print " Calculated Dependence Factor = ", Nprec / float(Nmin) print "-----------------------------------" # NOTE: if there are too many parameters, you may need to save them in a .csv file elif 'e_' in str(s): print "Plant ID : ", str(s) pymc.raftery_lewis(s, q=quantile, r=accuracy) Nmin, kthin, Nburn, Nprec, Kmind = pymc.raftery_lewis(s, q=quantile, r=accuracy, verbose=0) print " Calculated Dependence Factor = ", Nprec / float(Nmin)
def print_diagn(M, q, r, s): return raftery_lewis(M, q, r, s, verbose=0)
return samples, log_probs num_samples = 8192 samples, log_probs = run_mcmc(test_means, num_samples) emp_means = np.mean(samples, axis=0) emp_cov = np.cov(samples.T) print(emp_means, '\n\n', test_cov, '\n\n', emp_cov, '\n\n', test_cov / emp_cov) import pylab as pl import pymc scores = pymc.geweke(log_probs) pymc.Matplot.geweke_plot(scores, 'test') pymc.raftery_lewis(log_probs, q=0.025, r=0.01) pymc.Matplot.autocorrelation(log_probs, 'test') pl.show() bp() # ============================================== # Developers' Section # ============================================== # ------------ Scrap work goes here ------------ ''' # 'Stepping in' with a mode sophisticated stopping criterion. def step_in(self, local_fn, threshold, upper, lower, dtype=None): if (dtype is None): dtype = threshold.dtype with tf.name_scope('step_in') as scope: # Source: https://github.com/HIPS/Spearmint/blob/master/spearmint/sampling/mcmc.py
def get_mcmc_stats(all_samples,v_names,out_file_base,debug): """ Generate statistics of the passed in MCMC samples. Assumes that the first column of all_samples contains the step number, and the last two columns contain the acceptance probability and the posterior probability for each sampled state. Inputs: all_samples : Array with all samples (one sample set per row). Has step number in first column and acceptance probability and posterior in last two columns v_names : Actual variable names out_file_base : Base for output file names debug : Writes out more info if number is larger Outputs: Various statistics written to the screen Correlation functions written to pdf files Returns array of map values """ # Number of variables, columns, samples in the file n_vars = len(v_names) n_cols = all_samples.shape[1] n_sam = all_samples.shape[0] # Extract all MCMC chain variables in separate array var_samples = all_samples[:,1:1+n_vars] if (debug > 0): print var_samples.shape # Compute mean parameter values par_mean = np.mean(var_samples,axis=0,dtype=np.float64) #print "\nParameter mean values:\n" #for i_v in range(n_vars): # print " ", v_names[i_v], ":", par_mean[i_v] # Compute the covariance par_cov = np.cov(var_samples,rowvar=0) print "\nParameter covariances:\n" print par_cov # write out covariance matrix to file cov_file_name = out_file_base + ".covariance.dat" np.savetxt(cov_file_name,par_cov) # print the square root of the diagonal entries of the covariance #print "\nParameter standard deviations (proposal width estimates):\n" #for i_v in range(n_vars): # print " ", v_names[i_v], ":", math.sqrt(par_cov[i_v,i_v]) # # Compute the MAP values # (could also get this from the last line of the MCMC output file # but this line is not always there; and it is more fun # to do it with Python) # # Sample index with max posterior prop (last column in MCMC file): i_map = all_samples[:,-1].argmax() print "\n", print '%27s' % "Parameter :", '%15s' % "Mean Value", '%15s' % "MAP values", '%15s' % "Std. Dev." for i_v in range(n_vars): print '%25s' % v_names[i_v], ":", '%15.8e' % par_mean[i_v], '%15.8e' % var_samples[i_map,i_v], print '%15.8e' % math.sqrt(par_cov[i_v,i_v]) # Write mean and MAP to file mean_file_name = out_file_base + ".mean.dat" np.savetxt(mean_file_name,par_mean) map_file_name = out_file_base + ".map.dat" np.savetxt(map_file_name,var_samples[i_map,:]) # Compute mean and standard deviation of acceptance probability print "\nAcceptance Probability:\n" # In some cases, the next to last column contains the ratio of posterior # values rather than the acceptance probability. First convert this number # to acceptance probabilities: acc_prob = min(alpha,1) # (This does no harm if the next to last column already contains the actual acceptance probability) acc_prob = np.minimum(all_samples[:,-2],np.ones_like(all_samples[:,-2])) # In some cases, a very large negative number is shown in the column for acceptance # probability to indicate a proposed value was out of bounds. In that case, replace # the value with 0. Again, this does no harm if the next to last column already contains # the actual acceptance probability. acc_prob = np.maximum(acc_prob,np.zeros_like(acc_prob)) print "Mean :",acc_prob.mean(), print "Std. Dev.:",acc_prob.std() # # # # Compute effective sample size (ESS) # # # print "\nEffective Sample Sizes:\n" # # ess = effective_sample_sizes(var_samples,par_mean,par_cov) # # for i_v in range(n_vars): # print " ",v_names[i_v],":",int(ess[i_v]),"out of",n_sam # # Compute autocorrelations and effective sample size (ESS) # print "\nAutocorrelations and Effective Sample Sizes:\n" # Number of variable samples in this file n_sam = var_samples.shape[0] # Cut-off point for autocorrelation # Ideally, n_a should be chosen such that the autocorrelation goes to 0 at this lag. # Chosing n_a too low will give inaccurate results (overpredicting ESS), but going # to much higher lag will create a lot of noise in ESS estimate. n_a = min(1000,n_sam) # Autocorrelation computation auto_corr_vars = compute_group_auto_corr(var_samples,n_a) # Plotting and computation of effective sample size for i_v in range(n_vars): # Plot autocorrelation to see if n_a is large enough plot_auto_corr(auto_corr_vars[:,i_v],v_names[i_v]) # Effective Sample Size (Number of samples divided by integral of autocorrelation) ESS = compute_effective_sample_size(n_sam,auto_corr_vars[:,i_v]) print " ",v_names[i_v],":",ESS,"out of",n_sam," ; skip factor:",n_sam/ESS print "\n See plots corr-*.pdf for autocorrelations of chain samples for all variables." # The following operations rely on PyMC if have_pymc: # # Compute Raftery-Lewis convergence test # print "\nComputing Raftery-Lewis criteria for all variables\n" quant = 0.025 # Quantile level to be estimated relacc = 0.01 # Error in quantile level (relative to the mean of the parameter) conf = 0.95 # Confidence in the achieved accuray print " Computing # of samples needed to compute quantile",quant print " for an accuracy",relacc*100,"% relative to parameter mean, with confidence",conf*100,"%:\n" print " Variable name: # initial samples to skip, # additional samples to take, thinning factor" for i_v in range(n_vars): output = pymc.raftery_lewis(var_samples[:,i_v], q=quant, r=relacc*par_mean[i_v], s=conf, verbose=0) print " ",'%25s' % v_names[i_v], ":", '%8d' % output[2],",",'%8d' % output[3],",",'%8d' % output[4] print "\n" quant = 0.5 # Quantile level to be estimated print " Computing # of samples needed to compute quantile",quant print " for an accuracy",relacc*100,"% relative to parameter mean, with confidence",conf*100,"%:\n" print " Variable name: # initial samples to skip, # additional samples to take, thinning factor" for i_v in range(n_vars): output = pymc.raftery_lewis(var_samples[:,i_v], q=quant, r=relacc*par_mean[i_v], s=conf, verbose=0) print " ",'%25s' % v_names[i_v], ":", '%8d' % output[2],",",'%8d' % output[3],",",'%8d' % output[4] # # Geweke test # print "\nComputing Geweke test for all variables\n" print "Geweke Test temporarily disabled. Needs to be debugged." # for i_v in range(n_vars): # var_scores = pymc.geweke(var_samples[:,i_v], intervals=20) # pymc.Matplot.geweke_plot(var_scores, v_names[i_v]) # print " See plots *-diagnostic.png" # # Autocorrelations (done above already) # # print "\nComputing autocorrelations for all variables\n" # for i_v in range(n_vars): # pymc.Matplot.autocorrelation(var_samples[:,i_v], v_names[i_v]) # print " See plots *-acf.png" return var_samples[i_map,:]
mcmc.cont(AllBAOh.trace('omega_M_0')[:],AllBAOh.trace('w')[:],color='red',nsmooth=sm) xx=np.linspace(0,1,1000) plot(xx,xx*0-1,'k:') #### convergence checking # Geweke: mean in segments compared with global mean scores=pymc.geweke(BAOh,intervals=10) pymc.Matplot.geweke_plot(scores) # Raftery-Lewis pymc.raftery_lewis(BAOh,q=0.68,r=0.01) ft=scipy.fft(BAOh.trace('w')[:]) ps=abs(ft)**2 clf() xscale('log') yscale('log') plot(ps)