def test_convergence(run_model, chains, saveplots=False): """ Uses ipyparallel client to run <chains> model fits, then runs r-hat (gelman-rubin) statistic on the resulting traces. Finally, the models are concatenated and their posteriors are plotted for assessing convergence. """ from ipyparallel import Client c = Client()[:] # create the ipython client jobs = c.map(run_model, range(chains)) # c.map(function, number of CPUs) models = jobs.get() # run the jobs # Calculate convergence statistics from kabuki.analyze import gelman_rubin rhat = gelman_rubin(models) # Create a new model that contains all traces concatenated from the individual models from kabuki.utils import concat_models combined_model = concat_models(models) # Plot posteriors combined_model.plot_posteriors(save=saveplots) return rhat
# ************************************************************ # create group maps pool = Pool() #mp_results = pool.map(assess_convergence, hddm_fun_dict.keys()) mp_results = pool.map(assess_convergence, ['stroop']) pool.close() pool.join() results = {} for d in mp_results: results.update(d) # save plots of traces # make sure the output directory exists for k,v in results.items(): gelman_vals[k+'_base'] = gelman_rubin([i[0] for i in v]) # plot posteriors v[0][0].plot_posteriors(['a', 't', 'v'], save=True) plots = glob('*png') for p in plots: rename(p, path.join('hddm_output', 'Plots', '%s_base_%s' % (k,p))) if v[0][1] is not None: gelman_vals[k+'_condition'] = gelman_rubin([i[1] for i in v]) v[0][1].plot_posteriors(['a', 't', 'v'], save=True) plots = glob('*png') for p in plots: rename(p, path.join('hddm_output', 'Plots', '%s_condition_%s' % (k,p))) # save gelman vals
ss.sample(samples,burn=burnIn,thin=thinning,tune_throughout=False, db='pickle', dbname='local_traces' + str(i+1) + '.db') local_models.append(ss) if i == (n_runs-1): print('Waiting for the other chains to finish...') if num_remote>0: models = results.get() for i in range(n_runs): models.append(local_models[i]) #print(len(models)) print "Finished sampling!" if numberOfChains > 1: Rhat = gelman_rubin(models) print('\n Gelman-Rubin Rhat diagnostic:') for key in Rhat.keys(): print((key, Rhat[key])) name_dataFile = dataFile.replace(".csv","") for i in range(numberOfChains): save_csv(get_traces(models[i]), 'parameters'+str(i+1)+'.csv', sep = ';') print "Posterior samples are saved to file." if deviance == True: for i in range(numberOfChains): dev = models[i].mc.db.trace('deviance')() numpy.savetxt('deviance'+str(i+1)+'.csv', dev, delimiter=";")
pickle.dump(self, f) hddm.HDDM.savePatch = savePatch docdir = 'D:\\all hddm analysis\\BL\\informative priors 40K\\ic' saved_model_name = 'BL_ic_no_tttv' os.chdir(docdir) models = [] for i in range(12): print(i) m = pickle.load(open('submodel'+str(i), 'rb')) models.append(m) print('Thats it!!') gelman_rubin_stats = gelman_rubin(models) with open('gelman_rubin_stats.csv', 'w') as csv_file: writer = csv.writer(csv_file) for key, value in gelman_rubin_stats.items(): writer.writerow([key, value]) # The combined model: running_model = kabuki.utils.concat_models(models) # Saving the model: running_model.savePatch(saved_model_name) # Saving the traces: running_model.get_traces().to_csv('traces.csv') # Saving the model's stats: running_model_stats = running_model.gen_stats() running_model_stats.to_csv('stats_csv_'+saved_model_name+'.csv')
models = [] for i in range(5): m = hddm.models.HDDMRegressor(data, 'v ~ other_poss_loss', depends_on={ 'v': 'condition', 'z': 'condition', 'a': 'condition' }, bias=True, include=['z', 'st', 'sz', 'sv'], p_outlier=0.05) m.find_starting_values() m.sample(7000, burn=5000, dbname='traces.db', db='pickle') models.append(m) gelman_rubin(models) # retrieve plausible t-values by repeatedly sampling from posterior distribution # pick some new samples to calculate with these plausible values new_sample_n = 5000 new_sample_burn = 10 new_samples = m_all_vza.sample(new_sample_n, burn=new_sample_burn) # new_samples = m_all_vza # for every sample create a data frame # and compare the conditions, save the t values (example: z-parameter) t_values_z_mm_con = [] t_values_z_mm_emp = [] t_values_z_mm_rec = [] for s in range(0, new_sample_n - new_sample_burn): this_z_sample_df = pd.DataFrame()
ppc_compare = hddm.utils.post_pred_stats(data, ppc_data) ppc_stats = hddm.utils.post_pred_stats(data, ppc_data, call_compare=False) print(ppc_compare) ppc_data.to_csv(os.path.join(fig_dir, 'diagnostics', 'ppc_210518.csv')) #Get Gelman-Rubin Stats models = [] for i in range(5): m = hddm.HDDMStimCoding(data, include={'z'}, stim_col= 'stimulus', split_param='z', depends_on={'v': 'cond_v', 'a': 'session', 't': 'session', 'z': 'session'}, p_outlier=.05) m.find_starting_values() m.sample(5000, burn=2000) models.append(m) model_base_name = '2018_GNG_HDDM' fig_dir = os.path.join(model_base_name) gelman = gelman_rubin(models) gelman = pd.DataFrame.from_dict(gelman,orient='index') gelman.to_csv(os.path.join(fig_dir, 'diagnostics', 'gelman_GNG.csv')) ##### Below code is saving data and creating figures model_base_name = '2018_GNG_HDDM' fig_dir = os.path.join(model_base_name) results = model.gen_stats() # check the outputs in comparision with the true param results.to_csv(os.path.join(fig_dir, 'diagnostics', 'results_ARC.csv')) #data.to_csv(os.path.join(fig_dir, 'diagnostics', 'simdataTest.csv')) text_file = open(os.path.join(fig_dir, 'diagnostics', 'DIC.txt'), 'w')
if not os.path.exists(outputPath): os.makedirs(outputPath) def run_model(id): m = hddm.HDDMRegressor(data, modelList,bias=includeBias, include='p_outlier',group_only_regressors = False) m.find_starting_values() m.sample(nSample, burn=nBurned, dbname=outputPath + '/' + modelName+'_'+str(id)+'.db', db='pickle') m.savePatch(outputPath + '/' +modelName+'_'+str(id)) return m pool = multiprocessing.Pool() models = pool.map(run_model, range(5)) pool.close() m_rhat = gelman_rubin(models) pd.DataFrame.from_dict(m_rhat, orient='index').to_csv(outputPath + '/'+modelName+'_RHat.csv') m_comb = concat_models(models) m_comb_export = m_comb.get_traces() m_comb_export.to_csv(outputPath + '/' + modelName+'_traces.csv') print("DIC: %f" %m_comb.dic) results = m_comb.get_traces() results.to_csv(outputPath + '/' + modelName+'_Results.csv') summary = m_comb.gen_stats() summary.to_csv(outputPath + '/' + modelName+'_Summary.csv')
except Exception as e: print(e) if str(e) == 'unsupported pickle protocol: 3': try: convert(mod + '_chain' + str(chain) + '_' + code + 'Code.model') mods.append(this_model) traces = this_model.get_traces() traces.to_csv(outputdir + '/../diagnostics/' + mod + '_traces_' + str(chain) + '.csv') except Exception as ee: print(ee) try: gel_rub = pd.DataFrame(gelman_rubin(mods).items(), columns=['parameter', 'rhat']) gel_rub.to_csv(outputdir + '/../diagnostics/gr_' + mod + '.csv') dic = this_model.dic print dic dics = dics.append( pd.DataFrame([[mod, dic]], columns=['model', 'DIC'])) dics.to_csv(outputdir + '/../diagnostics/dics_all.csv') except Exception as e: print('Ran into issues with these settings: ' + samp + ' ' + task + ' ' + samp_size) # #
# ************************************************************ # create group maps pool = Pool() #mp_results = pool.map(assess_convergence, hddm_fun_dict.keys()) mp_results = pool.map(assess_convergence, ['stroop']) pool.close() pool.join() results = {} for d in mp_results: results.update(d) # save plots of traces # make sure the output directory exists for k, v in results.items(): gelman_vals[k + '_base'] = gelman_rubin([i[0] for i in v]) # plot posteriors v[0][0].plot_posteriors(['a', 't', 'v'], save=True) plots = glob('*png') for p in plots: rename(p, path.join('hddm_output', 'Plots', '%s_base_%s' % (k, p))) if v[0][1] is not None: gelman_vals[k + '_condition'] = gelman_rubin([i[1] for i in v]) v[0][1].plot_posteriors(['a', 't', 'v'], save=True) plots = glob('*png') for p in plots: rename( p, path.join('hddm_output', 'Plots', '%s_condition_%s' % (k, p)))