def drift_diffusion_hddm(data, samples=10000, n_jobs=6, run=True, parallel=True, model_name='model', model_dir='.', accuracy_coding=False): import hddm import os # run the model: if run: if parallel: job_server = pp.Server(ppservers=(), ncpus=n_jobs) start_time = time.time() jobs = [(trace_id, job_server.submit(run_model,(trace_id, data, model_dir, model_name, samples, accuracy_coding), (), ('hddm',))) for trace_id in range(n_jobs)] results = [] for s, job in jobs: results.append(job()) print "Time elapsed: ", time.time() - start_time, "s" job_server.print_stats() # save: for i in range(n_jobs): model = results[i] model.save(os.path.join(model_dir, '{}_{}'.format(model_name,i))) else: model = run_model(1, data, model_dir, model_name, samples, accuracy_coding) model.save(os.path.join(model_dir, model_name)) # load the models: else: print 'loading existing model(s)' if parallel: model = [] for i in range(n_jobs): model.append(hddm.load(os.path.join(model_dir, '{}_{}'.format(model_name,i)))) else: model = hddm.load(os.path.join(model_dir, model_name)) return model
def load_model(empty_model, dbfile): loadfile = sorted(glob(dbfile)) if len(loadfile) > 1: models = [] for l in loadfile: m = hddm.load(empty_model) m.load_db(l, db='pickle') models.append(m) m = load_concat_models(models) return m, models else: m = hddm.load(empty_model) m.load_db(loadfile[0], db='pickle') return m
def concat_models(mypath, model_name): import os, hddm, time, kabuki # ============================================ # # APPEND MODELS # ============================================ # allmodels = [] print "appending models" for trace_id in range(60): # 15 models were run model_filename = os.path.join(mypath, model_name, 'modelfit-md%d.model'%trace_id) modelExists = os.path.isfile(model_filename) assert modelExists == True # if not, this model has to be rerun starttime = time.time() print model_filename thism = hddm.load(model_filename) # now append allmodels.append(thism) elapsed = time.time() - starttime print( "Elapsed time: %f seconds." %elapsed ) # ============================================ # # MANUALLY APPEND CHAINS # only # ============================================ # 1. construct the model object with the original data and parameters
def PPC(samples, n=None): import hddm import pandas as pd import pickle import os #set filepath filepath = './models/' #import model m = hddm.load(filepath + 'm_all') print('model loaded') #set savepath savepath = './ppc/' if not os.path.exists(savepath): os.makedirs(savepath) #start PP sampling print('starting...') ppc = hddm.utils.post_pred_gen(m, samples=samples) ppc.reset_index(inplace=True) print('\nsaving...') ppc.to_csv('%s/ppc_%d.csv' % (savepath, n), sep=',', encoding='utf-8') print('PP samples saved')
def test_init_sample_save_load_regressor(self): for model in self.models: if len(hddm.model_config.model_config[model]["choices"]) == 2: # Define Link Function def id_link(x): return x # Define Regression Model v_reg = {"model": "v ~ 1 + theta", "link_func": id_link} # Initialize HDDM Model (using cavanagh data) model_ = hddm.HDDMnnRegressor( self.cav_data, [v_reg], include=hddm.model_config.model_config[model] ["hddm_include"], model=model, group_only_regressors=True, ) # Sample model_.sample( self.nmcmc, burn=self.nburn, dbname=self.filepath + "test_" + model + ".db", db="pickle", ) # Save Model print("Saving Model: ") model_.save(self.filepath + "test_" + model + ".pickle") # Load Model print("Loading Model: ") model__ = hddm.load(self.filepath + "test_" + model + ".pickle") self.assertTrue(model__.nn == True) # Check if id_link is preserved correctly print("Checking if link func is correctly recovered") self.assertTrue( model__.model_descrs[0]["model"]["link_func"] == id_link) del model_ del model__ else: print("Skipping n > 2 choice models for this test for now !") pass
def test_init_sample_save_load_stimcoding(self): for model in self.models: if len(hddm.model_config.model_config[model]["choices"]) == 2: # Generate Data data, gt = hddm.simulators.hddm_dataset_generators.simulator_stimcoding( model=model, n_samples_by_condition=self.n_samples_per_trial, split_by="v", ) # Initialize HDDM Model model_ = hddm.HDDMnnStimCoding( data, model=model, split_param="v", drift_criterion=True, stim_col="stim", ) # Sample # Sample model_.sample( self.nmcmc, burn=self.nburn, dbname=self.filepath + "test_" + model + ".db", db="pickle", ) # Save Model print("Saving Model: ") model_.save(self.filepath + "test_" + model + ".pickle") # Load Model print("Loading Model: ") model__ = hddm.load(self.filepath + "test_" + model + ".pickle") self.assertTrue(model__.nn == True) del model_ del model__ else: print("Skipping n > 2 choice models for this test for now !") pass pass
def test_HDDM_load_save(self): include = ['z', 'sz', 'st', 'sv'] dbs = ['pickle', 'sqlite'] params = hddm.generate.gen_rand_params(include=include) data, params_true = hddm.generate.gen_rand_data(params, size=10, subjs=2) data = pd.DataFrame(data) data['cov'] = 1. for db, model_class in itertools.product(dbs, self.model_classes): if model_class is hddm.models.HDDMRegressor: model = model_class(data, 'v ~ cov', include=include, is_group_model=True) else: model = model_class(data, include=include, is_group_model=True) model.sample(100, dbname='test.db', db=db) model.save('test.model') m_load = hddm.load('test.model') os.remove('test.db') os.remove('test.model')
def test_HDDM_load_save(self, assert_=False): include = ['z', 'sz','st','sv'] dbs = ['pickle', 'sqlite'] model_classes = [hddm.models.HDDMTruncated, hddm.models.HDDM, hddm.models.HDDMRegressor] reg_func = lambda args, cols: args[0] + args[1]*cols[:,0] reg = {'func': reg_func, 'args':['v_slope','v_inter'], 'covariates': 'cov', 'outcome':'v'} params = hddm.generate.gen_rand_params(include=include) data, params_true = hddm.generate.gen_rand_data(params, size=10, subjs=2) data = pd.DataFrame(data) data['cov'] = 1. for db, model_class in itertools.product(dbs, model_classes): if model_class is hddm.models.HDDMRegressor: model = model_class(data, regressor=reg, include=include, is_group_model=True) else: model = model_class(data, include=include, is_group_model=True) model.sample(20, dbname='test.db', db=db) model.save('test.model') m_load = hddm.load('test.model') os.remove('test.db') os.remove('test.model')
def get_stats(): """ Chop up the stats data frame for running a repeated-measures ANOVA. """ data = pd.read_csv('data.csv', index_col=None) model = load('model.pickle') results = model.gen_stats() print results.index.tolist() params = 'avtz' isis = sorted(data.isi.unique().tolist()) deltas = sorted(data.delta.unique().tolist()) subjects = sorted(data.subj_idx.unique().tolist()) df = pd.DataFrame() for x in product(params, deltas, isis): y = '%s_subj(%s.%s).' % x data = [] for s in subjects: z = y + s data.append(results.ix[z, 'mean']) df['%s_%s_%s' % x] = data df.to_csv('for_anovas.csv', index=False)
def test_HDDM_load_save(self): include = ["z", "sz", "st", "sv"] dbs = ["pickle", "sqlite"] params = hddm.generate.gen_rand_params(include=include) data, params_true = hddm.generate.gen_rand_data(params, size=10, subjs=2) data = pd.DataFrame(data) data["cov"] = 1.0 for db, model_class in itertools.product(dbs, self.model_classes): if model_class is hddm.models.HDDMRegressor: model = model_class(data, "v ~ cov", include=include, is_group_model=True) else: model = model_class(data, include=include, is_group_model=True) model.sample(100, dbname="test.db", db=db) model.save("test.model") m_load = hddm.load("test.model") os.remove("test.db") os.remove("test.model")
def test_init_sample_save_load_single_subj(self): for model in self.models: # Get simulations data = self.get_data_single_subj(model=model) # Initialize HDDMnn Model print("Loading Model: " + model) model_ = hddm.HDDMnn( data, model=model, informative=False, include=hddm.model_config.model_config[model]["hddm_include"], is_group_model=False, depends_on={}, p_outlier=0.00, ) # Sample print("Sampling: ") model_.sample( self.nmcmc, burn=self.nburn, dbname=self.filepath + "test_" + model + ".db", db="pickle", ) # Save Model print("Saving Model: ") model_.save(self.filepath + "test_" + model + ".pickle") # Load Model print("Loading Model: ") model__ = hddm.load(self.filepath + "test_" + model + ".pickle") self.assertTrue(model__.nn == True) del model_ del model__
if len(modelformula.split('a')) == 2: aformula.append(modelformula.split('a')[-1]) modelformula = modelformula.split('a')[0] else: aformula.append(None) if len(modelformula.split('v')) == 2: vformula.append(modelformula.split('v')[-1]) modelformula = modelformula.split('v')[0] else: vformula.append(None) for i in range(5): m = hddm.load(path + x + '/' + x + '_' + str(i)) models.append(m) m_comb = concat_models(models) print(modelname) print("****DIC: %f" %m_comb.dic) print("****BPIC: %f" %(m_comb.dic_info['pD'] + m_comb.dic)) dic_dict[modelname] = m_comb.dic bdic_dict[modelname] = m_comb.dic_info['pD'] + m_comb.dic dic.append(m_comb.dic) bdic.append(m_comb.dic_info['pD'] + m_comb.dic) # Transform into data frame and store in dic.csv """
start_time = time.time() # the start time of the processing #### model 1, free v,t,z C_Id_vtz = hddm.HDDM(dat_C_Id, depends_on={ 'v': ['val', 'id'], 'z': ['val', 'id'], 't': ['val', 'id'] }, include=['v', 'z', 't'], p_outlier=.05) C_Id_vtz.find_starting_values() C_Id_vtz.sample(10000, burn=1000, dbname='traces_id_vtz.db', db='pickle') # save the model C_Id_vtz.save('C_Id_vtz') C_Id_vtz = hddm.load('C_Id_vtz') # check convergence of MCMC #### out put of gelman_rubin ###### models_vtz = list() for i in range(5): m = hddm.HDDM(dat_C_Id, depends_on={ 'v': ['val', 'id'], 'z': ['val', 'id'], 't': ['val', 'id'] }, include=['v', 'z', 't'], p_outlier=.05) m.find_starting_values() m.sample(10000, burn=1000) models_vtz.append(m)
a_reg = {'model': 'a ~ 1 + stimulus + prevpe_bin_noern + postpe_bin_noern', 'link_func': lambda x: x} v_reg = {'model': 'v ~ 1 + stimulus + prevpe_bin_noern + postpe_bin_noern', 'link_func': v_link_func} reg_descr = [a_reg, v_reg] m = hddm.HDDMRegressor(data, reg_descr, group_only_regressors=False, p_outlier=.05) m.find_starting_values() m.sample(samples, burn=samples/10, thin=2, dbname=os.path.join(model_dir, 'ERPall_binnedPE_traces_1'), db='pickle') m.save(os.path.join(model_dir, 'ERPall_binnedPE_1')) goOn = False if goOn == True: import kabuki import seaborn as sns import matplotlib.pyplot as plt models = [] for i in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]: #this assumes you ran 10 different models above and changed the index when saving models.append(hddm.load('ERPall_binnedPE_%s' %i)) m = kabuki.utils.concat_models(models) #contact these 10 models #Run the same sanity checks as reported in hddm_fit.py, these are not reiterated here #Extract the data and save these results = m.gen_stats() results.to_csv(os.path.join(model_dir, 'eeg_binnedPe_HDDMestimates.csv')) a2, a3, a4, a5 = m.nodes_db.node[['a_prevpe_bin_noern[T.B_bin]', 'a_prevpe_bin_noern[T.C_bin]', 'a_prevpe_bin_noern[T.D_bin]', 'a_prevpe_bin_noern[T.E_bin]']] p2, p3, p4, p5 = m.nodes_db.node[['a_postpe_bin_noern[T.B_bin]', 'a_postpe_bin_noern[T.C_bin]', 'a_postpe_bin_noern[T.D_bin]', 'a_postpe_bin_noern[T.E_bin]']] v2, v3, v4, v5 = m.nodes_db.node[['v_prevpe_bin_noern[T.B_bin]', 'v_prevpe_bin_noern[T.C_bin]', 'v_prevpe_bin_noern[T.D_bin]', 'v_prevpe_bin_noern[T.E_bin]']] vp2, vp3, vp4, vp5 = m.nodes_db.node[['v_postpe_bin_noern[T.B_bin]', 'v_postpe_bin_noern[T.C_bin]', 'v_postpe_bin_noern[T.D_bin]', 'v_postpe_bin_noern[T.E_bin]']] numpy.savetxt("PEonly_noern_a_prevcj_bin2.csv", a2.trace(), delimiter=",") numpy.savetxt("PEonly_noern_a_prevcj_bin3.csv", a3.trace(), delimiter=",")
pdf[i] = np.exp(node.logp) * 10 #plot shit plt.plot(xlim, pdf) plt.xlabel(name) sns.despine(offset=2, trim=True) # # Hide the right and top spines # ax.spines['right'].set_visible(False) # ax.spines['top'].set_visible(False) # # # Only show ticks on the left and bottom spines # ax.yaxis.set_ticks_position('left') # ax.xaxis.set_ticks_position('bottom') #add suptitle plt.suptitle('HDDM priors') # save the figure plt.savefig(os.path.join(mypath, 'priorPlot.pdf')) ## LOAD MODEL WITH THE MOST PARAMETERS WE HAVE mypath = os.path.realpath(os.path.expanduser('/nfs/aeurai/HDDM/JW_PNAS')) m = hddm.load( os.path.join(mypath, 'stimcoding_dc_z_prevresp_st', 'modelfit-combined.model')) #print(m) #shell() plot_all_priors(m)
start_time = time.time() # the start time of the processing #### model 1, free v,t,z M_match_vtz = hddm.HDDM(dat_M_match, depends_on={ 'v': ['val', 'id'], 'z': ['val', 'id'], 't': ['val', 'id'] }, include=['v', 'z', 't'], p_outlier=.05) M_match_vtz.find_starting_values() M_match_vtz.sample(10000, burn=1000, dbname='traces_m_vtz.db', db='pickle') # save the model M_match_vtz.save('exp7_rep_match_vtz') M_match_vtz = hddm.load('exp7_rep_match_vtz') # check convergence of MCMC #### out put of gelman_rubin ###### models_vtz = list() for i in range(5): m = hddm.HDDM(dat_M_match, depends_on={ 'v': ['val', 'id'], 'z': ['val', 'id'], 't': ['val', 'id'] }, include=['v', 'z', 't'], p_outlier=.05) m.find_starting_values() m.sample(10000, burn=1000) models_vtz.append(m)
import hddm import matplotlib.pyplot as plt import pandas as pd import numpy as np import pickle try: import IPython shell = IPython.get_ipython() shell.enable_matplotlib(gui='inline') except: pass i = int(input('Load which model? ')) model = hddm.load('Z://Work//UW//projects//RR_TMS//hddm//models//by_cond//fullsplit%i'%i) model.plot_posterior_predictive(figsize=(14,10)) print("Model DIC: %f"%model.dic) model.plot_posterior_predictive() plt.show() plt.savefig('foo.pdf')
def plot_model(mypath, model_name, trace_id): # load in the model that was ran m = hddm.load(os.path.join(mypath, model_name, 'modelfit-md%d.model'%trace_id)) # ============================================ # # save plots # ============================================ # # plot some output stuff in figures subfolder figpath = os.path.join(mypath, model_name, 'figures-md%d'%trace_id) if not os.path.exists(figpath): os.mkdir(figpath) # 1. plot the traces and posteriors for each parameter m.plot_posteriors(save=True, path=figpath, format='pdf') # 2. plot posterior predictive print('plotting posterior predictive') plot_posterior_predictive_anne(m, path=figpath) # 3. plot the actual posteriors and the way they depend on the variables we specified if model_name in ['prevresp_prevrt_dc', 'prevresp_prevpupil_dc']: print('plotting the posteriors by previous response and rt') dc_prevresp0_prevRTlow, dc_prevresp0_prevRTmed, dc_prevresp0_prevRThigh, \ dc_prevresp1_prevRTlow, dc_prevresp1_prevRTmed, dc_prevresp1_prevRThigh \ = m.nodes_db.node[['dc(-1.0.1.0)', 'dc(-1.0.2.0)', 'dc(-1.0.3.0)', 'dc(1.0.1.0)','dc(1.0.2.0)','dc(1.0.3.0)']] # plot these myself plot_posterior_nodes_anne([dc_prevresp0_prevRTlow, dc_prevresp0_prevRTmed, dc_prevresp0_prevRThigh, \ dc_prevresp1_prevRTlow, dc_prevresp1_prevRTmed, dc_prevresp1_prevRThigh]) plt.xlabel('Drift criterion') plt.ylabel('Posterior probability') plt.title('Posterior of drift-rate group means') plt.savefig(os.path.join(figpath, 'driftcriterion_posteriors.pdf')) elif model_name in ['prevresp_prevrt_z', 'prevresp_prevpupil_z']: print('plotting the posteriors by previous response and rt') dc_prevresp0_prevRTlow, dc_prevresp0_prevRTmed, dc_prevresp0_prevRThigh, \ dc_prevresp1_prevRTlow, dc_prevresp1_prevRTmed, dc_prevresp1_prevRThigh \ = m.nodes_db.node[['z(-1.0.1.0)', 'z(-1.0.2.0)', 'z(-1.0.3.0)', 'z(1.0.1.0)','z(1.0.2.0)','z(1.0.3.0)']] # plot these myself plot_posterior_nodes_anne([dc_prevresp0_prevRTlow, dc_prevresp0_prevRTmed, dc_prevresp0_prevRThigh, \ dc_prevresp1_prevRTlow, dc_prevresp1_prevRTmed, dc_prevresp1_prevRThigh]) plt.xlabel('Starting point') plt.ylabel('Posterior probability') plt.title('Posterior of drift-rate group means') plt.savefig(os.path.join(figpath, 'startingpoint_posteriors.pdf')) elif model_name in ['prevresp_z']: print('plotting the posteriors by previous response') dc_prevresp0, dc_prevresp1 \ = m.nodes_db.node[['z(-1.0)', 'z(1.0)']] # plot these myself plot_posterior_nodes_anne([dc_prevresp0, dc_prevresp1]) plt.xlabel('Starting point') plt.ylabel('Posterior probability') plt.title('Posterior of drift-rate group means') plt.savefig(os.path.join(figpath, 'startingpoint_posteriors.pdf')) elif model_name in ['prevresp_dc']: print('plotting the posteriors by previous response') dc_prevresp0, dc_prevresp1 \ = m.nodes_db.node[['dc(-1.0)', 'dc(1.0)']] # plot these myself plot_posterior_nodes_anne([dc_prevresp0, dc_prevresp1]) plt.xlabel('Drift criterion') plt.ylabel('Posterior probability') plt.title('Posterior of drift-rate group means') plt.savefig(os.path.join(figpath, 'driftcriterion_posteriors.pdf'))
# written by Liangying, 20/2/2020 import hddm import pandas as pd import matplotlib.pyplot as plt model = hddm.load('model_avtz2_all_StimCoding_2') #model.plot_posterior_predictive(figsize=(40,40),save = True, path = "D:\\brainbnu\\haiyang\\hddm\\result\\all_StimCoding\\split_z\\avz") #model.plot_posteriors(['a', 't', 'v', 'z'],save = True, path = "D:\\brainbnu\\haiyang\\hddm\\result\\all_StimCoding\\avtz2") # Between group drift rate comparisons data = hddm.load_csv('D://brainbnu//haiyang//hddm//hddm_all_StimCoding.csv') #Posterior predictive check ppc_data = hddm.utils.post_pred_gen(model) ppc_compare = hddm.utils.post_pred_stats(data, ppc_data) ppc_stats = hddm.utils.post_pred_stats(data, ppc_data, call_compare=False) ''' # 0 back Stress_v_0back, Control_v_0back = model.nodes_db.node[['v(0.stress)', 'v(0.control)']] print "P_v(Stress_v_0back > Control_v_0back) =", (Stress_v_0back.trace()> Control_v_0back.trace()).mean() print "P_v(Control_v_0back > Stress_v_0back) =", (Control_v_0back.trace() > Stress_v_0back.trace()).mean() hddm.analyze.plot_posterior_nodes([Stress_v_0back, Control_v_0back],10) plt.xlabel('drift-rate') plt.ylabel('Posterior probability') plt.title('Posterior of drift-rate group means') plt.show() # 2 back Stress_v_2back, Control_v_2back = model.nodes_db.node[['v(2.stress)', 'v(2.control)']] print "P_v(Stress_v_2back > Control_v_2back) =", (Stress_v_2back.trace()> Control_v_2back.trace()).mean()
burn=samples / 10, thin=2, dbname=os.path.join(model_dir, 'Experiment1_traces_1'), db='pickle') m.save(os.path.join(model_dir, 'Experiment1_1')) goOn = False if goOn == True: import kabuki import seaborn as sns import matplotlib.pyplot as plt models = [] for i in [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ]: #this assumes you ran 10 different models above and changed the index when saving models.append(hddm.load('Experiment1_%s' % i)) m = kabuki.utils.concat_models(models) #contact these 10 models gelman_rubin(models) #check R hat #diagnostics m.plot_posteriors() #Simulate data and compare to actual data data['response'] = data['cor'] ppc_data = post_pred_gen(m, append_data=True) ppc_data['resp_sampled'] = 1 ppc_data['resp_sampled'][ppc_data.rt_sampled > 0] = 0 hddm.utils.post_pred_stats(data, ppc_data) ppc_data.rt = abs(ppc_data.rt) ppc_data.rt[ppc_data.cor == 1] = -ppc_data.rt[ppc_data.cor == 1]
import hddm import pandas as pd import pickle models = [] for i in range(5): m = hddm.load( 'Z://Work//UW//projects//RR_TMS//hddm//models//by_cond//testmodel') models.append(m) hddm.analyze.gelman_rubin(models)
def drift_diffusion_hddm(data, samples=10000, n_jobs=6, run=True, parallel=True, model_name='model', model_dir='.', accuracy_coding=False): import hddm import os # run the model: if run: if parallel: job_server = pp.Server(ppservers=(), ncpus=n_jobs) start_time = time.time() jobs = [(trace_id, job_server.submit(run_model, (trace_id, data, model_dir, model_name, samples, accuracy_coding), (), ('hddm', ))) for trace_id in range(n_jobs)] results = [] shell() for s, job in jobs: results.append(job()) print "Time elapsed: ", time.time() - start_time, "s" job_server.print_stats() # save: for i in range(n_jobs): model = results[i] model.save( os.path.join(model_dir, '{}_{}'.format(model_name, i))) else: start_time = time.time() model = run_model(3, data, model_dir, model_name, samples, accuracy_coding) model.save(os.path.join(model_dir, '{}_md{}'.format(model_name, 3))) # print point estimates results = model.gen_stats() results.to_csv(os.path.join(fig_dir, 'diagnostics', 'results3.csv')) # dic: text_file = open(os.path.join(fig_dir, 'diagnostics', 'DIC3.txt'), 'w') text_file.write("Model {}: {}\n".format(m, model.dic)) text_file.close() print "Time elapsed: ", time.time() - start_time, "s" # load the models: else: print 'loading existing model(s)' if parallel: model = [] for i in range(n_jobs): model.append( hddm.load( os.path.join(model_dir, '{}_{}'.format(model_name, i)))) else: model = hddm.load( os.path.join(model_dir, '{}_md{}'.format(model_name, 1))) return model
import hddm import pickle model = hddm.load( 'Z://Work//UW//projects//RR_TMS//hddm//models//by_cond//va_stim0') # init just a couple of vars # conditions in DRI_TMS task # inf/ins - X/Y # sym/fin - S/F # early/late/no stim - E/L/S # PMd/Vertex stim - P/V v_XFEP, v_XFEV, v_XFLP, v_XFLV, v_XFNP, v_XFNV, \ v_XSEP, v_XSEV, v_XSLP, v_XSLV, v_XSNP, v_XSNV, \ v_YFEP, v_YFEV, v_YFLP, v_YFLV, v_YFNP, v_YFNV, \ v_YSEP, v_YSEV, v_YSLP, v_YSLV, v_YSNP, v_YSNV = \ model.nodes_db.node[['v( XFEP)', 'v( XFEV)', 'v( XFLP)', 'v( XFLV)', 'v( XFNP)', 'v( XFNV)', 'v( XSEP)', 'v( XSEV)', 'v( XSLP)', 'v( XSLV)', 'v( XSNP)', 'v( XSNV)', 'v( YFEP)', 'v( YFEV)', 'v( YFLP)', 'v( YFLV)', 'v( YFNP)', 'v( YFNV)', 'v( YSEP)', 'v( YSEV)', 'v( YSLP)', 'v( YSLV)', 'v( YSNP)', 'v( YSNV)']] a_XFEP, a_XFEV, a_XFLP, a_XFLV, a_XFNP, a_XFNV, \ a_XSEP, a_XSEV, a_XSLP, a_XSLV, a_XSNP, a_XSNV, \ a_YFEP, a_YFEV, a_YFLP, a_YFLV, a_YFNP, a_YFNV, \ a_YSEP, a_YSEV, a_YSLP, a_YSLV, a_YSNP, a_YSNV = \ model.nodes_db.node[['a( XFEP)', 'a( XFEV)', 'a( XFLP)', 'a( XFLV)', 'a( XFNP)', 'a( XFNV)', 'a( XSEP)', 'a( XSEV)', 'a( XSLP)', 'a( XSLV)', 'a( XSNP)', 'a( XSNV)', 'a( YFEP)', 'a( YFEV)', 'a( YFLP)', 'a( YFLV)', 'a( YFNP)', 'a( YFNV)', 'a( YSEP)', 'a( YSEV)', 'a( YSLP)', 'a( YSLV)', 'a( YSNP)', 'a( YSNV)']]
Anne Urai, 2016 adapted from JW de Gee """ import hddm, os nr_traces = 3 model_name = 'basic_stimcoding' # find path depending on local/klimag usr = os.environ.get('USER') if usr in ['anne']: mypath = '/Users/anne/Data/projects/0/neurodec/Data/MEG-PL/Data/HDDM' if usr in ['aurai']: mypath = '/home/aurai/Data/MEG-PL/Data/HDDM' thispath = os.path.join(mypath, model_name) print "appending models" models = [] for t in range(nr_traces): # run the models serially models.append(hddm.load(os.path.join(thispath, 'modelfit-md%d' % t))) print "computing gelman-rubin convergence statistics" # compute gelman rubic gr = hddm.analyze.gelman_rubin(models) text_file = open(os.path.join(thispath, 'gelman_rubic.txt'), 'w') for p in gr.items(): text_file.write("%s:%s\n" % p) text_file.close() print "DONE!"
a_reg = {'model': 'a ~ 1 + z_x', 'link_func': lambda x: x} # a_reg_within = {'model': 'a ~ 1+x + C(condition)', 'link_func': lambda x: x} # for including and estimating within subject effects of condition v_reg = {'model': 'v ~ 1 + z_x', 'link_func': lambda x: x} reg_comb = [a_reg, v_reg] # m_reg = hddm.HDDMRegressor(data_group, reg_comb, group_only_regressors=['true']) m_reg = hddm.HDDMRegressor(data_group, a_reg, group_only_regressors=['true']) m_reg.find_starting_values() m_reg.sample(3000, burn=500, dbname='a_bwsubs_t200.db', db='pickle') m_reg.save('a_bwsubs_model_t200') m_reg.print_stats() # check values of reg coefficients against the generated ones m_reg = hddm.load('a_bwsubs_model') data_group = pd.read_csv('data_group.csv') #look at correlation of recovered parameter with original subjdf = data_group.groupby('subj_idx').first().reset_index() ## check for residual correlation with x a_int_recovered =[] pp=[] from scipy import stats for i in range(0,(1+max(x_range))*subjs_per_bin): a='a_Intercept_subj.' a+=str(i) a+='.0' xx=i//subjs_per_bin
import multiprocessing # File paths model_name = sys.argv[1] model_path = '../output/Study1/ae_only/' + sys.argv[1] + '/' + sys.argv[1] datafile_name = '../data/Study1/' + sys.argv[2] df = pd.read_csv(datafile_name, low_memory=False) data = hddm.utils.flip_errors(df) model_list = [] for model_index in range(5): sub_model_name = model_path + '_' + str(model_index) sub_model = hddm.load(sub_model_name) model_list.append(sub_model) m_comb = concat_models(model_list) print("DIC: %f" % m_comb.dic) print("BPIC: %f" % (m_comb.dic_info['pD'] + m_comb.dic)) def _parents_to_random_posterior_sample(bottom_node, pos=None): """Walks through parents and sets them to pos sample.""" for i, parent in enumerate(bottom_node.extended_parents): if not isinstance(parent, pm.Node): # Skip non-stochastic nodes continue
time.sleep(60) # concatenate the different chains, will save disk space concat_models(mypath, models[vx]) elif runMe == 2: # ============================================ # # POSTERIOR PREDICTIVES TO ASSESS MODEL FIT # ============================================ # starttime = time.time() print "computing ppc" # specify how many samples are needed m = hddm.load( os.path.join(mypath, models[vx], 'modelfit-combined.model')) print os.path.join(mypath, models[vx], 'modelfit-combined.model') if 'MEG' in datasets[dx]: nsmp = 50 else: nsmp = 100 ppc = hddm.utils.post_pred_gen(m, append_data=True, samples=nsmp) # make the csv smaller, save disk space savecols = list( set(ppc.columns) & set([ 'rt', 'rt_sampled', 'response_sampled', 'index', 'stimulus', 'response', 'prevresp', 'subj_idx', 'transitionprob', 'coherence', 'prevcorrect' ]))
# ============================================ # # post-processing # ============================================ # import hddm import matplotlib.pyplot as plt print "HDDM imported, starting post-processing" models = [] for trace_id in range(nr_traces): # run the models serially thism = hddm.load( os.path.join(mypath, model_name, 'modelfit-md%d.model' % trace_id)) print os.path.join(mypath, model_name, 'modelfit-md%d.model' % trace_id) # plot some output stuff in figures subfolder figpath = os.path.join(mypath, model_name, 'figures-md%d' % trace_id) if not os.path.exists(figpath): os.mkdir(figpath) thism.plot_posteriors(save=True, path=figpath, format='pdf') plt.close( 'all') # this will leave figures open, make sure to close them all models.append(thism) # gelman rubic on the list of models gr = hddm.analyze.gelman_rubin(models) text_file = open(os.path.join(mypath, model_name, 'gelman_rubic.txt'), 'w') for p in gr.items(): text_file.write("%s:%s\n" % p) text_file.close() # ============================================ #
def concat_models(mypath, model_name): nchains = 30 # CHECK IF COMBINED MODEL EXISTS if os.path.isfile( os.path.join(mypath, model_name, 'modelfit-combined.model')): print os.path.join(mypath, model_name, 'modelfit-combined.model') else: # ============================================ # # APPEND MODELS # ============================================ # allmodels = [] print("appending models for %s" % model_name) for trace_id in range(nchains): # how many chains were run? model_filename = os.path.join(mypath, model_name, 'modelfit-md%d.model' % trace_id) modelExists = os.path.isfile(model_filename) if modelExists == True: # if not, this model has to be rerun print model_filename thism = hddm.load(model_filename) allmodels.append(thism) # now append into a list # ============================================ # # CHECK CONVERGENCE # ============================================ # if len(allmodels) == 0: return allmodels try: gr = hddm.analyze.gelman_rubin(allmodels) # save text_file = open( os.path.join(mypath, model_name, 'gelman_rubin.txt'), 'w') for p in gr.items(): text_file.write("%s,%s\n" % p) # print a warning when non-convergence is detected # Values should be close to 1 and not larger than 1.02 which would indicate convergence problems. # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3731670/ if abs(p[1] - 1) > 0.02: print "non-convergence found, %s:%s" % p text_file.close() print "written gelman rubin stats to file" except: pass # now actually concatenate them, see email Gilles m = kabuki.utils.concat_models(allmodels) # ============================================ # # SAVE THE FULL MODEL # ============================================ # print "concatenated models" m.save( os.path.join(mypath, model_name, 'modelfit-combined.model')) # save the model to disk # DELETE FILES to save space print "deleting separate chains..." for fl in glob.glob( os.path.join(mypath, models[vx], 'modelfit-md*.model')): print(fl) os.remove(fl) for fl in glob.glob(os.path.join(mypath, models[vx], 'modelfit-md*.db')): if not '-md0.db' in fl: print(fl) os.remove(fl) # ============================================ # # SAVE POINT ESTIMATES # ============================================ # print "saving stats" results = m.gen_stats( ) # point estimate for each parameter and subject results.to_csv(os.path.join(mypath, model_name, 'results-combined.csv')) # save the DIC for this model text_file = open(os.path.join(mypath, model_name, 'DIC-combined.txt'), 'w') text_file.write("Combined model: {}\n".format(m.dic)) text_file.close() # ============================================ # # SAVE TRACES # ============================================ # print "saving traces" # get the names for all nodes that are available here group_traces = m.get_group_traces() group_traces.to_csv( os.path.join(mypath, model_name, 'group_traces.csv')) all_traces = m.get_traces() all_traces.to_csv(os.path.join(mypath, model_name, 'all_traces.csv')) # ============================================ # # CONCATENATE MODEL COMPARISON # ============================================ # # average model comparison values across chains print('concatenating model comparison') nchains = 30 for trace_id in range(nchains): # how many chains were run? filename = os.path.join(mypath, models[vx], 'model_comparison_md%d.csv' % trace_id) df = pd.read_csv(filename) if trace_id == 0: df2 = df else: df2 = df2.append(df, ignore_index=True) # average over chains df3 = df2.mean() df3 = df2.describe().loc[['mean']] df3.to_csv(os.path.join(mypath, models[vx], 'model_comparison.csv')) for fl in glob.glob( os.path.join(mypath, models[vx], 'model_comparison_md*.csv')): print(fl) os.remove(fl)
#workdir = '/home/mikkel/PM-volition/Dataanalysis' #outdir = '/home/mikkel/PM-volition/Datafiles' workdir = 'C:\\Users\\Mikkel\\Documents\\PM-volition\\Dataanalysis' outdir = 'C:\\Users\\Mikkel\\Documents\\PM-volition\\Datafiles' sys.path.append(workdir) import PM_volition_utilfun as pm # plot_posterior_diff, plot_posterior_nodes2, get_posteriorP # %% Plot options dpi=600 # %% Load model chdir(outdir) #Must be in folder to load databases mod = hddm.load(op.join(outdir, 'ddm_model31')) #f = open(op.join(outdir,"ddm_model22"),"rb") #mod = pickle.load(f) #%% Generate posteriors v_fixPM, v_freePM, v_fixFil, v_freeFil = mod.nodes_db.node[['v(pm.fix)', 'v(pm.free)','v(filler.fix)','v(filler.free)']] a_fix, a_free = mod.nodes_db.node[['a(fix)', 'a(free)']] t_int = mod.nodes_db.node['t'] ## Difference between posteriors: PM _, v_PMdiff = pm.get_posteriorP(v_fixPM, v_freePM, plot=0) _, v_Fildiff = pm.get_posteriorP(v_fixFil, v_freeFil, plot=0) _, a_diff = pm.get_posteriorP(a_fix, a_free, plot=0) #_, z_PMdiff = pm.get_posteriorP(z_fixPM, z_freePM, plot=0)
fig = plt.figure() ax = fig.add_subplot(111, xlabel='RT', ylabel='count', title='RT distributions') for i, subj_data in dat_M_Categ_id.groupby('subj_idx'): subj_data.rt.hist(bins=20, histtype='step', ax=ax) plt.savefig('plot_MS_Categ_id_flipped.pdf') start_time = time.time() # the start time of the processing #### model 1 for valence based categorization, free v,t,z M_Categ_val_vtz = hddm.HDDM(dat_M_Categ_val,depends_on = {'v':['val','id'],'z':['val','id'],'t':['val','id']}, include=['v', 'z', 't'],p_outlier=.05) M_Categ_val_vtz.find_starting_values() M_Categ_val_vtz.sample(10000,burn = 1000, dbname='traces_val_vtz.db', db='pickle') # save the model M_Categ_val_vtz.save('M_Categ_val_vtz') M_Categ_val_vtz = hddm.load('M_Categ_val_vtz') # doing Gelman-Rubin statistic models_categ_val = [] for i in range(5): m_stim = hddm.HDDM(dat_M_Categ_val,depends_on = {'v':['val','id'],'z':['val','id'],'t':['val','id']}, include=['v', 'z', 't'],p_outlier=.05) m_stim.find_starting_values() m_stim.sample(10000,burn = 1000) models_categ_val.append(m_stim) Categ_val_R_hat_vtz = hddm.analyze.gelman_rubin(models_categ_val) # save Categ_R_hat_vtz with open('Categ_val_R_hat_vtz.csv','w') as f: w = csv.writer(f) w.writerows(Categ_val_R_hat_vtz.items())