def run_model(trace_id, data, model_dir, model_name, samples=10000, accuracy_coding=False): import os import numpy as np import hddm from patsy import dmatrix ## version 0 ## v_reg = { 'model': 'v ~ 1 + stimulus + BS_nMod + BS_C', 'link_func': lambda x: x } reg_descr = [v_reg] m = hddm.HDDMRegressor(data, reg_descr, include=('z'), p_outlier=.05, group_only_regressors=False) m.find_starting_values() m.sample(samples, burn=samples / 10, thin=2, dbname=os.path.join(model_dir, model_name + '_db{}'.format(trace_id)), db='pickle') # ## version 1 ## # v_reg = {'model': 'v ~ 1 + stimulus + combined_choice_all * pupil_d', 'link_func': lambda x: x} # reg_descr = [v_reg] # m = hddm.HDDMRegressor(data, reg_descr, include=('z'), p_outlier=.05, group_only_regressors=False) # m.find_starting_values() # m.sample(samples, burn=samples/10, thin=2, dbname=os.path.join(model_dir, model_name+ '_db{}'.format(trace_id)), db='pickle') # # ## version 2 ## # v_reg = {'model': 'v ~ 1 + stimulus + M1 * pupil_d', 'link_func': lambda x: x} # reg_descr = [v_reg] # m = hddm.HDDMRegressor(data, reg_descr, include=('z'), p_outlier=.05, group_only_regressors=False) # m.find_starting_values() # m.sample(samples, burn=samples/10, thin=2, dbname=os.path.join(model_dir, model_name+ '_db{}'.format(trace_id)), db='pickle') # # ## version 3 ## # v_reg = {'model': 'v ~ 1 + stimulus + combined_choice_lat * pupil_d', 'link_func': lambda x: x} # reg_descr = [v_reg] # m = hddm.HDDMRegressor(data, reg_descr, include=('z'), p_outlier=.05, group_only_regressors=False) # m.find_starting_values() # m.sample(samples, burn=samples/10, thin=2, dbname=os.path.join(model_dir, model_name+ '_db{}'.format(trace_id)), db='pickle') # # ## version 4 ## # v_reg = {'model': 'v ~ 1 + stimulus + combined_choice_sl * pupil_d', 'link_func': lambda x: x} # reg_descr = [v_reg] # m = hddm.HDDMRegressor(data, reg_descr, include=('z'), p_outlier=.05, group_only_regressors=False) # m.find_starting_values() # m.sample(samples, burn=samples/10, thin=2, dbname=os.path.join(model_dir, model_name+ '_db{}'.format(trace_id)), db='pickle') return m
def test_group_only(self): params = hddm.generate.gen_rand_params() data, params_true = hddm.generate.gen_rand_data(params, size=10, subjs=4) data = pd.DataFrame(data) data["cov"] = 1.0 m = hddm.HDDMRegressor(data, "v ~ cov", group_only_regressors=True) m.sample(self.iter, burn=self.burn) self.assertTrue( isinstance( m.nodes_db.loc["wfpt.0"]["node"].parents["v"].parents["args"] [0], pm.Normal, )) self.assertEqual( m.nodes_db.loc["wfpt.0"]["node"].parents["v"].parents["args"] [0].__name__, "v_Intercept_subj.0", ) self.assertTrue( isinstance( m.nodes_db.loc["wfpt.0"]["node"].parents["v"].parents["args"] [1], pm.Normal, )) self.assertEqual( m.nodes_db.loc["wfpt.0"]["node"].parents["v"].parents["args"] [1].__name__, "v_cov", ) self.assertEqual( len(np.unique( m.nodes_db.loc["wfpt.0"]["node"].parents["v"].value)), 1)
def test_group_only(self): params = hddm.generate.gen_rand_params() data, params_true = hddm.generate.gen_rand_data(params, size=10, subjs=4) data = pd.DataFrame(data) data['cov'] = 1. m = hddm.HDDMRegressor(data, 'v ~ cov', group_only_regressors=True) m.sample(self.iter, burn=self.burn) self.assertTrue( isinstance( m.nodes_db.loc['wfpt.0']['node'].parents['v'].parents['args'] [0], pm.Normal)) self.assertEqual( m.nodes_db.loc['wfpt.0']['node'].parents['v'].parents['args'] [0].__name__, 'v_Intercept_subj.0') self.assertTrue( isinstance( m.nodes_db.loc['wfpt.0']['node'].parents['v'].parents['args'] [1], pm.Normal)) self.assertEqual( m.nodes_db.loc['wfpt.0']['node'].parents['v'].parents['args'] [1].__name__, 'v_cov') self.assertEqual( len(np.unique( m.nodes_db.loc['wfpt.0']['node'].parents['v'].value)), 1)
def test_categorical_wo_intercept(self): params = hddm.generate.gen_rand_params(cond_dict={'a': [1, 2, 3]}) data, params_true = hddm.generate.gen_rand_data(params[0], size=10, subjs=4) data = pd.DataFrame(data) data['cov'] = 1. m = hddm.HDDMRegressor(data, 'a ~ 0 + C(condition) * cov', group_only_regressors=False) m.sample(self.iter, burn=self.burn) self.assertIsInstance( m.nodes_db.loc['a_C(condition)[c0]_subj.0']['node'], pm.Gamma) self.assertIsInstance( m.nodes_db.loc['a_C(condition)[c1]_subj.0']['node'], pm.Gamma) self.assertIsInstance( m.nodes_db.loc['a_C(condition)[c2]_subj.0']['node'], pm.Gamma) self.assertNotIsInstance( m.nodes_db.loc['a_C(condition)[T.c1]:cov_subj.0']['node'], pm.Gamma) self.assertNotIsInstance( m.nodes_db.loc['a_C(condition)[T.c2]:cov_subj.0']['node'], pm.Gamma) self.assertNotIsInstance(m.nodes_db.loc['a_cov_subj.0']['node'], pm.Gamma)
def test_contrast_coding(self): params = hddm.generate.gen_rand_params(cond_dict={'v': [1, 2, 3]}) data, params_true = hddm.generate.gen_rand_data(params[0], size=10, subjs=4) data = pd.DataFrame(data) data['cov'] = 1. m = hddm.HDDMRegressor(data, 'v ~ cov * C(condition)', depends_on={'a': 'condition'}, group_only_regressors=False) m.sample(self.iter, burn=self.burn) self.assertTrue( isinstance( m.nodes_db.loc['wfpt(c1).0'] ['node'].parents['v'].parents['args'][0], pm.Normal)) self.assertEqual( m.nodes_db.loc['wfpt(c1).0']['node'].parents['v'].parents['args'] [0].__name__, 'v_Intercept_subj.0') self.assertTrue( isinstance( m.nodes_db.loc['wfpt(c1).0'] ['node'].parents['v'].parents['args'][1], pm.Normal)) self.assertEqual( m.nodes_db.loc['wfpt(c1).0']['node'].parents['v'].parents['args'] [1].__name__, 'v_C(condition)[T.c1]_subj.0') self.assertEqual( len( np.unique( m.nodes_db.loc['wfpt(c1).0']['node'].parents['v'].value)), 1)
def test_HDDMRegressor_two_regressors(self): reg_func1 = lambda args, cols: args[0] + args[1]*cols[:,0] reg1 = {'func': reg_func1, 'args':['v_slope','v_inter'], 'covariates': 'cov1', 'outcome':'v'} reg_func2 = lambda args, cols: args[0] + args[1]*cols[:,0] reg2 = {'func': reg_func2, 'args':['a_slope','a_inter'], 'covariates': 'cov2', 'outcome':'a'} params = hddm.generate.gen_rand_params() data, params_true = hddm.generate.gen_rand_data(params, size=500, subjs=5) data = pd.DataFrame(data) data['cov1'] = 1. data['cov2'] = -1 m = hddm.HDDMRegressor(data, regressor=[reg1, reg2]) m.sample(self.iter, burn=self.burn) self.assertTrue(all(m.nodes_db.ix['wfpt.0']['node'].parents['v'].parents['cols'][:,0] == 1)) self.assertTrue(all(m.nodes_db.ix['wfpt.0']['node'].parents['a'].parents['cols'][:,0] == -1)) self.assertTrue(isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['v'].parents['args'][0], pm.Normal)) self.assertTrue(isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['a'].parents['args'][0], pm.Normal)) self.assertEqual(m.nodes_db.ix['wfpt.0']['node'].parents['v'].parents['args'][0].__name__, 'v_slope_subj.0') self.assertEqual(m.nodes_db.ix['wfpt.0']['node'].parents['a'].parents['args'][0].__name__, 'a_slope_subj.0') self.assertTrue(isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['v'].parents['args'][1], pm.Normal)) self.assertTrue(isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['a'].parents['args'][1], pm.Normal)) self.assertEqual(m.nodes_db.ix['wfpt.0']['node'].parents['v'].parents['args'][1].__name__, 'v_inter_subj.0') self.assertEqual(m.nodes_db.ix['wfpt.0']['node'].parents['a'].parents['args'][1].__name__, 'a_inter_subj.0')
def run_model(id): m = hddm.HDDMRegressor(data, modelList,bias=includeBias, include='p_outlier',group_only_regressors = False) m.find_starting_values() m.sample(nSample, burn=nBurned, dbname=outputPath + '/' + modelName+'_'+str(id)+'.db', db='pickle') m.savePatch(outputPath + '/' +modelName+'_'+str(id)) return m
def test_two_regressors(self): params = hddm.generate.gen_rand_params() data, params_true = hddm.generate.gen_rand_data(params, size=10, subjs=4) data = pd.DataFrame(data) data["cov1"] = 1.0 data["cov2"] = -1 m = hddm.HDDMRegressor(data, ["v ~ cov1", "a ~ cov2"], group_only_regressors=False) m.sample(self.iter, burn=self.burn) self.assertTrue( isinstance( m.nodes_db.loc["wfpt.0"]["node"].parents["v"].parents["args"] [0], pm.Normal, )) self.assertTrue( isinstance( m.nodes_db.loc["wfpt.0"]["node"].parents["a"].parents["args"] [0], pm.Gamma, )) self.assertEqual( m.nodes_db.loc["wfpt.0"]["node"].parents["v"].parents["args"] [0].__name__, "v_Intercept_subj.0", ) self.assertEqual( m.nodes_db.loc["wfpt.0"]["node"].parents["a"].parents["args"] [0].__name__, "a_Intercept_subj.0", ) self.assertTrue( isinstance( m.nodes_db.loc["wfpt.0"]["node"].parents["v"].parents["args"] [1], pm.Normal, )) self.assertTrue( isinstance( m.nodes_db.loc["wfpt.0"]["node"].parents["a"].parents["args"] [1], pm.Normal, )) self.assertEqual( m.nodes_db.loc["wfpt.0"]["node"].parents["v"].parents["args"] [1].__name__, "v_cov1_subj.0", ) self.assertEqual( m.nodes_db.loc["wfpt.0"]["node"].parents["a"].parents["args"] [1].__name__, "a_cov2_subj.0", )
def main(derivatives, ds): df = pd.read_pickle(op.join(derivatives, 'behavior.pkl')) df = df[df.ds == ds] def get_cue_congruency(row): if row.cue == 'neutral': return 'neutral' elif row.stimulus == row.cue: return 'congruent' else: return 'incongruent' def z_link_func(x): return 1 / (1 + np.exp(-x)) df['cue congruency'] = df.apply(get_cue_congruency, 1) df['response'] = df.correct.map({0: 'error', 1: 'correct'}) df['rt'] = df['rt'] / 1000. df['cue_regressor'] = df['cue congruency'].map({ 'congruent': 1, 'neutral': 0, 'incongruent': -1 }) df['subj_idx'] = df['subject'] z_reg = {'model': 'z ~ 0 + cue_regressor', 'link_func': z_link_func} v_reg = {'model': 'v ~ 0 + C(difficulty)', 'link_func': lambda x: x} model = hddm.HDDMRegressor(df[df.rt > 0.15], [z_reg, v_reg], include='z', group_only_regressors=False) def fit_model(i): model = hddm.HDDMRegressor(df[df.rt > 0.15], [z_reg, v_reg], include='z', group_only_regressors=False) model.sample(20000, 10000, dbname='/tmp/traces{}.db'.format(i), db='pickle') return model results = Parallel(n_jobs=6)(delayed(fit_model)(i) for i in range(6)) traces = pd.concat([r.get_traces() for r in results]) if not op.exists(op.join(derivatives, 'ddm_results')): os.makedirs(op.join(derivatives, 'ddm_results')) traces.to_pickle( op.join(derivatives, 'ddm_results', 'traces_{}.pkl'.format(ds)))
def fit_model(i): model = hddm.HDDMRegressor(df[df.rt > 0.15], [z_reg, v_reg], include='z', group_only_regressors=False) model.sample(20000, 10000, dbname='/tmp/traces{}.db'.format(i), db='pickle') return model
def test_link_func_on_z(self): params = hddm.generate.gen_rand_params() data, params_true = hddm.generate.gen_rand_data(params, size=10, subjs=4) data = pd.DataFrame(data) data["cov"] = 1.0 link_func = lambda x: 1 / (1 + np.exp(-x)) m = hddm.HDDMRegressor( data, { "model": "z ~ cov", "link_func": link_func }, group_only_regressors=False, include="z", ) m.sample(self.iter, burn=self.burn) self.assertIn("z", m.include) self.assertIn("z_Intercept", m.nodes_db.knode_name) self.assertTrue( isinstance( m.nodes_db.loc["wfpt.0"]["node"].parents["z"].parents["args"] [0].parents["ltheta"], pm.Normal, )) self.assertEqual( m.nodes_db.loc["wfpt.0"]["node"].parents["z"].parents["args"] [0].__name__, "z_Intercept_subj.0", ) self.assertTrue( isinstance( m.nodes_db.loc["wfpt.0"]["node"].parents["z"].parents["args"] [1], pm.Normal, )) self.assertEqual( m.nodes_db.loc["wfpt.0"]["node"].parents["z"].parents["args"] [1].__name__, "z_cov_subj.0", ) self.assertEqual( len(np.unique( m.nodes_db.loc["wfpt.0"]["node"].parents["z"].value)), 1) self.assertEqual(m.model_descrs[0]["link_func"](2), link_func(2))
def test_HDDMRegressorGroupOnly(self): reg_func = lambda args, cols: args[0] + args[1]*cols[:,0] reg = {'func': reg_func, 'args':['v_slope','v_inter'], 'covariates': 'cov', 'outcome':'v'} params = hddm.generate.gen_rand_params() data, params_true = hddm.generate.gen_rand_data(params, size=500, subjs=5) data = pd.DataFrame(data) data['cov'] = 1. m = hddm.HDDMRegressor(data, regressor=reg, group_only_nodes=['v_slope', 'v_inter']) m.sample(self.iter, burn=self.burn) self.assertTrue(all(m.nodes_db.ix['wfpt.0']['node'].parents['v'].parents['cols'][:,0] == 1)) self.assertTrue(isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['v'].parents['args'][0], pm.Normal)) self.assertEqual(m.nodes_db.ix['wfpt.0']['node'].parents['v'].parents['args'][0].parents['mu'].__name__, 'v_slope') self.assertTrue(isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['v'].parents['args'][1], pm.Normal)) self.assertEqual(m.nodes_db.ix['wfpt.0']['node'].parents['v'].parents['args'][1].parents['mu'].__name__, 'v_inter') self.assertEqual(len(np.unique(m.nodes_db.ix['wfpt.0']['node'].parents['v'].value)), 1)
def test_contrast_coding(self): params = hddm.generate.gen_rand_params(cond_dict={"v": [1, 2, 3]}) data, params_true = hddm.generate.gen_rand_data(params[0], size=10, subjs=4) data = pd.DataFrame(data) data["cov"] = 1.0 m = hddm.HDDMRegressor( data, "v ~ cov * C(condition)", depends_on={"a": "condition"}, group_only_regressors=False, ) m.sample(self.iter, burn=self.burn) self.assertTrue( isinstance( m.nodes_db.loc["wfpt(c1).0"] ["node"].parents["v"].parents["args"][0], pm.Normal, )) self.assertEqual( m.nodes_db.loc["wfpt(c1).0"]["node"].parents["v"].parents["args"] [0].__name__, "v_Intercept_subj.0", ) self.assertTrue( isinstance( m.nodes_db.loc["wfpt(c1).0"] ["node"].parents["v"].parents["args"][1], pm.Normal, )) self.assertEqual( m.nodes_db.loc["wfpt(c1).0"]["node"].parents["v"].parents["args"] [1].__name__, "v_C(condition)[T.c1]_subj.0", ) self.assertEqual( len( np.unique( m.nodes_db.loc["wfpt(c1).0"]["node"].parents["v"].value)), 1)
def test_HDDMRegressor_no_group(self): reg_func = lambda args, cols: args[0] + args[1]*cols[:,0] reg = {'func': reg_func, 'args':['v_slope','v_inter'], 'covariates': 'cov', 'outcome':'v'} params = hddm.generate.gen_rand_params() data, params_true = hddm.generate.gen_rand_data(params, size=500, subjs=1) data = pd.DataFrame(data) data['cov'] = 1. del data['subj_idx'] m = hddm.HDDMRegressor(data, regressor=reg, is_group_model=False, depends_on={}) m.sample(self.iter, burn=self.burn) print m.nodes_db.index self.assertTrue(all(m.nodes_db.ix['wfpt']['node'].parents['v'].parents['cols'][:,0] == 1)) self.assertTrue(isinstance(m.nodes_db.ix['wfpt']['node'].parents['v'].parents['args'][0], pm.Normal)) self.assertEqual(m.nodes_db.ix['wfpt']['node'].parents['v'].parents['args'][0].__name__, 'v_slope') self.assertTrue(isinstance(m.nodes_db.ix['wfpt']['node'].parents['v'].parents['args'][1], pm.Normal)) self.assertEqual(m.nodes_db.ix['wfpt']['node'].parents['v'].parents['args'][1].__name__, 'v_inter') self.assertEqual(len(np.unique(m.nodes_db.ix['wfpt']['node'].parents['v'].value)), 1)
def test_two_regressors(self): params = hddm.generate.gen_rand_params() data, params_true = hddm.generate.gen_rand_data(params, size=10, subjs=4) data = pd.DataFrame(data) data['cov1'] = 1. data['cov2'] = -1 m = hddm.HDDMRegressor(data, ['v ~ cov1', 'a ~ cov2'], group_only_regressors=False) m.sample(self.iter, burn=self.burn) self.assertTrue( isinstance( m.nodes_db.loc['wfpt.0']['node'].parents['v'].parents['args'] [0], pm.Normal)) self.assertTrue( isinstance( m.nodes_db.loc['wfpt.0']['node'].parents['a'].parents['args'] [0], pm.Gamma)) self.assertEqual( m.nodes_db.loc['wfpt.0']['node'].parents['v'].parents['args'] [0].__name__, 'v_Intercept_subj.0') self.assertEqual( m.nodes_db.loc['wfpt.0']['node'].parents['a'].parents['args'] [0].__name__, 'a_Intercept_subj.0') self.assertTrue( isinstance( m.nodes_db.loc['wfpt.0']['node'].parents['v'].parents['args'] [1], pm.Normal)) self.assertTrue( isinstance( m.nodes_db.loc['wfpt.0']['node'].parents['a'].parents['args'] [1], pm.Normal)) self.assertEqual( m.nodes_db.loc['wfpt.0']['node'].parents['v'].parents['args'] [1].__name__, 'v_cov1_subj.0') self.assertEqual( m.nodes_db.loc['wfpt.0']['node'].parents['a'].parents['args'] [1].__name__, 'a_cov2_subj.0')
def run_model(id): from patsy import dmatrix from pandas import Series import numpy as np import hddm dataHDDM = hddm.load_csv('DDM/dataHDDM_pmt.csv') dataHDDM["subj_idx"] = dataHDDM["participant"] del dataHDDM["participant"] dataHDDM["SAT"] = dataHDDM.apply(lambda row: 0 if row['SAT'] == "Accuracy" else 1, axis=1) dataHDDM["FC"] = dataHDDM.apply(lambda row: -0.5 if row['FC'] == "low" else 0.5, axis=1) dataHDDM["contrast"] = dataHDDM.contrast.replace([1,2,3,4,5,6], [-.5,-.3,-.1,.1,.3,.5]) dataHDDM["givenResp"] = dataHDDM["response"] dataHDDM["stim"] = dataHDDM.apply(lambda row: 1 if row['stim'] == 'Right' else 0, axis=1) dataHDDM["response"] = dataHDDM.apply(lambda row: 1 if row['givenResp'] == 'Right' else 0, axis=1) def v_link_func(x, data=dataHDDM): stim = (np.asarray(dmatrix('0 + C(s, [[1], [-1]])', {'s': data.stim.ix[x.index]}))) return x*stim if id < 4: ############## M1 LM = [{'model':'t ~ SAT + FC + contrast + SAT:FC + SAT:contrast + FC:contrast + SAT:FC:contrast', 'link_func': lambda x: x} , {'model':'v ~ contrast', 'link_func':v_link_func} , {'model':'a ~ FC + SAT + SAT:FC', 'link_func': lambda x: x} ] deps = {'sz' : 'SAT'} inc = ['sv','sz','st','z'] model_name = "Joint_t0" else : return np.nan() name = 'light_reg_PMT_%s' %str(id) m = hddm.HDDMRegressor(dataHDDM, LM , depends_on = deps, include=inc, group_only_nodes=['sv', 'sz','st', "sz_SAT"], group_only_regressors=False, keep_regressor_trace=True) m.find_starting_values() m.sample(iter=10000, burn=8500, thin=1, dbname='DDM/traces/db_%s'%name, db='pickle') m.save('DDM/Fits/%s'%name) return m
def test_link_func_on_z(self): params = hddm.generate.gen_rand_params() data, params_true = hddm.generate.gen_rand_data(params, size=10, subjs=4) data = pd.DataFrame(data) data['cov'] = 1. link_func = lambda x: 1 / (1 + np.exp(-x)) m = hddm.HDDMRegressor(data, { 'model': 'z ~ cov', 'link_func': link_func }, group_only_regressors=False, include='z') m.sample(self.iter, burn=self.burn) self.assertIn('z', m.include) self.assertIn('z_Intercept', m.nodes_db.knode_name) self.assertTrue( isinstance( m.nodes_db.loc['wfpt.0']['node'].parents['z'].parents['args'] [0].parents['ltheta'], pm.Normal)) self.assertEqual( m.nodes_db.loc['wfpt.0']['node'].parents['z'].parents['args'] [0].__name__, 'z_Intercept_subj.0') self.assertTrue( isinstance( m.nodes_db.loc['wfpt.0']['node'].parents['z'].parents['args'] [1], pm.Normal)) self.assertEqual( m.nodes_db.loc['wfpt.0']['node'].parents['z'].parents['args'] [1].__name__, 'z_cov_subj.0') self.assertEqual( len(np.unique( m.nodes_db.loc['wfpt.0']['node'].parents['z'].value)), 1) self.assertEqual(m.model_descrs[0]['link_func'](2), link_func(2))
return 1 / (1 + np.exp(-(x * condition))) ##### Estimate models: step 1 simple base models v = {'model': "v ~ 1 + C(stim, Treatment(0))", 'link_func': lambda x: x} #v = {'model': "v ~ 1 ", 'link_func': lambda x: x} v_block = {'model': "v ~ 1 + C(stim, Treatment(0)) * C(CongruentBlock, Treatment(0))", 'link_func': lambda x: x} sv = {'model': "sv ~ 1 ", 'link_func': lambda x: x} # z = {'model': "z ~ 1", 'link_func': z_link_func} # sz = {'model': "sz ~ 1", 'link_func': lambda x: x} a = {'model': "a ~ 1", 'link_func': lambda x: x} # t = {'model': "t ~ 1", 'link_func': lambda x: x} st = {'model': "st ~ 1", 'link_func': lambda x: x} mod_dict = {'v_reg':hddm.HDDMRegressor(data, v), 'vsv_reg':hddm.HDDMRegressor(data, [v,sv], include = 'sv')}#,#, # 'vz_reg':hddm.HDDMRegressor(data, [v,z], include = 'z')} #'vsvz_reg':hddm.HDDMRegressor(data, [v,sv,z], include = ('sv','z')), # 'v_block_reg':hddm.HDDMRegressor(data, v_block)}#, #'v_blocksv_reg':hddm.HDDMRegressor(data, [v_block,sv], include = 'sv'), #'v_blockz_reg':hddm.HDDMRegressor(data, [v_block,z], include = 'z'), #'v_blocksvz_reg':hddm.HDDMRegressor(data, [v_block,sv,z], include = ('sv', 'z'))} mod_dict all_models = {} # from __future__ import print_function import pymp ex_array = pymp.shared.array((100,), dtype='uint8')
v_stimblock_trial_runtrial_prev_rt = { 'model': "v ~ 1 + C(stimblock, Treatment('congruent')) + trial_z + run_trial_z + prev_rt", 'link_func': lambda x: x } ################################### ##### create dict to index that includes different combinations of the design matrices from above ################################### #import pdb; pdb.set_trace() mod_dict = { ##################### start with no st or st #### Simplest model: stim type only IV 'v': hddm.HDDMRegressor(data, v, group_only_regressors=False), 'v_stimblock': hddm.HDDMRegressor(data, v_stimblock, group_only_regressors=False), #### single IV (not including stim) # stimulus x block ixn 'v_block': hddm.HDDMRegressor(data, v_block, group_only_regressors=False), # standard stimulus contrasts 'v_trial': hddm.HDDMRegressor(data, v_trial, group_only_regressors=False), 'v_runtrial': hddm.HDDMRegressor(data, v_runtrial, group_only_regressors=False), 'v_prev_rt': hddm.HDDMRegressor(data, v_prev_rt, group_only_regressors=False), # stimblock contrasts 'v_stimblock_trial':
#v_standard = hddm.HDDM(data, depends_on={'v': 'condition'}) # Don't use. Produced the following error: # RuntimeWarning: invalid value encountered in double_scalars # tmp2 = (x - v) * (fx - fw) #Warning: Powell optimization failed. Falling back to simplex. #v_standard.sample(100, burn=10, dbname='gng_standard.db', db='pickle') # for quicker debugging. #v_standard.sample(2000, burn=400, dbname='gng_standard.db', db='pickle') #v_standard.save('gng_standard_model') #import pdb; pdb.set_trace() # load fitted model #v_standard = hddm.load('gng_standard_model') # simple regressoion model v_regression = { 'model': "v ~ 1 + C(condition, Treatment('OneGo'))", 'link_func': lambda x: x } v_reg = hddm.HDDMRegressor(data, v_regression, group_only_regressors=['true']) #v_reg.find_starting_values() v_reg.sample(10, burn=2, dbname='gng_regressor.db', db='pickle') v_reg.save('gng_regressor') import pdb pdb.set_trace()
data.postpe_bin_noern[data.postpe_bin_noern==5] = 'E_bin' # #define the specific link functions for z and v def z_link_func(x, data=data): stim = (np.asarray(dmatrix('0 + C(s,[[1],[-1]])', {'s':data.stimulus.ix[x.index]}))) return 1 / (1 + np.exp(-(x * stim))) def v_link_func(x, data=data): stim = (np.asarray(dmatrix('0 + C(s,[[1],[-1]])', {'s':data.stimulus.ix[x.index]}))) return x * stim #fit different models for previous and following cj a_reg = {'model': 'a ~ 1 + stimulus + prevpe_bin_noern + postpe_bin_noern', 'link_func': lambda x: x} v_reg = {'model': 'v ~ 1 + stimulus + prevpe_bin_noern + postpe_bin_noern', 'link_func': v_link_func} reg_descr = [a_reg, v_reg] m = hddm.HDDMRegressor(data, reg_descr, group_only_regressors=False, p_outlier=.05) m.find_starting_values() m.sample(samples, burn=samples/10, thin=2, dbname=os.path.join(model_dir, 'ERPall_binnedPE_traces_1'), db='pickle') m.save(os.path.join(model_dir, 'ERPall_binnedPE_1')) goOn = False if goOn == True: import kabuki import seaborn as sns import matplotlib.pyplot as plt models = [] for i in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]: #this assumes you ran 10 different models above and changed the index when saving models.append(hddm.load('ERPall_binnedPE_%s' %i)) m = kabuki.utils.concat_models(models) #contact these 10 models #Run the same sanity checks as reported in hddm_fit.py, these are not reiterated here
if 'depends_on' in model_json['HDDMmodel']['nodes']['HDDM']['arguments']: depends_on_fixed = {} for d in model_json['HDDMmodel']['nodes']['HDDM']['arguments']['depends_on']: depends_on_fixed[d.encode('ascii','ignore')]=model_json['HDDMmodel']['nodes']['HDDM']['arguments']['depends_on'][d].encode('ascii','ignore') model = hddm.HDDM(df, depends_on=depends_on_fixed) elif 'regressors' in model_json['HDDMmodel']['nodes']['HDDM']: regressors_fixed = [] for r in model_json['HDDMmodel']['nodes']['HDDM']['regressors']: for k in r: regressors_fixed={k.encode('ascii','ignore'):r[k].encode('ascii','ignore'),'link_func':lambda x: x} print(regressors_fixed) model = hddm.HDDMRegressor(df,regressors_fixed) if estimation_method == 'mcmc': model.sample(500) if not op.exists(op.join(derivatives, 'hddm_mcmc')): os.makedirs(op.join(derivatives, 'hddm_mcmc')) for key, row in model.get_group_nodes().iterrows(): fn = op.join(derivatives, 'hddm_mcmc', 'group_par-{}_traces.tsv'.format(key)) trace = pd.DataFrame(row.node.trace[:]) trace.to_csv(fn, sep='\t', header=False, index=False) for key, row in model.get_subj_nodes().iterrows(): subject = row.subj_idx
def z_link_func(x, data=mydata): stim = (np.asarray( dmatrix('0 + C(s, [[1], [-1]])', {'s': data.condition.ix[x.index]}))) return 1 / (1 + np.exp(-(x * stim))) # if you assume other parameter changes as well, we need to add other parameters z_reg = {'model': 'z ~ 1 + C(session)', 'link_func': z_link_func} v_reg = {'model': 'v ~ 1 + C(cond_v)', 'link_func': lambda x: x} a_reg = {'model': 'a ~ 1 + C(session)', 'link_func': lambda a: a} t_reg = {'model': 't ~ 1 + C(session)', 'link_func': lambda t: t} reg_descr = [z_reg, v_reg, a_reg, t_reg] m_reg = hddm.HDDMRegressor(mydata, reg_descr, include='z') m_reg.sample(2000, burn=200) m_reg.print_stats() # check the outputs in comparision with the true param """ Example of one run. Note that the z values should be passed through the transition function to get its face values. mean std 2.5q 25q 50q 75q 97.5q mc err z_Intercept 0.346673 0.0564847 0.239106 0.307751 0.3441 0.386205 0.461575 0.00439742 z_Intercept_std 0.211743 0.0405919 0.124576 0.186462 0.212431 0.240409 0.289452 0.00228978 z_Intercept_subj.0 0.468137 0.076576 0.313579 0.416551 0.469894 0.523116 0.61058 0.00546733 z_Intercept_subj.1 0.2409 0.0505806 0.150262 0.204568 0.238209 0.273871 0.346782 0.00324306 z_Intercept_subj.2 0.271563 0.0573848 0.166577 0.230793 0.269063 0.311191 0.38972 0.00391868 z_Intercept_subj.3 0.310187 0.0620826 0.195229 0.267373 0.306494 0.353033 0.431924 0.00445056 z_Intercept_subj.4 0.44693 0.0736314 0.298593 0.395914 0.449606 0.499134 0.589479 0.00533299
data_a_df = pd.DataFrame(data=[data_a]) data_group = data_group.append([data_a], ignore_index=True) data_group.to_csv('data_group.csv') ## Recover a_reg = {'model': 'a ~ 1 + z_x', 'link_func': lambda x: x} # a_reg_within = {'model': 'a ~ 1+x + C(condition)', 'link_func': lambda x: x} # for including and estimating within subject effects of condition v_reg = {'model': 'v ~ 1 + z_x', 'link_func': lambda x: x} reg_comb = [a_reg, v_reg] # m_reg = hddm.HDDMRegressor(data_group, reg_comb, group_only_regressors=['true']) m_reg = hddm.HDDMRegressor(data_group, a_reg, group_only_regressors=['true']) m_reg.find_starting_values() m_reg.sample(3000, burn=500, dbname='a_bwsubs_t200.db', db='pickle') m_reg.save('a_bwsubs_model_t200') m_reg.print_stats() # check values of reg coefficients against the generated ones m_reg = hddm.load('a_bwsubs_model') data_group = pd.read_csv('data_group.csv') #look at correlation of recovered parameter with original subjdf = data_group.groupby('subj_idx').first().reset_index() ## check for residual correlation with x a_int_recovered =[] pp=[]
print("P(CONGRUENT > INCONGRUENT)_v = ", (v_C.trace() > v_I.trace()).mean()) print("Lumped model DIC: %f" % simp.dic) print("Lumped model BIC: %f" % simp.bic) print("Lumped model AIC: %f" % simp.aic) print("threshold by stimulus DIC: %f" % simp_a_stim.dic) print("threshold by stimulus BIC: %f" % simp_a_stim.bic) print("threshold by stimulus AIC: %f" % simp_a_stim.aic) print("drift rate by stimulus DIC: %f" % simp_v_stim.dic) print("drift rate by stimulus BIC: %f" % simp_v_stim.bic) print("drift rate by stimulus AIC: %f" % simp_v_stim.aic) #################################################################################################################################### ##modelling within-subjects effects (rather than sampling from separate gorup priors with depends_on) from patsy import dmatrix dmatrix("C(stim, Treatment('CONGRUENT'))", data.head(10)) within_v = hddm.HDDMRegressor(data, "v ~ C(stim, Treatment('CONGRUENT'))") within_v.find_starting_values() within_v.sample(10000, burn=100, thin=3, dbname='traces.db', db='pickle') v_C, v_I = within_v.nodes_db.ix[ ["v_Intercept", "v_C(stim, Treatment('CONGRUENT'))[T.INCONGRUENT]"], 'node'] hddm.analyze.plot_posterior_nodes([v_C, v_I]) plt.xlabel('drift-rate') plt.ylabel('Posterior probability') plt.title('Group mean posteriors of within-subject drift-rate effects') plt.savefig('posteriorDist_within_v.pdf') within_a = hddm.HDDMRegressor(data, "a ~ C(stim, Treatment('CONGRUENT'))") within_a.find_starting_values() within_a.sample(10000, burn=100, thin=3, dbname='traces.db', db='pickle')
# ============================================ # # MANUALLY APPEND CHAINS # only # ============================================ # 1. construct the model object with the original data and parameters 2. call model.load_db(dbname, db='pickle') # or sqlite, whatever you used 3. call model.gen_stats() if model_name == 'regress_z_prevresp': mydata.ix[mydata['stimulus']==0,'stimulus'] = -1 # recode the stimuli into signed mydata = mydata.dropna(subset=['prevresp']) # dont use trials with nan in prevresp # specify that we want individual parameters for all regressors, see email Gilles 22.02.2017 m = hddm.HDDMRegressor(mydata, reg_both, include=['z', 'sv'], group_only_nodes=['sv'], group_only_regressors=False, p_outlier=0.05) elif model_name == 'regress_z_prevresp_prevpupil_prevrt': mydata.ix[mydata['stimulus']==0,'stimulus'] = -1 # recode the stimuli into signed mydata = mydata.dropna(subset=['prevresp', 'prevpupil']) # dont use trials with nan in prevresp or prevpupil # specify that we want individual parameters for all regressors, see email Gilles 22.02.2017 m = hddm.HDDMRegressor(mydata, reg_both, include=['z', 'sv'], group_only_nodes=['sv'], group_only_regressors=False, p_outlier=0.05) # ============================================ # # CHECK CONVERGENCE # ============================================ #
nsample = 5000 nburn = 500 os.chdir('/Users/nth7/ics/PD_Inhibition_DDM/model_outputs_nth_full_2') baseline_v = { 'model': "v ~ 1 + C(con, Treatment('Congruent'))", 'link_func': lambda x: x } baseline_a = { 'model': "a ~ 1 + C(con, Treatment('Congruent'))", 'link_func': lambda x: x } baseline_comb = [baseline_a, baseline_v] baseline_v_reg = hddm.HDDMRegressor(data, baseline_v) baseline_v_reg.sample(nsample, burn=nburn, dbname='baseline_v_reg.db', db='pickle') baseline_v_reg.save('baseline_v_reg.model') baseline_a_reg = hddm.HDDMRegressor(data, baseline_a) baseline_a_reg.sample(nsample, burn=nburn, dbname='baseline_a_reg.db', db='pickle') baseline_a_reg.save('baseline_a_reg.model') baseline_comb_reg = hddm.HDDMRegressor(data, baseline_comb) baseline_comb_reg.sample(nsample,
##### generate design matrices for HDDMRegressor v = {'model': "v ~ 1 + C(stim, Treatment(0))", 'link_func': lambda x: x} #v = {'model': "v ~ 1 ", 'link_func': lambda x: x} v_block = {'model': "v ~ 1 + C(stim, Treatment(0)) * C(CongruentBlock, Treatment(0))", 'link_func': lambda x: x} sv = {'model': "sv ~ 1 ", 'link_func': lambda x: x} # z = {'model': "z ~ 1", 'link_func': z_link_func} # sz = {'model': "sz ~ 1", 'link_func': lambda x: x} a = {'model': "a ~ 1", 'link_func': lambda x: x} # t = {'model': "t ~ 1", 'link_func': lambda x: x} st = {'model': "st ~ 1", 'link_func': lambda x: x} ##### create dict to index that includes different combinations of the design matrices from above mod_dict = {'v_reg':hddm.HDDMRegressor(data, v, group_only_regressors = False), 'vsv_reg':hddm.HDDMRegressor(data, [v,sv], include = 'sv', group_only_regressors = False), 'v_block_reg':hddm.HDDMRegressor(data, v_block, group_only_regressors = False), 'v_blocksv_reg':hddm.HDDMRegressor(data, [v_block,sv], include = 'sv',group_only_regressors = False), 'vst_reg':hddm.HDDMRegressor(data, [v,st], include = 'st', group_only_regressors = False), 'vsvst_reg':hddm.HDDMRegressor(data, [v,sv,st], include = ('sv','st'), group_only_regressors = False), 'v_blockst_reg':hddm.HDDMRegressor(data, [v_block, st], include = 'st', group_only_regressors = False), 'v_blocksvst_reg':hddm.HDDMRegressor(data, [v_block,sv,st], include = ('sv', 'st'),group_only_regressors = False)} # 'vz_reg':hddm.HDDMRegressor(data, [v,z], include = 'z')} #'vsvz_reg':hddm.HDDMRegressor(data, [v,sv,z], include = ('sv','z')), #'v_blockz_reg':hddm.HDDMRegressor(data, [v_block,z], include = 'z'), #'v_blocksvz_reg':hddm.HDDMRegressor(data, [v_block,sv,z], include = ('sv', 'z'))} mod_dict = {'v_blocksvst_reg':hddm.HDDMRegressor(data, [v_block,sv,st], include = ('sv', 'st'),group_only_regressors = False)} ##### parallel loop over models and number of chains for gelman rubin statistic. Sample and save
import pandas as pd import hddm import pickle # load data data = hddm.load_csv( 'Z://Work//UW//projects//RR_TMS//hddm//data//fullsplit_hddm.csv') regm = hddm.HDDMRegressor(data, "z ~ rulert:C(tms, Treatment('NV'))", depends_on={ 'v': 'stim', 'a': 'stim' }, bias=True, include='all', p_outlier=0.05) regm.find_starting_values() regm.sample( 2000, burn=200, dbname='Z://Work//UW//projects//RR_TMS//hddm//db//testmodel_traces.db', db='pickle') fname = 'Z://Work//UW//projects//RR_TMS//hddm//models//by_cond//testmodel' regm.save(fname)
#test.fillna(999, inplace=True) mydata = mydata[np.isfinite(mydata['rt'])] def z_link_func(x, data=mydata): stim = (np.asarray( dmatrix('0 + C(s, [[1], [-1]])', {'s': data.stim.ix[x.index]}))) return 1 / (1 + np.exp(-(x * stim))) z_reg = {'model': 'z ~ 1 + C(pupil)', 'link_func': z_link_func} v_reg = {'model': 'v ~ 1 + C(pupil)', 'link_func': lambda x: x} reg_descr = [z_reg, v_reg] m_reg = hddm.HDDMRegressor(mydata, reg_descr, include='z') m_reg.sample(5000, burn=200) mydata = mydata[np.logical_or( np.logical_and(mydata.dprobe > -17, mydata.stim == 0), np.logical_and(mydata.dprobe > -3, mydata.stim == 1))] mydata2 = hddm.utils.flip_errors(mydata) mydata2 = mydata2[mydata2.state != 4] mydata2["state"] = mydata2["state"].astype('category') m = hddm.models.HDDMRegressor(mydata2, 't ~ pupil') m_within_subj = hddm.HDDMRegressor(mydata2, "t ~ C(stim, state('1'))") m_reg = hddm.HDDMRegressor(mydata2,