def drift_diffusion_hddm(data, samples=10000, n_jobs=6, run=True, parallel=True, model_name='model', model_dir='.', accuracy_coding=False):
    
    import hddm
    import os
    
    # run the model:
    if run:
        if parallel:
            job_server = pp.Server(ppservers=(), ncpus=n_jobs)
            start_time = time.time()
            jobs = [(trace_id, job_server.submit(run_model,(trace_id, data, model_dir, model_name, samples, accuracy_coding), (), ('hddm',))) for trace_id in range(n_jobs)]
            results = []
            for s, job in jobs:
                results.append(job())
            print "Time elapsed: ", time.time() - start_time, "s"
            job_server.print_stats()
            
            # save:
            for i in range(n_jobs):
                model = results[i]
                model.save(os.path.join(model_dir, '{}_{}'.format(model_name,i)))
        else:
            model = run_model(1, data, model_dir, model_name, samples, accuracy_coding)
            model.save(os.path.join(model_dir, model_name))
    
    # load the models:
    else:
        print 'loading existing model(s)'
        if parallel:
            model = []
            for i in range(n_jobs):
                model.append(hddm.load(os.path.join(model_dir, '{}_{}'.format(model_name,i))))
        else:
            model = hddm.load(os.path.join(model_dir, model_name))
    return model
Beispiel #2
0
def load_model(empty_model, dbfile):
    loadfile = sorted(glob(dbfile))
    if len(loadfile) > 1:
        models = []
        for l in loadfile:
            m = hddm.load(empty_model)
            m.load_db(l, db='pickle')
            models.append(m)
        m = load_concat_models(models)
        return m, models
    else:
        m = hddm.load(empty_model)
        m.load_db(loadfile[0], db='pickle')
        return m
Beispiel #3
0
def concat_models(mypath, model_name):

    import os, hddm, time, kabuki

    # ============================================ #
    # APPEND MODELS
    # ============================================ #

    allmodels = []
    print "appending models"
    for trace_id in range(60): # 15 models were run
        model_filename              = os.path.join(mypath, model_name, 'modelfit-md%d.model'%trace_id)
        modelExists                 = os.path.isfile(model_filename)
        assert modelExists == True # if not, this model has to be rerun

        starttime = time.time()
        print model_filename
        thism                       = hddm.load(model_filename)

        # now append
        allmodels.append(thism)
        elapsed = time.time() - starttime
        print( "Elapsed time: %f seconds." %elapsed )

    # ============================================ #
    # MANUALLY APPEND CHAINS
    # only
    # ============================================ #

    1. construct the model object with the original data and parameters
Beispiel #4
0
def PPC(samples, n=None):
    import hddm

    import pandas as pd
    import pickle
    import os

    #set filepath
    filepath = './models/'

    #import model
    m = hddm.load(filepath + 'm_all')
    print('model loaded')

    #set savepath
    savepath = './ppc/'

    if not os.path.exists(savepath):
        os.makedirs(savepath)

    #start PP sampling
    print('starting...')
    ppc = hddm.utils.post_pred_gen(m, samples=samples)
    ppc.reset_index(inplace=True)

    print('\nsaving...')
    ppc.to_csv('%s/ppc_%d.csv' % (savepath, n), sep=',', encoding='utf-8')

    print('PP samples saved')
Beispiel #5
0
    def test_init_sample_save_load_regressor(self):
        for model in self.models:
            if len(hddm.model_config.model_config[model]["choices"]) == 2:
                # Define Link Function
                def id_link(x):
                    return x

                # Define Regression Model
                v_reg = {"model": "v ~ 1 + theta", "link_func": id_link}

                # Initialize HDDM Model (using cavanagh data)
                model_ = hddm.HDDMnnRegressor(
                    self.cav_data,
                    [v_reg],
                    include=hddm.model_config.model_config[model]
                    ["hddm_include"],
                    model=model,
                    group_only_regressors=True,
                )

                # Sample
                model_.sample(
                    self.nmcmc,
                    burn=self.nburn,
                    dbname=self.filepath + "test_" + model + ".db",
                    db="pickle",
                )

                # Save Model
                print("Saving Model: ")
                model_.save(self.filepath + "test_" + model + ".pickle")

                # Load Model
                print("Loading Model: ")
                model__ = hddm.load(self.filepath + "test_" + model +
                                    ".pickle")
                self.assertTrue(model__.nn == True)

                # Check if id_link is preserved correctly
                print("Checking if link func is correctly recovered")
                self.assertTrue(
                    model__.model_descrs[0]["model"]["link_func"] == id_link)

                del model_
                del model__

            else:
                print("Skipping n > 2 choice models for this test for now !")
        pass
Beispiel #6
0
    def test_init_sample_save_load_stimcoding(self):
        for model in self.models:
            if len(hddm.model_config.model_config[model]["choices"]) == 2:
                # Generate Data
                data, gt = hddm.simulators.hddm_dataset_generators.simulator_stimcoding(
                    model=model,
                    n_samples_by_condition=self.n_samples_per_trial,
                    split_by="v",
                )
                # Initialize HDDM Model
                model_ = hddm.HDDMnnStimCoding(
                    data,
                    model=model,
                    split_param="v",
                    drift_criterion=True,
                    stim_col="stim",
                )
                # Sample

                # Sample
                model_.sample(
                    self.nmcmc,
                    burn=self.nburn,
                    dbname=self.filepath + "test_" + model + ".db",
                    db="pickle",
                )

                # Save Model
                print("Saving Model: ")
                model_.save(self.filepath + "test_" + model + ".pickle")

                # Load Model
                print("Loading Model: ")
                model__ = hddm.load(self.filepath + "test_" + model +
                                    ".pickle")
                self.assertTrue(model__.nn == True)

                del model_
                del model__

            else:
                print("Skipping n > 2 choice models for this test for now !")
        pass

        pass
Beispiel #7
0
    def test_HDDM_load_save(self):
        include = ['z', 'sz', 'st', 'sv']
        dbs = ['pickle', 'sqlite']
        params = hddm.generate.gen_rand_params(include=include)
        data, params_true = hddm.generate.gen_rand_data(params, size=10, subjs=2)
        data = pd.DataFrame(data)
        data['cov'] = 1.

        for db, model_class in itertools.product(dbs, self.model_classes):
            if model_class is hddm.models.HDDMRegressor:
                model = model_class(data, 'v ~ cov', include=include, is_group_model=True)
            else:
                model = model_class(data, include=include, is_group_model=True)
            model.sample(100, dbname='test.db', db=db)
            model.save('test.model')
            m_load = hddm.load('test.model')
            os.remove('test.db')
            os.remove('test.model')
Beispiel #8
0
    def test_HDDM_load_save(self, assert_=False):
        include = ['z', 'sz','st','sv']
        dbs = ['pickle', 'sqlite']
        model_classes = [hddm.models.HDDMTruncated, hddm.models.HDDM, hddm.models.HDDMRegressor]
        reg_func = lambda args, cols: args[0] + args[1]*cols[:,0]
        reg = {'func': reg_func, 'args':['v_slope','v_inter'], 'covariates': 'cov', 'outcome':'v'}
        params = hddm.generate.gen_rand_params(include=include)
        data, params_true = hddm.generate.gen_rand_data(params, size=10, subjs=2)
        data = pd.DataFrame(data)
        data['cov'] = 1.

        for db, model_class in itertools.product(dbs, model_classes):
            if model_class is hddm.models.HDDMRegressor:
                model = model_class(data, regressor=reg, include=include, is_group_model=True)
            else:
                model = model_class(data, include=include, is_group_model=True)
            model.sample(20, dbname='test.db', db=db)
            model.save('test.model')
            m_load = hddm.load('test.model')
            os.remove('test.db')
            os.remove('test.model')
Beispiel #9
0
def get_stats():
    """
    Chop up the stats data frame for running a repeated-measures ANOVA.
    """
    data = pd.read_csv('data.csv', index_col=None)
    model = load('model.pickle')
    results = model.gen_stats()
    print results.index.tolist()
    params = 'avtz'
    isis = sorted(data.isi.unique().tolist())
    deltas = sorted(data.delta.unique().tolist())
    subjects = sorted(data.subj_idx.unique().tolist())
    df = pd.DataFrame()
    for x in product(params, deltas, isis):
        y = '%s_subj(%s.%s).' % x
        data = []
        for s in subjects:
            z = y + s
            data.append(results.ix[z, 'mean'])
        df['%s_%s_%s' % x] = data
    df.to_csv('for_anovas.csv', index=False)
Beispiel #10
0
    def test_HDDM_load_save(self):
        include = ['z', 'sz', 'st', 'sv']
        dbs = ['pickle', 'sqlite']
        params = hddm.generate.gen_rand_params(include=include)
        data, params_true = hddm.generate.gen_rand_data(params,
                                                        size=10,
                                                        subjs=2)
        data = pd.DataFrame(data)
        data['cov'] = 1.

        for db, model_class in itertools.product(dbs, self.model_classes):
            if model_class is hddm.models.HDDMRegressor:
                model = model_class(data,
                                    'v ~ cov',
                                    include=include,
                                    is_group_model=True)
            else:
                model = model_class(data, include=include, is_group_model=True)
            model.sample(100, dbname='test.db', db=db)
            model.save('test.model')
            m_load = hddm.load('test.model')
            os.remove('test.db')
            os.remove('test.model')
Beispiel #11
0
    def test_HDDM_load_save(self):
        include = ["z", "sz", "st", "sv"]
        dbs = ["pickle", "sqlite"]
        params = hddm.generate.gen_rand_params(include=include)
        data, params_true = hddm.generate.gen_rand_data(params,
                                                        size=10,
                                                        subjs=2)
        data = pd.DataFrame(data)
        data["cov"] = 1.0

        for db, model_class in itertools.product(dbs, self.model_classes):
            if model_class is hddm.models.HDDMRegressor:
                model = model_class(data,
                                    "v ~ cov",
                                    include=include,
                                    is_group_model=True)
            else:
                model = model_class(data, include=include, is_group_model=True)
            model.sample(100, dbname="test.db", db=db)
            model.save("test.model")
            m_load = hddm.load("test.model")
            os.remove("test.db")
            os.remove("test.model")
Beispiel #12
0
    def test_init_sample_save_load_single_subj(self):
        for model in self.models:
            # Get simulations
            data = self.get_data_single_subj(model=model)

            # Initialize HDDMnn Model
            print("Loading Model: " + model)
            model_ = hddm.HDDMnn(
                data,
                model=model,
                informative=False,
                include=hddm.model_config.model_config[model]["hddm_include"],
                is_group_model=False,
                depends_on={},
                p_outlier=0.00,
            )

            # Sample
            print("Sampling: ")
            model_.sample(
                self.nmcmc,
                burn=self.nburn,
                dbname=self.filepath + "test_" + model + ".db",
                db="pickle",
            )

            # Save Model
            print("Saving Model: ")
            model_.save(self.filepath + "test_" + model + ".pickle")

            # Load Model
            print("Loading Model: ")
            model__ = hddm.load(self.filepath + "test_" + model + ".pickle")
            self.assertTrue(model__.nn == True)

            del model_
            del model__
    if len(modelformula.split('a')) == 2:

        aformula.append(modelformula.split('a')[-1])
        modelformula = modelformula.split('a')[0]
    else:
        aformula.append(None)

    if len(modelformula.split('v')) == 2:

        vformula.append(modelformula.split('v')[-1])
        modelformula = modelformula.split('v')[0]
    else:
        vformula.append(None)

    for i in range(5):
        m = hddm.load(path + x + '/' + x + '_' + str(i))
        models.append(m)
    m_comb = concat_models(models)
    print(modelname)
    print("****DIC: %f" %m_comb.dic)
    print("****BPIC: %f" %(m_comb.dic_info['pD'] + m_comb.dic))
    
    dic_dict[modelname] = m_comb.dic
    bdic_dict[modelname] = m_comb.dic_info['pD'] + m_comb.dic

    dic.append(m_comb.dic)
    bdic.append(m_comb.dic_info['pD'] + m_comb.dic)


# Transform into data frame and store in dic.csv
"""
Beispiel #14
0
start_time = time.time()  # the start time of the processing

#### model 1, free v,t,z
C_Id_vtz = hddm.HDDM(dat_C_Id,
                     depends_on={
                         'v': ['val', 'id'],
                         'z': ['val', 'id'],
                         't': ['val', 'id']
                     },
                     include=['v', 'z', 't'],
                     p_outlier=.05)
C_Id_vtz.find_starting_values()
C_Id_vtz.sample(10000, burn=1000, dbname='traces_id_vtz.db', db='pickle')
# save the model
C_Id_vtz.save('C_Id_vtz')
C_Id_vtz = hddm.load('C_Id_vtz')

# check convergence of MCMC  #### out put of gelman_rubin ######
models_vtz = list()
for i in range(5):
    m = hddm.HDDM(dat_C_Id,
                  depends_on={
                      'v': ['val', 'id'],
                      'z': ['val', 'id'],
                      't': ['val', 'id']
                  },
                  include=['v', 'z', 't'],
                  p_outlier=.05)
    m.find_starting_values()
    m.sample(10000, burn=1000)
    models_vtz.append(m)
Beispiel #15
0
a_reg = {'model': 'a ~ 1 + stimulus + prevpe_bin_noern + postpe_bin_noern', 'link_func': lambda x: x}
v_reg = {'model': 'v ~ 1 + stimulus + prevpe_bin_noern + postpe_bin_noern', 'link_func': v_link_func}
reg_descr = [a_reg, v_reg]
m = hddm.HDDMRegressor(data, reg_descr, group_only_regressors=False, p_outlier=.05)
m.find_starting_values()
m.sample(samples, burn=samples/10, thin=2, dbname=os.path.join(model_dir, 'ERPall_binnedPE_traces_1'), db='pickle')
m.save(os.path.join(model_dir, 'ERPall_binnedPE_1'))

goOn = False
if goOn == True:
    import kabuki
    import seaborn as sns
    import matplotlib.pyplot as plt
    models = []
    for i in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]: #this assumes you ran 10 different models above and changed the index when saving
        models.append(hddm.load('ERPall_binnedPE_%s' %i))
    m = kabuki.utils.concat_models(models) #contact these 10 models

    #Run the same sanity checks as reported in hddm_fit.py, these are not reiterated here  
    
    #Extract the data and save these
    results = m.gen_stats()
    results.to_csv(os.path.join(model_dir, 'eeg_binnedPe_HDDMestimates.csv'))
    
    a2, a3, a4, a5 = m.nodes_db.node[['a_prevpe_bin_noern[T.B_bin]', 'a_prevpe_bin_noern[T.C_bin]', 'a_prevpe_bin_noern[T.D_bin]', 'a_prevpe_bin_noern[T.E_bin]']]
    p2, p3, p4, p5 = m.nodes_db.node[['a_postpe_bin_noern[T.B_bin]', 'a_postpe_bin_noern[T.C_bin]', 'a_postpe_bin_noern[T.D_bin]', 'a_postpe_bin_noern[T.E_bin]']]
    v2, v3, v4, v5 = m.nodes_db.node[['v_prevpe_bin_noern[T.B_bin]', 'v_prevpe_bin_noern[T.C_bin]', 'v_prevpe_bin_noern[T.D_bin]', 'v_prevpe_bin_noern[T.E_bin]']]
    vp2, vp3, vp4, vp5 = m.nodes_db.node[['v_postpe_bin_noern[T.B_bin]', 'v_postpe_bin_noern[T.C_bin]', 'v_postpe_bin_noern[T.D_bin]', 'v_postpe_bin_noern[T.E_bin]']]

    numpy.savetxt("PEonly_noern_a_prevcj_bin2.csv", a2.trace(), delimiter=",")
    numpy.savetxt("PEonly_noern_a_prevcj_bin3.csv", a3.trace(), delimiter=",")
Beispiel #16
0
                    pdf[i] = np.exp(node.logp) * 10

            #plot shit
            plt.plot(xlim, pdf)
            plt.xlabel(name)
            sns.despine(offset=2, trim=True)

            # # Hide the right and top spines
#             ax.spines['right'].set_visible(False)
#             ax.spines['top'].set_visible(False)
#
#             # Only show ticks on the left and bottom spines
#             ax.yaxis.set_ticks_position('left')
#             ax.xaxis.set_ticks_position('bottom')

#add suptitle
        plt.suptitle('HDDM priors')

# save the figure
    plt.savefig(os.path.join(mypath, 'priorPlot.pdf'))


## LOAD MODEL WITH THE MOST PARAMETERS WE HAVE
mypath = os.path.realpath(os.path.expanduser('/nfs/aeurai/HDDM/JW_PNAS'))
m = hddm.load(
    os.path.join(mypath, 'stimcoding_dc_z_prevresp_st',
                 'modelfit-combined.model'))
#print(m)
#shell()
plot_all_priors(m)
Beispiel #17
0
start_time = time.time()  # the start time of the processing

#### model 1, free v,t,z
M_match_vtz = hddm.HDDM(dat_M_match,
                        depends_on={
                            'v': ['val', 'id'],
                            'z': ['val', 'id'],
                            't': ['val', 'id']
                        },
                        include=['v', 'z', 't'],
                        p_outlier=.05)
M_match_vtz.find_starting_values()
M_match_vtz.sample(10000, burn=1000, dbname='traces_m_vtz.db', db='pickle')
# save the model
M_match_vtz.save('exp7_rep_match_vtz')
M_match_vtz = hddm.load('exp7_rep_match_vtz')

# check convergence of MCMC  #### out put of gelman_rubin ######
models_vtz = list()
for i in range(5):
    m = hddm.HDDM(dat_M_match,
                  depends_on={
                      'v': ['val', 'id'],
                      'z': ['val', 'id'],
                      't': ['val', 'id']
                  },
                  include=['v', 'z', 't'],
                  p_outlier=.05)
    m.find_starting_values()
    m.sample(10000, burn=1000)
    models_vtz.append(m)
Beispiel #18
0
import hddm
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pickle

try:
    import IPython
    shell = IPython.get_ipython()
    shell.enable_matplotlib(gui='inline')
except:
    pass

i = int(input('Load which model? '))

model = hddm.load('Z://Work//UW//projects//RR_TMS//hddm//models//by_cond//fullsplit%i'%i)

model.plot_posterior_predictive(figsize=(14,10))

print("Model DIC: %f"%model.dic)

model.plot_posterior_predictive()
plt.show()
plt.savefig('foo.pdf')
Beispiel #19
0
def plot_model(mypath, model_name, trace_id):

    # load in the model that was ran
    m = hddm.load(os.path.join(mypath, model_name, 'modelfit-md%d.model'%trace_id))

    # ============================================ #
    # save plots
    # ============================================ #

    # plot some output stuff in figures subfolder
    figpath = os.path.join(mypath, model_name, 'figures-md%d'%trace_id)
    if not os.path.exists(figpath):
        os.mkdir(figpath)

    # 1. plot the traces and posteriors for each parameter
    m.plot_posteriors(save=True, path=figpath, format='pdf')

    # 2. plot posterior predictive
    print('plotting posterior predictive')
    plot_posterior_predictive_anne(m, path=figpath)

    # 3. plot the actual posteriors and the way they depend on the variables we specified
    if model_name in ['prevresp_prevrt_dc', 'prevresp_prevpupil_dc']:
        print('plotting the posteriors by previous response and rt')
        dc_prevresp0_prevRTlow, dc_prevresp0_prevRTmed, dc_prevresp0_prevRThigh, \
            dc_prevresp1_prevRTlow, dc_prevresp1_prevRTmed, dc_prevresp1_prevRThigh \
            = m.nodes_db.node[['dc(-1.0.1.0)', 'dc(-1.0.2.0)', 'dc(-1.0.3.0)', 'dc(1.0.1.0)','dc(1.0.2.0)','dc(1.0.3.0)']]

        # plot these myself
        plot_posterior_nodes_anne([dc_prevresp0_prevRTlow, dc_prevresp0_prevRTmed, dc_prevresp0_prevRThigh, \
            dc_prevresp1_prevRTlow, dc_prevresp1_prevRTmed, dc_prevresp1_prevRThigh])

        plt.xlabel('Drift criterion')
        plt.ylabel('Posterior probability')
        plt.title('Posterior of drift-rate group means')
        plt.savefig(os.path.join(figpath, 'driftcriterion_posteriors.pdf'))

    elif model_name in ['prevresp_prevrt_z', 'prevresp_prevpupil_z']:
        print('plotting the posteriors by previous response and rt')
        dc_prevresp0_prevRTlow, dc_prevresp0_prevRTmed, dc_prevresp0_prevRThigh, \
            dc_prevresp1_prevRTlow, dc_prevresp1_prevRTmed, dc_prevresp1_prevRThigh \
            = m.nodes_db.node[['z(-1.0.1.0)', 'z(-1.0.2.0)', 'z(-1.0.3.0)', 'z(1.0.1.0)','z(1.0.2.0)','z(1.0.3.0)']]

        # plot these myself
        plot_posterior_nodes_anne([dc_prevresp0_prevRTlow, dc_prevresp0_prevRTmed, dc_prevresp0_prevRThigh, \
            dc_prevresp1_prevRTlow, dc_prevresp1_prevRTmed, dc_prevresp1_prevRThigh])

        plt.xlabel('Starting point')
        plt.ylabel('Posterior probability')
        plt.title('Posterior of drift-rate group means')
        plt.savefig(os.path.join(figpath, 'startingpoint_posteriors.pdf'))

    elif model_name in ['prevresp_z']:
        print('plotting the posteriors by previous response')
        dc_prevresp0, dc_prevresp1 \
            = m.nodes_db.node[['z(-1.0)', 'z(1.0)']]

        # plot these myself
        plot_posterior_nodes_anne([dc_prevresp0, dc_prevresp1])
        plt.xlabel('Starting point')
        plt.ylabel('Posterior probability')
        plt.title('Posterior of drift-rate group means')
        plt.savefig(os.path.join(figpath, 'startingpoint_posteriors.pdf'))

    elif model_name in ['prevresp_dc']:
        print('plotting the posteriors by previous response')
        dc_prevresp0, dc_prevresp1 \
            = m.nodes_db.node[['dc(-1.0)', 'dc(1.0)']]

        # plot these myself
        plot_posterior_nodes_anne([dc_prevresp0, dc_prevresp1])
        plt.xlabel('Drift criterion')
        plt.ylabel('Posterior probability')
        plt.title('Posterior of drift-rate group means')
        plt.savefig(os.path.join(figpath, 'driftcriterion_posteriors.pdf'))
# written by Liangying, 20/2/2020
import hddm
import pandas as pd
import matplotlib.pyplot as plt

model = hddm.load('model_avtz2_all_StimCoding_2')
#model.plot_posterior_predictive(figsize=(40,40),save = True, path = "D:\\brainbnu\\haiyang\\hddm\\result\\all_StimCoding\\split_z\\avz")
#model.plot_posteriors(['a', 't', 'v', 'z'],save = True, path = "D:\\brainbnu\\haiyang\\hddm\\result\\all_StimCoding\\avtz2")
# Between group drift rate comparisons

data = hddm.load_csv('D://brainbnu//haiyang//hddm//hddm_all_StimCoding.csv')

#Posterior predictive check
ppc_data = hddm.utils.post_pred_gen(model)
ppc_compare = hddm.utils.post_pred_stats(data, ppc_data)
ppc_stats = hddm.utils.post_pred_stats(data, ppc_data, call_compare=False)
'''
# 0 back
Stress_v_0back, Control_v_0back = model.nodes_db.node[['v(0.stress)', 'v(0.control)']]
print "P_v(Stress_v_0back > Control_v_0back) =", (Stress_v_0back.trace()> Control_v_0back.trace()).mean()
print "P_v(Control_v_0back > Stress_v_0back) =", (Control_v_0back.trace() > Stress_v_0back.trace()).mean()
hddm.analyze.plot_posterior_nodes([Stress_v_0back, Control_v_0back],10)
plt.xlabel('drift-rate')
plt.ylabel('Posterior probability')
plt.title('Posterior of drift-rate group means')
plt.show()


# 2 back
Stress_v_2back, Control_v_2back = model.nodes_db.node[['v(2.stress)', 'v(2.control)']]
print "P_v(Stress_v_2back > Control_v_2back) =", (Stress_v_2back.trace()> Control_v_2back.trace()).mean()
         burn=samples / 10,
         thin=2,
         dbname=os.path.join(model_dir, 'Experiment1_traces_1'),
         db='pickle')
m.save(os.path.join(model_dir, 'Experiment1_1'))

goOn = False
if goOn == True:
    import kabuki
    import seaborn as sns
    import matplotlib.pyplot as plt
    models = []
    for i in [
            1, 2, 3, 4, 5, 6, 7, 8, 9, 10
    ]:  #this assumes you ran 10 different models above and changed the index when saving
        models.append(hddm.load('Experiment1_%s' % i))
    m = kabuki.utils.concat_models(models)  #contact these 10 models
    gelman_rubin(models)  #check R hat

    #diagnostics
    m.plot_posteriors()

    #Simulate data and compare to actual data
    data['response'] = data['cor']
    ppc_data = post_pred_gen(m, append_data=True)
    ppc_data['resp_sampled'] = 1
    ppc_data['resp_sampled'][ppc_data.rt_sampled > 0] = 0
    hddm.utils.post_pred_stats(data, ppc_data)

    ppc_data.rt = abs(ppc_data.rt)
    ppc_data.rt[ppc_data.cor == 1] = -ppc_data.rt[ppc_data.cor == 1]
Beispiel #22
0
import hddm
import pandas as pd
import pickle

models = []

for i in range(5):
    m = hddm.load(
        'Z://Work//UW//projects//RR_TMS//hddm//models//by_cond//testmodel')
    models.append(m)

hddm.analyze.gelman_rubin(models)
def drift_diffusion_hddm(data,
                         samples=10000,
                         n_jobs=6,
                         run=True,
                         parallel=True,
                         model_name='model',
                         model_dir='.',
                         accuracy_coding=False):

    import hddm
    import os

    # run the model:
    if run:
        if parallel:
            job_server = pp.Server(ppservers=(), ncpus=n_jobs)
            start_time = time.time()
            jobs = [(trace_id,
                     job_server.submit(run_model,
                                       (trace_id, data, model_dir, model_name,
                                        samples, accuracy_coding), (),
                                       ('hddm', )))
                    for trace_id in range(n_jobs)]
            results = []
            shell()
            for s, job in jobs:
                results.append(job())
            print "Time elapsed: ", time.time() - start_time, "s"
            job_server.print_stats()

            # save:
            for i in range(n_jobs):
                model = results[i]
                model.save(
                    os.path.join(model_dir, '{}_{}'.format(model_name, i)))
        else:
            start_time = time.time()
            model = run_model(3, data, model_dir, model_name, samples,
                              accuracy_coding)
            model.save(os.path.join(model_dir, '{}_md{}'.format(model_name,
                                                                3)))

            # print point estimates
            results = model.gen_stats()
            results.to_csv(os.path.join(fig_dir, 'diagnostics',
                                        'results3.csv'))

            # dic:
            text_file = open(os.path.join(fig_dir, 'diagnostics', 'DIC3.txt'),
                             'w')
            text_file.write("Model {}: {}\n".format(m, model.dic))
            text_file.close()
            print "Time elapsed: ", time.time() - start_time, "s"

    # load the models:
    else:
        print 'loading existing model(s)'
        if parallel:
            model = []
            for i in range(n_jobs):
                model.append(
                    hddm.load(
                        os.path.join(model_dir, '{}_{}'.format(model_name,
                                                               i))))
        else:
            model = hddm.load(
                os.path.join(model_dir, '{}_md{}'.format(model_name, 1)))
    return model
Beispiel #24
0
import hddm
import pickle

model = hddm.load(
    'Z://Work//UW//projects//RR_TMS//hddm//models//by_cond//va_stim0')

# init just a couple of vars
# conditions in DRI_TMS task
# inf/ins - X/Y
# sym/fin - S/F
# early/late/no stim - E/L/S
# PMd/Vertex stim - P/V

v_XFEP, v_XFEV, v_XFLP, v_XFLV, v_XFNP, v_XFNV, \
v_XSEP, v_XSEV, v_XSLP, v_XSLV, v_XSNP, v_XSNV, \
v_YFEP, v_YFEV, v_YFLP, v_YFLV, v_YFNP, v_YFNV, \
v_YSEP, v_YSEV, v_YSLP, v_YSLV, v_YSNP, v_YSNV = \
    model.nodes_db.node[['v( XFEP)', 'v( XFEV)', 'v( XFLP)', 'v( XFLV)', 'v( XFNP)', 'v( XFNV)',
                         'v( XSEP)', 'v( XSEV)', 'v( XSLP)', 'v( XSLV)', 'v( XSNP)', 'v( XSNV)',
                         'v( YFEP)', 'v( YFEV)', 'v( YFLP)', 'v( YFLV)', 'v( YFNP)', 'v( YFNV)',
                         'v( YSEP)', 'v( YSEV)', 'v( YSLP)', 'v( YSLV)', 'v( YSNP)', 'v( YSNV)']]

a_XFEP, a_XFEV, a_XFLP, a_XFLV, a_XFNP, a_XFNV, \
a_XSEP, a_XSEV, a_XSLP, a_XSLV, a_XSNP, a_XSNV, \
a_YFEP, a_YFEV, a_YFLP, a_YFLV, a_YFNP, a_YFNV, \
a_YSEP, a_YSEV, a_YSLP, a_YSLV, a_YSNP, a_YSNV = \
    model.nodes_db.node[['a( XFEP)', 'a( XFEV)', 'a( XFLP)', 'a( XFLV)', 'a( XFNP)', 'a( XFNV)',
                         'a( XSEP)', 'a( XSEV)', 'a( XSLP)', 'a( XSLV)', 'a( XSNP)', 'a( XSNV)',
                         'a( YFEP)', 'a( YFEV)', 'a( YFLP)', 'a( YFLV)', 'a( YFNP)', 'a( YFNV)',
                         'a( YSEP)', 'a( YSEV)', 'a( YSLP)', 'a( YSLV)', 'a( YSNP)', 'a( YSNV)']]
Beispiel #25
0
Anne Urai, 2016
adapted from JW de Gee
"""

import hddm, os

nr_traces = 3
model_name = 'basic_stimcoding'

# find path depending on local/klimag
usr = os.environ.get('USER')
if usr in ['anne']:
    mypath = '/Users/anne/Data/projects/0/neurodec/Data/MEG-PL/Data/HDDM'
if usr in ['aurai']:
    mypath = '/home/aurai/Data/MEG-PL/Data/HDDM'
thispath = os.path.join(mypath, model_name)

print "appending models"
models = []
for t in range(nr_traces):  # run the models serially
    models.append(hddm.load(os.path.join(thispath, 'modelfit-md%d' % t)))

print "computing gelman-rubin convergence statistics"
# compute gelman rubic
gr = hddm.analyze.gelman_rubin(models)
text_file = open(os.path.join(thispath, 'gelman_rubic.txt'), 'w')
for p in gr.items():
    text_file.write("%s:%s\n" % p)
text_file.close()
print "DONE!"
Beispiel #26
0
a_reg = {'model': 'a ~ 1 + z_x', 'link_func': lambda x: x}
# a_reg_within = {'model': 'a ~ 1+x + C(condition)', 'link_func': lambda x: x}
# for including and estimating within subject effects of  condition

v_reg = {'model': 'v ~ 1 + z_x', 'link_func': lambda x: x}
reg_comb = [a_reg, v_reg]
# m_reg = hddm.HDDMRegressor(data_group, reg_comb, group_only_regressors=['true']) 

m_reg = hddm.HDDMRegressor(data_group, a_reg, group_only_regressors=['true'])
m_reg.find_starting_values()
m_reg.sample(3000, burn=500, dbname='a_bwsubs_t200.db', db='pickle')
m_reg.save('a_bwsubs_model_t200')

m_reg.print_stats()  # check values of reg coefficients against the generated ones

m_reg = hddm.load('a_bwsubs_model')
data_group = pd.read_csv('data_group.csv')

#look at correlation of recovered parameter with original
subjdf = data_group.groupby('subj_idx').first().reset_index()

## check for residual correlation with x 
a_int_recovered =[]
pp=[]

from scipy import stats
for i in range(0,(1+max(x_range))*subjs_per_bin):
    a='a_Intercept_subj.'
    a+=str(i)
    a+='.0'
    xx=i//subjs_per_bin
import multiprocessing

# File paths

model_name = sys.argv[1]
model_path = '../output/Study1/ae_only/' + sys.argv[1] + '/' + sys.argv[1]
datafile_name = '../data/Study1/' + sys.argv[2]

df = pd.read_csv(datafile_name, low_memory=False)
data = hddm.utils.flip_errors(df)

model_list = []

for model_index in range(5):
    sub_model_name = model_path + '_' + str(model_index)
    sub_model = hddm.load(sub_model_name)
    model_list.append(sub_model)

m_comb = concat_models(model_list)

print("DIC: %f" % m_comb.dic)

print("BPIC: %f" % (m_comb.dic_info['pD'] + m_comb.dic))


def _parents_to_random_posterior_sample(bottom_node, pos=None):
    """Walks through parents and sets them to pos sample."""
    for i, parent in enumerate(bottom_node.extended_parents):
        if not isinstance(parent, pm.Node):  # Skip non-stochastic nodes
            continue
                        time.sleep(60)

                # concatenate the different chains, will save disk space
                concat_models(mypath, models[vx])

        elif runMe == 2:

            # ============================================ #
            # POSTERIOR PREDICTIVES TO ASSESS MODEL FIT
            # ============================================ #

            starttime = time.time()
            print "computing ppc"

            # specify how many samples are needed
            m = hddm.load(
                os.path.join(mypath, models[vx], 'modelfit-combined.model'))
            print os.path.join(mypath, models[vx], 'modelfit-combined.model')
            if 'MEG' in datasets[dx]:
                nsmp = 50
            else:
                nsmp = 100

            ppc = hddm.utils.post_pred_gen(m, append_data=True, samples=nsmp)

            # make the csv smaller, save disk space
            savecols = list(
                set(ppc.columns) & set([
                    'rt', 'rt_sampled', 'response_sampled', 'index',
                    'stimulus', 'response', 'prevresp', 'subj_idx',
                    'transitionprob', 'coherence', 'prevcorrect'
                ]))
# ============================================ #
# post-processing
# ============================================ #

import hddm
import matplotlib.pyplot as plt

print "HDDM imported, starting post-processing"
models = []
for trace_id in range(nr_traces):  # run the models serially
    thism = hddm.load(
        os.path.join(mypath, model_name, 'modelfit-md%d.model' % trace_id))
    print os.path.join(mypath, model_name, 'modelfit-md%d.model' % trace_id)

    # plot some output stuff in figures subfolder
    figpath = os.path.join(mypath, model_name, 'figures-md%d' % trace_id)
    if not os.path.exists(figpath):
        os.mkdir(figpath)
    thism.plot_posteriors(save=True, path=figpath, format='pdf')
    plt.close(
        'all')  # this will leave figures open, make sure to close them all
    models.append(thism)

# gelman rubic on the list of models
gr = hddm.analyze.gelman_rubin(models)
text_file = open(os.path.join(mypath, model_name, 'gelman_rubic.txt'), 'w')
for p in gr.items():
    text_file.write("%s:%s\n" % p)
text_file.close()

# ============================================ #
def concat_models(mypath, model_name):

    nchains = 30

    # CHECK IF COMBINED MODEL EXISTS
    if os.path.isfile(
            os.path.join(mypath, model_name, 'modelfit-combined.model')):
        print os.path.join(mypath, model_name, 'modelfit-combined.model')

    else:
        # ============================================ #
        # APPEND MODELS
        # ============================================ #

        allmodels = []
        print("appending models for %s" % model_name)
        for trace_id in range(nchains):  # how many chains were run?
            model_filename = os.path.join(mypath, model_name,
                                          'modelfit-md%d.model' % trace_id)
            modelExists = os.path.isfile(model_filename)

            if modelExists == True:  # if not, this model has to be rerun
                print model_filename
                thism = hddm.load(model_filename)
                allmodels.append(thism)  # now append into a list

        # ============================================ #
        # CHECK CONVERGENCE
        # ============================================ #

        if len(allmodels) == 0:
            return allmodels

        try:
            gr = hddm.analyze.gelman_rubin(allmodels)

            # save
            text_file = open(
                os.path.join(mypath, model_name, 'gelman_rubin.txt'), 'w')
            for p in gr.items():
                text_file.write("%s,%s\n" % p)
                # print a warning when non-convergence is detected
                # Values should be close to 1 and not larger than 1.02 which would indicate convergence problems.
                # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3731670/
                if abs(p[1] - 1) > 0.02:
                    print "non-convergence found, %s:%s" % p
            text_file.close()
            print "written gelman rubin stats to file"
        except:
            pass

        # now actually concatenate them, see email Gilles
        m = kabuki.utils.concat_models(allmodels)

        # ============================================ #
        # SAVE THE FULL MODEL
        # ============================================ #

        print "concatenated models"
        m.save(
            os.path.join(mypath, model_name,
                         'modelfit-combined.model'))  # save the model to disk

        # DELETE FILES to save space
        print "deleting separate chains..."
        for fl in glob.glob(
                os.path.join(mypath, models[vx], 'modelfit-md*.model')):
            print(fl)
            os.remove(fl)
        for fl in glob.glob(os.path.join(mypath, models[vx],
                                         'modelfit-md*.db')):
            if not '-md0.db' in fl:
                print(fl)
                os.remove(fl)

        # ============================================ #
        # SAVE POINT ESTIMATES
        # ============================================ #

        print "saving stats"
        results = m.gen_stats(
        )  # point estimate for each parameter and subject
        results.to_csv(os.path.join(mypath, model_name,
                                    'results-combined.csv'))

        # save the DIC for this model
        text_file = open(os.path.join(mypath, model_name, 'DIC-combined.txt'),
                         'w')
        text_file.write("Combined model: {}\n".format(m.dic))
        text_file.close()

        # ============================================ #
        # SAVE TRACES
        # ============================================ #

        print "saving traces"
        # get the names for all nodes that are available here
        group_traces = m.get_group_traces()
        group_traces.to_csv(
            os.path.join(mypath, model_name, 'group_traces.csv'))

        all_traces = m.get_traces()
        all_traces.to_csv(os.path.join(mypath, model_name, 'all_traces.csv'))

        # ============================================ #
        # CONCATENATE MODEL COMPARISON
        # ============================================ #

        # average model comparison values across chains
        print('concatenating model comparison')

        nchains = 30
        for trace_id in range(nchains):  # how many chains were run?
            filename = os.path.join(mypath, models[vx],
                                    'model_comparison_md%d.csv' % trace_id)
            df = pd.read_csv(filename)

            if trace_id == 0:
                df2 = df
            else:
                df2 = df2.append(df, ignore_index=True)

        # average over chains
        df3 = df2.mean()
        df3 = df2.describe().loc[['mean']]
        df3.to_csv(os.path.join(mypath, models[vx], 'model_comparison.csv'))

        for fl in glob.glob(
                os.path.join(mypath, models[vx], 'model_comparison_md*.csv')):
            print(fl)
            os.remove(fl)
Beispiel #31
0
#workdir = '/home/mikkel/PM-volition/Dataanalysis'
#outdir = '/home/mikkel/PM-volition/Datafiles'

workdir = 'C:\\Users\\Mikkel\\Documents\\PM-volition\\Dataanalysis'
outdir = 'C:\\Users\\Mikkel\\Documents\\PM-volition\\Datafiles'

sys.path.append(workdir)
import PM_volition_utilfun as pm # plot_posterior_diff, plot_posterior_nodes2, get_posteriorP

# %% Plot options
dpi=600

# %% Load model
chdir(outdir)                   #Must be in folder to load databases
mod = hddm.load(op.join(outdir, 'ddm_model31'))

#f = open(op.join(outdir,"ddm_model22"),"rb")
#mod = pickle.load(f)

#%% Generate posteriors
v_fixPM, v_freePM, v_fixFil, v_freeFil = mod.nodes_db.node[['v(pm.fix)', 'v(pm.free)','v(filler.fix)','v(filler.free)']]
a_fix, a_free = mod.nodes_db.node[['a(fix)', 'a(free)']]
t_int  = mod.nodes_db.node['t']

## Difference between posteriors: PM
_, v_PMdiff = pm.get_posteriorP(v_fixPM, v_freePM, plot=0)
_, v_Fildiff = pm.get_posteriorP(v_fixFil, v_freeFil, plot=0)
_, a_diff = pm.get_posteriorP(a_fix, a_free, plot=0)

#_, z_PMdiff = pm.get_posteriorP(z_fixPM, z_freePM, plot=0)
fig = plt.figure()
ax = fig.add_subplot(111, xlabel='RT', ylabel='count', title='RT distributions')
for i, subj_data in dat_M_Categ_id.groupby('subj_idx'):
    subj_data.rt.hist(bins=20, histtype='step', ax=ax)
plt.savefig('plot_MS_Categ_id_flipped.pdf')

start_time = time.time() # the start time of the processing
    
#### model 1 for valence based categorization, free v,t,z
M_Categ_val_vtz = hddm.HDDM(dat_M_Categ_val,depends_on = {'v':['val','id'],'z':['val','id'],'t':['val','id']}, include=['v', 'z', 't'],p_outlier=.05)
M_Categ_val_vtz.find_starting_values()
M_Categ_val_vtz.sample(10000,burn = 1000, dbname='traces_val_vtz.db', db='pickle')
   
# save the model
M_Categ_val_vtz.save('M_Categ_val_vtz')
M_Categ_val_vtz = hddm.load('M_Categ_val_vtz')

# doing Gelman-Rubin statistic
models_categ_val = []
for i in range(5):
    m_stim = hddm.HDDM(dat_M_Categ_val,depends_on = {'v':['val','id'],'z':['val','id'],'t':['val','id']}, include=['v', 'z', 't'],p_outlier=.05)
    m_stim.find_starting_values()
    m_stim.sample(10000,burn = 1000)
    models_categ_val.append(m_stim)

Categ_val_R_hat_vtz = hddm.analyze.gelman_rubin(models_categ_val)

# save Categ_R_hat_vtz
with open('Categ_val_R_hat_vtz.csv','w') as f:
    w = csv.writer(f)
    w.writerows(Categ_val_R_hat_vtz.items())