def main(source=False): from skimage import io from os import path from chr_helpers import get_config_file config = get_config_file(localpath=path.dirname(path.realpath(__file__))+"/") #IMPORT VARIABLES if not source: source = config.get('Source', 'source') data_path = config.get('Addresses', source) compare_files = config.get("Data", "compare_files") #END IMPORT VARIABLES data_path = path.expanduser(data_path) compare_files = compare_files.split('; ') compare_files = [location_pair.split(', ') for location_pair in compare_files] for location_pair in compare_files: if location_pair[0][-1] != "/": location_pair[0] = location_pair[0] + "/" #fixes trailing slashes for file-pair directories in case the user forgot to specify them under gen.cfg for data_file in location_pair[1:]: file_path = data_path + location_pair[0] + data_file image = io.imread(file_path, as_grey=True)
def sequence_check(source=False): from os import path import sys import pandas as pd import numpy as np import math from chr_helpers import get_config_file config = get_config_file(localpath=path.dirname(path.realpath(__file__))+'/') #IMPORT VARIABLES if not source: source = config.get('Source', 'source') data_path = config.get('Addresses', source) eye_tracking = config.get('Data', 'eye_tracking') fmri_logfile = config.get('Data', 'fmri_logfile') preprocessed_path = config.get('Data', 'df_dir') #END IMPORT VARIABLES if source == 'local': from os import listdir eye_tracking = path.expanduser(data_path+eye_tracking) fmri_logfile = path.expanduser(data_path+fmri_logfile) eye_pre_fileslist = listdir(eye_tracking) fmri_pre_fileslist = listdir(fmri_logfile) et_files = sorted([lefile for lefile in eye_pre_fileslist if lefile.endswith('.txt')])[:9] fmri_files = sorted([lefile for lefile in fmri_pre_fileslist if lefile.endswith('OM.log') and not lefile.startswith('KP') and not lefile.startswith('ET_')])[:9] files = np.array([[a for a in et_files],[b for b in fmri_files]]).T for et, fmri in files: et_file = pd.DataFrame.from_csv(eye_tracking+et, header=42, sep='\t').reset_index() # CUTOFF PREVIOUS EXPERIMENTS cutoff = et_file[(et_file['L Raw X [px]'] == '# Message: pulse_start')].index.tolist() cutoff = int(cutoff[-1]) et_file = et_file[cutoff:] # END CUTOFF PREVIOUS EXPERIMENTS et_file = et_file[(et_file['Type']=='MSG')].reset_index() et_file = et_file[['L Raw X [px]']].ix[1:].reset_index() #eliminate first row ("pulse_start") fmri_file = pd.DataFrame.from_csv(fmri_logfile+fmri, header=3, sep='\t').reset_index() fmri_file = fmri_file[(fmri_file['Event Type']=='Picture')].reset_index() fmri_file = fmri_file[['Code']] seq_file = pd.concat([et_file, fmri_file], axis=1).drop(['index'],1) # drop old index seq_file.columns = ['ET', 'fMRI'] seq_file.to_csv(fmri_logfile+"/sequence-check/seq-chk_"+fmri)
def corr(source=False, num_bins=False, keep_scrambling=False, make_tight=True, print_title = True, linewidth=0.5, fontscale=1): config = get_config_file(localpath=path.dirname(path.realpath(__file__))+'/') #IMPORT VARIABLES if not source: source = config.get('Source', 'source') data_path = config.get('Addresses', source) reaction_times = config.get('Addresses', 'reaction_times') #END IMPORT VARIABLES data_path = path.expanduser(data_path) rt_path = data_path + reaction_times files = [lefile for lefile in listdir(rt_path) if lefile.endswith('.csv')] ids = [t.split('_',2)[0]+'_'+t.split('_',2)[1] for t in files] ids = np.unique(ids) id_list = l_dist isspec=False conts = get_scatterdata(id_list,rt_path) if isspec: spec_conts = get_scatterdata(spec,rt_path) print pearsonr(spec_conts['ratediff'], spec_conts['RTdiff']) print pearsonr(conts['ratediff'], conts['RTdiff']) fig = figure(dpi=80,facecolor='#eeeeee', tight_layout=True) ax=fig.add_subplot(1,1,1) width = 0.3 ax.yaxis.grid(True, linestyle='-', which='major', color='#dddddd',alpha=0.5, zorder = 1) if isspec: ax.plot(conts['RTdiff'], conts['ratediff'], 'o', markersize=3, markeredgecolor='#7ECC5A', markerfacecolor='#7ECC5A', alpha=0.5, zorder=1) A = np.vstack([conts['RTdiff'], np.ones(len(conts['RTdiff']))]).T m, c = np.linalg.lstsq(A, conts['ratediff'])[0] plt.plot(conts['RTdiff'],conts['RTdiff']*m+c,color='#7ECC5A', antialiased=True) ax.plot(spec_conts['RTdiff'], spec_conts['ratediff'], '.', markersize=8, markeredgecolor='m',markerfacecolor='m', zorder=2) s_A = np.vstack([spec_conts['RTdiff'], np.ones(len(spec_conts['RTdiff']))]).T s_m, s_c = np.linalg.lstsq(s_A, spec_conts['ratediff'])[0] plt.plot(spec_conts['RTdiff'],spec_conts['RTdiff']*s_m+s_c,color='m',antialiased=True) else: ax.plot(conts['RTdiff'], conts['ratediff'], 'o', markersize=3, markeredgecolor='#7ECC5A', markerfacecolor='#7ECC5A', alpha=0.5, zorder=1) A = np.vstack([conts['RTdiff'], np.ones(len(conts['RTdiff']))]).T m, c = np.linalg.lstsq(A, conts['ratediff'])[0] plt.plot(conts['RTdiff'],conts['RTdiff']*m+c,color='#7ECC5A', antialiased=True) ax.set_ylabel('Rating difference (stimulus side - distractor side)') ax.set_xlabel(r'$\mathsf{RT - \overline{RT}_{aa;uu}}$ [s]', fontsize=13) return m, c
def main(experiment=False, source=False, prepixelation='not specified', num_bins=False, keep_scrambling=False, make_tight=True, print_title = True, linewidth=0.5, fontscale=1): data_all = get_and_filter_results(experiment, source, prepixelation, remove='no-response') localpath = path.dirname(path.realpath(__file__)) + '/' config = get_config_file(localpath) #IMPORT VARIABLES if num_bins: pass else: num_bins = config.getint('RTdistribution', 'num_bins') if keep_scrambling: pass else: keep_scrambling = [int(i) for i in config.get('RTdistribution', 'keep_scrambling').split(',')] #END IMPORT VARIABLES data_filtered = pd.DataFrame() for scrambling in keep_scrambling: data_scrambling = data_all[(data_all['scrambling'] == scrambling)] data_filtered = pd.concat([data_filtered, data_scrambling], ignore_index=True) fig = figure(figsize=(data_filtered['RT'].max()*4, 5), dpi=300,facecolor='#eeeeee', tight_layout=make_tight) ax=fig.add_subplot(1,1,1) ax.yaxis.grid(True, linestyle='-', which='major', color='#dddddd',alpha=0.6, zorder = 0) ax.set_axisbelow(True) # the histogram of the data n, bins, patches = plt.hist(data_filtered['RT'], num_bins, normed=True, facecolor='green', alpha=0.5, linewidth=linewidth) # add a 'best fit' line mu = data_filtered['RT'].mean() sigma = np.std(data_filtered['RT']) norm_fit = mlab.normpdf(bins, mu, sigma) plt.plot(bins, norm_fit, 'm') plt.xlabel('RT [s]') plt.ylabel('Probability') keep_scrambling = [str(i) for i in keep_scrambling] if print_title: plt.title('Histogram of RTs for scrambling = '+ ', '.join(keep_scrambling)+ r' $\mu\approx$'+str(np.around(mu, decimals=2))+r' s, $\sigma\approx$'+str(np.around(sigma, decimals=2))+' s') plt.subplots_adjust(left=0.15)# Tweak spacing to prevent clipping of ylabel axis.Axis.zoom(ax.yaxis, -0.5) # sets y margins further apart from the content proportional to its length ax.set_ylim(bottom=0) # after scaling to disregard padding unerneath zero. legend(('Fitted normal distribution', 'RT bins'), loc='upper center', bbox_to_anchor=(0.5, 1.065), ncol=3, fancybox=False, shadow=False, prop=FontProperties(size=str(int(9*fontscale)))) return data_filtered
def main( input_file, output_dir="", iptc_profile=None, do_fullsize=True, do_minis=True, template_name="", mini_width="", style="" ): #tweak do_template and do_export here to controll what output you get localpath = path.dirname(path.realpath(__file__)) + '/' config = get_config_file(localpath) #IMPORT VARIABLES if not output_dir: output_dir = localpath + config.get('Directories', 'output_dir') else: output_dir = path.expanduser(output_dir) batch_profile_dir = config.get('Directories', 'batch_profile_dir') template_dir = config.get('Directories', 'template_dir') RT_profiles_dir = config.get('Directories', 'RT_profiles_dir') minis_dir = config.get('Directories', 'minis_dir') mini_name = config.get('Directories', 'mini_name') rt_command = config.get('Parameters', 'rt_command') stock_iptc_profile = config.get('Parameters', 'iptc_profile') pictures_link_path = config.get('Directories', 'pictures_link_path') if template_name and 'octopress' in template_name: style = config.get('Parameters', 'style') if not mini_width: mini_width = config.getint('Parameters', 'mini_width') #END IMPORT VARIABLES if style == 'NONE': style = "" if not iptc_profile: iptc_profile = batch_profile_dir + stock_iptc_profile iptc_profile = path.abspath(path.expanduser(iptc_profile)) input_file = path.abspath(path.expanduser(input_file)) batch_profile_dir = localpath + batch_profile_dir profile_list = [] for lepath, subdirs, files in walk(RT_profiles_dir): for name in files: profile_list += [path.join(lepath, name)] if bool(template_name): template_file = localpath + template_dir + template_name + '.txt' the_template = Template(open(template_file, 'r').read()) output_template_file = output_dir + path.splitext( path.basename(input_file))[0] + '-temp' outfile = save_gen(output_template_file, extension='.txt') if do_minis: mini_profile_file = batch_profile_dir + mini_name mini_temp_profile_location = batch_profile_dir + 'tmp_' + mini_name mini_temp_profile_file = save_gen(mini_temp_profile_location) mini_profile = Template(open(mini_profile_file, 'r').read()) mini_temp_profile_file.write(mini_profile.substitute(WIDTH=mini_width)) mini_temp_profile_file.close() for profile in profile_list: source = input_file out_name = path.splitext(path.basename( input_file))[0] + '-' + path.basename(profile) + '.jpg' out_name_minis = path.splitext(path.basename( input_file))[0] + '-' + path.basename(profile) + '.jpg' if do_fullsize: fullsize_destination = output_dir + out_name.replace(" ", "_") subprocess.call([ rt_command, '-o', fullsize_destination, '-p', iptc_profile, '-p', profile, '-j[100]', '-Y', '-c', source ]) if do_minis: minis_folder = output_dir + minis_dir if not path.isdir(minis_folder): makedirs(minis_folder) minis_destination = output_dir + minis_dir + out_name_minis.replace( " ", "_") subprocess.call([ rt_command, '-o', minis_destination, '-p', iptc_profile, '-p', profile, '-p', mini_temp_profile_location, '-j[100]', '-Y', '-c', source ]) if bool(template_name): full_size_link = pictures_link_path + out_name.replace(" ", "_") mini_path = '/images/photos/minis/' + out_name_minis.replace( " ", "_") outfile.write( the_template.substitute(STYLE=style, PATH=mini_path, WIDTH=mini_width, HEIGHT=0, CAPTION=path.splitext(profile)[0], LINK=full_size_link)) if do_minis: remove(mini_temp_profile_location)
def parse_results(input_file=False, source=False, data_format=""): localpath = path.dirname(path.realpath(__file__)) + '/' config = get_config_file(localpath) #IMPORT VARIABLES output_dir = config.get('Directories', 'output_dir') formats_dir = config.get('Directories', 'formats_dir') keys_dir = config.get('Directories', 'keys_dir') if not input_file: if not source: source = config.get('Source', 'source') data_path = config.get('Addresses', source) file_name = config.get('Addresses', 'file_name') input_file = data_path + file_name if not data_format: data_format = config.get('Parameters', 'data_format') #END IMPORT VARIABLES if type(input_file) is list: input_file = [ path.expanduser(input_file_item) for input_file_item in input_file ] else: input_file = path.expanduser(input_file) keys_dir = path.dirname(path.realpath(__file__)) + '/' + keys_dir formats_dir = path.dirname(path.realpath(__file__)) + '/' + formats_dir keys_list = [ path.splitext(i)[0] for i in listdir(keys_dir) if i.endswith('.csv') ] #list of questionnare IDs for which we provide decoding if data_format == 'testmaker': format_processing = pd.read_csv(formats_dir + data_format + '.csv') import key_functions raw_data = pd.read_csv(input_file, sep='/').set_index(['ID_v']) question_ids = set([i.partition('_')[0] for i in raw_data.columns]) question_ids = [i for i in question_ids if i in keys_list] results = pd.DataFrame(index=raw_data.index, columns=question_ids) for sub_test in question_ids: # for sub_test in question_ids: test_key = pd.read_csv( keys_dir + sub_test + '.csv') # load the key used to score the answers test_fields = fnmatch.filter(raw_data.columns, sub_test + '_*') test_questions = raw_data[ test_fields] # slice the relevant answers from the raw results test_questions = test_questions + format_processing['add'][ 0] # preprocess data typically for testmaker results = getattr(key_functions, sub_test)(test_questions, test_key, results) elif data_format in ['surveygizmo', "surveymonkey"]: raise ValueError( 'The \'surveygizmo\' format is not yet supported. If you cannot make due without this please direct your query to [email protected].' ) elif data_format == "cuQuest1": response_data = pd.read_csv(input_file[0], sep=';') self_data = pd.read_csv(input_file[1], sep=';') self_data = self_data[(self_data['curiosity'].notnull()) & (self_data['knowledge'].notnull())] participant_list = list(set(self_data["participant"].values.tolist())) correlation_df_columns = [ "participant", "pCvK", "pCvCo", "pCvM", "pKvCo", "pKvM", "pMvCo", "kCvK", "kCvCo", "kCvM", "kKvCo", "kKvM", "kMvCo", "sCvK", "sCvCo", "sCvM", "sKvCo", "sKvM", "sMvCo" ] correlations_df = pd.DataFrame(index=participant_list, columns=correlation_df_columns) correlations_df = correlations_df.fillna(0) # with 0s rather than NaNs correlations_df["participant"] = participant_list for participant in participant_list: correlation_list = [] for correlation_type in ["pearson", "kendall", "spearman"]: correlation = self_data[( self_data["participant"] == participant)][[ "curiosity", "knowledge" ]].corr(method=correlation_type, min_periods=1)["curiosity"]["knowledge"] correlations_df.loc[( correlations_df['participant'] == participant), correlation_type[0] + "CvK"] = correlation print(correlations_df)
def main(input_file, output_dir="", iptc_profile=None, do_fullsize=True, do_minis=True, template_name="", mini_width="", style=""): #tweak do_template and do_export here to controll what output you get localpath = path.dirname(path.realpath(__file__)) + '/' config = get_config_file(localpath) #IMPORT VARIABLES if not output_dir: output_dir = localpath+config.get('Directories', 'output_dir') else: output_dir = path.expanduser(output_dir) batch_profile_dir = config.get('Directories', 'batch_profile_dir') template_dir = config.get('Directories', 'template_dir') RT_profiles_dir = config.get('Directories', 'RT_profiles_dir') minis_dir = config.get('Directories', 'minis_dir') mini_name = config.get('Directories', 'mini_name') rt_command = config.get('Parameters', 'rt_command') stock_iptc_profile = config.get('Parameters', 'iptc_profile') pictures_link_path = config.get('Directories', 'pictures_link_path') if template_name and 'octopress' in template_name: style = config.get('Parameters', 'style') if not mini_width: mini_width = config.getint('Parameters', 'mini_width') #END IMPORT VARIABLES if style == 'NONE': style = "" if not iptc_profile: iptc_profile = batch_profile_dir + stock_iptc_profile iptc_profile = path.abspath(path.expanduser(iptc_profile)) input_file = path.abspath(path.expanduser(input_file)) batch_profile_dir = localpath + batch_profile_dir profile_list = [] for lepath, subdirs, files in walk(RT_profiles_dir): for name in files: profile_list += [path.join(lepath, name)] if bool(template_name): template_file = localpath + template_dir + template_name + '.txt' the_template = Template(open(template_file, 'r').read()) output_template_file = output_dir + path.splitext(path.basename(input_file))[0] + '-temp' outfile = save_gen(output_template_file, extension='.txt') if do_minis: mini_profile_file = batch_profile_dir + mini_name mini_temp_profile_location = batch_profile_dir + 'tmp_' + mini_name mini_temp_profile_file = save_gen(mini_temp_profile_location) mini_profile = Template(open(mini_profile_file, 'r').read()) mini_temp_profile_file.write(mini_profile.substitute(WIDTH=mini_width)) mini_temp_profile_file.close() for profile in profile_list: source = input_file out_name = path.splitext(path.basename(input_file))[0]+'-'+path.basename(profile)+'.jpg' out_name_minis = path.splitext(path.basename(input_file))[0]+'-'+path.basename(profile)+'.jpg' if do_fullsize: fullsize_destination = output_dir + out_name.replace(" ", "_") subprocess.call([rt_command, '-o', fullsize_destination, '-p', iptc_profile, '-p', profile, '-j[100]', '-Y', '-c', source]) if do_minis: minis_folder = output_dir + minis_dir if not path.isdir(minis_folder): makedirs(minis_folder) minis_destination = output_dir + minis_dir + out_name_minis.replace(" ", "_") subprocess.call([rt_command, '-o', minis_destination, '-p', iptc_profile, '-p', profile, '-p', mini_temp_profile_location, '-j[100]', '-Y', '-c', source]) if bool(template_name): full_size_link = pictures_link_path+out_name.replace(" ", "_") mini_path = '/images/photos/minis/'+out_name_minis.replace(" ", "_") outfile.write(the_template.substitute(STYLE=style, PATH=mini_path, WIDTH = mini_width, HEIGHT=0, CAPTION=path.splitext(profile)[0], LINK=full_size_link)) if do_minis: remove(mini_temp_profile_location)
def ett(plot_ax="Y", source=False, id_list="final", make_tight=True, print_title=True, linewidth=0.8, fontscale=0.5, isspec=False, make_sem=True): config = get_config_file(localpath=path.dirname(path.realpath(__file__))+'/') #IMPORT VARIABLES if not source: source = config.get('Source', 'source') data_path = config.get('Addresses', source) reaction_times = config.get('Addresses', 'reaction_times') #END IMPORT VARIABLES if plot_ax == "X": plot_axis = 'GazePointX' direction = "(towards stimulus)" if plot_ax == "Y": plot_axis = 'GazePointY' direction = "(up)" data_path = path.expanduser(data_path) rt_path = data_path + reaction_times files = [lefile for lefile in listdir(rt_path) if lefile.endswith('.tsv')] ids = [t.split('_',2)[0]+'_'+t.split('_',2)[1] for t in files] ids = np.unique(ids) eye_data_total = pd.DataFrame([]) stimulus_datas = pd.DataFrame([]) spec = ['6245247_f'] h_dist = ['1236345_f','6779353_f','7310001_f','7714775_m','7816097_m','7865828_m','7922847_m'] l_dist = ['1975801_m','4724273_f','6268973_m','8963557_f','8286497_m','8963557_m','9651558_m','8240877_m','6887665_m','5559429_f','8582941_f','8582941_m','9302438_f','4276763_f','3878418_m','3537898_f','1247497_f','8717741_m','4744495_f','7117377_m'] best = ['1975801_m','4724273_f','6268973_m','8963557_f','8286497_m','8963557_m','6887665_m','5559429_f','8582941_f','9302438_f','1247497_f','4744495_f','7117377_m'] test = ['chr1_f','chr2_f'] if id_list=="final": id_list = l_dist for fileidx, fileid in enumerate(id_list): ratings = open_csv(rt_path+fileid+'_p') ratings = pd.DataFrame(ratings[1:], columns=ratings[0], dtype=float) ratings = ratings.groupby('picture').mean() sorted_scores = sorted(ratings['score']) score_top, score_bottom = sorted_scores[-20], sorted_scores [19] stimulus_data = pd.DataFrame stimulus_data = stimulus_data.from_csv(rt_path+fileid+'_wm.csv') stimulus_data['rateL'] = stimulus_data['rateL'].astype(np.float64) stimulus_data['RTL'] = stimulus_data['RTL'].astype(np.float64) stimulus_data['orderL'] = stimulus_data['orderL'].astype(np.float64) stimulus_data['rateR'] = stimulus_data['rateR'].astype(np.float64) stimulus_data['RTR'] = stimulus_data['RTR'].astype(np.float64) stimulus_data['orderR'] = stimulus_data['orderR'].astype(np.float64) stimulus_data['RT'] = stimulus_data['RT'].astype(np.float64) stimulus_data['session'] = stimulus_data['session'].astype(np.float64) stimulus_data = stimulus_data[stimulus_data['RT'] >=0] stimulus_data['block'] = '' stimulus_data.ix[(stimulus_data['rateL'] >= score_top) & (stimulus_data['rateR'] >= score_top), 'block'] = 'aa' stimulus_data.ix[(stimulus_data['rateL'] >= score_top) & (stimulus_data['rateR'] <= score_bottom), 'block'] = 'au' stimulus_data.ix[(stimulus_data['rateL'] <= score_bottom) & (stimulus_data['rateR'] >= score_top), 'block'] = 'ua' stimulus_data.ix[(stimulus_data['rateL'] <= score_bottom) & (stimulus_data['rateR'] <= score_bottom), 'block'] = 'uu' aa_trials = list(stimulus_data[(stimulus_data['block'] == 'aa')]['session']) au_trials = list(stimulus_data[(stimulus_data['block'] == 'au')]['session']) ua_trials = list(stimulus_data[(stimulus_data['block'] == 'ua')]['session']) uu_trials = list(stimulus_data[(stimulus_data['block'] == 'uu')]['session']) stimleft_trials = list(stimulus_data[(stimulus_data['isstimleft'] == True)]['session']) stimright_trials = list(stimulus_data[(stimulus_data['isstimleft'] == False)]['session']) stimatt_trials = list(stimulus_data[(stimulus_data['isstimleft'] == True) & (stimulus_data['rateL'] >= score_top) & (stimulus_data['rateR'] <= score_bottom)]['session']) stimatt_trials = stimatt_trials + list(stimulus_data[(stimulus_data['isstimleft'] == False) & (stimulus_data['rateL'] <= score_bottom) & (stimulus_data['rateR'] >= score_top)]['session']) stimNatt_trials = list(stimulus_data[(stimulus_data['isstimleft'] == False) & (stimulus_data['rateL'] >= score_top) & (stimulus_data['rateR'] <= score_bottom)]['session']) stimNatt_trials = stimNatt_trials + list(stimulus_data[(stimulus_data['isstimleft'] == True) & (stimulus_data['rateL'] <= score_bottom) & (stimulus_data['rateR'] >= score_top)]['session']) pat = 'TimeStamp GazePointXLeft GazePointYLeft ValidityLeft GazePointXRight GazePointYRight ValidityRight GazePointX GazePointY Event' with open(rt_path+fileid+'_wmet.tsv') as infile: eye_data = infile.read().split(pat) eye_data = eye_data[1:] # remove header (describing date etc) eye_data = [trial.split('\r\n') for trial in eye_data] # split at '\r' for idx, i in enumerate(eye_data): # crop to 447 ACTUAL time frames - the first one is empty eye_data[idx] = i[:448] for idx, trial in enumerate(eye_data): trial = [row.split('\t') for row in trial] eye_data[idx] = trial eye_data = [name[1:] for name in eye_data] # crop the first, empty line eye_data = np.array(eye_data) eye_data = eye_data[:,:,[0,3,6,7,8]].astype(np.float64) # convert to float, we don't need separate eye coordinates eye_data[:,:,3:] = eye_data[:,:,3:] / 2 - 0.5 # the integrated left-right gaze coordinates are the sum of the per-eye screen percentages - divide by 2 (2 eyes) and normalize to: 50% = 0 for a in np.arange(np.shape(eye_data)[0]): # assume that when neither of the eyes is detected the subject looks at the fixation for i in np.arange(np.shape(eye_data)[1]): if eye_data[a,i,1] == 4 and eye_data[a,i,2] == 4: eye_data[a,i,3] = 0 eye_data[a,i,4] = 0 for i in stimleft_trials: # invert stimleft trial coordinates - equates 'right' with 'stimside' eye_data[i,:,3:] = -eye_data[i,:,3:] eye_data = eye_data[:,:,[0,3,4]] # we can't work with eye detection indices in the subsequent sumation, discard them here eye_data_aa = eye_data[aa_trials,:,:] eye_data_uu = eye_data[uu_trials,:,:] eye_data_uas = eye_data[stimatt_trials,:,:] eye_data_aus = eye_data[stimNatt_trials,:,:] eye_data_aa = np.sum(eye_data_aa, axis=0) / np.shape(eye_data_aa)[0] eye_data_uu = np.sum(eye_data_uu, axis=0) / np.shape(eye_data_uu)[0] eye_data_uas = np.sum(eye_data_uas, axis=0) / np.shape(eye_data_uas)[0] eye_data_aus = np.sum(eye_data_aus, axis=0) / np.shape(eye_data_aus)[0] eye_data_aa = pd.DataFrame(eye_data_aa, columns=['time','GazePointX','GazePointY']) eye_data_aa['stimuli'] = 'aa' eye_data_uu = pd.DataFrame(eye_data_uu, columns=['time','GazePointX','GazePointY']) eye_data_uu['stimuli'] = 'uu' eye_data_uas = pd.DataFrame(eye_data_uas, columns=['time','GazePointX','GazePointY']) eye_data_uas['stimuli'] = 'uas' eye_data_aus = pd.DataFrame(eye_data_aus, columns=['time','GazePointX','GazePointY']) eye_data_aus['stimuli'] = 'aus' eye_data = pd.concat([eye_data_aa, eye_data_uu, eye_data_uas, eye_data_aus]) eye_data['ID'] = fileid eye_data["timepoint"] = eye_data.index if fileidx == 0: eye_data_total = eye_data[["timepoint",'time','GazePointX','GazePointY','stimuli',"ID"]] else: eye_data_total = pd.concat([eye_data_total, eye_data[["timepoint",'time','GazePointX','GazePointY','stimuli',"ID"]]]) # load reaction times (to plot as lines) here: conts = get_dataframes(id_list, rt_path) sa_reaction_time = conts[(conts['subblock'] == 'uas+sau')]['RT'].mean()*1000 su_reaction_time = conts[(conts['subblock'] == 'aus+sua')]['RT'].mean()*1000 aa_reaction_time = conts[(conts['block'] == 'aa')]['RT'].mean()*1000 uu_reaction_time = conts[(conts['block'] == 'uu')]['RT'].mean()*1000 # end load reaction times et_means = eye_data_total.groupby(["stimuli","timepoint"]).mean() if make_sem: et_sem = eye_data_total.groupby(["stimuli","timepoint"]).aggregate(sem) fig = figure(figsize=(3, 4), dpi=300, facecolor='#eeeeee', tight_layout=make_tight) ax1=fig.add_subplot(2,1,1) matplotlib.rcParams.update({'font.size': 12*fontscale}) ax1.set_xlim(0, et_means['time'].max()) tc = et_means.ix["aa"]['time'] v = et_means.ix["aa"][plot_axis] ax1.plot(tc, v, color='g') if make_sem: se = et_sem.ix["aa"][plot_axis]/2 ax1.fill_between(tc, v+se, v-se, facecolor="g", edgecolor="none", alpha=0.1, zorder=0) tc = et_means.ix["uu"]['time'] v = et_means.ix["uu"][plot_axis] ax1.plot(tc, v, color='m') if make_sem: se = et_sem.ix["uu"][plot_axis]/2 ax1.fill_between(tc, v+se, v-se, facecolor="m", edgecolor="none", alpha=0.1, zorder=0) legend(('Attractive - Attractive','Unattractive - Unattractive'), bbox_to_anchor=(0.94, 0.99), shadow=False, frameon=False, prop=FontProperties(size=str(9*fontscale))) ax1.axhline(0, color='k', alpha = 0.1, linewidth=linewidth) ax1.axvline(aa_reaction_time, color='g', alpha = 0.3, linewidth=linewidth) ax1.axvline(uu_reaction_time, color='m', alpha = 0.3, linewidth=linewidth) ax1.set_ylabel(plot_ax+'-axis % '+direction) ax1.set_xlabel('Time [ms]') ax2 = fig.add_subplot(212) ax2.set_xlim(0, et_means['time'].max()) tc = et_means.ix["uas"]['time'] v = et_means.ix["uas"][plot_axis] ax2.plot(tc, v, color='g') if make_sem: se = et_sem.ix["uas"][plot_axis]/2 ax2.fill_between(tc, v+se, v-se, facecolor="g", edgecolor="none", alpha=0.1, zorder=0) tc = et_means.ix["aus"]['time'] v = et_means.ix["aus"][plot_axis] ax2.plot(tc, v, color='m') if make_sem: se = et_means.ix["aus"][plot_axis]/2 ax2.fill_between(tc, v+se, v-se, facecolor="m", edgecolor="none", alpha=0.1, zorder=0) legend(('Attractive on Target Side','Unattractive on Target Side'), bbox_to_anchor=(0.94, 0.99), shadow=False, frameon=False, prop=FontProperties(size=str(9*fontscale))) ax2.axhline(0, color='k', alpha = 0.1, linewidth=linewidth) ax2.axvline(sa_reaction_time, color='g', alpha = 0.3, linewidth=linewidth) ax2.axvline(su_reaction_time, color='m', alpha = 0.3, linewidth=linewidth) ax2.set_ylabel(plot_ax+'-axis % '+direction) ax2.set_xlabel('Time [ms]')
def get_et_data(source=False, make='timecourse', pre_cutoff=0, make_categories='', diff=False, savefile='', baseline ="", force_new=False): from os import path import sys import pandas as pd import numpy as np import math from chr_helpers import get_config_file config = get_config_file(localpath=path.dirname(path.realpath(__file__))+'/') #IMPORT VARIABLES if not source: source = config.get('Source', 'source') data_path = config.get('Addresses', source) eye_tracking = config.get('Data', 'eye_tracking') preprocessed_path = config.get('Data', 'df_dir') regressor_dir = config.get('Data', 'regressor_dir') if make == "regressor": make = config.getint("Settings", "make") if isinstance(make, int) and not pre_cutoff: pre_cutoff = config.getint('Settings', 'pre_cutoff') #END IMPORT VARIABLES if source == 'local': from os import listdir data_path = path.expanduser(data_path+eye_tracking) pre_fileslist = listdir(data_path) regressor_path = path.expanduser(data_path+preprocessed_path+regressor_dir) if savefile: preprocessed_file = path.expanduser(data_path+preprocessed_path+savefile) if path.exists(preprocessed_file) and not force_new: data_all = pd.DataFrame.from_csv(preprocessed_file) data_all = data_all.set_index(['CoI'],append=True, drop=True) data_all = data_all.set_index(['measurement'],append=True, drop=True) data_all = data_all.reorder_levels(['ID','CoI','measurement']) return data_all print('Loading data from '+data_path) if pre_fileslist == []: raise InputError('For some reason the list of results files could not be populated.') files = [lefile for lefile in pre_fileslist if lefile.endswith('.txt')] usually_empty = ["L Mapped Diameter [mm]", "L Validity", "R Validity", " Pupil Confidence"] data_all = [] # empty container list for listing per-file dataframes for lefile in files: print("Processing " + lefile + ":") data_lefile = pd.DataFrame.from_csv(data_path+lefile, header=42, sep='\t') data_lefile = data_lefile.reset_index() data_lefile = data_lefile.dropna(axis=1, how='all', thresh=3) #remove non informative (null) columns for field in usually_empty: if field in data_lefile.columns.tolist(): data_lefile = data_lefile.drop(field, 1) #this column contains no useful values and is not in all files, dropna however fails to remove it :/ #CUTOFF AT 'pulse_start' cutoff = data_lefile[(data_lefile['L Raw X [px]'] == '# Message: pulse_start')].index.tolist() cutoff = int(cutoff[-1]) data_lefile = data_lefile[cutoff-pre_cutoff:] data_lefile = data_lefile.reset_index() #make new index data_lefile = data_lefile.drop(['index'],1) # drop old index if isinstance(make, int): data_lefile = data_lefile[(data_lefile["Type"] != "MSG")] data_lefile["Pupil"] = ((data_lefile["L Dia Y [px]"] + data_lefile["L Dia X [px]"])/2)**2 # compute Pupil ~area data_lefile_single = data_lefile["Pupil"] if diff == "prebin": data_lefile_single = data_lefile_single.diff() data_lefile_single = downsample(data_lefile_single, sample=make) if diff == "postbin": data_lefile_single = data_lefile_single.diff() data_lefile_single = data_lefile_single.ix[1:,1] data_lefile_single.to_csv(regressor_path+lefile.split('_')[0]+'_diff_postbin.csv', index=False, header=False, index_label=None) elif diff == "prebin": data_lefile_single = data_lefile_single.ix[:,1] data_lefile_single.to_csv(regressor_path+lefile.split('_')[0]+'_diff_prebin.csv', index=False, header=False, index_label=None) else: data_lefile_single = data_lefile_single.ix[:,1] data_lefile_single.to_csv(regressor_path+lefile.split('_')[0]+'.csv', index=False, header=False, index_label=None) continue data_lefile.index.name = 'measurement' data_lefile['Trial'] = data_lefile['Trial'] - data_lefile['Trial'].ix[0] #trial number relative to first remaining trial data_lefile['Time'] = (data_lefile['Time'] - data_lefile['Time'].ix[0])/1000 #time relative to first remaining time point; turn time to milliseconds #END CUTOFF AT 'pulse_start' if make_categories: new_category_names = set([new_cat[0] for new_cat in make_categories]) for new_category_name in new_category_names: data_lefile[new_category_name]='' trial_key = data_lefile[(data_lefile['Type'] == 'MSG')][['Trial','L Raw X [px]']] #crop trial_key = np.array(trial_key) #for easier iteration for category in make_categories: criterion = category[-1] for trial in trial_key: if '-' in criterion: if criterion.split('-')[0] in trial[1] and criterion.split('-')[1] not in trial[1]: data_lefile.ix[(data_lefile['Trial']==trial[0]), category[0]] = category[1] elif '|' in criterion: if criterion.split('|')[0] in trial[1] or criterion.split('|')[1] in trial[1]: data_lefile.ix[(data_lefile['Trial']==trial[0]), category[0]] = category[1] else: if criterion in trial[1]: data_lefile.ix[(data_lefile['Trial']==trial[0]), category[0]] = category[1] #MAKE MULTIINDEX grouped = data_lefile.set_index(['Trial']) #measurement is the ordinal number of a capture frame within a trial: data_lefile['measurement'] = grouped.groupby(level=0).cumcount().tolist() data_lefile.set_index(['Trial', 'measurement'], inplace=True) #END MAKE MULTIINDEX if make == 'timecourse': groups_all = [] for category in make_categories: print("Binning category:"+"\""+category[1]+"\"") group = data_lefile[(data_lefile[category[0]] == category[1])] group = group.groupby(level=1).mean() # make per-category means group = group.ix[:240] # means for timepoints with missing values will be smaller. group['Time'] = group['Time']-group['Time'].ix[0] group['CoI'] = '' group['CoI'] = category[1] group = group.set_index(['CoI'], append=True, drop=True) group = group.reorder_levels(['CoI','measurement']) group["Pupil"] = ((group["L Dia Y [px]"] + group["L Dia X [px]"])/2)**2 # compute Pupil ~area groups_all.append(group) data_lefile = pd.concat(groups_all) else: print('Please specify the "make" argumant as either "timecourse" or an integer.') if baseline == "participant": baseline_mean = data_lefile.ix["fix"]["Pupil"].mean() #baseline (fixcross) mean data_lefile["Pupil"] = data_lefile["Pupil"]/baseline_mean #ADD ID data_lefile['ID']=lefile.split('_')[0] data_lefile = data_lefile.set_index(['ID'], append=True, drop=True) if make == "timecourse": data_lefile = data_lefile.reorder_levels(['ID','CoI','measurement']) #END ADD ID data_all.append(data_lefile) if make == "timecourse": data_all = pd.concat(data_all) data_all.reorder_levels(['ID','CoI','measurement']) if baseline == "global": baseline_mean = data_all.groupby(level=1).mean().ix["fix"]["Pupil"].mean() #baseline (fixcross) mean data_all["Pupil"] = data_all["Pupil"]/baseline_mean if savefile: data_all.to_csv(preprocessed_file) return data_all
def get_rt_data(source=False, make_categories=False, no_response="", make_scrambled_yn=False): from os import path import sys import pandas as pd import numpy as np import math from chr_helpers import get_config_file config = get_config_file(localpath=path.dirname(path.realpath(__file__))+'/') #IMPORT VARIABLES if not source: source = config.get('Source', 'source') data_path = config.get('Addresses', source) eye_tracking = config.get('Data', 'eye_tracking') preprocessed_path = config.get('Data', 'df_dir') rt_dir = config.get('Data', 'rt_dir') #END IMPORT VARIABLES if source == 'local': from os import listdir data_path = path.expanduser(data_path+rt_dir) pre_fileslist = listdir(data_path) print('Loading data from '+data_path) if pre_fileslist == []: raise InputError('For some reason the list of results files could not be populated.') files = [lefile for lefile in pre_fileslist if lefile.endswith('OM.log') and not lefile.startswith("KP") and not lefile.startswith("ET_")] data_all = [] # empty container list for listing per-file dataframes for lefile in files: with open(data_path+lefile, 'rb') as source: a=0 for line in source: if line == '\r\n': a += 1 else: a = 0 if a == 2: break df_file = pd.read_csv(source, engine='python', sep="\t") if len(set(df_file["Type"])) <= 2: continue #skip files which do not contain RT data (identified by having a monotonous "Type") df_file = df_file[["Code","RT","Type"]] df_file = df_file[(df_file["Code"] != "fixCross")] df_file.reset_index(inplace=True) if isinstance(no_response, int): df_file.ix[(df_file["Type"] != "miss"), "RT"] = no_response # remove missed trials with int penalty value else: df_file = df_file[(df_file["Type"] != "miss")] # remove missed trials if make_categories: new_category_names = set([new_cat[0] for new_cat in make_categories]) for new_category_name in new_category_names: df_file[new_category_name]='' trial_key = df_file[['index','Code']] #crop trial_key = np.array(trial_key) #for easier iteration for category in make_categories: criterion = category[-1] for trial in trial_key: if '-' in criterion: if criterion.split('-')[0] in trial[1] and criterion.split('-')[1] not in trial[1]: df_file.ix[(df_file['index']==trial[0]), category[0]] = category[1] elif '|' in criterion: if criterion.split('|')[0] in trial[1] or criterion.split('|')[1] in trial[1]: df_file.ix[(df_file['index']==trial[0]), category[0]] = category[1] else: if criterion in trial[1]: df_file.ix[(df_file['index']==trial[0]), category[0]] = category[1] if make_scrambled_yn: df_file["scrambled"]="" df_file.ix[(df_file["emotion"] == "scrambled"), "scrambled"]= "yes" df_file.ix[(df_file["emotion"] != "scrambled"), "scrambled"]= "no" df_file = df_file.drop(['index'],1) # drop old index #ADD ID df_file['ID']=lefile.split('-')[0] df_file = df_file.set_index(['ID'], append=True, drop=True) df_file = df_file.reorder_levels([1,0]) #END ADD ID data_all.append(df_file) data_all = pd.concat(data_all) data_all["RT"] = data_all["RT"] / 10000 #make seconds return data_all
def main(output_format=False, scrambling_steps_id=False): import numpy as np import pandas as pd from numpy.random import permutation, choice, sample from os import path, listdir from itertools import product from chr_helpers import get_config_file, save_pd_csv, save_gen, save_pd_tsv from routines import hariri from string import Template localpath = path.dirname(path.realpath(__file__)) + '/' config = get_config_file(localpath) #IMPORT VARIABLES sequence_name = config.get('Files', 'sequence_name') output_subdir = config.get('Directories', 'output_subdir') stimuli_dir = config.get('Directories', 'stimuli_dir') templates_dir = config.get('Directories', 'templates_dir') scrambling_id = config.get('Decoding', 'scrambling_id') scrambling_a_id = config.get('Decoding', 'scrambling_a_id') scrambling_b_id = config.get('Decoding', 'scrambling_b_id') fearful_id = config.get('Decoding', 'fearful_id') happy_id = config.get('Decoding', 'happy_id') female_id = config.get('Decoding', 'female_id') male_id = config.get('Decoding', 'male_id') easy_em_id = config.get('Decoding', 'easy_em_id') hard_em_id = config.get('Decoding', 'hard_em_id') if scrambling_steps_id: pass else: scrambling_steps_id = [int(i) for i in config.get('Decoding', 'scrambling_steps_id').split(',')] scrambling_steps_prefix = config.get('Decoding', 'scrambling_steps_prefix') make_blocks = config.getboolean('Parameters', 'make_blocks') block_size = config.getint('Parameters', 'block_size') if output_format: pass else: output_format = [str(i) for i in config.get('Parameters', 'output_format').split(',')] keep_oldsequence = config.getboolean('Parameters', 'keep_oldsequence') #END IMPORT VARIABLES scrambling_steps_id_withprefix = [scrambling_steps_prefix+str(i) for i in scrambling_steps_id] local_dir = path.dirname(path.realpath(__file__)) + '/' output_dir = local_dir + output_subdir #check for already existing sequence file if keep_oldsequence and path.isfile(output_dir + '.' + sequence_name + '-last_exported_sequence.csv'): sequence = pd.DataFrame.from_csv(output_dir + '.' + sequence_name + '-last_exported_sequence.csv') print('Keeping old sequence found at '+output_dir + '.' + sequence_name + '-last_exported_sequence.csv . Please unset "keep_oldsequence" in '+path.dirname(path.realpath(__file__))+'/gen.cfg to avoid this.') else: ### START CREATING THE NEW STIMLIST DATAFRAME scrambling_steps_id_withprefix = [scrambling_steps_prefix+str(i) for i in scrambling_steps_id] stimuli_dir = path.expanduser(stimuli_dir) # expands the path if it is specified with tilde for "home" stimlist = permutation(listdir(stimuli_dir)) sequence = pd.DataFrame([]) # blank dataframe to add the stimuli lists to if make_blocks: sequence_name = sequence_name + '_' + str(block_size) + 'block' # 100% emotion trials for variable_categories in product([male_id, female_id], [[happy_id, fearful_id],[fearful_id,happy_id]]): top_stimuli = [a for a in stimlist if variable_categories[0] in a and variable_categories[1][0] in a and easy_em_id in a and scrambling_id not in a] distractors = [a for a in stimlist if variable_categories[0] in a and variable_categories[1][1] in a and easy_em_id in a and scrambling_id not in a] subsequence = hariri(top_stimuli, top_stimuli, distractors, suffix_characters=11) # with suffix identifier skipping because we don't want the same person twice in a slide sequence = pd.concat([sequence, subsequence], ignore_index=True) # 40% emotion trials for variable_categories in product([male_id, female_id], [[happy_id, fearful_id],[fearful_id,happy_id]]): top_stimuli = [a for a in stimlist if variable_categories[0] in a and variable_categories[1][0] in a and easy_em_id in a and scrambling_id not in a] distractors = [a for a in stimlist if variable_categories[0] in a and variable_categories[1][1] in a and hard_em_id in a and scrambling_id not in a] targets = [a for a in stimlist if variable_categories[0] in a and variable_categories[1][0] in a and hard_em_id in a and scrambling_id not in a] subsequence = hariri(top_stimuli, targets, distractors, suffix_characters=11) # with suffix identifier skipping because we don't want the same person twice in a slide sequence = pd.concat([sequence, subsequence], ignore_index=True) # scrambling trials for variable_categories in product(scrambling_steps_id_withprefix, [[scrambling_a_id, scrambling_b_id]]): top_stimuli = [a for a in stimlist if variable_categories[0] in a and variable_categories[1][0] in a and easy_em_id in a] distractors = [a for a in stimlist if variable_categories[0] in a and variable_categories[1][0] in a and easy_em_id in a] for idx, i in enumerate(distractors): distractors[idx] = distractors[idx][:-5]+'b.jpg' targets = top_stimuli # we use identical images for identification subsequence = hariri(top_stimuli, targets, distractors, forbid_identical_targets=False) sequence = pd.concat([sequence, subsequence], ignore_index=True) # Fill out meta-fileds sequence.ix[(sequence['top face'].map(lambda x: happy_id in x)), 'emotion'] = 'happiness' sequence.ix[(sequence['top face'].map(lambda x: fearful_id in x)), 'emotion'] = 'fear' sequence.ix[(sequence['right face'].map(lambda x: easy_em_id in x)), 'emotion intensity'] = 100 # the top face is always C100 sequence.ix[(sequence['right face'].map(lambda x: hard_em_id in x)), 'emotion intensity'] = 40 # the top face is always C100 sequence.ix[(sequence['top face'].map(lambda x: male_id in x)), 'gender'] = 'm' sequence.ix[(sequence['top face'].map(lambda x: female_id in x)), 'gender'] = 'f' for scrambling_step in scrambling_steps_id_withprefix: sequence.ix[(sequence['top face'].map(lambda x: scrambling_step in x)), 'scrambling'] = int(scrambling_step.strip('cell')) sequence.ix[(sequence['top face'].map(lambda x: scrambling_id not in x)), 'scrambling'] = 0 #make blocks if make_blocks: block_number = 0 #start value for iteration for step in range(0, len(sequence)*block_size, block_size): sequence.ix[step:step+block_size,'block'] = block_number block_number +=1 #save raw stimulus list output_file = output_dir + '.' + sequence_name + '-last_exported_sequence' save_pd_csv(sequence, output_file) # BEGIN OUTPUT FILE FORMATTING if 'christian' in output_format: # 'christian' format (dataframe, versatile, amongst others for faceRT) output_file = output_dir + sequence_name save_pd_csv(sequence, output_file) if 'gabriela1' in output_format: # 'gabriela1' format (for "Presentation") tamplate_subdir = 'gabriela1/' header = open(local_dir+templates_dir+tamplate_subdir+'header.txt', 'r').read() footer = open(local_dir+templates_dir+tamplate_subdir+'footer.txt', 'r').read() module = Template(open(local_dir+templates_dir+tamplate_subdir+'module.txt', 'r').read()) #~ print module.substitute(name='a', t='a', l='a', r='a', N='a') for condition_file_id in ['cont_hard', 'cont_easy', 'em_hard', 'em_easy']: #START REMAP SOME VALUES sequence.ix[(sequence['correct answer'] == 'right'), 'correct answer'] = 1 sequence.ix[(sequence['correct answer'] == 'left'), 'correct answer'] = 2 #END REMAP SOME VALUES output_file = output_dir + sequence_name + '_gabriela1_' + condition_file_id with save_gen(output_file, extension='.txt') as outfile: outfile.write(header) if condition_file_id == 'cont_hard': for idx, trial in sequence[(sequence['scrambling'] == scrambling_steps_id[0])].iterrows(): format_module(outfile, module, trial, idx) elif condition_file_id == 'cont_easy': for idx, trial in sequence[(sequence['scrambling'] == scrambling_steps_id[1])].iterrows(): format_module(outfile, module, trial, idx) elif condition_file_id == 'em_hard': for idx, trial in sequence[(sequence['scrambling'] == 0) & (sequence['emotion intensity'] == 40)].iterrows(): format_module(outfile, module, trial, idx) elif condition_file_id == 'em_easy': for idx, trial in sequence[(sequence['scrambling'] == 0) & (sequence['emotion intensity'] == 100)].iterrows(): format_module(outfile, module, trial, idx) else: raise InputError('Your condition_file_id values do not correspond to the script\'s expectations.') outfile.write(footer) if 'gabriela2' in output_format: sequence['name'] = sequence['top face'] for pos, le_name in enumerate(sequence['name']): sequence['name'].ix[pos] = path.splitext(le_name)[0] + ' ;' sequence.ix[(sequence['correct answer'] == 'right'), 'correct answer'] = 1 sequence.ix[(sequence['correct answer'] == 'left'), 'correct answer'] = 2 sequence = sequence.rename(columns={'top face': 'fname_up', 'left face': 'fname_down_left', 'right face': 'fname_down_right', 'correct answer': 'rating'}) output_file = output_dir + sequence_name + '_gabriela2' save_pd_tsv(sequence, output_file)
def get_and_filter_results(experiment=False, source=False, prepixelation='not specified', remove='', mismeasurement='remove', apply_correct_values=False, make_CoI=False): import pandas as pd from os import path import sys from chr_helpers import get_config_file config = get_config_file(localpath=path.dirname(path.realpath(__file__))+'/') if isinstance(prepixelation, (int, long)): print(prepixelation) #IMPORT VARIABLES if prepixelation == 'not specified': #is not triggers if the value is defined as 0 . prepixelation = config.getint('Data', 'prepixelation') if not experiment: experiment = config.get('Data', 'experiment') if not source: source = config.get('Source', 'source') data_path = config.get('Addresses', source) ignore_filename = config.get('Data', 'ignore_filename') #END IMPORT VARIABLES if source == 'server': from HTMLParser import HTMLParser import urllib class ChrParser(HTMLParser): def handle_starttag(self, tag, attrs): if tag =='a': for key, value in attrs: if key == 'href' and value.endswith('.csv'): pre_fileslist.append(value) results_dir = data_path+experiment+'/px'+str(prepixelation)+'/' print(results_dir) data_url = urllib.urlopen(results_dir).read() parser = ChrParser() pre_fileslist = [] parser.feed(data_url) # pre_fileslist gets populated here elif source == 'live': from os import listdir results_dir = path.dirname(path.dirname(path.realpath(__file__))) + data_path + str(prepixelation) + '/' results_dir = path.expanduser(results_dir) pre_fileslist = listdir(results_dir) elif source == 'local': from os import listdir results_dir = data_path + experiment + '/px' + str(prepixelation) + '/' results_dir = path.expanduser(results_dir) pre_fileslist = listdir(results_dir) print('Loading data from '+results_dir) if pre_fileslist == []: raise InputError('For some reason the list of results files could not be populated.') files = [lefile for lefile in pre_fileslist if lefile.endswith('.csv') and not lefile.endswith(ignore_filename+'.csv')] data_all = pd.DataFrame([]) # empty container frame for concatenating input from multiple files for lefile in files: data_lefile = pd.DataFrame.from_csv(results_dir+lefile) data_lefile['ID'] = path.splitext(lefile)[0] scrambling_list = set(data_lefile['scrambling']) if apply_correct_values: # relevant only for some legacy data where the script miswrote values to the results file data_lefile=correct_values(data_lefile) if make_CoI: data_lefile = categories_of_interest(data_lefile, scrambling_list) elif mismeasurement == 'fix': make_CoI == True data_lefile = categories_of_interest(data_lefile, scrambling_list) if mismeasurement == 'remove': data_lefile = data_lefile[data_lefile['RT'] >0] # remove entries with instant RTs here elif mismeasurement == 'nan': data_lefile.ix[(data_lefile['RT'] <=0), 'RT'] = False # remove entries with incorrect answers here elif mismeasurement == 'fix': import numpy as np for CoI in set(data_lefile['CoI']): data_lefile.ix[(data_lefile['RT'] <=0) & (data_lefile['CoI'] == CoI), 'RT'] = np.median(data_lefile[data_lefile['CoI'] == CoI]['RT']) #replace missing values with the median of the repecitive CoI if 'no-response' in remove: data_lefile = data_lefile[data_lefile['keypress'] != 'none'] # remove entries with no answers here if 'incorrect' in remove: data_lefile = data_lefile[data_lefile['correct answer'] == data_lefile['keypress']] # remove entries with incorrect answers here data_all = pd.concat([data_all, data_lefile], ignore_index=True) return data_all
def fa(source=False, use_filter="default", data_file="latest", participant_subset="", drop_metadata=True, drop=[], clean=7, factors=5, facecolor="#ffffff"): #gets config file: config = get_config_file(localpath=path.dirname(path.realpath(__file__))+'/') #IMPORT VARIABLES if not source: source = config.get('Source', 'source') data_path = config.get('Addresses', source) filter_dir = config.get('Paths', "filter_dir") filter_name = config.get("Filters", use_filter) #END IMPORT VARIABLES filter_path = path.dirname(path.realpath(__file__)) + '/' + filter_dir + filter_name + '.csv' filters = DataFrame.from_csv(filter_path, header=None).transpose() # transpose filters because of .csv file formatting all_data = DataFrame.from_csv(data_path + data_file + ".csv") all_data = all_data.reset_index(level=0) #~ print filters["metadata"] #clean data of respondents who only ckeck extreme answers: all_data = all_data[map(lambda y: len(set(y)) > clean,np.array(all_data))] if drop_metadata == True: # drops metadata all_data = all_data.drop(filters["metadata"][Series.notnull(filters["metadata"])], axis=1) drop_list = [] for drop_item in drop: # compile list of column names to be dropped: drop_list += list(filters[drop_item][Series.notnull(filters[drop_item])]) #get unique column names (the list may contain duplicates if overlaying multiple filters): drop_list = list(set(drop_list)) all_data = all_data.drop(drop_list, axis=1) if participant_subset == "odd": # selects only odd indexes (keep the other dataset half for validation) keep_rows = all_data.index.values[1::2] filtered_data = all_data.ix[keep_rows] elif participant_subset == "even": # selects only even indexes (keep the other dataset half for validation) keep_rows = all_data.index.values[0::2] filtered_data = all_data.ix[keep_rows] elif participant_subset == "male": # selects only male participants filtered_data = all_data[all_data['My legal gender:'] == 'Male'] elif participant_subset == "female": # selects only female participants filtered_data = all_data[all_data['My legal gender:'] == 'Female'] else: filtered_data = all_data #convert to correct type for analysis: filtered_data_array = np.array(filtered_data, dtype='float64') filtered_data_array = filtered_data_array / 100 fit = r.factanal(filtered_data_array, factors, rotation='promax') load = r.loadings(fit) load = numpy2ri.ri2numpy(load) load = r.t(load) remapped_cmap = remappedColorMap(cm.PiYG, start=(np.max(load)-abs(np.min(load)))/(2*np.max(load)), midpoint=abs(np.min(load))/(np.max(load)+abs(np.min(load))), name='shrunk') fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(17.5, 5), facecolor=facecolor) graphic = ax.imshow(load, cmap = remapped_cmap, interpolation='none') ax.xaxis.set_major_locator(matplotlib.ticker.MultipleLocator(base=1.0)) ax.yaxis.set_major_locator(matplotlib.ticker.MultipleLocator(base=1.0)) ax.set_xticklabels([0]+filtered_data.columns.tolist(),fontsize=8,rotation=90) ax.set_yticklabels(np.arange(factors+1)) ax.set_ylabel('Factors') ax.set_title("Question Loadings on Factors") #Recolor plot spines: for spine_side in ["bottom", "top", "left", "right"]: ax.spines[spine_side].set_color("#777777") #Remove ticks: plt.tick_params(axis='both', which='both', left="off", right="off", bottom='off', top='off') divider = make_axes_locatable(ax) #calculate width for cbar so that it is equal to the question column width: cbar_width = str(100/np.shape(load)[1])+ "%" cax = divider.append_axes("right", size=cbar_width, pad=0.05) cbar = colorbar(graphic, cax=cax, drawedges=True) #Limit the number of ticks: tick_locator = ticker.MaxNLocator(nbins=6) cbar.locator = tick_locator cbar.update_ticks() #Align ticklabels so that negative values are not misaligned (meaning right align): for t in cbar.ax.get_yticklabels(): t.set_horizontalalignment('right') t.set_x(0.045*(np.shape(load)[1]+6)) #Tweak color bar borders cbar.outline.set_color("#666666") cbar.dividers.set_linewidth(0)
def parse_results(input_file=False, source=False, data_format=""): localpath = path.dirname(path.realpath(__file__)) + "/" config = get_config_file(localpath) # IMPORT VARIABLES output_dir = config.get("Directories", "output_dir") formats_dir = config.get("Directories", "formats_dir") keys_dir = config.get("Directories", "keys_dir") if not input_file: if not source: source = config.get("Source", "source") data_path = config.get("Addresses", source) file_name = config.get("Addresses", "file_name") input_file = data_path + file_name if not data_format: data_format = config.get("Parameters", "data_format") # END IMPORT VARIABLES if type(input_file) is list: input_file = [path.expanduser(input_file_item) for input_file_item in input_file] else: input_file = path.expanduser(input_file) keys_dir = path.dirname(path.realpath(__file__)) + "/" + keys_dir formats_dir = path.dirname(path.realpath(__file__)) + "/" + formats_dir keys_list = [ path.splitext(i)[0] for i in listdir(keys_dir) if i.endswith(".csv") ] # list of questionnare IDs for which we provide decoding if data_format == "testmaker": format_processing = pd.read_csv(formats_dir + data_format + ".csv") import key_functions raw_data = pd.read_csv(input_file, sep="/").set_index(["ID_v"]) question_ids = set([i.partition("_")[0] for i in raw_data.columns]) question_ids = [i for i in question_ids if i in keys_list] results = pd.DataFrame(index=raw_data.index, columns=question_ids) for sub_test in question_ids: # for sub_test in question_ids: test_key = pd.read_csv(keys_dir + sub_test + ".csv") # load the key used to score the answers test_fields = fnmatch.filter(raw_data.columns, sub_test + "_*") test_questions = raw_data[test_fields] # slice the relevant answers from the raw results test_questions = test_questions + format_processing["add"][0] # preprocess data typically for testmaker results = getattr(key_functions, sub_test)(test_questions, test_key, results) elif data_format in ["surveygizmo", "surveymonkey"]: raise ValueError( "The 'surveygizmo' format is not yet supported. If you cannot make due without this please direct your query to [email protected]." ) elif data_format == "cuQuest1": response_data = pd.read_csv(input_file[0], sep=";") self_data = pd.read_csv(input_file[1], sep=";") self_data = self_data[(self_data["curiosity"].notnull()) & (self_data["knowledge"].notnull())] participant_list = list(set(self_data["participant"].values.tolist())) correlation_df_columns = [ "participant", "pCvK", "pCvCo", "pCvM", "pKvCo", "pKvM", "pMvCo", "kCvK", "kCvCo", "kCvM", "kKvCo", "kKvM", "kMvCo", "sCvK", "sCvCo", "sCvM", "sKvCo", "sKvM", "sMvCo", ] correlations_df = pd.DataFrame(index=participant_list, columns=correlation_df_columns) correlations_df = correlations_df.fillna(0) # with 0s rather than NaNs correlations_df["participant"] = participant_list for participant in participant_list: correlation_list = [] for correlation_type in ["pearson", "kendall", "spearman"]: correlation = self_data[(self_data["participant"] == participant)][["curiosity", "knowledge"]].corr( method=correlation_type, min_periods=1 )["curiosity"]["knowledge"] correlations_df.loc[ (correlations_df["participant"] == participant), correlation_type[0] + "CvK" ] = correlation print(correlations_df)
def coi(source=False, make_tight=True, print_title = True, elinewidth=3, fontscale=1, isspec = False): config = get_config_file(localpath=path.dirname(path.realpath(__file__))+'/') #IMPORT VARIABLES if not source: source = config.get('Source', 'source') data_path = config.get('Addresses', source) reaction_times = config.get('Addresses', 'reaction_times') #END IMPORT VARIABLES data_path = path.expanduser(data_path) rt_path = data_path + reaction_times files = [lefile for lefile in listdir(rt_path) if lefile.endswith('.csv')] ids = [t.split('_',2)[0]+'_'+t.split('_',2)[1] for t in files] ids = np.unique(ids) conts = get_dataframes(id_list, rt_path) if isspec: spec_conts = get_dataframes(spec, rt_path) meanscont = spec_conts.groupby('subblock').mean() print meanscont cat1 = spec_conts[spec_conts['subblock']=='aus+sua'] cat2 = spec_conts[spec_conts['subblock']=='uas+sau'] print ttest_rel(cat1['RTdiff'], cat2['RTdiff']) meanscont = conts.groupby(['ID','subblock']).mean() meanscont = meanscont.reset_index() ids = sorted(list(set(conts.set_index('ID').index))) pos_ids = np.arange(len(ids)) sa_means = conts[(conts['subblock'] == 'uas+sau')].groupby('ID')['RTdiff'].mean() sa_std = conts[(conts['subblock'] == 'uas+sau')].groupby('ID')['RTdiff'].aggregate(sem) su_means = conts[(conts['subblock'] == 'aus+sua')].groupby('ID')['RTdiff'].mean() su_std = conts[(conts['subblock'] == 'aus+sua')].groupby('ID')['RTdiff'].aggregate(sem) sa_means = conts[(conts['subblock'] == 'uas+sau')].groupby('ID')['RTdiff'].mean() sa_std = conts[(conts['subblock'] == 'uas+sau')].groupby('ID')['RTdiff'].aggregate(sem) sa_t_means = meanscont[(meanscont['subblock'] == 'uas+sau')]['RTdiff'].mean() sa_t_std = sem(meanscont[(meanscont['subblock'] == 'uas+sau')]['RTdiff']) su_means = conts[(conts['subblock'] == 'aus+sua')].groupby('ID')['RTdiff'].mean() su_std = conts[(conts['subblock'] == 'aus+sua')].groupby('ID')['RTdiff'].aggregate(sem) su_t_means = meanscont[(meanscont['subblock'] == 'aus+sua')]['RTdiff'].mean() su_t_std = sem(meanscont[(meanscont['subblock'] == 'aus+sua')]['RTdiff']) if isspec: sa_spec_means = spec_conts[(spec_conts['subblock'] == 'uas+sau')]['RTdiff'].mean() sa_spec_std = sem(spec_conts[(spec_conts['subblock'] == 'uas+sau')]['RTdiff']) su_spec_means = spec_conts[(spec_conts['subblock'] == 'aus+sua')]['RTdiff'].mean() su_spec_std = sem(spec_conts[(spec_conts['subblock'] == 'aus+sua')]['RTdiff']) fig = figure(figsize=(pos_ids.max()*3, 4), dpi=300, facecolor='#eeeeee', tight_layout=make_tight) ax=fig.add_subplot(1,1,1) matplotlib.rcParams.update({'font.size': 12*fontscale}) width = 0.3 ax.yaxis.grid(True, linestyle='-', which='major', color='#dddddd',alpha=0.5, zorder=1) sa_bar = plt.bar(pos_ids, sa_means, width ,color='m', alpha=0.4, zorder=1, linewidth=0) sa_err = errorbar(pos_ids+(width/2), sa_means, yerr=sa_std, ecolor='0.55', elinewidth=elinewidth, capsize=0, linestyle='None', zorder=2) sa_t_bar = plt.bar(pos_ids[-1]+1, sa_t_means, width ,color='m', alpha=0.8, zorder=1, linewidth=0) sa_t_err = errorbar(pos_ids[-1]+1+(width/2), sa_t_means, yerr=sa_t_std, ecolor='0.1', elinewidth=elinewidth, capsize=0, linestyle='None', zorder=2) su_bar = plt.bar(pos_ids+width, su_means, width ,color='g', alpha=0.4, zorder=1, linewidth=0) su_err = errorbar(pos_ids+(width*3/2), su_means, yerr=su_std, ecolor='0.55', elinewidth=elinewidth, capsize=0, linestyle='None', zorder=2) su_t_bar = plt.bar(pos_ids[-1]+1+width, su_t_means, width ,color='g', alpha=0.8, zorder=1, linewidth=0) su_t_err = errorbar(pos_ids[-1]+1+(width*3/2), su_t_means, yerr=su_t_std, ecolor='0.1', elinewidth=elinewidth, capsize=0, linestyle='None', zorder=2) if isspec: sa_spec_bar = plt.bar(pos_ids[-1]+2, sa_spec_means, width ,color='m', alpha=0.4, zorder=1, linewidth=0) sa_spec_err = errorbar(pos_ids[-1]+2+(width*1/2), sa_spec_means, yerr=sa_spec_std, ecolor='0.55', elinewidth=elinewidth, capsize=0, linestyle='None', zorder=2) su_spec_bar = plt.bar(pos_ids[-1]+2+width, su_spec_means, width ,color='g', alpha=0.4, zorder=1, linewidth=0) su_spec_err = errorbar(pos_ids[-1]+2+(width*3/2), su_spec_means, yerr=su_spec_std, ecolor='0.55', elinewidth=elinewidth, capsize=0, linestyle='None', zorder=2) if isspec: ids=ids+['total',spec] else: ids=ids+['TOTAL '] # blank space at the end so that it doesn't overlap with the x-axis pos_ids = np.arange(len(ids)) ax.set_xlim(0, pos_ids.max()) ax.set_ylabel(r'$\mathsf{\overline{RT}}$ [s]') ax.set_xlabel('Participant ID') ax.set_xticks(pos_ids + width) ax.set_xticklabels(ids,fontsize=8*fontscale,rotation=90) for tick in ax.axes.get_xticklines(): tick.set_visible(False) axis.Axis.zoom(ax.xaxis, -0.5) legend((sa_t_bar,su_t_bar),('Target flanking attracive face','Target flanking unattractive face'), bbox_to_anchor=(0.92, 0.2), shadow=False, frameon=False, prop=FontProperties(size=str(11*fontscale))) print meanscont[meanscont['subblock']=='uas+sau']['RTdiff'].mean()-meanscont[meanscont['subblock']=='aus+sua']['RTdiff'].mean() return meanscont
def coni(source=False, make_tight=True, print_title = True, elinewidth=2, fontscale=1, isspec = False): config = get_config_file(localpath=path.dirname(path.realpath(__file__))+'/') #IMPORT VARIABLES if not source: source = config.get('Source', 'source') data_path = config.get('Addresses', source) reaction_times = config.get('Addresses', 'reaction_times') #END IMPORT VARIABLES data_path = path.expanduser(data_path) rt_path = data_path + reaction_times files = [lefile for lefile in listdir(rt_path) if lefile.endswith('.csv')] ids = [t.split('_',2)[0]+'_'+t.split('_',2)[1] for t in files] ids = np.unique(ids) conts = pd.DataFrame([]) for i in id_list: ratings = open_csv(rt_path+i+'_p') ratings = pd.DataFrame(ratings[1:], columns=ratings[0], dtype=float) ratings = ratings.groupby('picture').mean() sorted_scores = sorted(ratings['score']) score_top, score_bottom = sorted_scores[-20], sorted_scores [19] cont = open_csv(rt_path+i+'_wm') cont = pd.DataFrame(cont[1:], columns=cont[0]) cont['rateL'] = cont['rateL'].astype(np.float64) cont['RTL'] = cont['RTL'].astype(np.float64) cont['orderL'] = cont['orderL'].astype(np.float64) cont['rateR'] = cont['rateR'].astype(np.float64) cont['RTR'] = cont['RTR'].astype(np.float64) cont['orderR'] = cont['orderR'].astype(np.float64) cont['RT'] = cont['RT'].astype(np.float64) cont['session'] = cont['session'].astype(np.float64) cont = cont[cont['RT'] >=0] cont.ix[cont['isstimleft'] == 'False', 'isstimleft'] = False cont.ix[cont['isstimleft'] == 'True', 'isstimleft'] = True cont['ID'] = i cont1 = cont[(cont['isstimleft'] == False) & (cont['keypress'] == 'right')] cont2 = cont[(cont['isstimleft'] == True) & (cont['keypress'] == 'left')] cont = pd.concat([cont1,cont2]) cont['block'] = '' cont.ix[(cont['rateL'] >= score_top) & (cont['rateR'] >= score_top), 'block'] = 'aa' cont.ix[(cont['rateL'] >= score_top) & (cont['rateR'] <= score_bottom), 'block'] = 'au' cont.ix[(cont['rateL'] <= score_bottom) & (cont['rateR'] >= score_top), 'block'] = 'ua' cont.ix[(cont['rateL'] <= score_bottom) & (cont['rateR'] <= score_bottom), 'block'] = 'uu' conts = pd.concat([conts, cont], ignore_index=True) #cat1 = cont[cont['block']=='aa'] #cat2 = cont[cont['block']=='uu'] #print ttest_ind(cat1['RT'], cat2['RT']) ids = sorted(list(set(conts.set_index('ID').index))) pos_ids = np.arange(len(ids)) meanscont = conts.groupby(['ID','block']).mean() meanscont = meanscont.reset_index() aa_means = conts[(conts['block'] == 'aa')].groupby('ID')['RT'].mean() aa_std = conts[(conts['block'] == 'aa')].groupby('ID')['RT'].aggregate(sem) aa_t_means = meanscont[(meanscont['block'] == 'aa')]['RT'].mean() aa_t_std = sem(meanscont[(meanscont['block'] == 'aa')]['RT']) uu_means = conts[(conts['block'] == 'uu')].groupby('ID')['RT'].mean() uu_std = conts[(conts['block'] == 'uu')].groupby('ID')['RT'].aggregate(sem) uu_t_means = meanscont[(meanscont['block'] == 'uu')]['RT'].mean() uu_t_std = sem(meanscont[(meanscont['block'] == 'uu')]['RT']) fig = figure(figsize=(pos_ids.max()*3, 4), dpi=300, facecolor='#eeeeee', tight_layout=make_tight) ax=fig.add_subplot(1,1,1) matplotlib.rcParams.update({'font.size': 12*fontscale}) width = 0.3 ax.yaxis.grid(True, linestyle='-', which='major', color='#dddddd',alpha=0.5, zorder=1) aa_bar = plt.bar(pos_ids, aa_means, width ,color='g', alpha=0.4, zorder=1, linewidth=0) aa_err = errorbar(pos_ids+(width/2), aa_means, yerr=aa_std, ecolor='0.55', elinewidth=elinewidth, capsize=0, linestyle='None', zorder=2) aa_t_bar = plt.bar(pos_ids[-1]+1, aa_t_means, width ,color='g', alpha=0.8, zorder=1, linewidth=0) aa_err = errorbar(pos_ids[-1]+1+(width/2), aa_t_means, yerr=aa_t_std, ecolor='0.1', elinewidth=elinewidth, capsize=0, linestyle='None', zorder=2) uu_bar = plt.bar(pos_ids+width, uu_means, width ,color='m', alpha=0.4, zorder=1, linewidth=0) uu_err = errorbar(pos_ids+(width*3/2), uu_means, yerr=uu_std, ecolor='0.55', elinewidth=elinewidth, capsize=0, linestyle='None', zorder=2) uu_t_bar = plt.bar(pos_ids[-1]+1+width, uu_t_means, width ,color='m', alpha=0.8, zorder=1, linewidth=0) uu_err = errorbar(pos_ids[-1]+1+(width*3/2), uu_t_means, yerr=uu_t_std, ecolor='0.1', elinewidth=elinewidth, capsize=0, linestyle='None', zorder=2) ids=ids+['TOTAL '] # blank space at the end so that it doesn't overlap with the x-axis pos_ids = np.arange(len(ids)) ax.set_xlim(0, pos_ids.max()) ax.set_ylim(0, 1.1) ax.set_ylabel(r'$\mathsf{\overline{RT}}$ [s]') ax.set_xlabel('Participant ID') ax.set_xticks(pos_ids + width) ax.set_xticklabels(ids,fontsize=8*fontscale,rotation=90) for tick in ax.axes.get_xticklines(): tick.set_visible(False) axis.Axis.zoom(ax.xaxis, -0.5) legend((aa_t_bar,uu_t_bar),('Only attractive faces','Only unattractive faces'), bbox_to_anchor=(0.92, 1), shadow=False, frameon=False, prop=FontProperties(size=str(11*fontscale))) return meanscont