mydata = drop_df_nan_rows_according2cols(mydata, col_to_dropna) # drop too fast and too slow response if drop_fastandslow_resp: col_to_drop_rows = "key_resp.rt" min_rt = 0.15 max_rt = 3 mydata = drop_df_rows_according2_one_col(mydata, col_to_drop_rows, min_rt, max_rt) # add numerosity difference between D1 and D2 mydata["dff_D1D2"] = mydata["D1numerosity"] - mydata["D2numerosity"] # add correct answer insert_new_col_from_two_cols(mydata, "ref_first", "key_resp.keys", "is_resp_ref_more", insert_is_resp_ref_more) insert_new_col(mydata, "is_resp_ref_more", "is_resp_probe_more", insert_is_resp_probe_more) # add probe numerosity insert_new_col_from_three_cols(mydata, "D1numerosity", "D2numerosity", "ref_first", "probeN", insert_probeN) # add ref numerosity insert_new_col_from_three_cols(mydata, "D1numerosity", "D2numerosity", "ref_first", "refN", insert_refN) # add probe crowding condition insert_new_col_from_three_cols(mydata, "D1Crowding", "D2Crowding", "ref_first", "probeCrowding", insert_probeCrowding) # add ref crowding condition insert_new_col_from_three_cols(mydata, "D1Crowding", "D2Crowding", "ref_first", "refCrowding", insert_refCrowing)
if winsize == 0.4: return 29 else: return 27 if __name__ == '__main__': to_excel = False # read data PATH = "../data/ms2_mix_prolific_2_data/" DATA = "ms2_mix_2_preprocessed.xlsx" data = pd.read_excel(PATH + DATA) # process the cols rename_df_col(data, "Unnamed: 0", "n") # convert percentpairs to percent_triplets insert_new_col(data, "perceptpairs", "percent_triplets", get_percent_triplets) dv = "deviation_score" dv2 = "percent_change" indv = "numerosity" indv2 = "protectzonetype" indv3 = "winsize" indv4 = "percent_triplets" indv5 = "participant" # average data: average deviation and percent change for each condition per participant data_1 = data.groupby([indv, indv2, indv3, indv4, indv5])[[dv, dv2]] \ .agg({dv: ['mean', 'std'], dv2: ['mean', 'std']}) \ .reset_index(level = [indv, indv2, indv3, indv4, indv5])
PATH_STIM = "../displays/" FILENAME_STIM = "update_stim_info_full.xlsx" data_to_merge = pd.read_excel(PATH_DATA + FILENAME_DATA) stimuli_to_merge = pd.read_excel(PATH_STIM + FILENAME_STIM) # keep needed cols stimuli_to_merge = keep_valid_columns(stimuli_to_merge, KEPT_COL_NAMES4) # merge data with stimuli info all_df = pd.merge( data_to_merge, stimuli_to_merge, how="left", on=["index_stimuliInfo", "N_disk", "crowdingcons", "winsize"]) # preprocess my_data = keep_valid_columns(all_df, KEPT_COL_NAMES5) # add color coded for crowding and no-crowding displays insert_new_col(my_data, "crowdingcons", 'colorcode', add_color_code_by_crowdingcons) # color coded insert_new_col_from_two_cols(my_data, "N_disk", "crowdingcons", "colorcode5levels", add_color_code_5levels) # %% correaltions winsize_list = [0.3, 0.4, 0.5, 0.6, 0.7] my_data = get_analysis_dataframe(my_data, crowding=crowdingcons) df_list_beforegb = [ get_sub_df_according2col_value(my_data, "winsize", winsize) for winsize in winsize_list ] df_list = [ get_data_to_analysis(df, "deviation_score", "a_values", "N_disk", "list_index", "colorcode", "colorcode5levels") for df in df_list_beforegb
if winsize == 0.4: return 34 else: return 32 if __name__ == '__main__': write_to_excel = False # read data PATH = "../data/ms2_uniform_prolific_1_data/" DATA = "preprocessed_prolific.xlsx" data = pd.read_excel(PATH + DATA) # process the cols rename_df_col(data, "Unnamed: 0", "n") # convert percentpairs to percent_triplets insert_new_col(data, "perceptpairs", "percent_triplets", get_percent_triplets) dv = "deviation_score" dv2 = "percent_change" indv = "numerosity" indv2 = "protectzonetype" indv3 = "winsize" indv4 = "percent_triplets" indv5 = "perceptpairs" indv6 = "participant" # averaged data: averaged deviation for each condition per participant # data_1 = data.groupby(["percent_triplets", "numerosity", "protectzonetype", "participant", "winsize"])[ # "deviation_score"] \ # .agg(['mean', 'std']) \
# remove duplicated cols stimuli_to_merge = keep_valid_columns(stimuli_to_merge_ori, KEPT_COL_NAMES_STIMU_DF) # merge data with stimuli info all_df = pd.merge( data_to_merge, stimuli_to_merge, how="left", on=["index_stimuliInfo", "N_disk", "crowdingcons", "winsize"]) # %% preprocess my_data = keep_valid_columns(all_df, KEPT_COL_NAMES) # add color coded for crowding and no-crowding displays insert_new_col(my_data, "crowdingcons", 'colorcode', add_color_code_by_crowdingcons) # color coded insert_new_col_from_two_cols(my_data, "N_disk", "crowdingcons", "colorcode5levels", add_color_code_5levels) # %% correlation # crowding = 0, 1, 2 for no-crowding, crowding and all data my_data = get_analysis_dataframe(my_data, crowding=crowdingcons) winsize = [0.3, 0.4, 0.5, 0.6, 0.7] my_data_list = [ get_sub_df_according2col_value(my_data, "winsize", ws) for ws in winsize ] # data to calcualte partial corr my_data_list2analysis = [ get_data_to_analysis(data, "deviation_score", alignment[indx_align_n],
elif percentpairs == 0: return 1 else: raise Exception(f"percentpair {percentpairs} is unexpected") if __name__ == '__main__': to_excel = False # read data PATH = "../data/ms2_uniform_mix_3_data/" DATA = "preprocessed_uniform_mix_3.xlsx" data = pd.read_excel(PATH + DATA) # convert percentpairs to percent_triplets insert_new_col(data, "perceptpairs", "percent_triplets", get_percent_triplets) dv = "deviation_score" dv2 = "percent_change" indv = "numerosity" indv2 = "protectzonetype" indv3 = "winsize" indv4 = "percent_triplets" indv5 = "contrast" indv6 = "contrast_full" indv7 = "participant" # average data: average deviation and percent change for each condition per participant data_1 = data.groupby([indv, indv2, indv3, indv4, indv5, indv6, indv7])[[dv, dv2]] \ .agg({dv: ['mean', 'std'], dv2: ['mean', 'std']}) \
# drop obvious wrong response: min_res = 10 max_res = 128 df_list_prepro = list() for df in df_list_t1: df_list_prepro.append(drop_df_rows_according2_one_col(df, "responseN", min_res, max_res)) # concat all participant df_data = pd.concat(df_list_prepro) # keep data within 3 sd n_discs = [34, 36, 38, 40, 42, 44, 54, 56, 58, 60, 62, 64] df_list_by_num = [get_sub_df_according2col_value(df_data, "numerosity", n) for n in n_discs] prepro_df_list = list() for sub_df in df_list_by_num: lower_bondary = get_mean(sub_df, "responseN") - 3 * get_std(sub_df, "responseN") upper_bondary = get_mean(sub_df, "responseN") + 3 * get_std(sub_df, "responseN") new_sub_df = drop_df_rows_according2_one_col(sub_df, "responseN", lower_bondary, upper_bondary) prepro_df_list.append(new_sub_df) # 1.20% trials were removed df_data_prepro = pd.concat(prepro_df_list, ignore_index = True) insert_new_col(df_data_prepro, "blockOrder", "contrast", convert_blockOrdertocontrast1) insert_new_col(df_data_prepro, "blockOrder", "contrast_full", convert_blockOrdertocontrast2) if write_to_excel: df_data_prepro.to_excel("preprocessed_uniform_mix_3.xlsx", index = False)
# remove rows with NaN totalData = totalData.dropna() # check responseN columns totalData['responseN'].apply(type).value_counts() #totalData['responseN_Type'] = totalData['responseN'].apply(lambda x: type(x).__name__) # convert response to int totalData['responseN'] = totalData['responseN'].apply(raw_resp_to_int) # remove rows with NaN after convert all response to srting totalData = totalData.dropna() # map all needed columns from imageFile insert_new_col(totalData, "imageFile", "Ndisplay", imageFile_to_number3) totalData['Ndisplay'] = totalData['Ndisplay'].astype(int) #change to int # reset index totalData = totalData.reset_index(drop=True) # read stimuli file stimuli = pd.read_excel('../../../displays/exp2_stim_info.xlsx') # map totalData with stimulus properties totalData = pd.merge(totalData, stimuli, how='left', on=['Ndisplay']) # get deviation totalData['deviation'] = totalData['responseN'] - totalData['Numerosity'] # write to excel
# keep data witnin 3 standard deviations col_to_process = "response" prepro_df_list = list() for sub_df in df_list: lower_bondary = get_mean( sub_df, col_to_process) - 3 * get_std(sub_df, col_to_process) upper_bondary = get_mean( sub_df, col_to_process) + 3 * get_std(sub_df, col_to_process) new_sub_df = drop_df_rows_according2_one_col(sub_df, col_to_process, lower_bondary, upper_bondary) prepro_df_list.append(new_sub_df) mydata = pd.concat(prepro_df_list, ignore_index=True) # add columns/rename columns insert_new_col(mydata, "Display", "winsize", imageFile_to_number2) insert_new_col(mydata, "Display", "index_stimuliInfo", imageFile_to_number) rename_df_col(mydata, "Numerosity", "N_disk") rename_df_col(mydata, "Crowding", "crowdingcons") # DV: deviation insert_new_col_from_two_cols(mydata, "response", "N_disk", "deviation_score", get_deviation) # make sure col val type change_col_value_type(mydata, "crowdingcons", int) change_col_value_type(mydata, "winsize", float) change_col_value_type(mydata, "index_stimuliInfo", str) change_col_value_type(mydata, "N_disk", int) # groupby data to make bar plot