def cal_ds_std(df, crowdingcon, col_name="crowdingcons"): if crowdingcon == 1: return get_sub_df_according2col_value(df, col_name, 1).std() elif crowdingcon == 0: return get_sub_df_according2col_value(df, col_name, 0).std() elif crowdingcon == 2: return get_sub_df_according2col_value(df, col_name, 2).std() else: Exception( f"crowdingcon = {crowdingcon} is not recognized, use 1 or 0 or 2")
def get_analysis_dataframe(my_data, crowding): if crowding == 1: return get_sub_df_according2col_value(my_data, "crowdingcons", 1) elif crowding == 0: return get_sub_df_according2col_value(my_data, "crowdingcons", 0) elif crowding == 2: return my_data else: raise Exception( f"crowding == {crowding} is not recognized. 0 for no-crowding, 1 for crowding, 2 for all" )
alignment = ["align_v_size12", "align_v_size11", "align_v_size10", "align_v_size9", "align_v_size8", "align_v_size7", "align_v_size6", "align_v_size5", "align_v_size4", "align_v_size3", "align_v_size2", "align_v_size1"] # get data to cal partial corr my_data = exp1_alignment.my_data winsize03 = get_sub_df_according2col_value(my_data, "winsize", 0.3) winsize04 = get_sub_df_according2col_value(my_data, "winsize", 0.4) winsize05 = get_sub_df_according2col_value(my_data, "winsize", 0.5) winsize06 = get_sub_df_according2col_value(my_data, "winsize", 0.6) winsize07 = get_sub_df_according2col_value(my_data, "winsize", 0.7) def get_partial_corr_df(indx_align_n = 0, w03 = winsize03, w04 = winsize04, w05 = winsize05, w06 = winsize06, w07 = winsize07): """ get one partial corr dataframe for given angle size, indicated by indx_align_n """ w03 = get_data_to_analysis(w03, "deviation_score", alignment[indx_align_n], "N_disk", "list_index", "colorcode", "colorcode5levels") w04 = get_data_to_analysis(w04, "deviation_score", alignment[indx_align_n], "N_disk", "list_index", "colorcode", "colorcode5levels")
df_list_t1.append(df.loc[df["numerosity"] > 4]) min_res = 10 max_res = 150 df_list_prepro = list() for df in df_list_t1: df_list_prepro.append(drop_df_rows_according2_one_col(df, "responseN", min_res, max_res)) # concat all participant df_data = pd.concat(df_list_prepro) # keep data within 3 sd n_discs = [51, 54, 57, 60, 63, 66, 69, 72, 78, 81, 84, 87, 90, 93, 96, 99] df_list_by_num = [get_sub_df_according2col_value(df_data, "numerosity", n) for n in n_discs] prepro_df_list = list() for sub_df in df_list_by_num: lower_bondary = get_mean(sub_df, "responseN") - 3 * get_std(sub_df, "responseN") upper_bondary = get_mean(sub_df, "responseN") + 3 * get_std(sub_df, "responseN") new_sub_df = drop_df_rows_according2_one_col(sub_df, "responseN", lower_bondary, upper_bondary) prepro_df_list.append(new_sub_df) df_data_prepro = pd.concat(prepro_df_list, ignore_index = True) # 3.71% of trials were removed df_full = pd.concat(df_list_t1, ignore_index = True) if to_excel: df_data_prepro.to_excel("preprocessed_triplets_4.xlsx", index = False)
if ra_score >= mean: return 1 else: return 0 if __name__ == '__main__': PATH = "../displays/" FILE = "uodate_stim_info_RAscores.xlsx" stimuli_df = pd.read_excel(PATH + FILE) # calculate mean of the RA scores for each numerosity range winsize_list = [0.3, 0.4, 0.5, 0.6, 0.7] stimuli_df_list = [ get_sub_df_according2col_value(stimuli_df, col_name="winsize", col_value=winsize) for winsize in winsize_list ] mean_ra_score_list = list() for df in stimuli_df_list: mean_ra_score_list.append(df["align_v_size6"].mean()) mean = list() for i in mean_ra_score_list: mean += ([i] * 50) stimuli_df.sort_values(by=["winsize"], inplace=True) stimuli_df["mean_ra"] = mean
# drop obvious wrong response: min_res = 10 max_res = 128 df_list_prepro = list() for df in df_list_t1: df_list_prepro.append( drop_df_rows_according2_one_col(df, "responseN", min_res, max_res)) # see trials number before removing any trials df_data = pd.concat(df_list_prepro, ignore_index=True) n_discs = [34, 36, 38, 40, 42, 44, 54, 56, 58, 60, 62, 64] df_list_by_num = [ get_sub_df_according2col_value(df_data, "numerosity", n) for n in n_discs ] prepro_df_list = list() for sub_df in df_list_by_num: lower_bondary = get_mean( sub_df, "responseN") - 3 * get_std(sub_df, "responseN") upper_bondary = get_mean( sub_df, "responseN") + 3 * get_std(sub_df, "responseN") new_sub_df = drop_df_rows_according2_one_col(sub_df, "responseN", lower_bondary, upper_bondary) prepro_df_list.append(new_sub_df) # 1.13% trials were removed df_data_prepro = pd.concat(prepro_df_list, ignore_index=True)
nl5 = [5 for i in range(0, 250)] nl6 = [6 for i in range(0, 250)] n_beams_to_plot1["N_disc_in_beam"] = nl1 n_beams_to_plot2["N_disc_in_beam"] = nl2 n_beams_to_plot3["N_disc_in_beam"] = nl3 n_beams_to_plot4["N_disc_in_beam"] = nl4 n_beams_to_plot5["N_disc_in_beam"] = nl5 n_beams_to_plot6["N_disc_in_beam"] = nl6 # get the dataframe, to be plot n_beams_to_plot = pd.concat( [n_beams_to_plot1, n_beams_to_plot2, n_beams_to_plot3, n_beams_to_plot4, n_beams_to_plot5, n_beams_to_plot6], axis = 0, sort = True) # separate for each numerosity range winsize_list = [0.3, 0.4, 0.5, 0.6, 0.7] n_beams_to_plot_list = [get_sub_df_according2col_value(n_beams_to_plot, "winsize", winsize) for winsize in winsize_list] # plots starts here fig, axes = plt.subplots(2, 3, figsize = (13, 6), sharex = True, sharey = True) axes = axes.ravel() for i, ax in enumerate(axes): if i < 5: sns.boxplot(x = "N_disc_in_beam", y = "n_beam", data = n_beams_to_plot_list[i], ax = ax, hue = "crowdingcons", palette = ["royalblue", "orangered"]) else: sns.boxplot(x = "N_disc_in_beam", y = "n_beam", data = n_beams_to_plot, ax = ax, hue = "crowdingcons", palette = ["royalblue", "orangered"]) # set x,y label for i, ax in enumerate(axes): if i == 4:
how="left", on=["index_stimuliInfo", "N_disk", "crowdingcons", "winsize"]) # preprocess my_data = keep_valid_columns(all_df, KEPT_COL_NAMES5) # add color coded for crowding and no-crowding displays insert_new_col(my_data, "crowdingcons", 'colorcode', add_color_code_by_crowdingcons) # color coded insert_new_col_from_two_cols(my_data, "N_disk", "crowdingcons", "colorcode5levels", add_color_code_5levels) # %% correaltions winsize_list = [0.3, 0.4, 0.5, 0.6, 0.7] my_data = get_analysis_dataframe(my_data, crowding=crowdingcons) df_list_beforegb = [ get_sub_df_according2col_value(my_data, "winsize", winsize) for winsize in winsize_list ] df_list = [ get_data_to_analysis(df, "deviation_score", "a_values", "N_disk", "list_index", "colorcode", "colorcode5levels") for df in df_list_beforegb ] # correaltion paramters method = "pearson" x = "a_values" y = "deviation_score" covar = "N_disk" # corr: a values and numerosity corr_av_ndisc = list() corr_av_ndisc = [
# %% preprocess my_data = keep_valid_columns(all_df, KEPT_COL_NAMES) # add color coded for crowding and no-crowding displays insert_new_col(my_data, "crowdingcons", 'colorcode', add_color_code_by_crowdingcons) # color coded insert_new_col_from_two_cols(my_data, "N_disk", "crowdingcons", "colorcode5levels", add_color_code_5levels) # %% correlation # crowding = 0, 1, 2 for no-crowding, crowding and all data my_data = get_analysis_dataframe(my_data, crowding=crowdingcons) winsize = [0.3, 0.4, 0.5, 0.6, 0.7] my_data_list = [ get_sub_df_according2col_value(my_data, "winsize", ws) for ws in winsize ] # data to calcualte partial corr my_data_list2analysis = [ get_data_to_analysis(data, "deviation_score", alignment[indx_align_n], "N_disk", "list_index", "colorcode", "colorcode5levels") for data in my_data_list ] # partial corr between deviation score and alignment scores method = "pearson" partial_corr_list = [ pg.partial_corr(data, x="deviation_score", y=alignment[indx_align_n],
# read the totalData file all_df = pd.read_excel('../../data/exp2_data_online/clean_totalData.xlsx', index_col=0) # drop obvious wrong response col_to_drop_rows = "responseN" min_res = 10 max_res = 100 all_df = drop_df_rows_according2_one_col(all_df, col_to_drop_rows, min_res, max_res) # drop outside 3 strd n_discs = [34, 36, 38, 40, 42, 44, 58, 60, 62, 64, 66, 68] df_list = [ get_sub_df_according2col_value(all_df, "Numerosity", n) for n in n_discs ] col_to_process = "responseN" prepro_df_list = list() for numerosity, sub_df in zip(n_discs, df_list): lower_bondary = get_mean( sub_df, col_to_process) - 3 * get_std(sub_df, col_to_process) upper_bondary = get_mean( sub_df, col_to_process) + 3 * get_std(sub_df, col_to_process) new_sub_df = drop_df_rows_according2_one_col(sub_df, col_to_process, lower_bondary, upper_bondary) prepro_df_list.append(new_sub_df)
stimuli_to_merge = keep_valid_columns(stim_to_merge, KEPT_COL_NAMES3) # merge data with stimuli info all_df = pd.merge(data_to_merge, stimuli_to_merge, how = "left", on = ["index_stimuliInfo", "N_disk", "crowdingcons", "winsize"]) # %% preprocess my_data = keep_valid_columns(all_df, KEPT_COL_NAMES3) # add color coded for crowding and no-crowding displays insert_new_col(my_data, "crowdingcons", 'colorcode', add_color_code_by_crowdingcons) # color coded insert_new_col_from_two_cols(my_data, "N_disk", "crowdingcons", "colorcode5levels", add_color_code_5levels) # %% correlations my_data = get_analysis_dataframe(my_data, crowding = crowdingcons) # data for each winsize df_list_beforegb = [get_sub_df_according2col_value(my_data, "winsize", winsize) for winsize in winsize_list] df_list = [get_data_to_analysis(df, "deviation_score", "count_number", "N_disk", "list_index", "colorcode", "colorcode5levels") for df in df_list_beforegb] # partial corr parameters method = "pearson" x = "count_number" y = "deviation_score" covar = "N_disk" partial_corr_res_list = [pg.partial_corr(df, x = x, y = y, covar = covar, method = method) for df in df_list] # %% normalization df_list_norm_deviation = [normalize_deviation(df) for df in df_list] df_list_norm_countn = [normalize_zerotoone(df, to_normalize_col = "count_number") for df in df_list] # rename normed cols old_name_dev = "deviation_score" new_name_dev = "deviation_score_norm" old_name_countn = "count_number"
save_fig = False see_clustering_level = True # read data PATH = "../data/exp2_data_online/" DATA = "exp2_online_preprocessed.xlsx" data = pd.read_excel(PATH + DATA) # data to plot: different clustering levels if see_clustering_level: data2plot = data["deviation"].groupby( [data["crowding"], data["winsize"], data["clustering"], data["participantID"]]).mean() data2plot = data2plot.reset_index(level = ["crowding", "winsize", "clustering", "participantID"]) # data sep winsize data_sep_ws = [get_sub_df_according2col_value(data2plot, "winsize", winsize) for winsize in [0.4, 0.6]] # some parameters x = "clustering" y = "deviation" hue = "crowding" errwidth = 2 capsize = 0.05 alpha = 0.5 palette = ["royalblue", "orangered", "grey"] ci = 68 # plot starts here plot_with_clustering(data2plot_sep_ws = data_sep_ws, x = x, y = y, hue = hue,
if __name__ == "__main__": is_debug = True see_4condi_in_all = False save_plots = True PATH = "../data/exp3_data/exp3_pilot_data/" DATAFILE = "exp3a_preprocessed.xlsx" mydata = pd.read_excel(PATH + DATAFILE) # exclude subject TODO exclude = False exclude_n = 8 # exp conditions in separate df for plots refc = get_sub_df_according2col_value(mydata, "refCrowding", 1) refnc = get_sub_df_according2col_value(mydata, "refCrowding", 0) probec = get_sub_df_according2col_value(mydata, "probeCrowding", 1) probenc = get_sub_df_according2col_value(mydata, "probeCrowding", 0) # below are four exp conditions refcprobec = get_sub_df_according2col_value(refc, "probeCrowding", 1) refcprobenc = get_sub_df_according2col_value(refc, "probeCrowding", 0) refncprobec = get_sub_df_according2col_value(refnc, "probeCrowding", 1) refncprobenc = get_sub_df_according2col_value(refnc, "probeCrowding", 0) # %% plots - see all together # all data x = "probeN" y = "is_resp_probe_more" hue = "ref_probe_condi" if see_4condi_in_all: