# drop practice trials: drop all rows with NaNs in key_resp.keys col_to_dropna = ['key_resp.keys'] mydata = drop_df_nan_rows_according2cols(mydata, col_to_dropna) # drop too fast and too slow response if drop_fastandslow_resp: col_to_drop_rows = "key_resp.rt" min_rt = 0.15 max_rt = 3 mydata = drop_df_rows_according2_one_col(mydata, col_to_drop_rows, min_rt, max_rt) # add numerosity difference between D1 and D2 mydata["dff_D1D2"] = mydata["D1numerosity"] - mydata["D2numerosity"] # add correct answer insert_new_col_from_two_cols(mydata, "ref_first", "key_resp.keys", "is_resp_ref_more", insert_is_resp_ref_more) insert_new_col(mydata, "is_resp_ref_more", "is_resp_probe_more", insert_is_resp_probe_more) # add probe numerosity insert_new_col_from_three_cols(mydata, "D1numerosity", "D2numerosity", "ref_first", "probeN", insert_probeN) # add ref numerosity insert_new_col_from_three_cols(mydata, "D1numerosity", "D2numerosity", "ref_first", "refN", insert_refN) # add probe crowding condition insert_new_col_from_three_cols(mydata, "D1Crowding", "D2Crowding", "ref_first", "probeCrowding", insert_probeCrowding) # add ref crowding condition insert_new_col_from_three_cols(mydata, "D1Crowding", "D2Crowding", "ref_first", "refCrowding",
indv2 = "protectzonetype" indv3 = "winsize" indv4 = "percent_triplets" indv5 = "participant" # average data: average deviation and percent change for each condition per participant data_1 = data.groupby([indv, indv2, indv3, indv4, indv5])[[dv, dv2]] \ .agg({dv: ['mean', 'std'], dv2: ['mean', 'std']}) \ .reset_index(level = [indv, indv2, indv3, indv4, indv5]) # transfer column name data_1.columns = [''.join(x) for x in data_1.columns] data_1["samplesize"] = [5] * data_1.shape[ 0] # each participant repeat each condition 5 times (5 displays) insert_new_col_from_two_cols(data_1, "deviation_scorestd", "samplesize", "SEM_deviation_score", cal_SEM) insert_new_col_from_two_cols(data_1, "percent_changestd", "samplesize", "SEM_percent_change", cal_SEM) # averaged across participant data_2 = data.groupby([indv, indv2, indv3, indv4])[[dv, dv2]] \ .agg({dv: ['mean', 'std'], dv2: ['mean', 'std']}) \ .reset_index(level = [indv, indv2, indv3, indv4]) # transfer column name data_2.columns = [''.join(x) for x in data_2.columns] insert_new_col(data_2, indv3, "samplesize", get_samplesize ) # 50 participants, 29 for winsize0.4, 21 for winsize0.6 insert_new_col_from_two_cols(data_2, "deviation_scorestd", "samplesize", "SEM_deviation_score", cal_SEM)
PATH = "../../data/ms2_triplets_4_data/rawdata/" # list raw data files files = os.listdir(PATH) df_list_all = [pd.read_csv(PATH + file) for file in files if file.endswith("csv")] # preprocess df_list = list() for df in df_list_all: # keep useful cols df = keep_valid_columns(df, KEEP_COLs) df = pre_process_indi_data(df) df_list.append(df) # add deviation score and percent change for df in df_list: insert_new_col_from_two_cols(df, "responseN", "numerosity", "deviation_score", get_deviation) insert_new_col_from_two_cols(df, "deviation_score", "numerosity", "percent_change", get_percent_change) # check subitizing results subitizing_df_list = list() for df in df_list: sub_df = df.loc[df["numerosity"] <=4] subitizing_df_list.append(sub_df) # 36 subitizing trials per participant: all participants are above 90%, the worst 34 out of 36 are correct correct_trial_list = list() for sub_df in subitizing_df_list: correct_trial_list.append((sub_df["deviation_score"] == 0).sum()) # remove subitizing trials df_list_t1 = list()
for winsize in winsize_list ] mean_ra_score_list = list() for df in stimuli_df_list: mean_ra_score_list.append(df["align_v_size6"].mean()) mean = list() for i in mean_ra_score_list: mean += ([i] * 50) stimuli_df.sort_values(by=["winsize"], inplace=True) stimuli_df["mean_ra"] = mean # new column split display by high and low RA scores insert_new_col_from_two_cols(stimuli_df, "align_v_size6", "mean_ra", "ra_group", is_ra_score_high) # high and low ra group high_ra_df = stimuli_df[stimuli_df["ra_group"] == 1] low_ra_df = stimuli_df[stimuli_df["ra_group"] == 0] h = { k: g['result_density_projection'].tolist() for k, g in high_ra_df.groupby('N_disk') } l = { k: g['result_density_projection'].tolist() for k, g in low_ra_df.groupby('N_disk') } a = str_to_list(h[25][0])
import pandas as pd from src.commons.process_dataframe import insert_new_col_from_two_cols from src.commons.process_number import cal_SEM if __name__ == '__main__': to_excel = False # read data PATH = "../data/ms1_encircle/" DATA = "ms1_encircle_data.xlsx" data = pd.read_excel(PATH + DATA) dv = "average_group" indv = "numerosity" indv2 = "crowdingcons" indv3 = "winsize" data_1 = data.groupby([indv, indv2, indv3, "displayN"])[dv]\ .agg(['mean', 'std']) \ .reset_index(level = [indv, indv2, indv3]) data_1["samplesize"] = [5 * 3] * data_1.shape[0] #5 displays, 3 pp insert_new_col_from_two_cols(data_1, "mean", "samplesize", "SEM", cal_SEM) if to_excel: data_1.to_excel("ms1_encircle_by_num.xlsx")
stimuli_to_merge = pd.read_excel(PATH_STIM + FILENAME_STIM) # keep needed cols stimuli_to_merge = keep_valid_columns(stimuli_to_merge, KEPT_COL_NAMES4) # merge data with stimuli info all_df = pd.merge( data_to_merge, stimuli_to_merge, how="left", on=["index_stimuliInfo", "N_disk", "crowdingcons", "winsize"]) # preprocess my_data = keep_valid_columns(all_df, KEPT_COL_NAMES5) # add color coded for crowding and no-crowding displays insert_new_col(my_data, "crowdingcons", 'colorcode', add_color_code_by_crowdingcons) # color coded insert_new_col_from_two_cols(my_data, "N_disk", "crowdingcons", "colorcode5levels", add_color_code_5levels) # %% correaltions winsize_list = [0.3, 0.4, 0.5, 0.6, 0.7] my_data = get_analysis_dataframe(my_data, crowding=crowdingcons) df_list_beforegb = [ get_sub_df_according2col_value(my_data, "winsize", winsize) for winsize in winsize_list ] df_list = [ get_data_to_analysis(df, "deviation_score", "a_values", "N_disk", "list_index", "colorcode", "colorcode5levels") for df in df_list_beforegb ] # correaltion paramters method = "pearson"
upper_bondary = get_mean( sub_df, col_to_process) + 3 * get_std(sub_df, col_to_process) new_sub_df = drop_df_rows_according2_one_col(sub_df, col_to_process, lower_bondary, upper_bondary) prepro_df_list.append(new_sub_df) mydata = pd.concat(prepro_df_list, ignore_index=True) # add columns/rename columns insert_new_col(mydata, "Display", "winsize", imageFile_to_number2) insert_new_col(mydata, "Display", "index_stimuliInfo", imageFile_to_number) rename_df_col(mydata, "Numerosity", "N_disk") rename_df_col(mydata, "Crowding", "crowdingcons") # DV: deviation insert_new_col_from_two_cols(mydata, "response", "N_disk", "deviation_score", get_deviation) # make sure col val type change_col_value_type(mydata, "crowdingcons", int) change_col_value_type(mydata, "winsize", float) change_col_value_type(mydata, "index_stimuliInfo", str) change_col_value_type(mydata, "N_disk", int) # groupby data to make bar plot crwdng = "crowdingcons" ws = "winsize" pp = "participant_N" groupby_data = mydata["deviation_score"].groupby( [mydata[crwdng], mydata[ws], mydata[pp]]).mean() groupby_data = groupby_data.reset_index(level=[crwdng, ws, pp])