return df if __name__ == '__main__': to_excel = False # read data PATH = "../../data/ms2_triplets_4_data/rawdata/" # list raw data files files = os.listdir(PATH) df_list_all = [pd.read_csv(PATH + file) for file in files if file.endswith("csv")] # preprocess df_list = list() for df in df_list_all: # keep useful cols df = keep_valid_columns(df, KEEP_COLs) df = pre_process_indi_data(df) df_list.append(df) # add deviation score and percent change for df in df_list: insert_new_col_from_two_cols(df, "responseN", "numerosity", "deviation_score", get_deviation) insert_new_col_from_two_cols(df, "deviation_score", "numerosity", "percent_change", get_percent_change) # check subitizing results subitizing_df_list = list() for df in df_list: sub_df = df.loc[df["numerosity"] <=4] subitizing_df_list.append(sub_df) # 36 subitizing trials per participant: all participants are above 90%, the worst 34 out of 36 are correct
all_df = merge_all_data.merge_all_file2dataframe(data_path, filetype, filename_prefix) return all_df if __name__ == '__main__': DATA_PATH = "../../data/exp3_data/exp3_pilot_data/rawdata/" FILENAME_PREFIX = "P" FILETYPE = ".csv" drop_fastandslow_resp = False save_preprocessed = True # load raw data mydata = preprocess_exp3a_func(DATA_PATH, FILETYPE, FILENAME_PREFIX) # preprocess starts here mydata = keep_valid_columns(mydata, KEPT_COL_NAMES) # drop practice trials: drop all rows with NaNs in key_resp.keys col_to_dropna = ['key_resp.keys'] mydata = drop_df_nan_rows_according2cols(mydata, col_to_dropna) # drop too fast and too slow response if drop_fastandslow_resp: col_to_drop_rows = "key_resp.rt" min_rt = 0.15 max_rt = 3 mydata = drop_df_rows_according2_one_col(mydata, col_to_drop_rows, min_rt, max_rt) # add numerosity difference between D1 and D2 mydata["dff_D1D2"] = mydata["D1numerosity"] - mydata["D2numerosity"]
FILENAME_DATA = "cleanedTotalData_fullinfo_v3.xlsx" ENCIRCLE_DATA = "../data/ms1_encircle/preprocessed_encircle.csv" stimuli_to_merge = exp1_radial_display2.stimuli_df data_to_merge = pd.read_excel(PATH_DATA + FILENAME_DATA) # merge stimuli file with estimation data all_df = pd.merge( data_to_merge, stimuli_to_merge, how="left", on=["index_stimuliInfo", "N_disk", "crowdingcons", "winsize"]) # keep valid columns estimation data all_df = keep_valid_columns(all_df, KEEP_COL_Coor) # encircle data encircle_df = pd.read_csv(ENCIRCLE_DATA) # group by the estimation data by each display data = all_df.groupby(["displayN", "crowdingcons", "list_index", "N_disk", "winsize"])["response"] \ .agg(["mean", "std"]).reset_index(level = ["displayN", "crowdingcons", "list_index", "N_disk", "winsize"]) rename_df_col(df=data, old_col_name="mean", new_col_name="response_mean") rename_df_col(df=data, old_col_name="std", new_col_name="response_std") # group by the encircle data - average participant data_encircle = encircle_df.groupby(["displayN", "crowdingcons", "list_index", "numerosity", "winsize"])["groups_n"] \ .agg(["mean", "std"]).reset_index(level = ["displayN", "crowdingcons", "list_index", "numerosity", "winsize"])
from src.preprocess.sub.get_data2analysis import drop_df_rows_according2_one_col if __name__ == '__main__': write_to_excel = False PATH = "../../data/ms2_mix_prolific_2_data/raw/" dir_list = os.listdir(PATH) df_list_all = [ pd.read_csv(PATH + file) for file in dir_list if file.endswith(".csv") ] # preprocess df_list = list() for df in df_list_all: # keep useful cols df = keep_valid_columns(df=df, kept_columns_list=KEEP_COLS) # drop practice and ref trials df = df.dropna(subset=["trials.thisN"]) # remove spaces if df["responseN"].dtypes == "object": df["responseN"] = df["responseN"].str.strip() # remove non numeric responses df["is_num"] = df["responseN"].str.isnumeric() drop_index = df[df["is_num"] == False].index df.drop(drop_index, inplace=True) # change responseN to float change_col_value_type(df, "responseN", float)
columns=[to_normalize_col]) if __name__ == '__main__': # TODO set parameters crowdingcons = 2 # 0, 1, 2 for no-crowding, crowding and all data parrtial_corr = True # read stimuli info and data PATH_DATA = "../data/exp1_rerun_data/" FILENAME_DATA = "cleanedTotalData_fullinfo_v2.xlsx" PATH_STIM = "../displays/" FILENAME_STIM = "update_stim_info_full.xlsx" data_to_merge = pd.read_excel(PATH_DATA + FILENAME_DATA) stimuli_to_merge = pd.read_excel(PATH_STIM + FILENAME_STIM) # keep needed cols stimuli_to_merge = keep_valid_columns(stimuli_to_merge, KEPT_COL_NAMES4) # merge data with stimuli info all_df = pd.merge( data_to_merge, stimuli_to_merge, how="left", on=["index_stimuliInfo", "N_disk", "crowdingcons", "winsize"]) # preprocess my_data = keep_valid_columns(all_df, KEPT_COL_NAMES5) # add color coded for crowding and no-crowding displays insert_new_col(my_data, "crowdingcons", 'colorcode', add_color_code_by_crowdingcons) # color coded insert_new_col_from_two_cols(my_data, "N_disk", "crowdingcons", "colorcode5levels", add_color_code_5levels)
stimuli_to_merge_ori = exp1_radial_display2.stimuli_df data_to_merge = pd.read_excel(PATH_DATA + FILENAME_DATA) # unify col value type change_col_value_type(stimuli_to_merge_ori, "crowdingcons", int) change_col_value_type(stimuli_to_merge_ori, "winsize", float) change_col_value_type(stimuli_to_merge_ori, "index_stimuliInfo", str) change_col_value_type(stimuli_to_merge_ori, "N_disk", int) change_col_value_type(data_to_merge, "crowdingcons", int) change_col_value_type(data_to_merge, "winsize", float) change_col_value_type(data_to_merge, "index_stimuliInfo", str) change_col_value_type(data_to_merge, "N_disk", int) # remove duplicated cols stimuli_to_merge = keep_valid_columns(stimuli_to_merge_ori, KEPT_COL_NAMES_STIMU_DF) # merge data with stimuli info all_df = pd.merge( data_to_merge, stimuli_to_merge, how="left", on=["index_stimuliInfo", "N_disk", "crowdingcons", "winsize"]) # %% preprocess my_data = keep_valid_columns(all_df, KEPT_COL_NAMES) # add color coded for crowding and no-crowding displays insert_new_col(my_data, "crowdingcons", 'colorcode', add_color_code_by_crowdingcons) # color coded insert_new_col_from_two_cols(my_data, "N_disk", "crowdingcons",
all_df = merge_all_data.merge_all_file2dataframe(data_path, filetype, filename_prefix) return all_df if __name__ == "__main__": write_to_excel = False DATA_PATH = "../../data/exp1_rerun_data/rawdata/" FILENAME_PREFIX = "a" FILETYPE = ".csv" # read raw data all_df = preprocess_exp1rerun_func(DATA_PATH, FILETYPE, FILENAME_PREFIX) # preprocess all_df = keep_valid_columns(all_df, KEPT_COL_NAMES_exp1) # drop obvious wrong response col_to_drop_rows = "response" min_res = 10 max_res = 100 all_df = drop_df_rows_according2_one_col(all_df, col_to_drop_rows, min_res, max_res) # drop response outside 3 strd n_discs = [ 21, 22, 23, 24, 25, 31, 32, 33, 34, 35, 41, 42, 43, 44, 45, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58 ] df_list = [