コード例 #1
0
    return df


if __name__ == '__main__':
    to_excel = False
    # read data
    PATH = "../../data/ms2_triplets_4_data/rawdata/"
    # list raw data files
    files = os.listdir(PATH)
    df_list_all = [pd.read_csv(PATH + file) for file in files if file.endswith("csv")]

    # preprocess
    df_list = list()
    for df in df_list_all:
        # keep useful cols
        df = keep_valid_columns(df, KEEP_COLs)
        df = pre_process_indi_data(df)
        df_list.append(df)

    # add deviation score and percent change
    for df in df_list:
        insert_new_col_from_two_cols(df, "responseN", "numerosity", "deviation_score", get_deviation)
        insert_new_col_from_two_cols(df, "deviation_score", "numerosity", "percent_change", get_percent_change)

    # check subitizing results
    subitizing_df_list = list()
    for df in df_list:
        sub_df = df.loc[df["numerosity"] <=4]
        subitizing_df_list.append(sub_df)

    # 36 subitizing trials per participant: all participants are above 90%, the worst 34 out of 36 are correct
コード例 #2
0
    all_df = merge_all_data.merge_all_file2dataframe(data_path, filetype,
                                                     filename_prefix)
    return all_df


if __name__ == '__main__':
    DATA_PATH = "../../data/exp3_data/exp3_pilot_data/rawdata/"
    FILENAME_PREFIX = "P"
    FILETYPE = ".csv"
    drop_fastandslow_resp = False
    save_preprocessed = True

    # load raw data
    mydata = preprocess_exp3a_func(DATA_PATH, FILETYPE, FILENAME_PREFIX)
    # preprocess starts here
    mydata = keep_valid_columns(mydata, KEPT_COL_NAMES)

    # drop practice trials: drop all rows with NaNs in key_resp.keys
    col_to_dropna = ['key_resp.keys']
    mydata = drop_df_nan_rows_according2cols(mydata, col_to_dropna)

    # drop too fast and too slow response
    if drop_fastandslow_resp:
        col_to_drop_rows = "key_resp.rt"
        min_rt = 0.15
        max_rt = 3
        mydata = drop_df_rows_according2_one_col(mydata, col_to_drop_rows,
                                                 min_rt, max_rt)

    # add numerosity difference between D1 and D2
    mydata["dff_D1D2"] = mydata["D1numerosity"] - mydata["D2numerosity"]
コード例 #3
0
    FILENAME_DATA = "cleanedTotalData_fullinfo_v3.xlsx"

    ENCIRCLE_DATA = "../data/ms1_encircle/preprocessed_encircle.csv"

    stimuli_to_merge = exp1_radial_display2.stimuli_df
    data_to_merge = pd.read_excel(PATH_DATA + FILENAME_DATA)

    # merge stimuli file with estimation data
    all_df = pd.merge(
        data_to_merge,
        stimuli_to_merge,
        how="left",
        on=["index_stimuliInfo", "N_disk", "crowdingcons", "winsize"])

    # keep valid columns estimation data
    all_df = keep_valid_columns(all_df, KEEP_COL_Coor)

    # encircle data
    encircle_df = pd.read_csv(ENCIRCLE_DATA)

    # group by the estimation data by each display
    data = all_df.groupby(["displayN", "crowdingcons", "list_index", "N_disk", "winsize"])["response"] \
        .agg(["mean", "std"]).reset_index(level = ["displayN", "crowdingcons", "list_index", "N_disk", "winsize"])

    rename_df_col(df=data, old_col_name="mean", new_col_name="response_mean")
    rename_df_col(df=data, old_col_name="std", new_col_name="response_std")

    # group by the encircle data - average participant
    data_encircle = encircle_df.groupby(["displayN", "crowdingcons", "list_index", "numerosity", "winsize"])["groups_n"] \
        .agg(["mean", "std"]).reset_index(level = ["displayN", "crowdingcons", "list_index", "numerosity", "winsize"])
コード例 #4
0
from src.preprocess.sub.get_data2analysis import drop_df_rows_according2_one_col

if __name__ == '__main__':
    write_to_excel = False
    PATH = "../../data/ms2_mix_prolific_2_data/raw/"
    dir_list = os.listdir(PATH)

    df_list_all = [
        pd.read_csv(PATH + file) for file in dir_list if file.endswith(".csv")
    ]

    # preprocess
    df_list = list()
    for df in df_list_all:
        # keep useful cols
        df = keep_valid_columns(df=df, kept_columns_list=KEEP_COLS)

        # drop practice and ref trials
        df = df.dropna(subset=["trials.thisN"])

        # remove spaces
        if df["responseN"].dtypes == "object":
            df["responseN"] = df["responseN"].str.strip()
            # remove non numeric responses
            df["is_num"] = df["responseN"].str.isnumeric()
            drop_index = df[df["is_num"] == False].index
            df.drop(drop_index, inplace=True)

            # change responseN to float
            change_col_value_type(df, "responseN", float)
コード例 #5
0
                        columns=[to_normalize_col])


if __name__ == '__main__':
    # TODO set parameters
    crowdingcons = 2  # 0, 1, 2 for no-crowding, crowding and all data
    parrtial_corr = True
    # read stimuli info and data
    PATH_DATA = "../data/exp1_rerun_data/"
    FILENAME_DATA = "cleanedTotalData_fullinfo_v2.xlsx"
    PATH_STIM = "../displays/"
    FILENAME_STIM = "update_stim_info_full.xlsx"
    data_to_merge = pd.read_excel(PATH_DATA + FILENAME_DATA)
    stimuli_to_merge = pd.read_excel(PATH_STIM + FILENAME_STIM)
    # keep needed cols
    stimuli_to_merge = keep_valid_columns(stimuli_to_merge, KEPT_COL_NAMES4)
    # merge data with stimuli info
    all_df = pd.merge(
        data_to_merge,
        stimuli_to_merge,
        how="left",
        on=["index_stimuliInfo", "N_disk", "crowdingcons", "winsize"])
    # preprocess
    my_data = keep_valid_columns(all_df, KEPT_COL_NAMES5)
    # add color coded for crowding and no-crowding displays
    insert_new_col(my_data, "crowdingcons", 'colorcode',
                   add_color_code_by_crowdingcons)
    # color coded
    insert_new_col_from_two_cols(my_data, "N_disk", "crowdingcons",
                                 "colorcode5levels", add_color_code_5levels)
コード例 #6
0
stimuli_to_merge_ori = exp1_radial_display2.stimuli_df
data_to_merge = pd.read_excel(PATH_DATA + FILENAME_DATA)

# unify col value type
change_col_value_type(stimuli_to_merge_ori, "crowdingcons", int)
change_col_value_type(stimuli_to_merge_ori, "winsize", float)
change_col_value_type(stimuli_to_merge_ori, "index_stimuliInfo", str)
change_col_value_type(stimuli_to_merge_ori, "N_disk", int)

change_col_value_type(data_to_merge, "crowdingcons", int)
change_col_value_type(data_to_merge, "winsize", float)
change_col_value_type(data_to_merge, "index_stimuliInfo", str)
change_col_value_type(data_to_merge, "N_disk", int)

# remove duplicated cols
stimuli_to_merge = keep_valid_columns(stimuli_to_merge_ori,
                                      KEPT_COL_NAMES_STIMU_DF)

# merge data with stimuli info
all_df = pd.merge(
    data_to_merge,
    stimuli_to_merge,
    how="left",
    on=["index_stimuliInfo", "N_disk", "crowdingcons", "winsize"])

# %% preprocess
my_data = keep_valid_columns(all_df, KEPT_COL_NAMES)
# add color coded for crowding and no-crowding displays
insert_new_col(my_data, "crowdingcons", 'colorcode',
               add_color_code_by_crowdingcons)
# color coded
insert_new_col_from_two_cols(my_data, "N_disk", "crowdingcons",
コード例 #7
0
    all_df = merge_all_data.merge_all_file2dataframe(data_path, filetype,
                                                     filename_prefix)
    return all_df


if __name__ == "__main__":
    write_to_excel = False
    DATA_PATH = "../../data/exp1_rerun_data/rawdata/"
    FILENAME_PREFIX = "a"
    FILETYPE = ".csv"

    # read raw data
    all_df = preprocess_exp1rerun_func(DATA_PATH, FILETYPE, FILENAME_PREFIX)

    # preprocess
    all_df = keep_valid_columns(all_df, KEPT_COL_NAMES_exp1)

    # drop obvious wrong response
    col_to_drop_rows = "response"
    min_res = 10
    max_res = 100
    all_df = drop_df_rows_according2_one_col(all_df, col_to_drop_rows, min_res,
                                             max_res)

    # drop response outside 3 strd
    n_discs = [
        21, 22, 23, 24, 25, 31, 32, 33, 34, 35, 41, 42, 43, 44, 45, 49, 50, 51,
        52, 53, 54, 55, 56, 57, 58
    ]

    df_list = [