Python drop_df_rows_according2_one_col 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: src.preprocess.sub.get_data2analysis

메소드/함수: drop_df_rows_according2_one_col

hotexamples.com에서의 예제들: 3

Python drop_df_rows_according2_one_col - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 src.preprocess.sub.get_data2analysis.drop_df_rows_according2_one_col에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: ms2_triplets_4_preprocess.py 프로젝트: miaoli-psy/numerosity_exps

    # 36 subitizing trials per participant: all participants are above 90%, the worst 34 out of 36 are correct
    correct_trial_list = list()
    for sub_df in subitizing_df_list:
        correct_trial_list.append((sub_df["deviation_score"] == 0).sum())

    # remove subitizing trials
    df_list_t1 = list()
    for df in df_list:
        df_list_t1.append(df.loc[df["numerosity"] > 4])

    min_res = 10
    max_res = 150
    df_list_prepro = list()
    for df in df_list_t1:
        df_list_prepro.append(drop_df_rows_according2_one_col(df, "responseN", min_res, max_res))

    # concat all participant
    df_data = pd.concat(df_list_prepro)

    # keep data within 3 sd
    n_discs = [51, 54, 57, 60, 63, 66, 69, 72,
               78, 81, 84, 87, 90, 93, 96, 99]

    df_list_by_num = [get_sub_df_according2col_value(df_data, "numerosity", n) for n in n_discs]
    prepro_df_list = list()
    for sub_df in df_list_by_num:
        lower_bondary = get_mean(sub_df, "responseN") - 3 * get_std(sub_df, "responseN")
        upper_bondary = get_mean(sub_df, "responseN") + 3 * get_std(sub_df, "responseN")
        new_sub_df = drop_df_rows_according2_one_col(sub_df, "responseN", lower_bondary, upper_bondary)
        prepro_df_list.append(new_sub_df)

예제 #2

파일 보기

    # load raw data
    mydata = preprocess_exp3a_func(DATA_PATH, FILETYPE, FILENAME_PREFIX)
    # preprocess starts here
    mydata = keep_valid_columns(mydata, KEPT_COL_NAMES)

    # drop practice trials: drop all rows with NaNs in key_resp.keys
    col_to_dropna = ['key_resp.keys']
    mydata = drop_df_nan_rows_according2cols(mydata, col_to_dropna)

    # drop too fast and too slow response
    if drop_fastandslow_resp:
        col_to_drop_rows = "key_resp.rt"
        min_rt = 0.15
        max_rt = 3
        mydata = drop_df_rows_according2_one_col(mydata, col_to_drop_rows,
                                                 min_rt, max_rt)

    # add numerosity difference between D1 and D2
    mydata["dff_D1D2"] = mydata["D1numerosity"] - mydata["D2numerosity"]
    # add correct answer
    insert_new_col_from_two_cols(mydata, "ref_first", "key_resp.keys",
                                 "is_resp_ref_more", insert_is_resp_ref_more)
    insert_new_col(mydata, "is_resp_ref_more", "is_resp_probe_more",
                   insert_is_resp_probe_more)
    # add probe numerosity
    insert_new_col_from_three_cols(mydata, "D1numerosity", "D2numerosity",
                                   "ref_first", "probeN", insert_probeN)
    # add ref numerosity
    insert_new_col_from_three_cols(mydata, "D1numerosity", "D2numerosity",
                                   "ref_first", "refN", insert_refN)
    # add probe crowding condition

예제 #3

파일 보기

    return df[col_name].mean()


if __name__ == '__main__':
    debug = False
    write_to_excel = False

    # read the totalData file
    all_df = pd.read_excel('../../data/exp2_data_online/clean_totalData.xlsx',
                           index_col=0)

    # drop obvious wrong response
    col_to_drop_rows = "responseN"
    min_res = 10
    max_res = 100
    all_df = drop_df_rows_according2_one_col(all_df, col_to_drop_rows, min_res,
                                             max_res)

    # drop outside 3 strd
    n_discs = [34, 36, 38, 40, 42, 44, 58, 60, 62, 64, 66, 68]

    df_list = [
        get_sub_df_according2col_value(all_df, "Numerosity", n)
        for n in n_discs
    ]

    col_to_process = "responseN"
    prepro_df_list = list()
    for numerosity, sub_df in zip(n_discs, df_list):
        lower_bondary = get_mean(
            sub_df, col_to_process) - 3 * get_std(sub_df, col_to_process)
        upper_bondary = get_mean(