y_test_ids_df,
                                  on=id_cols)
        test_labs_df = pd.merge(collapsed_labs_df, y_test_ids_df, on=id_cols)
        test_mews_df = pd.merge(mews_df, y_test_ids_df, on=id_cols)

        # merge them
        test_collapsed_features_df = pd.merge(test_vitals_df,
                                              test_labs_df,
                                              on=id_cols,
                                              how='inner')
        test_features_df = pd.merge(test_collapsed_features_df,
                                    demographics_df,
                                    on=id_cols)
        if p == 0:
            test_features_dict = merge_data_dicts([
                collapsed_vitals_data_dict, collapsed_labs_data_dict,
                demographics_data_dict
            ])

        test_outcomes_df = pd.merge(test_features_df[id_cols],
                                    outcomes_df,
                                    on=id_cols,
                                    how='inner')

        #     # get performance metrics
        feature_cols = parse_feature_cols(test_features_dict['schema'])
        mews_score_col = parse_feature_cols(mews_data_dict['schema'])
        x_test = test_features_df[feature_cols].values
        y_test = test_outcomes_df[outcome_col].values
        mews_test = test_mews_df[mews_score_col].values

        # bootstrap test set inds without replacement
예제 #2
0
    chosen_stay_ids_df = chosen_stay_ids_df.drop_duplicates(subset=id_cols).reset_index(drop=True)
    
    # for each patient get their vitals, labs, demographics
    labs_df, labs_data_dict, vitals_df, vitals_data_dict, \
        demographics_df, demographics_data_dict, outcomes_df, outcomes_data_dict = get_preprocessed_data(args.preproc_data_dir)

    vitals = parse_feature_cols(vitals_data_dict)
    labs = parse_feature_cols(labs_data_dict)
    
    chosen_stay_labs_df = pd.merge(labs_df, chosen_stay_ids_df, on=id_cols, how='inner')
    chosen_stay_vitals_df = pd.merge(vitals_df, chosen_stay_ids_df, on=id_cols, how='inner')
    chosen_stay_highfreq_df = pd.merge(chosen_stay_labs_df, chosen_stay_vitals_df, on = id_cols + ['hours_since_admission', 
                                                                                                'timestamp'], how='outer')
    
    
    highfreq_features_dict = merge_data_dicts([labs_data_dict, vitals_data_dict])
    highfreq_features_dict['fields'] = highfreq_features_dict['schema']['fields']
    
    # choose a subject
    chosen_short_stay_subj_list = ['14343967', '18115638', '18826316', '17245153', 
                             '17557700', '11212084', '11163358', '17684794', 
                             '12751842', '11528888', '1379931', '17745211', 
                             '12862019', '14201044', '14917356', '17682462', 
                             '1339889', '17995864', '15787542', '13007083', 
                             '18239690', '11692208', '19352552', '19438165', 
                             '14858518']
    chosen_long_stay_subj_list = ['12702290', '19160387', '19806342', '19222313', '17863017']
    
    chosen_stay_subj_list = chosen_short_stay_subj_list + chosen_long_stay_subj_list
#     chosen_stay_subj_list = chosen_long_stay_subj_list[0:2]
    for idx in chosen_stay_subj_list:
        else:
            test_collapsed_features_df = pd.merge(test_vitals_df,
                                                  test_labs_df,
                                                  on=id_cols,
                                                  how='inner')

            data_dicts_list = [
                collapsed_vitals_data_dict, collapsed_labs_data_dict,
                demographics_data_dict
            ]

        test_features_df = pd.merge(test_collapsed_features_df,
                                    demographics_df,
                                    on=id_cols)
        if p == 0:
            test_features_dict = merge_data_dicts(data_dicts_list)

        test_outcomes_df = pd.merge(test_features_df[id_cols],
                                    outcomes_df,
                                    on=id_cols,
                                    how='inner')

        #     # get performance metrics
        feature_cols = parse_feature_cols(test_features_dict['schema'])
        mews_score_col = parse_feature_cols(mews_data_dict['schema'])
        x_test = test_features_df[feature_cols].values
        y_test = test_outcomes_df[outcome_col].values

        # load the scaler
        scaler = pickle.load(
            open(os.path.join(clf_models_dir, 'scaler.pkl'), 'rb'))