# save results result_table.to_pickle('data/result/initial_catboost.pkl') if __name__ == "__main__": parser = argparse.ArgumentParser(description='hypoxemia prediction') parser.add_argument('--hypoxemia_thresh', type=int, default=90) parser.add_argument('--hypoxemia_window', type=int, default=10) parser.add_argument('--prediction_window', type=int, default=5) parser.add_argument('--static_feature_file', type=str, default='static-bow.csv') parser.add_argument('--random_state', type=int, default=1) args = parser.parse_args() print(args) X, y, pos_rate = prepare_data( df_static=pd.read_csv(config.get('processed', 'df_static_file')), df_dynamic=pd.read_csv(config.get('processed', 'df_dynamic_file')), static_feature=pd.read_csv('data/features/' + args.static_feature_file), args=args) cat_features = np.array([0, 4, 5, 7, 8, 9]) X.iloc[:, cat_features] = X.iloc[:, cat_features].astype('str') # normal validation X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=args.random_state, stratify=y) train_gbtree(X_train, y_train)
# generate DataFrame for static and real-time data import pandas as pd from utils.utility_parsing_raw import ParseDynamicData, ParseStaticData, ParseICD from file_config.config import config import json # path vitals_dir = config.get('data', 'vitals_dir') demographic_file = config.get('data', 'demographic_file') icd_file = config.get('data', 'icd_file') df_static_file = config.get('processed', 'df_static_file') df_dynamic_file = config.get('processed', 'df_dynamic_file') def gen_static_dataframe(): # parse demographic data into DataFrame print('Start parsing static data...') static_parser = ParseStaticData(demographic_file) df_static = static_parser.gen_static_dataframe() # parse ICD data into DataFrame, lines with PatientID not in demographic are removed print('Start parsing ICD...') id_converter = static_parser.id_converter icd_parser = ParseICD(id_converter, icd_file) df_icd = icd_parser.parse_icd() # merge ICD data to demographic DataFrame print('Merging...') df = pd.merge(df_static, df_icd, how='outer', on='pid')
if __name__ == "__main__": parser = argparse.ArgumentParser(description='hypoxemia prediction') parser.add_argument('--hypoxemia_thresh', type=int, default=90) parser.add_argument('--hypoxemia_window', type=int, default=10) parser.add_argument('--prediction_window', type=int, default=5) parser.add_argument('--filter_mode', type=str, default='exclude') parser.add_argument('--feature_file', type=str, default='dynamic-ewm-notxt-nonimp.csv') parser.add_argument('--random_state', type=int, default=1) parser.add_argument('--n_jobs', type=int, default=-1) parser.add_argument('--lr', type=float, default=0.02) parser.add_argument('--depth', type=int, default=6) parser.add_argument('--l2', type=int, default=3) args = parser.parse_args() print(args) X_train, X_test, y_train, y_test, pos_rate = prepare_data( df_static=pd.read_csv(config.get('processed', 'df_static_file')), df_dynamic=pd.read_csv(config.get('processed', 'df_dynamic_file')), dynamic_feature=pd.read_csv('data/features/' + args.feature_file), args=args) model = param_tuning(X_train, y_train) pickle.dump(model, open(config.get('processed', 'realtime_model_file'), 'wb')) evaluate(model, X_test, y_test, pos_rate, args)
save_name = 'data/result/initial_models_random.pkl' # save results result_table.to_pickle(save_name) if __name__ == "__main__": parser = argparse.ArgumentParser(description='hypoxemia prediction') parser.add_argument('--hypoxemia_thresh', type=int, default=90) parser.add_argument('--hypoxemia_window', type=int, default=10) parser.add_argument('--prediction_window', type=int, default=5) parser.add_argument('--filter_mode', type=str, default='exclude') parser.add_argument('--static_feature_file', type=str, default='static-bow.csv') parser.add_argument('--random_state', type=int, default=1) args = parser.parse_args() print(args) X, y, pos_rate = prepare_data(df_static=pd.read_csv(config.get('processed', 'df_static_file')), df_dynamic=pd.read_csv(config.get('processed', 'df_dynamic_file')), static_feature=pd.read_csv('data/features/' + args.static_feature_file), args=args) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=args.random_state, stratify=y) train_models(X_train, y_train, pos_rate)
if __name__ == '__main__': parser = argparse.ArgumentParser(description='feature extraction') parser.add_argument( '--type', type=str, default='static') # 'static' OR 'dynamic-ewm' OR 'dynamic-sta' parser.add_argument('--if_impute', type=str, default='True') # 'True' OR 'False' parser.add_argument('--static_txt', type=str, default='rbow') # 'bow' OR 'rbow' parser.add_argument('--dynamic_txt', type=str, default='notxt') # 'notxt' OR 'rbow' args = parser.parse_args() print(args) # path df_static_file = config.get('processed', 'df_static_file') df_dynamic_file = config.get('processed', 'df_dynamic_file') # save name token_impute = 'imp' if args.if_impute == 'True' else 'nonimp' static_feature_file = 'data/features/static-' + args.static_txt + '.csv' ewm_feat_file = 'data/features/dynamic-ewm-' + args.dynamic_txt + '-' + token_impute + '.csv' sta_feat_file = 'data/features/dynamic-sta-' + args.dynamic_txt + '-' + token_impute + '.csv' lstm_feat_file = 'data/features/dynamic-lstm-' + args.dynamic_txt + '-' + token_impute + '.csv' # load DataFrame real-time data df_static = pd.read_csv(df_static_file) df_dynamic = pd.read_csv(df_dynamic_file) # feature extraction imputer = DataImputation()
parser = argparse.ArgumentParser(description='hypoxemia prediction') parser.add_argument('--hypoxemia_thresh', type=int, default=90) parser.add_argument('--hypoxemia_window', type=int, default=10) parser.add_argument('--prediction_window', type=int, default=5) parser.add_argument('--filter_mode', type=str, default='exclude') parser.add_argument('--feature_file', type=str, default='dynamic-ewm-notxt-nonimp.csv') parser.add_argument('--random_state', type=int, default=1) parser.add_argument('--gb_tool', type=str, default='catboost') parser.add_argument('--if_tuning', type=str, default='True') parser.add_argument('--n_jobs', type=int, default=-1) parser.add_argument('--lr', type=float, default=0.02) parser.add_argument('--depth', type=int, default=6) parser.add_argument('--l2', type=int, default=3) args = parser.parse_args() print(args) X_train, X_test, y_train, y_test, pos_rate = prepare_data(df_static=pd.read_csv(config.get('processed', 'df_static_file')), df_dynamic=pd.read_csv(config.get('processed', 'df_dynamic_file')), dynamic_feature=pd.read_csv('data/features/' + args.feature_file), args=args) model = param_tuning(X_train, y_train) if args.if_tuning == 'True' else train_gbtree(X_train, y_train, pos_rate, args) pickle.dump(model, open(config.get('processed', 'realtime_model_file'), 'wb')) evaluate(model, X_test, y_test, pos_rate, args) model_explain(model, X_test)