metrics.print_metrics_log_bins(labels, predictions) if args.partition == 'custom': predictions = [metrics.get_estimate_custom(x, 10) for x in predictions] metrics.print_metrics_custom_bins(labels, predictions) if args.partition == 'none': metrics.print_metrics_regression(labels, predictions) predictions = [x[0] for x in predictions] path = os.path.join( os.path.join(args.output_dir, "test_predictions", os.path.basename(args.load_state)) + ".csv") if stochastic: ee = np.mean(np.array(epistemic)) aa = np.mean(np.array(aleatoric)) print("Epistemic uncertainty =", ee) print("Aleatoric uncertainty =", aa) print("Uncertainty =", ee + aa) utils.save_results(names, ts, predictions, labels, path, aleatoric=aleatoric, epistemic=epistemic) else: utils.save_results(names, ts, predictions, labels, path) else: raise ValueError("Wrong value for args.mode")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--period', type=str, default='all', help='specifies which period extract features from', choices=[ 'first4days', 'first8days', 'last12hours', 'first25percent', 'first50percent', 'all' ]) parser.add_argument('--features', type=str, default='all', help='specifies what features to extract', choices=['all', 'len', 'all_but_len']) parser.add_argument('--data', type=str, help='Path to the data of length-of-stay task', default=os.path.join(os.path.dirname(__file__), '../../../data/length-of-stay/')) parser.add_argument( '--output_dir', type=str, help='Directory relative which all output files are stored', default='.') args = parser.parse_args() print(args) train_reader = LengthOfStayReader( dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'train_listfile.csv')) val_reader = LengthOfStayReader( dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'val_listfile.csv')) test_reader = LengthOfStayReader( dataset_dir=os.path.join(args.data, 'test'), listfile=os.path.join(args.data, 'test_listfile.csv')) print('Reading data and extracting features ...') n_train = min(100000, train_reader.get_number_of_examples()) n_val = min(100000, val_reader.get_number_of_examples()) (train_X, train_y, train_names, train_ts) = read_and_extract_features(train_reader, n_train, args.period, args.features) (val_X, val_y, val_names, val_ts) = read_and_extract_features(val_reader, n_val, args.period, args.features) (test_X, test_y, test_names, test_ts) = read_and_extract_features(test_reader, test_reader.get_number_of_examples(), args.period, args.features) print(train_X.shape) assert False print('Imputing missing values ...') imputer = Imputer(missing_values=np.nan, strategy='mean', axis=0, verbose=0, copy=True) imputer.fit(train_X) train_X = np.array(imputer.transform(train_X), dtype=np.float32) val_X = np.array(imputer.transform(val_X), dtype=np.float32) test_X = np.array(imputer.transform(test_X), dtype=np.float32) print('Normalizing the data to have zero mean and unit variance ...') scaler = StandardScaler() scaler.fit(train_X) train_X = scaler.transform(train_X) val_X = scaler.transform(val_X) test_X = scaler.transform(test_X) file_name = "{}.{}".format(args.period, args.features) linreg = LinearRegression() linreg.fit(train_X, train_y) result_dir = os.path.join(args.output_dir, 'results') common_utils.create_directory(result_dir) with open(os.path.join(result_dir, 'train_{}.json'.format(file_name)), "w") as res_file: ret = print_metrics_regression(train_y, linreg.predict(train_X)) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, res_file) with open(os.path.join(result_dir, 'val_{}.json'.format(file_name)), 'w') as res_file: ret = print_metrics_regression(val_y, linreg.predict(val_X)) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, res_file) prediction = linreg.predict(test_X) with open(os.path.join(result_dir, 'test_{}.json'.format(file_name)), 'w') as res_file: ret = print_metrics_regression(test_y, prediction) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, res_file) save_results( test_names, test_ts, prediction, test_y, os.path.join(args.output_dir, 'predictions', file_name + '.csv'))
ret = test_data_gen.next(return_y_true=True) (x, y_processed, y) = ret["data"] cur_names = ret["names"] cur_ts = ret["ts"] x = np.array(x) pred = model.predict_on_batch(x) predictions += list(pred) labels += list(y) names += list(cur_names) ts += list(cur_ts) if args.partition == 'log': predictions = [metrics.get_estimate_log(x, 10) for x in predictions] metrics.print_metrics_log_bins(labels, predictions) if args.partition == 'custom': predictions = [metrics.get_estimate_custom(x, 10) for x in predictions] metrics.print_metrics_custom_bins(labels, predictions) if args.partition == 'none': metrics.print_metrics_regression(labels, predictions) predictions = [x[0] for x in predictions] path = os.path.join( os.path.join(args.output_dir, "test_predictions", os.path.basename(args.load_state)) + ".csv") utils.save_results(names, ts, predictions, labels, path) else: raise ValueError("Wrong value for args.mode")
ret = test_data_gen.next(return_y_true=True) (x, _, y) = ret["data"] cur_names = ret["names"] cur_ts = ret["ts"] x = torch.tensor(x, dtype=torch.float).to(device) pred = model(x) pred = pred.cpu().data.numpy() predictions += list(pred) labels += list(y) names += list(cur_names) ts += list(cur_ts) if args.small_part: break if args.partition == 'log': predictions = [metrics.get_estimate_log(x, 10) for x in predictions] metrics.print_metrics_log_bins(labels, predictions) if args.partition == 'custom': predictions = [metrics.get_estimate_custom(x, 10) for x in predictions] metrics.print_metrics_custom_bins(labels, predictions) if args.partition == 'none': metrics.print_metrics_regression(labels, predictions) predictions = [x[0] for x in predictions] utils.save_results(names, ts, predictions, labels, os.path.join(save_path, 'test_predictions.csv')) else: raise ValueError("Wrong value for args.mode")
# pheno if args.pheno_C > 0: print("\n =================== phenotype ==================") pheno_pred = np.array(pheno_pred) pheno_ret = metrics.print_metrics_multilabel(pheno_y_true, pheno_pred) print("Saving the predictions in test_predictions/task directories ...") # ihm ihm_path = os.path.join(os.path.join(args.output_dir, "test_predictions/ihm", os.path.basename(args.load_state)) +experiment_name+ ".csv") ihm_utils.save_results(ihm_names, ihm_pred, ihm_y_true, ihm_path) # decomp decomp_path = os.path.join(os.path.join(args.output_dir, "test_predictions/decomp", os.path.basename(args.load_state)) +experiment_name+ ".csv") decomp_utils.save_results(decomp_names, decomp_ts, decomp_pred, decomp_y_true, decomp_path) # los los_path = os.path.join(os.path.join(args.output_dir, "test_predictions/los", os.path.basename(args.load_state)) +experiment_name+ ".csv") los_utils.save_results(los_names, los_ts, los_pred, los_y_true, los_path) # pheno pheno_path = os.path.join(os.path.join(args.output_dir, "test_predictions/pheno", os.path.basename(args.load_state)) +experiment_name+ ".csv") pheno_utils.save_results(pheno_names, pheno_ts, pheno_pred, pheno_y_true, pheno_path) else: raise ValueError("Wrong value for args.mode")
los_ret = metrics.print_metrics_custom_bins(los_y_true, los_pred) if args.partition == 'none': los_ret = metrics.print_metrics_regression(los_y_true, los_pred) # pheno if args.pheno_C > 0: print "\n =================== phenotype ==================" pheno_pred = np.array(pheno_pred) pheno_ret = metrics.print_metrics_multilabel(pheno_y_true, pheno_pred) print "Saving the predictions in test_predictions/task directories ..." # ihm ihm_path = os.path.join("test_predictions/ihm", os.path.basename(args.load_state)) + ".csv" ihm_utils.save_results(ihm_names, ihm_pred, ihm_y_true, ihm_path) # decomp decomp_path = os.path.join("test_predictions/decomp", os.path.basename(args.load_state)) + ".csv" decomp_utils.save_results(decomp_names, decomp_ts, decomp_pred, decomp_y_true, decomp_path) # los los_path = os.path.join("test_predictions/los", os.path.basename(args.load_state)) + ".csv" los_utils.save_results(los_names, los_ts, los_pred, los_y_true, los_path) # pheno pheno_path = os.path.join("test_predictions/pheno", os.path.basename(args.load_state)) + ".csv" pheno_utils.save_results(pheno_names, pheno_ts, pheno_pred, pheno_y_true, pheno_path) else: raise ValueError("Wrong value for args.mode")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--period', type=str, default='all', help='specifies which period extract features from', choices=['first4days', 'first8days', 'last12hours', 'first25percent', 'first50percent', 'all']) parser.add_argument('--features', type=str, default='all', help='specifies what features to extract', choices=['all', 'len', 'all_but_len']) parser.add_argument('--grid-search', dest='grid_search', action='store_true') parser.add_argument('--no-grid-search', dest='grid_search', action='store_false') parser.set_defaults(grid_search=False) parser.add_argument('--data', type=str, help='Path to the data of length-of-stay task', default=os.path.join(os.path.dirname(__file__), '../../../data/length-of-stay/')) parser.add_argument('--output_dir', type=str, help='Directory relative which all output files are stored', default='.') args = parser.parse_args() print(args) if args.grid_search: penalties = ['l2', 'l2', 'l2', 'l2', 'l2', 'l2', 'l1', 'l1', 'l1', 'l1', 'l1'] coefs = [1.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 1.0, 0.1, 0.01, 0.001, 0.0001] else: penalties = ['l2'] coefs = [0.00001] train_reader = LengthOfStayReader(dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'train_listfile.csv')) val_reader = LengthOfStayReader(dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'val_listfile.csv')) test_reader = LengthOfStayReader(dataset_dir=os.path.join(args.data, 'test'), listfile=os.path.join(args.data, 'test_listfile.csv')) print('Reading data and extracting features ...') n_train = min(100000, train_reader.get_number_of_examples()) n_val = min(100000, val_reader.get_number_of_examples()) (train_X, train_y, train_actual, train_names, train_ts) = read_and_extract_features( train_reader, n_train, args.period, args.features) (val_X, val_y, val_actual, val_names, val_ts) = read_and_extract_features( val_reader, n_val, args.period, args.features) (test_X, test_y, test_actual, test_names, test_ts) = read_and_extract_features( test_reader, test_reader.get_number_of_examples(), args.period, args.features) print("train set shape: {}".format(train_X.shape)) print("validation set shape: {}".format(val_X.shape)) print("test set shape: {}".format(test_X.shape)) print('Imputing missing values ...') imputer = Imputer(missing_values=np.nan, strategy='mean', axis=0, verbose=0, copy=True) imputer.fit(train_X) train_X = np.array(imputer.transform(train_X), dtype=np.float32) val_X = np.array(imputer.transform(val_X), dtype=np.float32) test_X = np.array(imputer.transform(test_X), dtype=np.float32) print('Normalizing the data to have zero mean and unit variance ...') scaler = StandardScaler() scaler.fit(train_X) train_X = scaler.transform(train_X) val_X = scaler.transform(val_X) test_X = scaler.transform(test_X) result_dir = os.path.join(args.output_dir, 'cf_results') common_utils.create_directory(result_dir) for (penalty, C) in zip(penalties, coefs): model_name = '{}.{}.{}.C{}'.format(args.period, args.features, penalty, C) train_activations = np.zeros(shape=train_y.shape, dtype=float) val_activations = np.zeros(shape=val_y.shape, dtype=float) test_activations = np.zeros(shape=test_y.shape, dtype=float) for task_id in range(n_bins): logreg = LogisticRegression(penalty=penalty, C=C, random_state=42) logreg.fit(train_X, train_y[:, task_id]) train_preds = logreg.predict_proba(train_X) train_activations[:, task_id] = train_preds[:, 1] val_preds = logreg.predict_proba(val_X) val_activations[:, task_id] = val_preds[:, 1] test_preds = logreg.predict_proba(test_X) test_activations[:, task_id] = test_preds[:, 1] train_predictions = np.array([metrics.get_estimate_custom(x, n_bins) for x in train_activations]) val_predictions = np.array([metrics.get_estimate_custom(x, n_bins) for x in val_activations]) test_predictions = np.array([metrics.get_estimate_custom(x, n_bins) for x in test_activations]) with open(os.path.join(result_dir, 'train_{}.json'.format(model_name)), 'w') as f: ret = metrics.print_metrics_custom_bins(train_actual, train_predictions) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, f) with open(os.path.join(result_dir, 'val_{}.json'.format(model_name)), 'w') as f: ret = metrics.print_metrics_custom_bins(val_actual, val_predictions) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, f) with open(os.path.join(result_dir, 'test_{}.json'.format(model_name)), 'w') as f: ret = metrics.print_metrics_custom_bins(test_actual, test_predictions) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, f) save_results(test_names, test_ts, test_predictions, test_actual, os.path.join(args.output_dir, 'cf_predictions', model_name + '.csv'))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--period', type=str, default='all', help='specifies which period extract features from', choices=['first4days', 'first8days', 'last12hours', 'first25percent', 'first50percent', 'all']) parser.add_argument('--features', type=str, default='all', help='specifies what features to extract', choices=['all', 'len', 'all_but_len']) args = parser.parse_args() print(args) train_reader = LengthOfStayReader(dataset_dir='../../../data/length-of-stay/train/', listfile='../../../data/length-of-stay/train_listfile.csv') val_reader = LengthOfStayReader(dataset_dir='../../../data/length-of-stay/train/', listfile='../../../data/length-of-stay/val_listfile.csv') test_reader = LengthOfStayReader(dataset_dir='../../../data/length-of-stay/test/', listfile='../../../data/length-of-stay/test_listfile.csv') print('Reading data and extracting features ...') n_train = min(100000, train_reader.get_number_of_examples()) n_val = min(100000, val_reader.get_number_of_examples()) (train_X, train_y, train_names, train_ts) = read_and_extract_features( train_reader, n_train, args.period, args.features) (val_X, val_y, val_names, val_ts) = read_and_extract_features( val_reader, n_val, args.period, args.features) (test_X, test_y, test_names, test_ts) = read_and_extract_features( test_reader, test_reader.get_number_of_examples(), args.period, args.features) print('Imputing missing values ...') imputer = Imputer(missing_values=np.nan, strategy='mean', axis=0, verbose=0, copy=True) imputer.fit(train_X) train_X = np.array(imputer.transform(train_X), dtype=np.float32) val_X = np.array(imputer.transform(val_X), dtype=np.float32) test_X = np.array(imputer.transform(test_X), dtype=np.float32) print('Normalizing the data to have zero mean and unit variance ...') scaler = StandardScaler() scaler.fit(train_X) train_X = scaler.transform(train_X) val_X = scaler.transform(val_X) test_X = scaler.transform(test_X) file_name = "{}.{}".format(args.period, args.features) linreg = LinearRegression() linreg.fit(train_X, train_y) common_utils.create_directory('results') with open(os.path.join("results", 'train_{}.json'.format(file_name)), "w") as res_file: ret = print_metrics_regression(train_y, linreg.predict(train_X)) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, res_file) with open(os.path.join('results', 'val_{}.json'.format(file_name)), 'w') as res_file: ret = print_metrics_regression(val_y, linreg.predict(val_X)) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, res_file) prediction = linreg.predict(test_X) with open(os.path.join('results', 'test_{}.json'.format(file_name)), 'w') as res_file: ret = print_metrics_regression(test_y, prediction) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, res_file) save_results(test_names, test_ts, prediction, test_y, os.path.join('predictions', file_name + '.csv'))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--period', type=str, default='all', help='specifies which period extract features from', choices=['first4days', 'first8days', 'last12hours', 'first25percent', 'first50percent', 'all']) parser.add_argument('--features', type=str, default='all', help='specifies what features to extract', choices=['all', 'len', 'all_but_len']) args = parser.parse_args() print(args) # penalties = ['l2', 'l2', 'l2', 'l2', 'l2', 'l2', 'l1', 'l1', 'l1', 'l1', 'l1'] # Cs = [1.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 1.0, 0.1, 0.01, 0.001, 0.0001] penalties = ['l2'] Cs = [0.00001] train_reader = LengthOfStayReader(dataset_dir='../../../data/length-of-stay/train/', listfile='../../../data/length-of-stay/train_listfile.csv') val_reader = LengthOfStayReader(dataset_dir='../../../data/length-of-stay/train/', listfile='../../../data/length-of-stay/val_listfile.csv') test_reader = LengthOfStayReader(dataset_dir='../../../data/length-of-stay/test/', listfile='../../../data/length-of-stay/test_listfile.csv') print('Reading data and extracting features ...') n_train = min(100000, train_reader.get_number_of_examples()) n_val = min(100000, val_reader.get_number_of_examples()) (train_X, train_y, train_actual, train_names, train_ts) = read_and_extract_features( train_reader, n_train, args.period, args.features) (val_X, val_y, val_actual, val_names, val_ts) = read_and_extract_features( val_reader, n_val, args.period, args.features) (test_X, test_y, test_actual, test_names, test_ts) = read_and_extract_features( test_reader, test_reader.get_number_of_examples(), args.period, args.features) print("train set shape: {}".format(train_X.shape)) print("validation set shape: {}".format(val_X.shape)) print("test set shape: {}".format(test_X.shape)) print('Imputing missing values ...') imputer = Imputer(missing_values=np.nan, strategy='mean', axis=0, verbose=0, copy=True) imputer.fit(train_X) train_X = np.array(imputer.transform(train_X), dtype=np.float32) val_X = np.array(imputer.transform(val_X), dtype=np.float32) test_X = np.array(imputer.transform(test_X), dtype=np.float32) print('Normalizing the data to have zero mean and unit variance ...') scaler = StandardScaler() scaler.fit(train_X) train_X = scaler.transform(train_X) val_X = scaler.transform(val_X) test_X = scaler.transform(test_X) common_utils.create_directory('cf_results') for (penalty, C) in zip(penalties, Cs): model_name = '{}.{}.{}.C{}'.format(args.period, args.features, penalty, C) train_activations = np.zeros(shape=train_y.shape, dtype=float) val_activations = np.zeros(shape=val_y.shape, dtype=float) test_activations = np.zeros(shape=test_y.shape, dtype=float) for task_id in range(n_bins): logreg = LogisticRegression(penalty=penalty, C=C, random_state=42) logreg.fit(train_X, train_y[:, task_id]) train_preds = logreg.predict_proba(train_X) train_activations[:, task_id] = train_preds[:, 1] val_preds = logreg.predict_proba(val_X) val_activations[:, task_id] = val_preds[:, 1] test_preds = logreg.predict_proba(test_X) test_activations[:, task_id] = test_preds[:, 1] train_predictions = np.array([metrics.get_estimate_custom(x, n_bins) for x in train_activations]) val_predictions = np.array([metrics.get_estimate_custom(x, n_bins) for x in val_activations]) test_predictions = np.array([metrics.get_estimate_custom(x, n_bins) for x in test_activations]) with open(os.path.join('cf_results', 'train_{}.json'.format(model_name)), 'w') as f: ret = metrics.print_metrics_custom_bins(train_actual, train_predictions) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, f) with open(os.path.join('cf_results', 'val_{}.json'.format(model_name)), 'w') as f: ret = metrics.print_metrics_custom_bins(val_actual, val_predictions) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, f) with open(os.path.join('cf_results', 'test_{}.json'.format(model_name)), 'w') as f: ret = metrics.print_metrics_custom_bins(test_actual, test_predictions) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, f) save_results(test_names, test_ts, test_predictions, test_actual, os.path.join('cf_predictions', model_name + '.csv'))
for i in range(test_data_gen.steps): print "\rpredicting {} / {}".format(i, test_data_gen.steps), ret = test_data_gen.next(return_y_true=True) (x, y_processed, y) = ret["data"] cur_names = ret["names"] cur_ts = ret["ts"] x = np.array(x) pred = model.predict_on_batch(x) predictions += list(pred) labels += list(y) names += list(cur_names) ts += list(cur_ts) if args.partition == 'log': predictions = [metrics.get_estimate_log(x, 10) for x in predictions] metrics.print_metrics_log_bins(labels, predictions) if args.partition == 'custom': predictions = [metrics.get_estimate_custom(x, 10) for x in predictions] metrics.print_metrics_custom_bins(labels, predictions) if args.partition == 'none': metrics.print_metrics_regression(labels, predictions) predictions = [x[0] for x in predictions] path = os.path.join("test_predictions", os.path.basename(args.load_state)) + ".csv" utils.save_results(names, ts, predictions, labels, path) else: raise ValueError("Wrong value for args.mode")