def calc_metrics(self, data_gen, history, dataset, logs): y_true = [] predictions = [] for i in range(data_gen.steps): if self.verbose == 1: print("\tdone {}/{}".format(i, data_gen.steps), end='\r') if self.use_time: ([x, t], y) = next(data_gen) outputs = self.model.predict([x, t], batch_size=self.batch_size) else: (x, y) = next(data_gen) outputs = self.model.predict(x, batch_size=self.batch_size) if data_gen.target_repl: y_true += list(y[0]) predictions += list(outputs[0]) else: y_true += list(y) predictions += list(outputs) print('\n') predictions = np.array(predictions) ret = metrics.print_metrics_multilabel(y_true, predictions) for k, v in ret.items(): logs[dataset + '_' + k] = v history.append(ret)
def do_epoch(mode, epoch): # mode is 'train' or 'test' y_true = [] predictions = [] avg_loss = 0.0 sum_loss = 0.0 prev_time = time.time() batches_per_epoch = network.get_batches_per_epoch(mode) for i in range(0, batches_per_epoch): step_data = network.step(mode) prediction = step_data["prediction"] answers = step_data["answers"] current_loss = step_data["current_loss"] log = step_data["log"] avg_loss += current_loss sum_loss += current_loss for x in answers: y_true.append(x) for x in prediction: predictions.append(x) if ((i + 1) % args.log_every == 0): cur_time = time.time() print (" %sing: %d.%d / %d \t loss: %.3f \t avg_loss: %.3f \t"\ "%s \t time: %.2fs" % (mode, epoch, i * args.batch_size, batches_per_epoch * args.batch_size, current_loss, avg_loss / args.log_every, log, cur_time - prev_time)) avg_loss = 0 prev_time = cur_time if np.isnan(current_loss): raise Exception ("current loss IS NaN. This should never happen :)") sum_loss /= batches_per_epoch print "\n %s loss = %.5f" % (mode, sum_loss) metrics.print_metrics_multilabel(y_true, predictions) return sum_loss
test_data_gen = utils.BatchGen(test_reader, discretizer, normalizer, args.batch_size, args.small_part, target_repl, shuffle=False, return_names=True) names = [] ts = [] labels = [] predictions = [] for i in range(test_data_gen.steps): print "\rpredicting {} / {}".format(i, test_data_gen.steps), ret = next(test_data_gen) x = ret["data"][0] y = ret["data"][1] cur_names = ret["names"] cur_ts = ret["ts"] x = np.array(x) pred = model.predict_on_batch(x) predictions += list(pred) labels += list(y) names += list(cur_names) ts += list(cur_ts) metrics.print_metrics_multilabel(labels, predictions) path = os.path.join("test_predictions", os.path.basename(args.load_state)) + ".csv" utils.save_results(names, ts, predictions, labels, path) else: raise ValueError("Wrong value for args.mode")
continue # Make sure only one file for this task assert(not PRED_TASKS[matches[0]]) PRED_TASKS[matches[0]] = True print("Evaluating {}".format(matches[0])) match_pred, match_Y = read_file(os.path.join(indir, filename)) if merged_pred is None: merged_pred = np.expand_dims(match_pred.copy(), axis=0) merged_Y = np.expand_dims(match_Y.copy(), axis=0) else: merged_pred =np.concatenate((merged_pred, np.expand_dims(match_pred, axis=0)), axis=0) merged_Y =np.concatenate((merged_Y, np.expand_dims(match_Y ,axis=0)), axis=0) #print(merged_X.shape) #print(merged_Y.shape) metrics.print_metrics_binary(match_Y, match_pred) print("----------------------------------------") print("\n==========================================") print("Evaluating all together:") metrics.print_metrics_multilabel(merged_Y.T, merged_pred.T) for key in PRED_TASKS: if PRED_TASKS[key] != True: print("WARNING: Data for task {} missing?".format(key))
header += "ave_auc_micro,ave_auc_macro,ave_auc_weighted," header += ','.join(["auc_%d" % i for i in range(NTASKS)]) resfile.write(header + "\n") def write_results(resfile, ret): resfile.write("%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f," % (ret['ave_prec_micro'], ret['ave_prec_macro'], ret['ave_prec_weighted'], ret['ave_recall_micro'], ret['ave_recall_macro'], ret['ave_recall_weighted'], ret['ave_auc_micro'], ret['ave_auc_macro'], ret['ave_auc_weighted'])) resfile.write(",".join(["%.6f" % x for x in ret['auc_scores']]) + "\n") print "\nAverage results on train" ret = metrics.print_metrics_multilabel(train_y, train_activations) write_results(resfile, ret) print "\nAverage results on val" ret = metrics.print_metrics_multilabel(val_y, val_activations) write_results(resfile, ret) print "\nAverage results on test" ret = metrics.print_metrics_multilabel(test_y, test_activations) write_results(resfile, ret) np.savetxt(os.path.join("activations", model_name + ".csv"), test_activations, delimiter=',') print "==================== Done (penalty = %s, C = %f) ====================\n" % (
predictions = [] for i in range(test_data_gen.steps): print "\rpredicting {} / {}".format(i, test_data_gen.steps), ret = next(test_data_gen) x = ret["data"][0] y = ret["data"][1] cur_names = ret["names"] cur_ts = ret["ts"] x = np.array(x) pred = model.predict_on_batch(x) predictions += list(pred) labels += list(y) names += list(cur_names) ts += list(cur_ts) ret = metrics.print_metrics_multilabel(labels, predictions) with open("results.txt", "w") as resfile: header = "ave_prec_micro,ave_prec_macro,ave_prec_weighted," header += "ave_recall_micro,ave_recall_macro,ave_recall_weighted," header += "ave_auc_micro,ave_auc_macro,ave_auc_weighted," header += ','.join( ["auc_%d" % i for i in range(args_dict['num_classes'])]) resfile.write(header + "\n") resfile.write("%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f," % (ret['ave_prec_micro'], ret['ave_prec_macro'], ret['ave_prec_weighted'], ret['ave_recall_micro'], ret['ave_recall_macro'], ret['ave_recall_weighted'], ret['ave_auc_micro'], ret['ave_auc_macro'], ret['ave_auc_weighted']))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--period', type=str, default='all', help='specifies which period extract features from', choices=[ 'first4days', 'first8days', 'last12hours', 'first25percent', 'first50percent', 'all' ]) parser.add_argument('--features', type=str, default='all', help='specifies what features to extract', choices=['all', 'len', 'all_but_len']) parser.add_argument('--grid-search', dest='grid_search', action='store_true') parser.add_argument('--no-grid-search', dest='grid_search', action='store_false') parser.set_defaults(grid_search=False) parser.add_argument('--data', type=str, help='Path to the data of phenotyping task', default=os.path.join(os.path.dirname(__file__), '../../../data/phenotyping/')) parser.add_argument( '--output_dir', type=str, help='Directory relative which all output files are stored', default='.') args = parser.parse_args() print(args) if args.grid_search: penalties = [ 'l2', 'l2', 'l2', 'l2', 'l2', 'l2', 'l1', 'l1', 'l1', 'l1', 'l1' ] coefs = [ 1.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 1.0, 0.1, 0.01, 0.001, 0.0001 ] else: penalties = ['l1'] coefs = [0.1] train_reader = PhenotypingReader( dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'train_listfile.csv')) val_reader = PhenotypingReader( dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'val_listfile.csv')) test_reader = PhenotypingReader( dataset_dir=os.path.join(args.data, 'test'), listfile=os.path.join(args.data, 'test_listfile.csv')) print('Reading data and extracting features ...') (train_X, train_y, train_names, train_ts) = read_and_extract_features(train_reader, args.period, args.features) train_y = np.array(train_y) (val_X, val_y, val_names, val_ts) = read_and_extract_features(val_reader, args.period, args.features) val_y = np.array(val_y) (test_X, test_y, test_names, test_ts) = read_and_extract_features(test_reader, args.period, args.features) test_y = np.array(test_y) print("train set shape: {}".format(train_X.shape)) print("validation set shape: {}".format(val_X.shape)) print("test set shape: {}".format(test_X.shape)) print('Imputing missing values ...') imputer = Imputer(missing_values=np.nan, strategy='mean', axis=0, verbose=0, copy=True) imputer.fit(train_X) train_X = np.array(imputer.transform(train_X), dtype=np.float32) val_X = np.array(imputer.transform(val_X), dtype=np.float32) test_X = np.array(imputer.transform(test_X), dtype=np.float32) print('Normalizing the data to have zero mean and unit variance ...') scaler = StandardScaler() scaler.fit(train_X) train_X = scaler.transform(train_X) val_X = scaler.transform(val_X) test_X = scaler.transform(test_X) n_tasks = 25 result_dir = os.path.join(args.output_dir, 'results') common_utils.create_directory(result_dir) for (penalty, C) in zip(penalties, coefs): model_name = '{}.{}.{}.C{}'.format(args.period, args.features, penalty, C) train_activations = np.zeros(shape=train_y.shape, dtype=float) val_activations = np.zeros(shape=val_y.shape, dtype=float) test_activations = np.zeros(shape=test_y.shape, dtype=float) for task_id in range(n_tasks): print('Starting task {}'.format(task_id)) logreg = LogisticRegression(penalty=penalty, C=C, random_state=42) logreg.fit(train_X, train_y[:, task_id]) train_preds = logreg.predict_proba(train_X) train_activations[:, task_id] = train_preds[:, 1] val_preds = logreg.predict_proba(val_X) val_activations[:, task_id] = val_preds[:, 1] test_preds = logreg.predict_proba(test_X) test_activations[:, task_id] = test_preds[:, 1] with open(os.path.join(result_dir, 'train_{}.json'.format(model_name)), 'w') as f: ret = metrics.print_metrics_multilabel(train_y, train_activations) ret = {k: float(v) for k, v in ret.items() if k != 'auc_scores'} json.dump(ret, f) with open(os.path.join(result_dir, 'val_{}.json'.format(model_name)), 'w') as f: ret = metrics.print_metrics_multilabel(val_y, val_activations) ret = {k: float(v) for k, v in ret.items() if k != 'auc_scores'} json.dump(ret, f) with open(os.path.join(result_dir, 'test_{}.json'.format(model_name)), 'w') as f: ret = metrics.print_metrics_multilabel(test_y, test_activations) ret = {k: float(v) for k, v in ret.items() if k != 'auc_scores'} json.dump(ret, f) save_results( test_names, test_ts, test_activations, test_y, os.path.join(args.output_dir, 'predictions', model_name + '.csv'))
def calc_metrics(self, data_gen, history, dataset, logs): ihm_y_true = [] decomp_y_true = [] los_y_true = [] pheno_y_true = [] ihm_pred = [] decomp_pred = [] los_pred = [] pheno_pred = [] for i in range(data_gen.steps): if self.verbose == 1: print("\tdone {}/{}".format(i, data_gen.steps), end='\r') (X, y, los_y_reg) = data_gen.next(return_y_true=True) outputs = self.model.predict(X, batch_size=self.batch_size) ihm_M = X[1] decomp_M = X[2] los_M = X[3] if not data_gen.target_repl: # no target replication (ihm_p, decomp_p, los_p, pheno_p) = outputs (ihm_t, decomp_t, los_t, pheno_t) = y else: # target replication (ihm_p, _, decomp_p, los_p, pheno_p, _) = outputs (ihm_t, _, decomp_t, los_t, pheno_t, _) = y los_t = los_y_reg # real value not the label # ihm for (m, t, p) in zip(ihm_M.flatten(), ihm_t.flatten(), ihm_p.flatten()): if np.equal(m, 1): ihm_y_true.append(t) ihm_pred.append(p) # decomp for (m, t, p) in zip(decomp_M.flatten(), decomp_t.flatten(), decomp_p.flatten()): if np.equal(m, 1): decomp_y_true.append(t) decomp_pred.append(p) # los if los_p.shape[-1] == 1: # regression for (m, t, p) in zip(los_M.flatten(), los_t.flatten(), los_p.flatten()): if np.equal(m, 1): los_y_true.append(t) los_pred.append(p) else: # classification for (m, t, p) in zip(los_M.flatten(), los_t.flatten(), los_p.reshape((-1, 10))): if np.equal(m, 1): los_y_true.append(t) los_pred.append(p) # pheno for (t, p) in zip(pheno_t.reshape((-1, 25)), pheno_p.reshape((-1, 25))): pheno_y_true.append(t) pheno_pred.append(p) print('\n') # ihm print("\n ================= 48h mortality ================") ihm_pred = np.array(ihm_pred) ihm_pred = np.stack([1 - ihm_pred, ihm_pred], axis=1) ret = metrics.print_metrics_binary(ihm_y_true, ihm_pred) for k, v in ret.items(): logs[dataset + '_ihm_' + k] = v # decomp print("\n ================ decompensation ================") decomp_pred = np.array(decomp_pred) decomp_pred = np.stack([1 - decomp_pred, decomp_pred], axis=1) ret = metrics.print_metrics_binary(decomp_y_true, decomp_pred) for k, v in ret.items(): logs[dataset + '_decomp_' + k] = v # los print("\n ================ length of stay ================") if self.partition == 'log': los_pred = [metrics.get_estimate_log(x, 10) for x in los_pred] ret = metrics.print_metrics_log_bins(los_y_true, los_pred) if self.partition == 'custom': los_pred = [metrics.get_estimate_custom(x, 10) for x in los_pred] ret = metrics.print_metrics_custom_bins(los_y_true, los_pred) if self.partition == 'none': ret = metrics.print_metrics_regression(los_y_true, los_pred) for k, v in ret.items(): logs[dataset + '_los_' + k] = v # pheno print("\n =================== phenotype ==================") pheno_pred = np.array(pheno_pred) ret = metrics.print_metrics_multilabel(pheno_y_true, pheno_pred) for k, v in ret.items(): logs[dataset + '_pheno_' + k] = v history.append(logs)
def do_epoch(mode, epoch): # mode is 'train' or 'test' ihm_predictions = [] ihm_answers = [] los_predictions = [] los_answers = [] ph_predictions = [] ph_answers = [] decomp_predictions = [] decomp_answers = [] avg_loss = 0.0 sum_loss = 0.0 prev_time = time.time() batches_per_epoch = network.get_batches_per_epoch(mode) for i in range(0, batches_per_epoch): step_data = network.step(mode) ihm_pred = step_data["ihm_prediction"] los_pred = step_data["los_prediction"] ph_pred = step_data["ph_prediction"] decomp_pred = step_data["decomp_prediction"] current_loss = step_data["loss"] ihm_loss = step_data["ihm_loss"] los_loss = step_data["los_loss"] ph_loss = step_data["ph_loss"] decomp_loss = step_data["decomp_loss"] reg_loss = step_data["reg_loss"] data = step_data["data"] ihm_data = data[1] ihm_mask = [x[1] for x in ihm_data] ihm_label = [x[2] for x in ihm_data] los_data = data[2] los_mask = [x[0] for x in los_data] los_label = [x[1] for x in los_data] ph_data = data[3] ph_label = ph_data decomp_data = data[4] decomp_mask = [x[0] for x in decomp_data] decomp_label = [x[1] for x in decomp_data] avg_loss += current_loss sum_loss += current_loss for (x, mask, y) in zip(ihm_pred, ihm_mask, ihm_label): if (mask == 1): ihm_predictions.append(x) ihm_answers.append(y) for (sx, smask, sy) in zip(los_pred, los_mask, los_label): for (x, mask, y) in zip(sx, smask, sy): if (mask == 1): los_predictions.append(x) los_answers.append(y) for (x, y) in zip(ph_pred, ph_label): ph_predictions.append(x) ph_answers.append(y) for (sx, smask, sy) in zip(decomp_pred, decomp_mask, decomp_label): for (x, mask, y) in zip(sx, smask, sy): if (mask == 1): decomp_predictions.append(x) decomp_answers.append(y) if ((i + 1) % args.log_every == 0): cur_time = time.time() print " {}ing {}.{} / {} loss: {:8.4f} = {:1.2f} + {:8.2f} + {:1.2f} + "\ "{:1.2f} + {:.2f} avg_loss: {:6.4f} time: {:6.4f}".format( mode, epoch, i * args.batch_size, batches_per_epoch * args.batch_size, float(current_loss), float(ihm_loss), float(los_loss), float(ph_loss), float(decomp_loss), float(reg_loss), float(avg_loss / args.log_every), float(cur_time - prev_time)) avg_loss = 0 prev_time = cur_time if np.isnan(current_loss): print "loss: {:6.4f} = {:1.2f} + {:8.2f} + {:1.2f} + {:1.2f} + {:.2f}".format( float(current_loss), float(ihm_loss), float(los_loss), float(ph_loss), float(decomp_loss), float(reg_loss)) raise Exception("current loss IS NaN. This should never happen :)") sum_loss /= batches_per_epoch print "\n %s loss = %.5f" % (mode, sum_loss) eps = 1e-13 if args.ihm_C > eps: print "\n ================= 48h mortality ================" metrics.print_metrics_binary(ihm_answers, ihm_predictions) if args.los_C > eps: print "\n ================ length of stay ================" if args.partition == 'log': metrics.print_metrics_log_bins(los_answers, los_predictions) else: metrics.print_metrics_custom_bins(los_answers, los_predictions) if args.ph_C > eps: print "\n =================== phenotype ==================" metrics.print_metrics_multilabel(ph_answers, ph_predictions) if args.decomp_C > eps: print "\n ================ decompensation ================" metrics.print_metrics_binary(decomp_answers, decomp_predictions) return sum_loss
if args.los_C > 0: print "\n ================ length of stay ================" if args.partition == 'log': los_pred = [metrics.get_estimate_log(x, 10) for x in los_pred] los_ret = metrics.print_metrics_log_bins(los_y_true, los_pred) if args.partition == 'custom': los_pred = [metrics.get_estimate_custom(x, 10) for x in los_pred] los_ret = metrics.print_metrics_custom_bins(los_y_true, los_pred) if args.partition == 'none': los_ret = metrics.print_metrics_regression(los_y_true, los_pred) # pheno if args.pheno_C > 0: print "\n =================== phenotype ==================" pheno_pred = np.array(pheno_pred) pheno_ret = metrics.print_metrics_multilabel(pheno_y_true, pheno_pred) print "Saving the predictions in test_predictions/task directories ..." # ihm ihm_path = os.path.join("test_predictions/ihm", os.path.basename(args.load_state)) + ".csv" ihm_utils.save_results(ihm_names, ihm_pred, ihm_y_true, ihm_path) # decomp decomp_path = os.path.join("test_predictions/decomp", os.path.basename(args.load_state)) + ".csv" decomp_utils.save_results(decomp_names, decomp_ts, decomp_pred, decomp_y_true, decomp_path) # los los_path = os.path.join("test_predictions/los", os.path.basename(args.load_state)) + ".csv" los_utils.save_results(los_names, los_ts, los_pred, los_y_true, los_path)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--period', type=str, default='all', help='specifies which period extract features from', choices=['first4days', 'first8days', 'last12hours', 'first25percent', 'first50percent', 'all']) parser.add_argument('--features', type=str, default='all', help='specifies what features to extract', choices=['all', 'len', 'all_but_len']) args = parser.parse_args() print(args) # penalties = ['l2', 'l2', 'l2', 'l2', 'l2', 'l2', 'l1', 'l1', 'l1', 'l1', 'l1'] # Cs = [1.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 1.0, 0.1, 0.01, 0.001, 0.0001] penalties = ['l1'] Cs = [0.1] train_reader = PhenotypingReader(dataset_dir='../../../data/phenotyping/train/', listfile='../../../data/phenotyping/train_listfile.csv') val_reader = PhenotypingReader(dataset_dir='../../../data/phenotyping/train/', listfile='../../../data/phenotyping/val_listfile.csv') test_reader = PhenotypingReader(dataset_dir='../../../data/phenotyping/test/', listfile='../../../data/phenotyping/test_listfile.csv') print('Reading data and extracting features ...') (train_X, train_y, train_names, train_ts) = read_and_extract_features(train_reader, args.period, args.features) train_y = np.array(train_y) (val_X, val_y, val_names, val_ts) = read_and_extract_features(val_reader, args.period, args.features) val_y = np.array(val_y) (test_X, test_y, test_names, test_ts) = read_and_extract_features(test_reader, args.period, args.features) test_y = np.array(test_y) print("train set shape: {}".format(train_X.shape)) print("validation set shape: {}".format(val_X.shape)) print("test set shape: {}".format(test_X.shape)) print('Imputing missing values ...') imputer = Imputer(missing_values=np.nan, strategy='mean', axis=0, verbose=0, copy=True) imputer.fit(train_X) train_X = np.array(imputer.transform(train_X), dtype=np.float32) val_X = np.array(imputer.transform(val_X), dtype=np.float32) test_X = np.array(imputer.transform(test_X), dtype=np.float32) print('Normalizing the data to have zero mean and unit variance ...') scaler = StandardScaler() scaler.fit(train_X) train_X = scaler.transform(train_X) val_X = scaler.transform(val_X) test_X = scaler.transform(test_X) n_tasks = 25 common_utils.create_directory('results') for (penalty, C) in zip(penalties, Cs): model_name = '{}.{}.{}.C{}'.format(args.period, args.features, penalty, C) train_activations = np.zeros(shape=train_y.shape, dtype=float) val_activations = np.zeros(shape=val_y.shape, dtype=float) test_activations = np.zeros(shape=test_y.shape, dtype=float) for task_id in range(n_tasks): print('Starting task {}'.format(task_id)) logreg = LogisticRegression(penalty=penalty, C=C, random_state=42) logreg.fit(train_X, train_y[:, task_id]) train_preds = logreg.predict_proba(train_X) train_activations[:, task_id] = train_preds[:, 1] val_preds = logreg.predict_proba(val_X) val_activations[:, task_id] = val_preds[:, 1] test_preds = logreg.predict_proba(test_X) test_activations[:, task_id] = test_preds[:, 1] with open(os.path.join('results', 'train_{}.json'.format(model_name)), 'w') as f: ret = metrics.print_metrics_multilabel(train_y, train_activations) ret = {k: float(v) for k, v in ret.items() if k != 'auc_scores'} json.dump(ret, f) with open(os.path.join('results', 'val_{}.json'.format(model_name)), 'w') as f: ret = metrics.print_metrics_multilabel(val_y, val_activations) ret = {k: float(v) for k, v in ret.items() if k != 'auc_scores'} json.dump(ret, f) with open(os.path.join('results', 'test_{}.json'.format(model_name)), 'w') as f: ret = metrics.print_metrics_multilabel(test_y, test_activations) ret = {k: float(v) for k, v in ret.items() if k != 'auc_scores'} json.dump(ret, f) save_results(test_names, test_ts, test_activations, test_y, os.path.join('predictions', model_name + '.csv'))
def main(): parser = argparse.ArgumentParser() parser.add_argument('prediction', type=str) parser.add_argument('--test_listfile', type=str, default='../data/phenotyping/test/listfile.csv') parser.add_argument('--n_iters', type=int, default=10000) parser.add_argument('--save_file', type=str, default='pheno_results.json') args = parser.parse_args() pred_df = pd.read_csv(args.prediction, index_col=False, dtype={'period_length': np.float32}) test_df = pd.read_csv(args.test_listfile, index_col=False, dtype={'period_length': np.float32}) n_tasks = 25 labels_cols = ["label_{}".format(i) for i in range(1, n_tasks + 1)] test_df.columns = list(test_df.columns[:2]) + labels_cols df = test_df.merge(pred_df, left_on='stay', right_on='stay', how='left', suffixes=['_l', '_r']) assert (df['pred_1'].isnull().sum() == 0) assert (df['period_length_l'].equals(df['period_length_r'])) for i in range(1, n_tasks + 1): assert (df['label_{}_l'.format(i)].equals(df['label_{}_r'.format(i)])) metrics = [('Macro ROC AUC', 'ave_auc_macro'), ('Micro ROC AUC', 'ave_auc_micro'), ('Weighted ROC AUC', 'ave_auc_weighted')] data = np.zeros((df.shape[0], 50)) for i in range(1, n_tasks + 1): data[:, i - 1] = df['pred_{}'.format(i)] data[:, 25 + i - 1] = df['label_{}_l'.format(i)] results = dict() results['n_iters'] = args.n_iters ret = print_metrics_multilabel(data[:, 25:], data[:, :25], verbose=0) for (m, k) in metrics: results[m] = dict() results[m]['value'] = ret[k] results[m]['runs'] = [] for i in range(1, n_tasks + 1): m = 'ROC AUC of task {}'.format(i) results[m] = dict() results[m]['value'] = print_metrics_binary(data[:, 25 + i - 1], data[:, i - 1], verbose=0)['auroc'] results[m]['runs'] = [] for iteration in range(args.n_iters): cur_data = sk_utils.resample(data, n_samples=len(data)) ret = print_metrics_multilabel(cur_data[:, 25:], cur_data[:, :25], verbose=0) for (m, k) in metrics: results[m]['runs'].append(ret[k]) for i in range(1, n_tasks + 1): m = 'ROC AUC of task {}'.format(i) cur_auc = print_metrics_binary(cur_data[:, 25 + i - 1], cur_data[:, i - 1], verbose=0)['auroc'] results[m]['runs'].append(cur_auc) reported_metrics = [m for m, k in metrics] reported_metrics += [ 'ROC AUC of task {}'.format(i) for i in range(1, n_tasks + 1) ] for m in reported_metrics: runs = results[m]['runs'] results[m]['mean'] = np.mean(runs) results[m]['median'] = np.median(runs) results[m]['std'] = np.std(runs) results[m]['2.5% percentile'] = np.percentile(runs, 2.5) results[m]['97.5% percentile'] = np.percentile(runs, 97.5) del results[m]['runs'] print "Saving the results (including task specific metrics) in {} ...".format( args.save_file) with open(args.save_file, 'w') as f: json.dump(results, f) print "Printing the summary of results (task specific metrics are skipped) ..." for i in range(1, n_tasks + 1): m = 'ROC AUC of task {}'.format(i) del results[m] print results
test_data_gen = utils.BatchGen(test_reader, discretizer, normalizer, args.batch_size, args.small_part, target_repl, shuffle=False, return_names=True) names = [] ts = [] labels = [] predictions = [] for i in range(test_data_gen.steps): print("predicting {} / {}".format(i, test_data_gen.steps), end='\r') ret = next(test_data_gen) x = ret["data"][0] y = ret["data"][1] cur_names = ret["names"] cur_ts = ret["ts"] x = np.array(x) pred = model.predict_on_batch(x) predictions += list(pred) labels += list(y) names += list(cur_names) ts += list(cur_ts) metrics.print_metrics_multilabel(labels, predictions, stochastic=stochastic) path = os.path.join(args.output_dir, "test_predictions", os.path.basename(args.load_state)) + ".csv" utils.save_results(names, ts, predictions, labels, path, stochastic=stochastic) else: raise ValueError("Wrong value for args.mode")
def main(): parser = argparse.ArgumentParser() parser.add_argument('prediction', type=str) parser.add_argument('--test_listfile', type=str, default='../data/phenotyping/test/listfile.csv') parser.add_argument('--n_iters', type=int, default=10000) parser.add_argument('--save_file', type=str, default='pheno_results.json') args = parser.parse_args() pred_df = pd.read_csv(args.prediction, index_col=False, dtype={'period_length': np.float32}) test_df = pd.read_csv(args.test_listfile, index_col=False, dtype={'period_length': np.float32}) n_tasks = 25 labels_cols = ["label_{}".format(i) for i in range(1, n_tasks + 1)] test_df.columns = list(test_df.columns[:2]) + labels_cols df = test_df.merge(pred_df, left_on='stay', right_on='stay', how='left', suffixes=['_l', '_r']) assert (df['pred_1'].isnull().sum() == 0) assert (df['period_length_l'].equals(df['period_length_r'])) for i in range(1, n_tasks + 1): assert (df['label_{}_l'.format(i)].equals(df['label_{}_r'.format(i)])) metrics = [('Macro ROC AUC', 'ave_auc_macro'), ('Micro ROC AUC', 'ave_auc_micro'), ('Weighted ROC AUC', 'ave_auc_weighted')] data = np.zeros((df.shape[0], 50)) for i in range(1, n_tasks + 1): data[:, i - 1] = df['pred_{}'.format(i)] data[:, 25 + i - 1] = df['label_{}_l'.format(i)] results = dict() results['n_iters'] = args.n_iters ret = print_metrics_multilabel(data[:, 25:], data[:, :25], verbose=0) for (m, k) in metrics: results[m] = dict() results[m]['value'] = ret[k] results[m]['runs'] = [] for i in range(1, n_tasks + 1): m = 'ROC AUC of task {}'.format(i) results[m] = dict() results[m]['value'] = print_metrics_binary(data[:, 25 + i - 1], data[:, i - 1], verbose=0)['auroc'] results[m]['runs'] = [] for iteration in range(args.n_iters): cur_data = sk_utils.resample(data, n_samples=len(data)) ret = print_metrics_multilabel(cur_data[:, 25:], cur_data[:, :25], verbose=0) for (m, k) in metrics: results[m]['runs'].append(ret[k]) for i in range(1, n_tasks + 1): m = 'ROC AUC of task {}'.format(i) cur_auc = print_metrics_binary(cur_data[:, 25 + i - 1], cur_data[:, i - 1], verbose=0)['auroc'] results[m]['runs'].append(cur_auc) reported_metrics = [m for m, k in metrics] reported_metrics += ['ROC AUC of task {}'.format(i) for i in range(1, n_tasks + 1)] for m in reported_metrics: runs = results[m]['runs'] results[m]['mean'] = np.mean(runs) results[m]['median'] = np.median(runs) results[m]['std'] = np.std(runs) results[m]['2.5% percentile'] = np.percentile(runs, 2.5) results[m]['97.5% percentile'] = np.percentile(runs, 97.5) del results[m]['runs'] print "Saving the results (including task specific metrics) in {} ...".format(args.save_file) with open(args.save_file, 'w') as f: json.dump(results, f) print "Printing the summary of results (task specific metrics are skipped) ..." for i in range(1, n_tasks + 1): m = 'ROC AUC of task {}'.format(i) del results[m] print results
# Find specific files within the directory for this task task_str_path = retrieve_matching_file(task_prefix, args.str_path) task_ustr_path = retrieve_matching_file(task_prefix, args.ustr_path) task_test_str_path = retrieve_matching_file(task_prefix, args.test_str_path) task_test_ustr_path = retrieve_matching_file(task_prefix, args.test_ustr_path) print("\n\n-----------------------\nFitting Model For {}\n".format(task_prefix)) labels, preds = load_fit_save(args.mode, task_str_path, task_ustr_path, task_test_str_path, task_test_ustr_path, args.outdir, out_filename_prefix=task_prefix) if merged_pred is None: merged_pred = np.expand_dims(preds, axis=1) merged_Y = np.expand_dims(labels, axis=1) else: merged_pred = np.concatenate((merged_pred, np.expand_dims(preds, axis=1)), axis=1) merged_Y = np.concatenate((merged_Y, np.expand_dims(labels, axis=1)), axis=1) print('\n============================') print("Overall performance:") metrics.print_metrics_multilabel(merged_Y, merged_pred)
if args.los_C > 0: print "\n ================ length of stay ================" if args.partition == 'log': los_pred = [metrics.get_estimate_log(x, 10) for x in los_pred] los_ret = metrics.print_metrics_log_bins(los_y_true, los_pred) if args.partition == 'custom': los_pred = [metrics.get_estimate_custom(x, 10) for x in los_pred] los_ret = metrics.print_metrics_custom_bins(los_y_true, los_pred) if args.partition == 'none': los_ret = metrics.print_metrics_regression(los_y_true, los_pred) ## pheno if args.pheno_C > 0: print "\n =================== phenotype ==================" pheno_pred = np.array(pheno_pred) pheno_ret = metrics.print_metrics_multilabel(pheno_y_true, pheno_pred) # TODO: save activations if needed elif args.mode == 'test_single': # ensure that the code uses test_reader del train_reader del val_reader del train_data_gen del val_data_gen # Testing ihm from mimic3benchmark.readers import InHospitalMortalityReader from mimic3models.in_hospital_mortality.utils import read_chunk from mimic3models import nn_utils
if ((i + 1) % args.log_every == 0): cur_time = time.time() print (" testing: %d / %d \t loss: %.3f \t avg_loss: %.3f \t"\ " time: %.2fs" % ((i+1) * args.batch_size, n_batches * args.batch_size, current_loss, avg_loss / args.log_every, cur_time - prev_time)) avg_loss = 0 prev_time = cur_time if np.isnan(current_loss): raise Exception("current loss IS NaN. This should never happen :)") sum_loss /= n_batches print "\n test loss = %.5f" % sum_loss ret = metrics.print_metrics_multilabel(y_true, predictions) with open("results.txt", "w") as resfile: header = "ave_prec_micro,ave_prec_macro,ave_prec_weighted," header += "ave_recall_micro,ave_recall_macro,ave_recall_weighted," header += "ave_auc_micro,ave_auc_macro,ave_auc_weighted," header += ','.join(["auc_%d" % i for i in range(NTASKS)]) resfile.write(header + "\n") resfile.write("%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f," % (ret['ave_prec_micro'], ret['ave_prec_macro'], ret['ave_prec_weighted'], ret['ave_recall_micro'], ret['ave_recall_macro'], ret['ave_recall_weighted'], ret['ave_auc_micro'], ret['ave_auc_macro'], ret['ave_auc_weighted'])) resfile.write(",".join(["%.6f" % x for x in ret['auc_scores']]) + "\n")
sum_loss /= batches_per_epoch print "\n %s loss = %.5f" % (args.mode, sum_loss) eps = 1e-13 if args.ihm_C > eps: print "\n ================= 48h mortality ================" metrics.print_metrics_binary(ihm_answers, ihm_predictions) if args.los_C > eps: print "\n ================ length of stay ================" if args.partition == 'log': metrics.print_metrics_log_bins(los_answers, los_predictions) else: metrics.print_metrics_custom_bins(los_answers, los_predictions) if args.ph_C > eps: print "\n =================== phenotype ==================" metrics.print_metrics_multilabel(ph_answers, ph_predictions) if args.decomp_C > eps: print "\n ================ decompensation ================" metrics.print_metrics_binary(decomp_answers, decomp_predictions) with open("los_activations.txt", "w") as fout: fout.write("prediction, y_true") for (x, y) in zip(los_predictions, los_answers): fout.write("%.6f, %.6f\n" % (x, y)) else: raise Exception("unknown mode")