def main(): parser = argparse.ArgumentParser() parser.add_argument('prediction', type=str) parser.add_argument('--test_listfile', type=str, default=os.path.join(os.path.dirname(__file__), '../../data/length-of-stay/test/listfile.csv')) parser.add_argument('--n_iters', type=int, default=1000) parser.add_argument('--save_file', type=str, default='los_results.json') args = parser.parse_args() pred_df = pd.read_csv(args.prediction, index_col=False, dtype={'period_length': np.float32, 'y_true': np.float32}) test_df = pd.read_csv(args.test_listfile, index_col=False, dtype={'period_length': np.float32, 'y_true': np.float32}) df = test_df.merge(pred_df, left_on=['stay', 'period_length'], right_on=['stay', 'period_length'], how='left', suffixes=['_l', '_r']) # assert (df['prediction'].isnull().sum() == 0) print((df['y_true_l'] != df['y_true_r']).sum()) # assert (df['y_true_l'].equals(df['y_true_r'])) df = df.dropna(subset=['y_true_r']) metrics = [('Kappa', 'kappa'), ('MAD', 'mad'), ('MSE', 'mse'), ('MAPE', 'mape')] data = np.zeros((df.shape[0], 2)) data[:, 0] = np.array(df['prediction']) data[:, 1] = np.array(df['y_true_l']) results = dict() results['n_iters'] = args.n_iters ret = print_metrics_regression(data[:, 1], data[:, 0], verbose=0) for (m, k) in metrics: results[m] = dict() results[m]['value'] = ret[k] results[m]['runs'] = [] for i in range(args.n_iters): cur_data = sk_utils.resample(data, n_samples=len(data)) ret = print_metrics_regression(cur_data[:, 1], cur_data[:, 0], verbose=0) for (m, k) in metrics: results[m]['runs'].append(ret[k]) for (m, k) in metrics: runs = results[m]['runs'] results[m]['mean'] = np.mean(runs) results[m]['median'] = np.median(runs) results[m]['std'] = np.std(runs) results[m]['2.5% percentile'] = np.percentile(runs, 2.5) results[m]['97.5% percentile'] = np.percentile(runs, 97.5) del results[m]['runs'] print("Saving the results in {} ...".format(args.save_file)) with open(args.save_file, 'w') as f: json.dump(results, f) print(results)
def main(): parser = argparse.ArgumentParser() parser.add_argument('prediction', type=str) parser.add_argument('--test_listfile', type=str, default='../data/length-of-stay/test/listfile.csv') parser.add_argument('--n_iters', type=int, default=1000) parser.add_argument('--save_file', type=str, default='los_results.json') args = parser.parse_args() pred_df = pd.read_csv(args.prediction, index_col=False, dtype={'period_length': np.float32, 'y_true': np.float32}) test_df = pd.read_csv(args.test_listfile, index_col=False, dtype={'period_length': np.float32, 'y_true': np.float32}) df = test_df.merge(pred_df, left_on=['stay', 'period_length'], right_on=['stay', 'period_length'], how='left', suffixes=['_l', '_r']) assert (df['prediction'].isnull().sum() == 0) assert (df['y_true_l'].equals(df['y_true_r'])) metrics = [('Kappa', 'kappa'), ('MAD', 'mad'), ('MSE', 'mse'), ('MAPE', 'mape')] data = np.zeros((df.shape[0], 2)) data[:, 0] = np.array(df['prediction']) data[:, 1] = np.array(df['y_true_l']) results = dict() results['n_iters'] = args.n_iters ret = print_metrics_regression(data[:, 1], data[:, 0], verbose=0) for (m, k) in metrics: results[m] = dict() results[m]['value'] = ret[k] results[m]['runs'] = [] for i in range(args.n_iters): cur_data = sk_utils.resample(data, n_samples=len(data)) ret = print_metrics_regression(cur_data[:, 1], cur_data[:, 0], verbose=0) for (m, k) in metrics: results[m]['runs'].append(ret[k]) for (m, k) in metrics: runs = results[m]['runs'] results[m]['mean'] = np.mean(runs) results[m]['median'] = np.median(runs) results[m]['std'] = np.std(runs) results[m]['2.5% percentile'] = np.percentile(runs, 2.5) results[m]['97.5% percentile'] = np.percentile(runs, 97.5) del results[m]['runs'] print "Saving the results in {} ...".format(args.save_file) with open(args.save_file, 'w') as f: json.dump(results, f) print results
def calc_metrics(self, data_gen, history, dataset, logs): y_true = [] predictions = [] # for i in range(data_gen.steps): # if self.verbose == 1: # print("\tdone {}/{}".format(i, data_gen.steps), end='\r') # (x, y_processed, y) = data_gen.getitem(i, return_y_true=True) # pred = self.model.predict(x, batch_size=self.batch_size, verbose=self.verbose) # pass # aflanders: debug # if i == 0: # print(f"type(x): {type(x)} type(self.batch_size): {type(self.batch_size)}") # print(f"tf.executing_eagerly():{tf.executing_eagerly()}") # aflanders: debug #pred = self.model.predict(x, batch_size=self.batch_size, verbose=self.verbose) pred = self.model.predict(data_gen, batch_size=self.batch_size, verbose=self.verbose, steps=data_gen.steps, workers=self.workers, use_multiprocessing=True) # if isinstance(x, list) and len(x) == 2: # deep supervision # if pred.shape[-1] == 1: # regression # pred_flatten = pred.flatten() # else: # classification # pred_flatten = pred.reshape((-1, 10)) # for m, t, p in zip(x[1].flatten(), y.flatten(), pred_flatten): # if np.equal(m, 1): # y_true.append(t) # predictions.append(p) # else: y = data_gen.get_y(len(pred)) # if pred.shape[-1] == 1: # y_true += list(y.flatten()) # predictions += list(pred.flatten()) # else: y_true += list(y) predictions += list(pred) print('\n') if self.partition == 'log': predictions = [ metrics.get_estimate_log(x, 10) for x in predictions ] ret = metrics.print_metrics_log_bins(y_true, predictions) if self.partition == 'custom': predictions = [ metrics.get_estimate_custom(x, 10) for x in predictions ] ret = metrics.print_metrics_custom_bins(y_true, predictions) if self.partition == 'none': ret = metrics.print_metrics_regression(y_true, predictions) for k, v in ret.items(): logs[dataset + '_' + k] = v history.append(ret)
def calc_metrics(self, data_gen, history, dataset, logs): y_true = [] predictions = [] for i in range(data_gen.steps): if self.verbose == 1: print("\tdone {}/{}".format(i, data_gen.steps), end='\r') if self.use_time: ([x, t], y_processed, y) = data_gen.next(return_y_true=True) pred = self.model.predict([x, t], batch_size=self.batch_size) else: (x, y_processed, y) = data_gen.next(return_y_true=True) pred = self.model.predict(x, batch_size=self.batch_size) if isinstance(x, list) and len(x) == 2: # deep supervision if pred.shape[-1] == 1: # regression pred_flatten = pred.flatten() else: # classification pred_flatten = pred.reshape((-1, 10)) for m, t, p in zip(x[1].flatten(), y.flatten(), pred_flatten): if np.equal(m, 1): y_true.append(t) predictions.append(p) else: if pred.shape[-1] == 1: y_true += list(y.flatten()) predictions += list(pred.flatten()) else: y_true += list(y) predictions += list(pred) print('\n') if self.partition == 'log': predictions = [ metrics.get_estimate_log(x, 10) for x in predictions ] ret = metrics.print_metrics_log_bins(y_true, predictions) if self.partition == 'custom': predictions = [ metrics.get_estimate_custom(x, 10) for x in predictions ] ret = metrics.print_metrics_custom_bins(y_true, predictions) if self.partition == 'none': ret = metrics.print_metrics_regression(y_true, predictions) for k, v in ret.items(): logs[dataset + '_' + k] = v history.append(ret)
names += list(cur_names) ts += list(cur_ts) if stochastic: aleatoric = [np.mean(x * (1. - x), axis=0) for x in predictions] epistemic = [np.var(x, axis=0) for x in predictions] predictions = [np.mean(x, axis=0) for x in predictions] if args.partition == 'log': predictions = [metrics.get_estimate_log(x, 10) for x in predictions] metrics.print_metrics_log_bins(labels, predictions) if args.partition == 'custom': predictions = [metrics.get_estimate_custom(x, 10) for x in predictions] metrics.print_metrics_custom_bins(labels, predictions) if args.partition == 'none': metrics.print_metrics_regression(labels, predictions) predictions = [x[0] for x in predictions] path = os.path.join( os.path.join(args.output_dir, "test_predictions", os.path.basename(args.load_state)) + ".csv") if stochastic: ee = np.mean(np.array(epistemic)) aa = np.mean(np.array(aleatoric)) print("Epistemic uncertainty =", ee) print("Aleatoric uncertainty =", aa) print("Uncertainty =", ee + aa) utils.save_results(names, ts, predictions,
print "\n ================ decompensation ================" decomp_pred = np.array(decomp_pred) decomp_pred = np.stack([1-decomp_pred, decomp_pred], axis=1) decomp_ret = metrics.print_metrics_binary(decomp_y_true, decomp_pred) ## los if args.los_C > 0: print "\n ================ length of stay ================" if args.partition == 'log': los_pred = [metrics.get_estimate_log(x, 10) for x in los_pred] los_ret = metrics.print_metrics_log_bins(los_y_true, los_pred) if args.partition == 'custom': los_pred = [metrics.get_estimate_custom(x, 10) for x in los_pred] los_ret = metrics.print_metrics_custom_bins(los_y_true, los_pred) if args.partition == 'none': los_ret = metrics.print_metrics_regression(los_y_true, los_pred) ## pheno if args.pheno_C > 0: print "\n =================== phenotype ==================" pheno_pred = np.array(pheno_pred) pheno_ret = metrics.print_metrics_multilabel(pheno_y_true, pheno_pred) # TODO: save activations if needed elif args.mode == 'test_single': # ensure that the code uses test_reader del train_reader del val_reader del train_data_gen del val_data_gen
if not os.path.exists("activations"): os.mkdir("activations") if not os.path.exists("results"): os.mkdir("results") with open(os.path.join("results", "log_" + file_name + ".txt"), "w") as resfile: resfile.write("mad, mse, mape, kappa\n") print "Scores on train set" pred = linreg.predict(train_X) pred[pred > 8] = 8 ret = metrics.print_metrics_regression(train_y, np.exp(pred) - 1) resfile.write("%.6f,%.6f,%.6f,%.6f\n" % (ret['mad'], ret['mse'], ret['mape'], ret['kappa'])) print "Scores on validation set" pred = linreg.predict(val_X) pred[pred > 8] = 8 ret = metrics.print_metrics_regression(val_y, np.exp(pred) - 1) resfile.write("%.6f,%.6f,%.6f,%.6f\n" % (ret['mad'], ret['mse'], ret['mape'], ret['kappa'])) ############################### TESTING ############################# # predict on test del train_reader del val_reader del train_X
def main(): parser = argparse.ArgumentParser() parser.add_argument('--period', type=str, default='all', help='specifies which period extract features from', choices=[ 'first4days', 'first8days', 'last12hours', 'first25percent', 'first50percent', 'all' ]) parser.add_argument('--features', type=str, default='all', help='specifies what features to extract', choices=['all', 'len', 'all_but_len']) parser.add_argument('--data', type=str, help='Path to the data of length-of-stay task', default=os.path.join(os.path.dirname(__file__), '../../../data/length-of-stay/')) parser.add_argument( '--output_dir', type=str, help='Directory relative which all output files are stored', default='.') args = parser.parse_args() print(args) train_reader = LengthOfStayReader( dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'train_listfile.csv')) val_reader = LengthOfStayReader( dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'val_listfile.csv')) test_reader = LengthOfStayReader( dataset_dir=os.path.join(args.data, 'test'), listfile=os.path.join(args.data, 'test_listfile.csv')) print('Reading data and extracting features ...') n_train = min(100000, train_reader.get_number_of_examples()) n_val = min(100000, val_reader.get_number_of_examples()) (train_X, train_y, train_names, train_ts) = read_and_extract_features(train_reader, n_train, args.period, args.features) (val_X, val_y, val_names, val_ts) = read_and_extract_features(val_reader, n_val, args.period, args.features) (test_X, test_y, test_names, test_ts) = read_and_extract_features(test_reader, test_reader.get_number_of_examples(), args.period, args.features) print(train_X.shape) assert False print('Imputing missing values ...') imputer = Imputer(missing_values=np.nan, strategy='mean', axis=0, verbose=0, copy=True) imputer.fit(train_X) train_X = np.array(imputer.transform(train_X), dtype=np.float32) val_X = np.array(imputer.transform(val_X), dtype=np.float32) test_X = np.array(imputer.transform(test_X), dtype=np.float32) print('Normalizing the data to have zero mean and unit variance ...') scaler = StandardScaler() scaler.fit(train_X) train_X = scaler.transform(train_X) val_X = scaler.transform(val_X) test_X = scaler.transform(test_X) file_name = "{}.{}".format(args.period, args.features) linreg = LinearRegression() linreg.fit(train_X, train_y) result_dir = os.path.join(args.output_dir, 'results') common_utils.create_directory(result_dir) with open(os.path.join(result_dir, 'train_{}.json'.format(file_name)), "w") as res_file: ret = print_metrics_regression(train_y, linreg.predict(train_X)) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, res_file) with open(os.path.join(result_dir, 'val_{}.json'.format(file_name)), 'w') as res_file: ret = print_metrics_regression(val_y, linreg.predict(val_X)) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, res_file) prediction = linreg.predict(test_X) with open(os.path.join(result_dir, 'test_{}.json'.format(file_name)), 'w') as res_file: ret = print_metrics_regression(test_y, prediction) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, res_file) save_results( test_names, test_ts, prediction, test_y, os.path.join(args.output_dir, 'predictions', file_name + '.csv'))
def calc_metrics(self, data_gen, history, dataset, logs): ihm_y_true = [] decomp_y_true = [] los_y_true = [] pheno_y_true = [] ihm_pred = [] decomp_pred = [] los_pred = [] pheno_pred = [] for i in range(data_gen.steps): if self.verbose == 1: print("\tdone {}/{}".format(i, data_gen.steps), end='\r') (X, y, los_y_reg) = data_gen.next(return_y_true=True) outputs = self.model.predict(X, batch_size=self.batch_size) ihm_M = X[1] decomp_M = X[2] los_M = X[3] if not data_gen.target_repl: # no target replication (ihm_p, decomp_p, los_p, pheno_p) = outputs (ihm_t, decomp_t, los_t, pheno_t) = y else: # target replication (ihm_p, _, decomp_p, los_p, pheno_p, _) = outputs (ihm_t, _, decomp_t, los_t, pheno_t, _) = y los_t = los_y_reg # real value not the label # ihm for (m, t, p) in zip(ihm_M.flatten(), ihm_t.flatten(), ihm_p.flatten()): if np.equal(m, 1): ihm_y_true.append(t) ihm_pred.append(p) # decomp for (m, t, p) in zip(decomp_M.flatten(), decomp_t.flatten(), decomp_p.flatten()): if np.equal(m, 1): decomp_y_true.append(t) decomp_pred.append(p) # los if los_p.shape[-1] == 1: # regression for (m, t, p) in zip(los_M.flatten(), los_t.flatten(), los_p.flatten()): if np.equal(m, 1): los_y_true.append(t) los_pred.append(p) else: # classification for (m, t, p) in zip(los_M.flatten(), los_t.flatten(), los_p.reshape((-1, 10))): if np.equal(m, 1): los_y_true.append(t) los_pred.append(p) # pheno for (t, p) in zip(pheno_t.reshape((-1, 25)), pheno_p.reshape((-1, 25))): pheno_y_true.append(t) pheno_pred.append(p) print('\n') # ihm print("\n ================= 48h mortality ================") ihm_pred = np.array(ihm_pred) ihm_pred = np.stack([1 - ihm_pred, ihm_pred], axis=1) ret = metrics.print_metrics_binary(ihm_y_true, ihm_pred) for k, v in ret.items(): logs[dataset + '_ihm_' + k] = v # decomp print("\n ================ decompensation ================") decomp_pred = np.array(decomp_pred) decomp_pred = np.stack([1 - decomp_pred, decomp_pred], axis=1) ret = metrics.print_metrics_binary(decomp_y_true, decomp_pred) for k, v in ret.items(): logs[dataset + '_decomp_' + k] = v # los print("\n ================ length of stay ================") if self.partition == 'log': los_pred = [metrics.get_estimate_log(x, 10) for x in los_pred] ret = metrics.print_metrics_log_bins(los_y_true, los_pred) if self.partition == 'custom': los_pred = [metrics.get_estimate_custom(x, 10) for x in los_pred] ret = metrics.print_metrics_custom_bins(los_y_true, los_pred) if self.partition == 'none': ret = metrics.print_metrics_regression(los_y_true, los_pred) for k, v in ret.items(): logs[dataset + '_los_' + k] = v # pheno print("\n =================== phenotype ==================") pheno_pred = np.array(pheno_pred) ret = metrics.print_metrics_multilabel(pheno_y_true, pheno_pred) for k, v in ret.items(): logs[dataset + '_pheno_' + k] = v history.append(logs)
else: if pred.shape[-1] == 1: y_true += list(y.flatten()) predictions += list(pred.flatten()) else: y_true += list(y) predictions += list(pred) print('\n') if args.partition == 'log': predictions = [metrics.get_estimate_log(x, 10) for x in predictions] ret = metrics.print_metrics_log_bins(y_true, predictions) if args.partition == 'custom': predictions = [metrics.get_estimate_custom(x, 10) for x in predictions] ret = metrics.print_metrics_custom_bins(y_true, predictions) if args.partition == 'none': ret = metrics.print_metrics_regression(y_true, predictions) cur_val = ret['mse'] scheduler.step(cur_val) current_lr = optimizer.param_groups[0]['lr'] if current_lr < 1e-5: with open(os.path.join(save_path, 'log.txt'), 'a') as fout: print('Early stop at step {}'.format(step), file=fout) exit() with open(os.path.join(save_path, 'log.txt'), 'a') as fout: print(ret, file=fout) is_best = cur_val < best_val if is_best: best_val = cur_val ### save model
if args.decomp_C > 0: print "\n ================ decompensation ================" decomp_pred = np.array(decomp_pred) decomp_ret = metrics.print_metrics_binary(decomp_y_true, decomp_pred) # los if args.los_C > 0: print "\n ================ length of stay ================" if args.partition == 'log': los_pred = [metrics.get_estimate_log(x, 10) for x in los_pred] los_ret = metrics.print_metrics_log_bins(los_y_true, los_pred) if args.partition == 'custom': los_pred = [metrics.get_estimate_custom(x, 10) for x in los_pred] los_ret = metrics.print_metrics_custom_bins(los_y_true, los_pred) if args.partition == 'none': los_ret = metrics.print_metrics_regression(los_y_true, los_pred) # pheno if args.pheno_C > 0: print "\n =================== phenotype ==================" pheno_pred = np.array(pheno_pred) pheno_ret = metrics.print_metrics_multilabel(pheno_y_true, pheno_pred) print "Saving the predictions in test_predictions/task directories ..." # ihm ihm_path = os.path.join("test_predictions/ihm", os.path.basename(args.load_state)) + ".csv" ihm_utils.save_results(ihm_names, ihm_pred, ihm_y_true, ihm_path) # decomp decomp_path = os.path.join("test_predictions/decomp", os.path.basename(args.load_state)) + ".csv"
linreg = LinearRegression() linreg.fit(train_X, train_y) if not os.path.exists("activations"): os.mkdir("activations") if not os.path.exists("results"): os.mkdir("results") with open(os.path.join("results", file_name + ".txt"), "w") as resfile: resfile.write("mad, mse, mape, kappa\n") print "Scores on train set" ret = metrics.print_metrics_regression(train_y, linreg.predict(train_X)) resfile.write("%.6f,%.6f,%.6f,%.6f\n" % ( ret['mad'], ret['mse'], ret['mape'], ret['kappa'])) print "Scores on validation set" ret = metrics.print_metrics_regression(val_y, linreg.predict(val_X)) resfile.write("%.6f,%.6f,%.6f,%.6f\n" % ( ret['mad'], ret['mse'], ret['mape'], ret['kappa'])) ############################### TESTING #############################
def main(): parser = argparse.ArgumentParser() parser.add_argument('--period', type=str, default='all', help='specifies which period extract features from', choices=['first4days', 'first8days', 'last12hours', 'first25percent', 'first50percent', 'all']) parser.add_argument('--features', type=str, default='all', help='specifies what features to extract', choices=['all', 'len', 'all_but_len']) args = parser.parse_args() print(args) train_reader = LengthOfStayReader(dataset_dir='../../../data/length-of-stay/train/', listfile='../../../data/length-of-stay/train_listfile.csv') val_reader = LengthOfStayReader(dataset_dir='../../../data/length-of-stay/train/', listfile='../../../data/length-of-stay/val_listfile.csv') test_reader = LengthOfStayReader(dataset_dir='../../../data/length-of-stay/test/', listfile='../../../data/length-of-stay/test_listfile.csv') print('Reading data and extracting features ...') n_train = min(100000, train_reader.get_number_of_examples()) n_val = min(100000, val_reader.get_number_of_examples()) (train_X, train_y, train_names, train_ts) = read_and_extract_features( train_reader, n_train, args.period, args.features) (val_X, val_y, val_names, val_ts) = read_and_extract_features( val_reader, n_val, args.period, args.features) (test_X, test_y, test_names, test_ts) = read_and_extract_features( test_reader, test_reader.get_number_of_examples(), args.period, args.features) print('Imputing missing values ...') imputer = Imputer(missing_values=np.nan, strategy='mean', axis=0, verbose=0, copy=True) imputer.fit(train_X) train_X = np.array(imputer.transform(train_X), dtype=np.float32) val_X = np.array(imputer.transform(val_X), dtype=np.float32) test_X = np.array(imputer.transform(test_X), dtype=np.float32) print('Normalizing the data to have zero mean and unit variance ...') scaler = StandardScaler() scaler.fit(train_X) train_X = scaler.transform(train_X) val_X = scaler.transform(val_X) test_X = scaler.transform(test_X) file_name = "{}.{}".format(args.period, args.features) linreg = LinearRegression() linreg.fit(train_X, train_y) common_utils.create_directory('results') with open(os.path.join("results", 'train_{}.json'.format(file_name)), "w") as res_file: ret = print_metrics_regression(train_y, linreg.predict(train_X)) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, res_file) with open(os.path.join('results', 'val_{}.json'.format(file_name)), 'w') as res_file: ret = print_metrics_regression(val_y, linreg.predict(val_X)) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, res_file) prediction = linreg.predict(test_X) with open(os.path.join('results', 'test_{}.json'.format(file_name)), 'w') as res_file: ret = print_metrics_regression(test_y, prediction) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, res_file) save_results(test_names, test_ts, prediction, test_y, os.path.join('predictions', file_name + '.csv'))
def process_one_chunk(mode, chunk_index): assert (mode == "train" or mode == "test") if (mode == "train"): reader = train_reader if (mode == "test"): reader = val_reader (data, ts, ys, header) = utils.read_chunk(reader, chunk_size) data = utils.preprocess_chunk(data, ts, discretizer, normalizer) if (mode == "train"): network.set_datasets((data, ys), None) if (mode == "test"): network.set_datasets(None, (data, ys)) network.shuffle_train_set() y_true = [] predictions = [] avg_loss = 0.0 sum_loss = 0.0 prev_time = time.time() n_batches = network.get_batches_per_epoch(mode) for i in range(0, n_batches): step_data = network.step(mode) prediction = step_data["prediction"] answers = step_data["answers"] current_loss = step_data["current_loss"] current_loss_mse = step_data["loss_mse"] current_loss_reg = step_data["loss_reg"] log = step_data["log"] avg_loss += current_loss sum_loss += current_loss for x in answers: y_true.append(x) for x in prediction: predictions.append(x) if ((i + 1) % args.log_every == 0): cur_time = time.time() print (" %sing: %d.%d / %d \t loss: %.3f = %.3f + %.3f \t avg_loss: %.3f \t"\ "%s \t time: %.2fs" % (mode, chunk_index, i * args.batch_size, n_batches * args.batch_size, current_loss, current_loss_mse, current_loss_reg, avg_loss / args.log_every, log, cur_time - prev_time)) avg_loss = 0 prev_time = cur_time if np.isnan(current_loss): raise Exception("current loss IS NaN. This should never happen :)") sum_loss /= n_batches print "\n %s loss = %.5f" % (mode, sum_loss) if args.network in ['lstm', 'lstm_log']: metrics.print_metrics_regression(y_true, predictions) if args.network == 'lstm_cf_log': metrics.print_metrics_log_bins(y_true, predictions) if args.network == 'lstm_cf_custom': metrics.print_metrics_custom_bins(y_true, predictions) return sum_loss
predictions.append(x) if ((i + 1) % args.log_every == 0): cur_time = time.time() print (" testing: %d / %d \t loss: %.3f \t avg_loss: %.3f \t"\ " time: %.2fs" % ((i+1) * args.batch_size, n_batches * args.batch_size, current_loss, avg_loss / args.log_every, cur_time - prev_time)) avg_loss = 0 prev_time = cur_time if np.isnan(current_loss): raise Exception("current loss IS NaN. This should never happen :)") sum_loss /= n_batches print "\n test loss = %.5f" % sum_loss if args.network in ['lstm', 'lstm_log']: metrics.print_metrics_regression(y_true, predictions) if args.network == 'lstm_cf_log': metrics.print_metrics_log_bins(y_true, predictions) if args.network == 'lstm_cf_custom': metrics.print_metrics_custom_bins(y_true, predictions) with open("activations.txt", "w") as fout: fout.write("prediction, y_true") for (x, y) in zip(predictions, y_true): fout.write("%.6f, %.6f\n" % (x, y)) else: raise Exception("unknown mode")
for i in range(test_data_gen.steps): print "\rpredicting {} / {}".format(i, test_data_gen.steps), ret = test_data_gen.next(return_y_true=True) (x, y_processed, y) = ret["data"] cur_names = ret["names"] cur_ts = ret["ts"] x = np.array(x) pred = model.predict_on_batch(x) predictions += list(pred) labels += list(y) names += list(cur_names) ts += list(cur_ts) if args.partition == 'log': predictions = [metrics.get_estimate_log(x, 10) for x in predictions] metrics.print_metrics_log_bins(labels, predictions) if args.partition == 'custom': predictions = [metrics.get_estimate_custom(x, 10) for x in predictions] metrics.print_metrics_custom_bins(labels, predictions) if args.partition == 'none': metrics.print_metrics_regression(labels, predictions) predictions = [x[0] for x in predictions] path = os.path.join("test_predictions", os.path.basename(args.load_state)) + ".csv" utils.save_results(names, ts, predictions, labels, path) else: raise ValueError("Wrong value for args.mode")