def main(): parser = argparse.ArgumentParser() parser.add_argument('prediction', type=str) parser.add_argument( '--test_listfile', type=str, default=os.path.join( os.path.dirname(__file__), '../../data/in-hospital-mortality/test/listfile.csv')) parser.add_argument('--n_iters', type=int, default=10000) parser.add_argument('--save_file', type=str, default='ihm_results.json') args = parser.parse_args() pred_df = pd.read_csv(args.prediction, index_col=False) test_df = pd.read_csv(args.test_listfile, index_col=False) df = test_df.merge(pred_df, left_on='stay', right_on='stay', how='left', suffixes=['_l', '_r']) assert (df['prediction'].isnull().sum() == 0) assert (df['y_true_l'].equals(df['y_true_r'])) metrics = [('AUC of ROC', 'auroc'), ('AUC of PRC', 'auprc'), ('min(+P, Se)', 'minpse')] data = np.zeros((df.shape[0], 2)) data[:, 0] = np.array(df['prediction']) data[:, 1] = np.array(df['y_true_l']) results = dict() results['n_iters'] = args.n_iters ret = print_metrics_binary(data[:, 1], data[:, 0], verbose=0) for (m, k) in metrics: results[m] = dict() results[m]['value'] = ret[k] results[m]['runs'] = [] for i in range(args.n_iters): cur_data = sk_utils.resample(data, n_samples=len(data)) ret = print_metrics_binary(cur_data[:, 1], cur_data[:, 0], verbose=0) for (m, k) in metrics: results[m]['runs'].append(ret[k]) for (m, k) in metrics: runs = results[m]['runs'] results[m]['mean'] = np.mean(runs) results[m]['median'] = np.median(runs) results[m]['std'] = np.std(runs) results[m]['2.5% percentile'] = np.percentile(runs, 2.5) results[m]['97.5% percentile'] = np.percentile(runs, 97.5) del results[m]['runs'] print("Saving the results in {} ...".format(args.save_file)) with open(args.save_file, 'w') as f: json.dump(results, f) print(results)
def calc_metrics(self, data, history, dataset, logs): y_true = [] predictions = [] B = self.batch_size for i in range(0, len(data[0]), B): if self.verbose == 1: print("\tdone {}/{}".format(i, len(data[0])), end='\r') if self.target_repl: (x, y, y_repl) = (data[0][i:i + B], data[1][0][i:i + B], data[1][1][i:i + B]) else: (x, y) = (data[0][i:i + B], data[1][i:i + B]) outputs = self.model.predict(x, batch_size=B) if self.target_repl: predictions += list(np.array(outputs[0]).flatten()) else: predictions += list(np.array(outputs).flatten()) y_true += list(np.array(y).flatten()) print('\n') predictions = np.array(predictions) predictions = np.stack([1 - predictions, predictions], axis=1) ret = metrics.print_metrics_binary(y_true, predictions) for k, v in ret.items(): logs[dataset + '_' + k] = v history.append(ret)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--C', type=float, default=1.0, help='inverse of L1 / L2 regularization') parser.add_argument('--l1', dest='l2', action='store_false') parser.add_argument('--l2', dest='l2', action='store_true') parser.set_defaults(l2=True) parser.add_argument('--period', type=str, default='all', help='specifies which period extract features from', choices=[ 'first4days', 'first8days', 'last12hours', 'first25percent', 'first50percent', 'all' ]) parser.add_argument('--features', type=str, default='all', help='specifies what features to extract', choices=['all', 'len', 'all_but_len']) parser.add_argument('--data', type=str, help='Path to the data of in-hospital mortality task', default=os.path.join( os.path.dirname(__file__), '../../../data/in-hospital-mortality/')) parser.add_argument( '--output_dir', type=str, help='Directory relative which all output files are stored', default='.') args = parser.parse_args() print(args) print("Path") print(os.path) train_reader = InHospitalMortalityReader( dataset_dir= "/home/sunitha/Documents/7thSem/DA/mimic-iii-clinical-database-demo-1.4/src/data/in-hospital-mortality/train", listfile= '/home/sunitha/Documents/7thSem/DA/mimic-iii-clinical-database-demo-1.4/src/data/in-hospital-mortality/train/train_listfile.csv', period_length=48.0) val_reader = InHospitalMortalityReader( dataset_dir= '/home/sunitha/Documents/7thSem/DA/mimic-iii-clinical-database-demo-1.4/src/data/in-hospital-mortality/train', listfile= '/home/sunitha/Documents/7thSem/DA/mimic-iii-clinical-database-demo-1.4/src/data/in-hospital-mortality/train/train_listfile.csv', period_length=48.0) test_reader = InHospitalMortalityReader( dataset_dir= '/home/sunitha/Documents/7thSem/DA/mimic-iii-clinical-database-demo-1.4/src/data/in-hospital-mortality/test', listfile= '/home/sunitha/Documents/7thSem/DA/mimic-iii-clinical-database-demo-1.4/src/data/in-hospital-mortality/test/test_listfile.csv', period_length=48.0) print('Reading data and extracting features ...') (train_X, train_y, train_names) = read_and_extract_features(train_reader, args.period, args.features) (val_X, val_y, val_names) = read_and_extract_features(val_reader, args.period, args.features) (test_X, test_y, test_names) = read_and_extract_features(test_reader, args.period, args.features) print(' train data shape = {}'.format(train_X.shape)) print(' validation data shape = {}'.format(val_X.shape)) print(' test data shape = {}'.format(test_X.shape)) print("---------------") print(train_X[0]) print("---------------") print(train_names[0]) print('Imputing missing values ...') imputer = Imputer(missing_values=np.nan, strategy='mean', axis=0, verbose=0, copy=True) imputer.fit(train_X) train_X = np.array(imputer.transform(train_X), dtype=np.float32) val_X = np.array(imputer.transform(val_X), dtype=np.float32) test_X = np.array(imputer.transform(test_X), dtype=np.float32) print('Normalizing the data to have zero mean and unit variance ...') scaler = StandardScaler() scaler.fit(train_X) train_X = scaler.transform(train_X) val_X = scaler.transform(val_X) test_X = scaler.transform(test_X) penalty = ('l2' if args.l2 else 'l1') file_name = '{}.{}.{}.C{}'.format(args.period, args.features, penalty, args.C) neigh = KNeighborsClassifier(n_neighbors=3) neigh.fit(train_X, train_y) result_dir = os.path.join(args.output_dir, 'results') common_utils.create_directory(result_dir) with open(os.path.join(result_dir, 'train_{}.json'.format(file_name)), 'w') as res_file: ret = print_metrics_binary(train_y, neigh.predict_proba(train_X)) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, res_file) with open(os.path.join(result_dir, 'val_{}.json'.format(file_name)), 'w') as res_file: ret = print_metrics_binary(val_y, neigh.predict_proba(val_X)) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, res_file) prediction = neigh.predict_proba(test_X)[:, 1] with open(os.path.join(result_dir, 'test_{}.json'.format(file_name)), 'w') as res_file: ret = print_metrics_binary(test_y, prediction) ret = {k: float(v) for k, v in ret.items()} json.dump(ret, res_file) save_results( test_names, prediction, test_y, os.path.join(args.output_dir, 'predictions', file_name + '.csv'))
verbose=args.verbose, batch_size=args.batch_size) elif args.mode == 'test': # ensure that the code uses test_reader del train_reader del val_reader del train_raw del val_raw test_reader = InHospitalMortalityReader(dataset_dir=os.path.join(args.data, 'test'), listfile=os.path.join(args.data, 'test_listfile.csv'), period_length=48.0) ret = utils.load_data(test_reader, discretizer, normalizer, args.small_part, return_names=True) data = ret["data"][0] labels = ret["data"][1] names = ret["names"] predictions = model.predict(data, batch_size=args.batch_size, verbose=1) predictions = np.array(predictions)[:, 0] metrics.print_metrics_binary(labels, predictions) path = os.path.join(args.output_dir, "test_predictions", os.path.basename(args.load_state)) + ".csv" utils.save_results(names, predictions, labels, path) else: raise ValueError("Wrong value for args.mode")