Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('prediction', type=str)
    parser.add_argument(
        '--test_listfile',
        type=str,
        default=os.path.join(
            os.path.dirname(__file__),
            '../../data/in-hospital-mortality/test/listfile.csv'))
    parser.add_argument('--n_iters', type=int, default=10000)
    parser.add_argument('--save_file', type=str, default='ihm_results.json')
    args = parser.parse_args()

    pred_df = pd.read_csv(args.prediction, index_col=False)
    test_df = pd.read_csv(args.test_listfile, index_col=False)

    df = test_df.merge(pred_df,
                       left_on='stay',
                       right_on='stay',
                       how='left',
                       suffixes=['_l', '_r'])
    assert (df['prediction'].isnull().sum() == 0)
    assert (df['y_true_l'].equals(df['y_true_r']))

    metrics = [('AUC of ROC', 'auroc'), ('AUC of PRC', 'auprc'),
               ('min(+P, Se)', 'minpse')]

    data = np.zeros((df.shape[0], 2))
    data[:, 0] = np.array(df['prediction'])
    data[:, 1] = np.array(df['y_true_l'])

    results = dict()
    results['n_iters'] = args.n_iters
    ret = print_metrics_binary(data[:, 1], data[:, 0], verbose=0)
    for (m, k) in metrics:
        results[m] = dict()
        results[m]['value'] = ret[k]
        results[m]['runs'] = []

    for i in range(args.n_iters):
        cur_data = sk_utils.resample(data, n_samples=len(data))
        ret = print_metrics_binary(cur_data[:, 1], cur_data[:, 0], verbose=0)
        for (m, k) in metrics:
            results[m]['runs'].append(ret[k])

    for (m, k) in metrics:
        runs = results[m]['runs']
        results[m]['mean'] = np.mean(runs)
        results[m]['median'] = np.median(runs)
        results[m]['std'] = np.std(runs)
        results[m]['2.5% percentile'] = np.percentile(runs, 2.5)
        results[m]['97.5% percentile'] = np.percentile(runs, 97.5)
        del results[m]['runs']

    print("Saving the results in {} ...".format(args.save_file))
    with open(args.save_file, 'w') as f:
        json.dump(results, f)

    print(results)
Ejemplo n.º 2
0
 def calc_metrics(self, data, history, dataset, logs):
     y_true = []
     predictions = []
     B = self.batch_size
     for i in range(0, len(data[0]), B):
         if self.verbose == 1:
             print("\tdone {}/{}".format(i, len(data[0])), end='\r')
         if self.target_repl:
             (x, y, y_repl) = (data[0][i:i + B], data[1][0][i:i + B],
                               data[1][1][i:i + B])
         else:
             (x, y) = (data[0][i:i + B], data[1][i:i + B])
         outputs = self.model.predict(x, batch_size=B)
         if self.target_repl:
             predictions += list(np.array(outputs[0]).flatten())
         else:
             predictions += list(np.array(outputs).flatten())
         y_true += list(np.array(y).flatten())
     print('\n')
     predictions = np.array(predictions)
     predictions = np.stack([1 - predictions, predictions], axis=1)
     ret = metrics.print_metrics_binary(y_true, predictions)
     for k, v in ret.items():
         logs[dataset + '_' + k] = v
     history.append(ret)
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--C',
                        type=float,
                        default=1.0,
                        help='inverse of L1 / L2 regularization')
    parser.add_argument('--l1', dest='l2', action='store_false')
    parser.add_argument('--l2', dest='l2', action='store_true')
    parser.set_defaults(l2=True)
    parser.add_argument('--period',
                        type=str,
                        default='all',
                        help='specifies which period extract features from',
                        choices=[
                            'first4days', 'first8days', 'last12hours',
                            'first25percent', 'first50percent', 'all'
                        ])
    parser.add_argument('--features',
                        type=str,
                        default='all',
                        help='specifies what features to extract',
                        choices=['all', 'len', 'all_but_len'])
    parser.add_argument('--data',
                        type=str,
                        help='Path to the data of in-hospital mortality task',
                        default=os.path.join(
                            os.path.dirname(__file__),
                            '../../../data/in-hospital-mortality/'))
    parser.add_argument(
        '--output_dir',
        type=str,
        help='Directory relative which all output files are stored',
        default='.')
    args = parser.parse_args()
    print(args)
    print("Path")
    print(os.path)
    train_reader = InHospitalMortalityReader(
        dataset_dir=
        "/home/sunitha/Documents/7thSem/DA/mimic-iii-clinical-database-demo-1.4/src/data/in-hospital-mortality/train",
        listfile=
        '/home/sunitha/Documents/7thSem/DA/mimic-iii-clinical-database-demo-1.4/src/data/in-hospital-mortality/train/train_listfile.csv',
        period_length=48.0)

    val_reader = InHospitalMortalityReader(
        dataset_dir=
        '/home/sunitha/Documents/7thSem/DA/mimic-iii-clinical-database-demo-1.4/src/data/in-hospital-mortality/train',
        listfile=
        '/home/sunitha/Documents/7thSem/DA/mimic-iii-clinical-database-demo-1.4/src/data/in-hospital-mortality/train/train_listfile.csv',
        period_length=48.0)

    test_reader = InHospitalMortalityReader(
        dataset_dir=
        '/home/sunitha/Documents/7thSem/DA/mimic-iii-clinical-database-demo-1.4/src/data/in-hospital-mortality/test',
        listfile=
        '/home/sunitha/Documents/7thSem/DA/mimic-iii-clinical-database-demo-1.4/src/data/in-hospital-mortality/test/test_listfile.csv',
        period_length=48.0)

    print('Reading data and extracting features ...')
    (train_X, train_y,
     train_names) = read_and_extract_features(train_reader, args.period,
                                              args.features)
    (val_X, val_y,
     val_names) = read_and_extract_features(val_reader, args.period,
                                            args.features)
    (test_X, test_y,
     test_names) = read_and_extract_features(test_reader, args.period,
                                             args.features)
    print('  train data shape = {}'.format(train_X.shape))
    print('  validation data shape = {}'.format(val_X.shape))
    print('  test data shape = {}'.format(test_X.shape))
    print("---------------")
    print(train_X[0])

    print("---------------")
    print(train_names[0])
    print('Imputing missing values ...')
    imputer = Imputer(missing_values=np.nan,
                      strategy='mean',
                      axis=0,
                      verbose=0,
                      copy=True)
    imputer.fit(train_X)
    train_X = np.array(imputer.transform(train_X), dtype=np.float32)
    val_X = np.array(imputer.transform(val_X), dtype=np.float32)
    test_X = np.array(imputer.transform(test_X), dtype=np.float32)

    print('Normalizing the data to have zero mean and unit variance ...')
    scaler = StandardScaler()
    scaler.fit(train_X)
    train_X = scaler.transform(train_X)
    val_X = scaler.transform(val_X)
    test_X = scaler.transform(test_X)

    penalty = ('l2' if args.l2 else 'l1')
    file_name = '{}.{}.{}.C{}'.format(args.period, args.features, penalty,
                                      args.C)

    neigh = KNeighborsClassifier(n_neighbors=3)
    neigh.fit(train_X, train_y)

    result_dir = os.path.join(args.output_dir, 'results')
    common_utils.create_directory(result_dir)

    with open(os.path.join(result_dir, 'train_{}.json'.format(file_name)),
              'w') as res_file:
        ret = print_metrics_binary(train_y, neigh.predict_proba(train_X))
        ret = {k: float(v) for k, v in ret.items()}
        json.dump(ret, res_file)

    with open(os.path.join(result_dir, 'val_{}.json'.format(file_name)),
              'w') as res_file:
        ret = print_metrics_binary(val_y, neigh.predict_proba(val_X))
        ret = {k: float(v) for k, v in ret.items()}
        json.dump(ret, res_file)

    prediction = neigh.predict_proba(test_X)[:, 1]

    with open(os.path.join(result_dir, 'test_{}.json'.format(file_name)),
              'w') as res_file:
        ret = print_metrics_binary(test_y, prediction)
        ret = {k: float(v) for k, v in ret.items()}
        json.dump(ret, res_file)

    save_results(
        test_names, prediction, test_y,
        os.path.join(args.output_dir, 'predictions', file_name + '.csv'))
Ejemplo n.º 4
0
              verbose=args.verbose,
              batch_size=args.batch_size)

elif args.mode == 'test':

    # ensure that the code uses test_reader
    del train_reader
    del val_reader
    del train_raw
    del val_raw

    test_reader = InHospitalMortalityReader(dataset_dir=os.path.join(args.data, 'test'),
                                            listfile=os.path.join(args.data, 'test_listfile.csv'),
                                            period_length=48.0)
    ret = utils.load_data(test_reader, discretizer, normalizer, args.small_part,
                          return_names=True)

    data = ret["data"][0]
    labels = ret["data"][1]
    names = ret["names"]

    predictions = model.predict(data, batch_size=args.batch_size, verbose=1)
    predictions = np.array(predictions)[:, 0]
    metrics.print_metrics_binary(labels, predictions)

    path = os.path.join(args.output_dir, "test_predictions", os.path.basename(args.load_state)) + ".csv"
    utils.save_results(names, predictions, labels, path)

else:
    raise ValueError("Wrong value for args.mode")