コード例 #1
0
ファイル: main.py プロジェクト: klainfo/mimic3-benchmarks
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--period', type=str, default='all', help='specifies which period extract features from',
                        choices=['first4days', 'first8days', 'last12hours', 'first25percent', 'first50percent', 'all'])
    parser.add_argument('--features', type=str, default='all', help='specifies what features to extract',
                        choices=['all', 'len', 'all_but_len'])
    args = parser.parse_args()
    print(args)

    # penalties = ['l2', 'l2', 'l2', 'l2', 'l2', 'l2', 'l1', 'l1', 'l1', 'l1', 'l1']
    # Cs = [1.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 1.0, 0.1, 0.01, 0.001, 0.0001]
    penalties = ['l2']
    Cs = [0.001]

    train_reader = DecompensationReader(dataset_dir='../../../data/decompensation/train/',
                                        listfile='../../../data/decompensation/train_listfile.csv')

    val_reader = DecompensationReader(dataset_dir='../../../data/decompensation/train/',
                                      listfile='../../../data/decompensation/val_listfile.csv')

    test_reader = DecompensationReader(dataset_dir='../../../data/decompensation/test/',
                                       listfile='../../../data/decompensation/test_listfile.csv')

    print('Reading data and extracting features ...')
    n_train = min(100000, train_reader.get_number_of_examples())
    n_val = min(100000, val_reader.get_number_of_examples())

    (train_X, train_y, train_names, train_ts) = read_and_extract_features(
        train_reader, n_train, args.period, args.features)

    (val_X, val_y, val_names, val_ts) = read_and_extract_features(
        val_reader, n_val, args.period, args.features)

    (test_X, test_y, test_names, test_ts) = read_and_extract_features(
        test_reader, test_reader.get_number_of_examples(), args.period, args.features)

    print('Imputing missing values ...')
    imputer = Imputer(missing_values=np.nan, strategy='mean', axis=0, verbose=0, copy=True)
    imputer.fit(train_X)
    train_X = np.array(imputer.transform(train_X), dtype=np.float32)
    val_X = np.array(imputer.transform(val_X), dtype=np.float32)
    test_X = np.array(imputer.transform(test_X), dtype=np.float32)

    print('Normalizing the data to have zero mean and unit variance ...')
    scaler = StandardScaler()
    scaler.fit(train_X)
    train_X = scaler.transform(train_X)
    val_X = scaler.transform(val_X)
    test_X = scaler.transform(test_X)

    common_utils.create_directory('results')

    for (penalty, C) in zip(penalties, Cs):
        file_name = '{}.{}.{}.C{}'.format(args.period, args.features, penalty, C)

        logreg = LogisticRegression(penalty=penalty, C=C, random_state=42)
        logreg.fit(train_X, train_y)

        with open(os.path.join('results', 'train_{}.json'.format(file_name)), "w") as res_file:
            ret = print_metrics_binary(train_y, logreg.predict_proba(train_X))
            ret = {k: float(v) for k, v in ret.items()}
            json.dump(ret, res_file)

        with open(os.path.join('results', 'val_{}.json'.format(file_name)), 'w') as res_file:
            ret = print_metrics_binary(val_y, logreg.predict_proba(val_X))
            ret = {k: float(v) for k, v in ret.items()}
            json.dump(ret, res_file)

        prediction = logreg.predict_proba(test_X)[:, 1]

        with open(os.path.join('results', 'test_{}.json'.format(file_name)), 'w') as res_file:
            ret = print_metrics_binary(test_y, prediction)
            ret = {k: float(v) for k, v in ret.items()}
            json.dump(ret, res_file)

        save_results(test_names, test_ts, prediction, test_y, os.path.join('predictions', file_name + '.csv'))
コード例 #2
0
        use_time=args.use_time,
        return_names=True)  # put steps = None for a full test

    for i in range(test_data_gen.steps):
        print("predicting {} / {}".format(i, test_data_gen.steps), end='\r')
        ret = next(test_data_gen)
        if args.use_time:
            [x, t], y = ret["data"]
        else:
            x, y = ret["data"]
        cur_names = ret["names"]
        cur_ts = ret["ts"]

        x = np.array(x)
        if args.use_time:
            pred = model.predict_on_batch([x, t])[:, 0]
        else:
            pred = model.predict_on_batch(x)[:, 0]
        predictions += list(pred)
        labels += list(y)
        names += list(cur_names)
        ts += list(cur_ts)

    metrics.print_metrics_binary(labels, predictions)
    path = os.path.join(args.output_dir, 'test_predictions',
                        os.path.basename(args.load_state)) + '.csv'
    utils.save_results(names, ts, predictions, labels, path)

else:
    raise ValueError("Wrong value for args.mode")
コード例 #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--period',
                        type=str,
                        default='all',
                        help='specifies which period extract features from',
                        choices=[
                            'first4days', 'first8days', 'last12hours',
                            'first25percent', 'first50percent', 'all'
                        ])
    parser.add_argument('--features',
                        type=str,
                        default='all',
                        help='specifies what features to extract',
                        choices=['all', 'len', 'all_but_len'])
    args = parser.parse_args()
    print(args)

    # penalties = ['l2', 'l2', 'l2', 'l2', 'l2', 'l2', 'l1', 'l1', 'l1', 'l1', 'l1']
    # Cs = [1.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 1.0, 0.1, 0.01, 0.001, 0.0001]
    penalties = ['l2']
    Cs = [0.001]

    train_reader = DecompensationReader(
        dataset_dir='../../../data/decompensation/train/',
        listfile='../../../data/decompensation/train_listfile.csv')

    val_reader = DecompensationReader(
        dataset_dir='../../../data/decompensation/train/',
        listfile='../../../data/decompensation/val_listfile.csv')

    test_reader = DecompensationReader(
        dataset_dir='../../../data/decompensation/test/',
        listfile='../../../data/decompensation/test_listfile.csv')

    print('Reading data and extracting features ...')
    n_train = min(100000, train_reader.get_number_of_examples())
    n_val = min(100000, val_reader.get_number_of_examples())

    (train_X, train_y, train_names,
     train_ts) = read_and_extract_features(train_reader, n_train, args.period,
                                           args.features)

    (val_X, val_y, val_names,
     val_ts) = read_and_extract_features(val_reader, n_val, args.period,
                                         args.features)

    (test_X, test_y, test_names,
     test_ts) = read_and_extract_features(test_reader,
                                          test_reader.get_number_of_examples(),
                                          args.period, args.features)

    print('Imputing missing values ...')
    imputer = Imputer(missing_values=np.nan,
                      strategy='mean',
                      axis=0,
                      verbose=0,
                      copy=True)
    imputer.fit(train_X)
    train_X = np.array(imputer.transform(train_X), dtype=np.float32)
    val_X = np.array(imputer.transform(val_X), dtype=np.float32)
    test_X = np.array(imputer.transform(test_X), dtype=np.float32)

    print('Normalizing the data to have zero mean and unit variance ...')
    scaler = StandardScaler()
    scaler.fit(train_X)
    train_X = scaler.transform(train_X)
    val_X = scaler.transform(val_X)
    test_X = scaler.transform(test_X)

    common_utils.create_directory('results')

    for (penalty, C) in zip(penalties, Cs):
        file_name = '{}.{}.{}.C{}'.format(args.period, args.features, penalty,
                                          C)

        logreg = LogisticRegression(penalty=penalty, C=C, random_state=42)
        logreg.fit(train_X, train_y)

        with open(os.path.join('results', 'train_{}.json'.format(file_name)),
                  "w") as res_file:
            ret = print_metrics_binary(train_y, logreg.predict_proba(train_X))
            ret = {k: float(v) for k, v in ret.items()}
            json.dump(ret, res_file)

        with open(os.path.join('results', 'val_{}.json'.format(file_name)),
                  'w') as res_file:
            ret = print_metrics_binary(val_y, logreg.predict_proba(val_X))
            ret = {k: float(v) for k, v in ret.items()}
            json.dump(ret, res_file)

        prediction = logreg.predict_proba(test_X)[:, 1]

        with open(os.path.join('results', 'test_{}.json'.format(file_name)),
                  'w') as res_file:
            ret = print_metrics_binary(test_y, prediction)
            ret = {k: float(v) for k, v in ret.items()}
            json.dump(ret, res_file)

        save_results(test_names, test_ts, prediction, test_y,
                     os.path.join('predictions', file_name + '.csv'))
コード例 #4
0
ファイル: main.py プロジェクト: amoldwin/notes_benchmark
    # pheno
    if args.pheno_C > 0:
        print("\n =================== phenotype ==================")
        pheno_pred = np.array(pheno_pred)
        pheno_ret = metrics.print_metrics_multilabel(pheno_y_true, pheno_pred)

    print("Saving the predictions in test_predictions/task directories ...")

    # ihm
    ihm_path = os.path.join(os.path.join(args.output_dir,
                                         "test_predictions/ihm", os.path.basename(args.load_state)) +experiment_name+ ".csv")
    ihm_utils.save_results(ihm_names, ihm_pred, ihm_y_true, ihm_path)

    # decomp
    decomp_path = os.path.join(os.path.join(args.output_dir,
                                            "test_predictions/decomp", os.path.basename(args.load_state)) +experiment_name+ ".csv")
    decomp_utils.save_results(decomp_names, decomp_ts, decomp_pred, decomp_y_true, decomp_path)

    # los
    los_path = os.path.join(os.path.join(args.output_dir,
                                         "test_predictions/los", os.path.basename(args.load_state)) +experiment_name+ ".csv")
    los_utils.save_results(los_names, los_ts, los_pred, los_y_true, los_path)

    # pheno
    pheno_path = os.path.join(os.path.join(args.output_dir,
                                           "test_predictions/pheno", os.path.basename(args.load_state)) +experiment_name+ ".csv")
    pheno_utils.save_results(pheno_names, pheno_ts, pheno_pred, pheno_y_true, pheno_path)

else:
    raise ValueError("Wrong value for args.mode")
コード例 #5
0
ファイル: main.py プロジェクト: klainfo/mimic3-benchmarks
            los_ret = metrics.print_metrics_custom_bins(los_y_true, los_pred)
        if args.partition == 'none':
            los_ret = metrics.print_metrics_regression(los_y_true, los_pred)

    # pheno
    if args.pheno_C > 0:
        print "\n =================== phenotype =================="
        pheno_pred = np.array(pheno_pred)
        pheno_ret = metrics.print_metrics_multilabel(pheno_y_true, pheno_pred)

    print "Saving the predictions in test_predictions/task directories ..."

    # ihm
    ihm_path = os.path.join("test_predictions/ihm", os.path.basename(args.load_state)) + ".csv"
    ihm_utils.save_results(ihm_names, ihm_pred, ihm_y_true, ihm_path)

    # decomp
    decomp_path = os.path.join("test_predictions/decomp", os.path.basename(args.load_state)) + ".csv"
    decomp_utils.save_results(decomp_names, decomp_ts, decomp_pred, decomp_y_true, decomp_path)

    # los
    los_path = os.path.join("test_predictions/los", os.path.basename(args.load_state)) + ".csv"
    los_utils.save_results(los_names, los_ts, los_pred, los_y_true, los_path)

    # pheno
    pheno_path = os.path.join("test_predictions/pheno", os.path.basename(args.load_state)) + ".csv"
    pheno_utils.save_results(pheno_names, pheno_ts, pheno_pred, pheno_y_true, pheno_path)

else:
    raise ValueError("Wrong value for args.mode")
コード例 #6
0
ファイル: main.py プロジェクト: klainfo/mimic3-benchmarks
    else:
        del train_reader
        del val_reader
        test_reader = DecompensationReader(dataset_dir='../../data/decompensation/test/',
                                           listfile='../../data/decompensation/test_listfile.csv')

        test_data_gen = utils.BatchGen(test_reader, discretizer,
                                       normalizer, args.batch_size,
                                       None, shuffle=False, return_names=True)  # put steps = None for a full test

        for i in range(test_data_gen.steps):
            print "\rpredicting {} / {}".format(i, test_data_gen.steps),
            ret = next(test_data_gen)
            x, y = ret["data"]
            cur_names = ret["names"]
            cur_ts = ret["ts"]

            x = np.array(x)
            pred = model.predict_on_batch(x)[:, 0]
            predictions += list(pred)
            labels += list(y)
            names += list(cur_names)
            ts += list(cur_ts)

    metrics.print_metrics_binary(labels, predictions)
    path = os.path.join("test_predictions", os.path.basename(args.load_state)) + ".csv"
    utils.save_results(names, ts, predictions, labels, path)

else:
    raise ValueError("Wrong value for args.mode")
コード例 #7
0
            return_names=True)  # put steps = None for a full test

        for i in range(test_data_gen.steps):
            print("predicting {} / {}".format(i, test_data_gen.steps),
                  end='\r')
            ret = next(test_data_gen)
            x, y = ret["data"]
            cur_names = ret["names"]
            cur_ts = ret["ts"]

            x = np.array(x)
            pred = model.predict_on_batch(x)
            pred = np.squeeze(pred)
            predictions += list(pred)
            labels += list(y)
            names += list(cur_names)
            ts += list(cur_ts)

    metrics.print_metrics_binary(labels, predictions, stochastic=stochastic)
    path = os.path.join(args.output_dir, 'test_predictions',
                        os.path.basename(args.load_state)) + '.csv'
    utils.save_results(names,
                       ts,
                       predictions,
                       labels,
                       path,
                       stochastic=stochastic)

else:
    raise ValueError("Wrong value for args.mode")