Ejemplo n.º 1
0
def general_training_target_test(data_path,
                                 model_id,
                                 data_id,
                                 output,
                                 n_patients,
                                 target_icu=None):
    """
    Train on all ICU and evaluate on test ICU
    """
    auc = []
    try:
        print("\n\n\n=== General training ===")
        folds = load_data(data_path, data_id, [1, 2, 3, 4], n_patients)
        for fold, (x_train, y_train, x_val, y_val, x_test, y_test,
                   icu_ids_train, icu_ids_val,
                   icu_ids_test) in enumerate(folds):
            layers, freezable = models.create_freezable_layers(
                model_id, x_train.shape[1], x_train.shape[2])
            checkpoint, early_stopping, model = models.create_model(
                "mt-" + model_id + "-" + data_id, layers)

            model.fit(x_train,
                      y_train,
                      epochs=n_epochs,
                      validation_data=(x_val, y_val),
                      callbacks=[early_stopping, checkpoint])

            icu_test_list = [
                target_icu
            ] if target_icu is not None else np.unique(icu_ids_test)
            icu_auc = []

            score, general_score = models.evaluate_model(
                "mt-" + model_id + "-" + data_id, x_test, y_test)

            print("\n\n=== General Score - \tFold %d - \tAUC %f" %
                  (fold, general_score))

            for test_icu in icu_test_list:
                score, auc_score = models.evaluate_model(
                    "mt-" + model_id + "-" + data_id,
                    x_test[icu_ids_test == test_icu],
                    y_test[icu_ids_test == test_icu])
                icu_auc.append(auc_score)

                print("\n=== ICU Test %d - \tFold %d - \tAUC %f" %
                      (test_icu, fold, auc_score))

                output.write(
                    "Mixed-%d,%d,%d,%d,%f,%f,%f,%f,%f,%f\n" %
                    (test_icu, fold, np.count_nonzero(y_test == 0),
                     np.count_nonzero(y_test == 1), score[0], score[1],
                     score[2], score[3], score[4], auc_score))
            auc.append(icu_auc)

    except Exception as e:
        print(e)

    avg_auc = np.array(auc).mean(axis=0).tolist()
    return avg_auc
def mixed(model_id, data_id, output, n_patients, feature_sequence):
    folds = load_data(data_id, [1, 2, 3, 4], n_patients, feature_sequence)
    for fold, (x_train, y_train, x_val, y_val, x_test, y_test, icu_ids_train,
               icu_ids_val, icu_ids_test) in enumerate(folds):
        layers, freezable = models.create_freezable_layers(
            model_id, x_train.shape[1], x_train.shape[2])
        checkpoint, early_stopping, model = models.create_model(
            "mt-" + model_id + "-" + data_id, layers)

        model.fit(x_train,
                  y_train,
                  epochs=n_epochs,
                  validation_data=(x_val, y_val),
                  callbacks=[early_stopping, checkpoint])

        score, auc_score = models.evaluate_model(
            "mt-" + model_id + "-" + data_id, x_test, y_test)
        print("ALL " + "," + str(auc_score))
        output.write(str(fold) + "," + str(auc_score))

        for test_icu in np.unique(icu_ids_test):
            score, auc_score = models.evaluate_model(
                "mt-" + model_id + "-" + data_id,
                x_test[icu_ids_test == test_icu],
                y_test[icu_ids_test == test_icu])
            print("ICU " + str(test_icu) + "," + str(auc_score))
            output.write("," + str(auc_score))
        output.write("\n")
Ejemplo n.º 3
0
def test_model(model, test_data, transformation_type=TRANSFORMATION.clean):
    X, Y = test_data

    print('Transforming test data set...')
    X = transform(X, transformation_type)

    print('Testing model [{}]...'.format(transformation_type))
    models.evaluate_model(model, X, Y)

    del X, Y
Ejemplo n.º 4
0
def train_on_target(data_path, model_id, data_id, output, n_patients):
    """
    Train and evaluate only on target ICU
    """
    auc = []
    try:
        icu_types = [1, 2, 3, 4]

        for held_out_icu in icu_types:
            print("=== Target: " + str(held_out_icu) + " ===")
            folds = load_data(data_path, data_id, [held_out_icu], n_patients)
            fold_auc = []
            for fold, (x_train, y_train, x_val, y_val, x_test, y_test,
                       icu_ids_train, icu_ids_val,
                       icu_ids_test) in enumerate(folds):
                layers, freezable = models.create_freezable_layers(
                    model_id, x_train.shape[1], x_train.shape[2])

                print("=== Train on Target ===")
                checkpoint, early_stopping, model = models.create_model(
                    "fc-" + model_id + "-" + data_id + "-" + str(held_out_icu),
                    layers)
                model.fit(x_train,
                          y_train,
                          epochs=n_epochs,
                          validation_data=(x_val, y_val),
                          callbacks=[early_stopping, checkpoint])

                score, auc_score = models.evaluate_model(
                    "fc-" + model_id + "-" + data_id + "-" + str(held_out_icu),
                    x_test, y_test)
                fold_auc.append(auc_score)

                print("=== Fold: " + str(fold) + ",Loss: " + str(score[0]) +
                      ",Acc: " + str(score[1]) + ",Prec: " + str(score[2]) +
                      ",Rec: " + str(score[3]) + ",F1: " + str(score[4]) +
                      ",AUC: " + str(auc_score) + " ====")

                output.write(
                    "Target-%d,%d,%d,%d,%f,%f,%f,%f,%f,%f\n" %
                    (held_out_icu, fold, np.count_nonzero(y_test == 0),
                     np.count_nonzero(y_test == 1), score[0], score[1],
                     score[2], score[3], score[4], auc_score))
            auc.append(fold_auc)
    except Exception as e:
        print(e)

    avg_auc = np.array(auc).mean(axis=1).tolist()
    return avg_auc
Ejemplo n.º 5
0
def general_training(data_path, model_id, data_id, output, n_patients):
    """
    Train and evaluate on all ICU
    """
    auc = []
    try:
        print("=== General training and testing ===")
        folds = load_data(data_path, data_id, [1, 2, 3, 4], n_patients)
        for fold, (x_train, y_train, x_val, y_val, x_test, y_test,
                   icu_ids_train, icu_ids_val,
                   icu_ids_test) in enumerate(folds):
            layers, freezable = models.create_freezable_layers(
                model_id, x_train.shape[1], x_train.shape[2])
            checkpoint, early_stopping, model = models.create_model(
                "mtt-" + model_id + "-" + data_id, layers)

            model.fit(x_train,
                      y_train,
                      epochs=n_epochs,
                      validation_data=(x_val, y_val),
                      callbacks=[early_stopping, checkpoint])
            score, auc_score = models.evaluate_model(
                "mtt-" + model_id + "-" + data_id, x_test, y_test)
            auc.append(auc_score)

            print("=== General Test, Fold: " + str(fold) + ",Loss: " +
                  str(score[0]) + ",Acc: " + str(score[1]) + ",Prec: " +
                  str(score[2]) + ",Rec: " + str(score[3]) + ",F1: " +
                  str(score[4]) + ",AUC: " + str(auc_score) + " ====")

            output.write("General-Test-%d,%d,%d,%f,%f,%f,%f,%f,%f\n" %
                         (fold, np.count_nonzero(y_test == 0),
                          np.count_nonzero(y_test == 1), score[0], score[1],
                          score[2], score[3], score[4], auc_score))

    except Exception as e:
        print(e)

    avg_auc = np.array(auc).mean().tolist()
    return avg_auc
Ejemplo n.º 6
0
def train_pipeline(training_pipeline_params: TrainingPipelineParams):
    logger.info(f"Start training with params: {training_pipeline_params}")

    load_data(training_pipeline_params.input_data_path,
              training_pipeline_params.input_data_url)
    data = read_data(training_pipeline_params.input_data_path)
    logger.info(f"Raw data shape: {data.shape}")

    train_df, val_df = split_train_val_data(
        data, training_pipeline_params.splitting_params)
    logger.info(f"Train df shape: {train_df.shape}")
    logger.info(f"Val df shape: {val_df.shape}")

    pipeline = build_transformer(training_pipeline_params.feature_params)
    pipeline.fit(train_df)
    logger.info(f"Transform fitted.")

    train_features = make_features(pipeline, train_df)
    train_target = extract_target(train_df,
                                  training_pipeline_params.feature_params)
    logger.info(f"Train features shape: {train_features.shape}")

    val_features = make_features(pipeline, val_df)
    val_target = extract_target(val_df,
                                training_pipeline_params.feature_params)
    logger.info(f"Val features shape: {train_features.shape}")

    model = get_model(training_pipeline_params.train_params)
    model = train_model(train_features, train_target, model)
    logger.info(f"Model trained.")

    predictions = predict_model(val_features, model)

    metrics = evaluate_model(predictions, val_target)

    path_to_model = save_artifacts(metrics, model, pipeline,
                                   training_pipeline_params)

    return path_to_model, metrics
Ejemplo n.º 7
0
                    train_list, val_list, test_list, model_type, config_dict)
                test_loader = loader_dict['test']

                if ablate == "F":
                    mask = np.isin(qty_full, force_features, invert=False)
                    test_loader.dataset.mask_labels(mask)
                if ablate == "P":
                    mask = np.isin(qty_full, pos_features, invert=False)
                    test_loader.dataset.mask_labels(mask)

                if use_predlist:
                    predictions = predlist[i]
                else:
                    #plt.close('all')
                    predictions = mdl.evaluate_model(model,
                                                     test_loader,
                                                     model_type=model_type,
                                                     encode=encode)
                    predictions_list.append(predictions)

                np.savetxt('encodings_031621/encode_' + str(i + 1) + '.txt',
                           predictions)  # uncomment to save encodings.

        else:
            for i, (test, condition) in enumerate(
                    zip(test_list_full, condition_list), 0):
                test_list = [test]
                loader_dict, loader_sizes = dat.init_dataset(
                    train_list, val_list, test_list, model_type, config_dict)
                test_loader = loader_dict['test']
                dataset_mean = np.loadtxt('train_dataset_mean.dat')
                dataset_stdev = np.loadtxt('train_dataset_sd.dat')
Ejemplo n.º 8
0
    'crop_list': None,
    'trans_function': None
}

loader_dict, loader_sizes = dat.init_dataset(train_list, val_list, test_list,
                                             model_type, config_dict)
test_loader = loader_dict["test"]
test_loader.dataset.mean = mean1
test_loader.dataset.stdev = std1
test_loader.dataset.label_array = test_loader.dataset.raw_label_array
test_loader.dataset.normalize_state(mean1, std1)

model = mdl.StateModel(54, 3)
model.load_state_dict(torch.load("best_modelweights_S_PSM1.dat"))

predictions1 = mdl.evaluate_model(model, test_loader, model_type=model_type)

np.savetxt("dual_PSM1_1.pred", predictions1)

#%% Do this for PSM2

file_dir = '../experiment_data'  # define the file directory for dataset
model_type = "S"
train_list = [1, 3, 5, 7, 8, 10, 12, 14, 15, 17, 19, 21]
val_list = [1]
test_list = [1]
config_dict = {
    'file_dir': file_dir,
    'include_torque': False,
    'spatial_forces': False,
    'custom_state': None,
Ejemplo n.º 9
0
def domain_adaptation(data_path,
                      model_id,
                      data_id,
                      output,
                      n_patients,
                      shuffle,
                      use_target,
                      target_icu=None):
    """
    Train on all ICU, fine tunning and evaluate on target ICU
    """
    auc = []
    try:
        icu_types = [1, 2, 3, 4]

        target_icu_list = [target_icu] if target_icu is not None else icu_types
        for held_out_icu in target_icu_list:
            icus = list(icu_types)
            if not use_target:
                icus.remove(held_out_icu)
            folds = load_data(data_path, data_id, icus, n_patients)
            icu_folds = load_data(data_path, data_id, [held_out_icu],
                                  n_patients)
            model_name = "ft-" + model_id + "-" + data_id + "-" + str(
                held_out_icu)

            fold_auc = []
            for fold in range(k_fold):
                x_train, y_train, x_val, y_val, x_test, y_test, icu_ids_train, icu_ids_val, icu_ids_test = folds[
                    fold]

                print("=== Held out: " + str(held_out_icu) + " ===")
                layers, freezable = models.create_freezable_layers(
                    model_id, x_train.shape[1], x_train.shape[2])

                print("=== General training ===")
                checkpoint, early_stopping, model = models.create_model(
                    model_name, layers)
                model.fit(x_train,
                          y_train,
                          epochs=n_epochs,
                          validation_data=(x_val, y_val),
                          callbacks=[early_stopping, checkpoint])
                if use_target:
                    score, auc_score_gt = models.evaluate_model(
                        model_name, x_test[icu_ids_test == held_out_icu],
                        y_test[icu_ids_test == held_out_icu])
                else:
                    score, auc_score_gt = models.evaluate_model(
                        model_name, x_test, y_test)

                print("=== Fine tuning ===")
                model = models.get_model(model_name)
                # layers = model.layers

                x_icu_train, y_icu_train, x_val, y_val, x_icu_test, y_icu_test, icu_ids_train, icu_ids_val, icu_ids_test = icu_folds[
                    fold]
                checkpoint, early_stopping, new_model = models.create_model(
                    model_name, model.layers, freezable, shuffle)

                new_model.fit(x_icu_train,
                              y_icu_train,
                              epochs=n_epochs,
                              validation_data=(x_val, y_val),
                              callbacks=[early_stopping, checkpoint])

                score, auc_score = models.evaluate_model(
                    model_name, x_icu_test, y_icu_test)
                fold_auc.append([auc_score_gt, auc_score])

                print("\n=== ICU %d - \tFold %d - \tAUC GT %f - \tAUC FT %f" %
                      (held_out_icu, fold, auc_score_gt, auc_score))

                y_zero_count = np.count_nonzero(y_icu_test == 0)
                y_one_count = np.count_nonzero(y_icu_test == 1)

                output.write("DA-%d-%s-%s,%d,%d,%d,%f,%f,%f,%f,%f,%f\n" %
                             (held_out_icu, shuffle, use_target, fold,
                              y_zero_count, y_one_count, score[0], score[1],
                              score[2], score[3], score[4], auc_score))
            auc.append(fold_auc)
    except Exception as e:
        print(e)

    avg_auc = np.array(auc).mean(axis=1)

    auc_df = pd.DataFrame(avg_auc.mean(axis=1))
    print(auc_df)

    return avg_auc.tolist()
Ejemplo n.º 10
0
def run_ablations(model, num_ablations):
    '''set up some persistent tracking variables'''
    remove_features = []  # list of features we are removing
    metrics_list = []  # list storing dictionary of performance metrics
    # feature indexes
    full_state_index = np.arange(7, 61)
    input_state = 54
    # create loss function
    criterion = nn.MSELoss(reduction='sum')
    # define optimization method
    optimizer = opt.Adam(model.parameters(), lr=0.01)
    param_count = []
    param_count.append(count_params(model))
    current_feature_list = np.array(qty)

    # create the dataloader
    dataloaders, dataset_sizes = dat.init_dataset(train_list, val_list,
                                                  val_list, model_type,
                                                  config_dict)

    print('evaluating full model predictions...')
    predictions = mdl.evaluate_model(model,
                                     dataloaders['test'],
                                     model_type=model_type,
                                     no_pbar=True)
    # compute the loss statistics
    print('computing full model performance metrics...')
    metrics = model_eval.compute_loss_metrics(
        predictions, dataloaders['test'].dataset.label_array[:, 1:4])
    metrics_list.append(metrics)
    print('Performance Summary of Full Model:')
    print(metrics)

    print('Running ablation study on model type:' + model_type)

    for iteration in range(num_ablations):
        print('-' * 10)
        print('Begin ablation run: {}/{}'.format(iteration + 1, num_ablations))
        print('-' * 10)

        # compute the backprop values:
        gbp_data = model_eval.compute_GBP(model,
                                          dataloaders['test'],
                                          num_state_inputs=input_state,
                                          model_type=model_type,
                                          no_pbar=True)
        # evaluate means
        df_gbp_means = model_eval.compute_and_plot_gbp(gbp_data,
                                                       current_feature_list,
                                                       True,
                                                       suppress_plots=True)
        # group by feature type and rank by value
        df_gbp_means = df_gbp_means.groupby('feature').mean().sort_values(
            by='gbp', ascending=False).reset_index()
        # get top ranking value and append to removal list
        feature_to_remove = df_gbp_means.iloc[0, 0]
        print("removing " + feature_to_remove + "...")
        remove_features.append(feature_to_remove)
        # create the mask
        mask = np.isin(qty, remove_features, invert=True)
        # mask the full state vector in config_dict global variable
        config_dict['custom_state'] = full_state_index[mask]
        current_feature_list = np.array(qty)[
            mask]  #update the current feature list
        # decrease the input dimension of the model by one
        input_state = input_state - 1

        # redefine the models
        print('redefining model with input state dims: {}'.format(input_state))
        if model_type == "VS":
            model = mdl.StateVisionModel(30,
                                         input_state,
                                         3,
                                         feature_extract=feat_extract)
        elif model_type == "S":
            model = mdl.StateModel(input_state, 3)

        # recalculate the number of parameters
        param_count.append(count_params(model))

        # redefine the optimizer
        optimizer = opt.Adam(model.parameters(), lr=0.01)

        # redefine the dataloader
        dataloaders, dataset_sizes = dat.init_dataset(train_list, val_list,
                                                      val_list, model_type,
                                                      config_dict)

        # retrain the model
        model, train_history, val_history = mdl.train_model(
            model,
            criterion,
            optimizer,
            dataloaders,
            dataset_sizes,
            num_epochs=50,
            model_type=model_type,
            weight_file=weight_file,
            no_pbar=True)
        print('retraining completed')
        # do inference
        print('evaluating model predictions...')
        predictions = mdl.evaluate_model(model,
                                         dataloaders['test'],
                                         model_type=model_type,
                                         no_pbar=True)
        # compute the loss statistics
        print('computing performance metrics...')
        metrics = model_eval.compute_loss_metrics(
            predictions, dataloaders['test'].dataset.label_array[:, 1:4])
        metrics_list.append(metrics)
        print('Performance Summary:')
        print(metrics)

    return remove_features, param_count, metrics_list