예제 #1
0
def xstacked_data(
    uuids=[],
    epochs=config.EPOCHS,
):
    results = [
        XResult.query.filter(XResult.run_id == uuid).first() for uuid in uuids
    ]
    assert len(results) > 0, "no models found"
    assert len(set([result.split for result in results
                    ])) == 1, "all models must be trained on the same split"
    assert len(set([result.label_form for result in results
                    ])) == 1, "all models must be trained on the same label"
    training = dict()
    training_fixed = dict()
    validation = dict()
    test = dict()
    # holdout = dict()

    for result in results:
        if result.input_form in training:
            continue

        split = UUID(result.split)

        # splitting the initial training and holdout test sets
        f = pandas.read_pickle(config.FEATURES)

        put_In_Training = pandas.read_csv(config.MULTIPLE_LESIONS)
        df_List = list(put_In_Training['ID'])
        multiple = f[f['patient'].isin(df_List)]
        multiple_y = multiple[result.label_form].values

        new_df = f[~f.patient.isin(df_List)]
        y = new_df[result.label_form].values

        # set up the k-fold process
        skf = StratifiedKFold(n_splits=config.NUMBER_OF_FOLDS,
                              random_state=int(split) % 2**32)

        # get the folds and loop over each fold
        fold_number = 0
        for train_index, test_index in skf.split(new_df, y):
            fold_number += 1

            if fold_number != result.fold:
                continue

            # get the training and testing set for the fold
            X_train, testingSet = new_df.iloc[train_index], new_df.iloc[
                test_index]
            y_train, y_test = y[train_index], y[test_index]

            # append multiple lesions into training/validation
            X_train = X_train.append(multiple, ignore_index=False)
            y_train = numpy.concatenate((y_train, multiple_y))

            # split the training into training and validation
            trainingSet, validationSet, result_train, result_test = train_test_split(
                X_train,
                y_train,
                test_size=config.SPLIT_TRAINING_INTO_VALIDATION,
                stratify=y_train,
                random_state=int(split) % 2**32)

            # train_generator, validation_generator, test_generator, holdout_test_generator
            train_generator, validation_generator, test_generator = xdata(
                fold_number,
                trainingSet,
                validationSet,
                testingSet,  # holdout_test,
                split,
                input_form=result.input_form,
                label_form=result.label_form,
                train_shuffle=True,
                validation_shuffle=False,
                train_augment=True,
                validation_augment=False)

            # train_generator_f, validation_generator_f, test_generator_f, holdout_test_generator_f
            train_generator_f, validation_generator_f, test_generator_f = xdata(
                fold_number,
                trainingSet,
                validationSet,
                testingSet,  # holdout_test,
                split,
                input_form=result.input_form,
                label_form=result.label_form,
                train_shuffle=False,
                validation_shuffle=False,
                train_augment=False,
                validation_augment=False)

            training[result.input_form] = train_generator
            training_fixed[result.input_form] = train_generator_f
            validation[result.input_form] = validation_generator
            test[result.input_form] = test_generator
            # holdout[result.input_form] = holdout_test_generator

    # generate labels
    train_labels = list()
    training_fixed_labels = list()
    validation_labels = list()
    test_labels = list()
    holdout_labels = list()

    first_training = list(training.values())[0]
    first_training_fixed = list(training_fixed.values())[0]
    first_validation = list(validation.values())[0]
    first_test = list(test.values())[0]
    # first_holdout = list(holdout.values())[0]

    for _ in range(epochs):
        train_labels += first_training.next()[1].tolist()

    for _ in range(math.ceil(len(first_validation) / config.BATCH_SIZE)):
        validation_labels += first_validation.next()[1].tolist()

    for _ in range(math.ceil(len(first_training_fixed) / config.BATCH_SIZE)):
        training_fixed_labels += first_training_fixed.next()[1].tolist()

    for _ in range(math.ceil(len(first_test) / config.BATCH_SIZE)):
        test_labels += first_test.next()[1].tolist()

    # for _ in range(math.ceil(len(first_holdout)/config.BATCH_SIZE)):
    #    holdout_labels += first_holdout.next()[1].tolist()

    first_training.reset()
    first_training_fixed.reset()
    first_validation.reset()
    first_test.reset()
    # first_holdout.reset()

    # generate predictions
    train_predictions = list()
    train_fixed_predictions = list()
    validation_predictions = list()
    test_predictions = list()
    # holdout_predictions = list()

    for result in results:
        model = xload(result)

        t = training[result.input_form]
        tf = training_fixed[result.input_form]
        v = validation[result.input_form]
        te = test[result.input_form]
        # h = holdout[result.input_form]

        train_predictions.append(
            model.predict_generator(t, steps=epochs).flatten())
        train_fixed_predictions.append(
            model.predict_generator(
                tf, steps=math.ceil(len(tf) / config.BATCH_SIZE)).flatten())
        validation_predictions.append(
            model.predict_generator(v,
                                    steps=math.ceil(
                                        len(v) / config.BATCH_SIZE)).flatten())
        test_predictions.append(
            model.predict_generator(
                te, steps=math.ceil(len(te) / config.BATCH_SIZE)).flatten())
        # holdout_predictions.append(model.predict_generator(h, steps=math.ceil(len(h) / config.BATCH_SIZE)).flatten())

        t.reset()
        tf.reset()
        v.reset()
        te.reset()
        # h.reset()
        K.clear_session()
        del model

    return train_predictions, validation_predictions, test_predictions, train_labels, validation_labels, test_labels, train_fixed_predictions, training_fixed_labels  # holdout_predictions, holdout_labels
예제 #2
0
 # get the training and testing set for the fold
 X_train, testing = f.iloc[train_index], f.iloc[test_index]
 y_train, y_test = y[train_index], y[test_index]
 # split the training into training and validation
 training, validation, result_train, result_test = train_test_split(
     X_train,
     y_train,
     test_size=config.SPLIT_TRAINING_INTO_VALIDATION,
     stratify=y_train,
     random_state=int(split) % 2**32)
 # get the data
 # training_data, validation_data, testing_data, holdout_test_data = xdata(fold_number, training, validation, testing, holdout_test, split, input_form=FLAGS.form, label_form=FLAGS.label)
 training_data, validation_data, testing_data = xdata(
     fold_number,
     training,
     validation,
     testing,
     split,
     input_form=FLAGS.form,
     label_form=FLAGS.label)
 # run the training, each trial
 for _ in range(FLAGS.trials):
     # in each trial, run for each hyperparameter combination
     for hyperparameters in parameters:
         # xrun(fold_number, (training_data, validation_data, testing_data, holdout_test_data), model, FLAGS.description, FLAGS.form, FLAGS.label, split, hyperparameters=hyperparameters)
         xrun(fold_number,
              (training_data, validation_data, testing_data),
              model,
              FLAGS.description,
              FLAGS.form,
              FLAGS.label,
              split,