def xstacked_data( uuids=[], epochs=config.EPOCHS, ): results = [ XResult.query.filter(XResult.run_id == uuid).first() for uuid in uuids ] assert len(results) > 0, "no models found" assert len(set([result.split for result in results ])) == 1, "all models must be trained on the same split" assert len(set([result.label_form for result in results ])) == 1, "all models must be trained on the same label" training = dict() training_fixed = dict() validation = dict() test = dict() # holdout = dict() for result in results: if result.input_form in training: continue split = UUID(result.split) # splitting the initial training and holdout test sets f = pandas.read_pickle(config.FEATURES) put_In_Training = pandas.read_csv(config.MULTIPLE_LESIONS) df_List = list(put_In_Training['ID']) multiple = f[f['patient'].isin(df_List)] multiple_y = multiple[result.label_form].values new_df = f[~f.patient.isin(df_List)] y = new_df[result.label_form].values # set up the k-fold process skf = StratifiedKFold(n_splits=config.NUMBER_OF_FOLDS, random_state=int(split) % 2**32) # get the folds and loop over each fold fold_number = 0 for train_index, test_index in skf.split(new_df, y): fold_number += 1 if fold_number != result.fold: continue # get the training and testing set for the fold X_train, testingSet = new_df.iloc[train_index], new_df.iloc[ test_index] y_train, y_test = y[train_index], y[test_index] # append multiple lesions into training/validation X_train = X_train.append(multiple, ignore_index=False) y_train = numpy.concatenate((y_train, multiple_y)) # split the training into training and validation trainingSet, validationSet, result_train, result_test = train_test_split( X_train, y_train, test_size=config.SPLIT_TRAINING_INTO_VALIDATION, stratify=y_train, random_state=int(split) % 2**32) # train_generator, validation_generator, test_generator, holdout_test_generator train_generator, validation_generator, test_generator = xdata( fold_number, trainingSet, validationSet, testingSet, # holdout_test, split, input_form=result.input_form, label_form=result.label_form, train_shuffle=True, validation_shuffle=False, train_augment=True, validation_augment=False) # train_generator_f, validation_generator_f, test_generator_f, holdout_test_generator_f train_generator_f, validation_generator_f, test_generator_f = xdata( fold_number, trainingSet, validationSet, testingSet, # holdout_test, split, input_form=result.input_form, label_form=result.label_form, train_shuffle=False, validation_shuffle=False, train_augment=False, validation_augment=False) training[result.input_form] = train_generator training_fixed[result.input_form] = train_generator_f validation[result.input_form] = validation_generator test[result.input_form] = test_generator # holdout[result.input_form] = holdout_test_generator # generate labels train_labels = list() training_fixed_labels = list() validation_labels = list() test_labels = list() holdout_labels = list() first_training = list(training.values())[0] first_training_fixed = list(training_fixed.values())[0] first_validation = list(validation.values())[0] first_test = list(test.values())[0] # first_holdout = list(holdout.values())[0] for _ in range(epochs): train_labels += first_training.next()[1].tolist() for _ in range(math.ceil(len(first_validation) / config.BATCH_SIZE)): validation_labels += first_validation.next()[1].tolist() for _ in range(math.ceil(len(first_training_fixed) / config.BATCH_SIZE)): training_fixed_labels += first_training_fixed.next()[1].tolist() for _ in range(math.ceil(len(first_test) / config.BATCH_SIZE)): test_labels += first_test.next()[1].tolist() # for _ in range(math.ceil(len(first_holdout)/config.BATCH_SIZE)): # holdout_labels += first_holdout.next()[1].tolist() first_training.reset() first_training_fixed.reset() first_validation.reset() first_test.reset() # first_holdout.reset() # generate predictions train_predictions = list() train_fixed_predictions = list() validation_predictions = list() test_predictions = list() # holdout_predictions = list() for result in results: model = xload(result) t = training[result.input_form] tf = training_fixed[result.input_form] v = validation[result.input_form] te = test[result.input_form] # h = holdout[result.input_form] train_predictions.append( model.predict_generator(t, steps=epochs).flatten()) train_fixed_predictions.append( model.predict_generator( tf, steps=math.ceil(len(tf) / config.BATCH_SIZE)).flatten()) validation_predictions.append( model.predict_generator(v, steps=math.ceil( len(v) / config.BATCH_SIZE)).flatten()) test_predictions.append( model.predict_generator( te, steps=math.ceil(len(te) / config.BATCH_SIZE)).flatten()) # holdout_predictions.append(model.predict_generator(h, steps=math.ceil(len(h) / config.BATCH_SIZE)).flatten()) t.reset() tf.reset() v.reset() te.reset() # h.reset() K.clear_session() del model return train_predictions, validation_predictions, test_predictions, train_labels, validation_labels, test_labels, train_fixed_predictions, training_fixed_labels # holdout_predictions, holdout_labels
# get the training and testing set for the fold X_train, testing = f.iloc[train_index], f.iloc[test_index] y_train, y_test = y[train_index], y[test_index] # split the training into training and validation training, validation, result_train, result_test = train_test_split( X_train, y_train, test_size=config.SPLIT_TRAINING_INTO_VALIDATION, stratify=y_train, random_state=int(split) % 2**32) # get the data # training_data, validation_data, testing_data, holdout_test_data = xdata(fold_number, training, validation, testing, holdout_test, split, input_form=FLAGS.form, label_form=FLAGS.label) training_data, validation_data, testing_data = xdata( fold_number, training, validation, testing, split, input_form=FLAGS.form, label_form=FLAGS.label) # run the training, each trial for _ in range(FLAGS.trials): # in each trial, run for each hyperparameter combination for hyperparameters in parameters: # xrun(fold_number, (training_data, validation_data, testing_data, holdout_test_data), model, FLAGS.description, FLAGS.form, FLAGS.label, split, hyperparameters=hyperparameters) xrun(fold_number, (training_data, validation_data, testing_data), model, FLAGS.description, FLAGS.form, FLAGS.label, split,