def CrossValidateModelParameters(splitTrainSet, matricesPL, labelsPL,
                                 trainingPL, predictionLayer, trainOperation,
                                 lossFunction, savePath, saveName,
                                 numberOfSteps, batchSize):
    """
    Trains a model using 5-fold cross validation on the given data set.
    Puts a plot of the results in the ../plots/ directory, and returns
    the average final validation performance.
    """
    ########## DEFINE DATA ##########
    X = splitTrainSet.images
    Y = splitTrainSet.labels
    folder = KFold(n_splits=5, shuffle=False)
    accumulatedTrainingLoss = []
    accumulatedValidationLoss = []
    splitIndex = 0

    for tIndex, vIndex in folder.split(X):
        ########## TRAIN THE MODEL ##########
        splitIndex += 1
        print('-------------Split: %i-------------' % splitIndex)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            fileSavePath = savePath + '_split%i.ckpt' % splitIndex
            splitTrainSet = DataSet(images=X[tIndex],
                                    labels=Y[tIndex],
                                    numClasses=1)
            splitValidationSet = DataSet(images=X[vIndex],
                                         labels=Y[vIndex],
                                         numClasses=1)
            foldTrainingLosses, foldValidationLosses = TrainModel(
                sess, splitTrainSet, splitValidationSet, matricesPL, labelsPL,
                trainingPL, predictionLayer, trainOperation, lossFunction,
                fileSavePath, numberOfSteps, batchSize)
            accumulatedTrainingLoss.append(foldTrainingLosses)
            accumulatedValidationLoss.append(foldValidationLosses)
            if splitIndex == 5:
                (point, lower, upper) = performanceCI(sess, splitValidationSet,
                                                      lossFunction, matricesPL,
                                                      labelsPL, trainingPL)
                print("Confidence Interval Performance: %f (%f, %f)" %
                      (point, lower, upper))

    ########## PLOT THE RESULTS OF CROSS VALIDATION ##########
    accumulatedTrainingLoss = np.array(accumulatedTrainingLoss)
    accumulatedValidationLoss = np.array(accumulatedValidationLoss)
    PlotTrainingValidationLoss(accumulatedTrainingLoss,
                               accumulatedValidationLoss, saveName,
                               'plots/' + saveName + '.png')

    if numberOfSteps >= 1000:
        PlotTrainingValidationLoss(accumulatedTrainingLoss[:, -1000:],
                                   accumulatedValidationLoss[:,
                                                             -1000:], saveName,
                                   'plots/' + saveName + 'last1000.png')

    ########## GET AVERAGE VALIDATION PERFORMANCE ##########
    averageFinalValidationPerformance = np.mean(accumulatedValidationLoss[:,
                                                                          -1])
    return averageFinalValidationPerformance
Beispiel #2
0
def TrainModelXY(sess, X, Y, imagesPL, labelsPL, predictionLayer,
                 trainOperation, lossFunction):
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
    splitTrainSet = DataSet(X_train, y_train)
    splitTestSet = DataSet(X_test, y_test)

    for batch_index in range(get('TRAIN.CNN.NB_STEPS')):
        batch_images, batch_labels = splitTrainSet.next_batch(
            get('TRAIN.CNN.BATCH_SIZE'))
        feed_dict = DefineFeedDict(DataSet(batch_images, batch_labels),
                                   imagesPL, labelsPL)
        sess.run(trainOperation, feed_dict=feed_dict)
        ReportProgress(sess, batch_index, lossFunction, imagesPL, labelsPL,
                       splitTrainSet, splitTestSet)

    trainingLoss = GetEvaluatedLoss(sess, splitTrainSet, lossFunction,
                                    imagesPL, labelsPL)
    testLoss = GetEvaluatedLoss(sess, splitTestSet, lossFunction, imagesPL,
                                labelsPL)
    return (trainingLoss, testLoss)
 def returnNIIDataset(self):
     train_mats = np.array(self.train_images)
     print(train_mats.shape)
     train_mats = np.reshape(train_mats, (train_mats.shape[0], train_mats.shape[1], train_mats.shape[2], train_mats.shape[3], 1))
     train_labels = np.zeros((len(self.train_subject)))
     for idx in range(len(self.train_subject)):
         train_labels[idx] = self._df.loc[self._df['Subject'] == self.train_subject[idx]]['AgeYears']
     # labels = np.array(self._df['AgeYears'].values.copy())
     train_labels = self.copy_labels(train_labels)
     print(train_labels.shape)
     print(train_mats.shape)
     test_mats = np.array(self.test_images)
     test_mats = np.reshape(test_mats, (test_mats.shape[0], test_mats.shape[1], test_mats.shape[2], test_mats.shape[3], 1))
     test_labels = np.zeros((len(self.test_subject)))
     for idx in range(len(self.test_subject)):
         test_labels[idx] = self._df.loc[self._df['Subject'] == self.test_subject[idx]]['AgeYears']
     test_labels = self.copy_labels(test_labels)
     print(test_labels.shape)
     print(test_mats.shape)
     return DataSet(train_mats, train_labels, reshape=True, fMRI=True), DataSet(test_mats, test_labels, reshape=True, fMRI=True)
def TrainModel(sess, splitTrainSet, splitValidationSet, matricesPL, labelsPL,
               trainingPL, predictionLayer, trainOperation, lossFunction,
               savePath, numberOfSteps, batchSize):
    """
    Trains a model defined by matricesPL, labelsPL, predictionLayer, trainOperation and lossFunction
    over numberOfSteps steps with batch size batchSize. Uses savePath to save the model.
    """
    extraUpdateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    ############# Define tf saver #############
    saver = saveModel.restore(sess, savePath)

    ############# DEFINE ARRAYS TO HOLD LOSS #############
    accumulatedTrainingLoss = []
    accumulatedValidationLoss = []

    for batch_index in range(numberOfSteps):
        ############# RUN TRAINING OPERATIONS #############
        batch_images, batch_labels = splitTrainSet.next_batch(batchSize)
        feed_dict = DefineFeedDict(DataSet(batch_images, batch_labels),
                                   matricesPL,
                                   labelsPL,
                                   trainingPL,
                                   isTraining=True)
        sess.run([trainOperation, extraUpdateOps], feed_dict=feed_dict)

        ############# REPORT TRAINING PROGRESS #############
        trainingLoss, validationLoss, shouldUse = ReportProgress(
            sess, batch_index, lossFunction, matricesPL, labelsPL,
            splitTrainSet, splitValidationSet, trainingPL)
        if shouldUse:
            accumulatedTrainingLoss.append(trainingLoss)
            accumulatedValidationLoss.append(validationLoss)

        ############# SAVE TRAINED MODEL #############
        SaveModel(sess, batch_index, saver, savePath)

    return (accumulatedTrainingLoss, accumulatedValidationLoss)
def performanceCI(sess, dataSet, lossFunction, matricesPL, labelsPL,
                  trainingPL):
    N = 1000
    X = dataSet.images
    Y = dataSet.labels
    bootstrap_performances = np.zeros(N)
    n = X.shape[0]
    indices = np.arange(n)

    for i in range(N):
        sample_indices = np.random.choice(indices, size=n, replace=True)
        sampleX = X[sample_indices]
        sampleY = Y[sample_indices]
        sampleDataset = DataSet(sampleX, sampleY)

        bootstrap_performances[i] = GetEvaluatedLoss(sess, sampleDataset,
                                                     lossFunction, matricesPL,
                                                     labelsPL, trainingPL)

    bootstrap_performances = np.sort(bootstrap_performances)
    point_performance = np.mean(bootstrap_performances)

    return (point_performance, bootstrap_performances[25],
            bootstrap_performances[975])
def RunCrossValidation(dataSet, matricesPL, labelsPL, predictionLayers,
                       trainOperations, lossFunctions, trainingPL,
                       numberOfStepsArray, batchSizes, saveNames):
    ########## SPLIT DATA INTO TRAIN AND TEST ##########
    X_train, X_test, y_train, y_test = train_test_split(dataSet.images,
                                                        dataSet.labels,
                                                        test_size=0.1)
    splitTrainSet = DataSet(X_train, y_train)
    splitTestSet = DataSet(X_test, y_test)

    ########## ITERATE OVER ALL MODELS ##########
    index = 0
    bestIndex = -1
    lowestLoss = math.inf
    finalValidationPerformances = []
    for index in range(len(saveNames)):
        predictionLayer = predictionLayers[index]
        lossFunction = lossFunctions[index]
        trainOperation = trainOperations[index]
        numberOfSteps = numberOfStepsArray[index]
        batchSize = batchSizes[index]
        saveName = saveNames[index]

        print('===================%s===================' % saveName)
        savePath = get('TRAIN.ROI_BASELINE.CHECKPOINT_DIR') + saveName

        ########## GET CROSS VALIDATION PERFORMANCE OF MODEL ##########
        averageFinalValidationPerformance = CrossValidateModelParameters(
            splitTrainSet, matricesPL, labelsPL, trainingPL, predictionLayer,
            trainOperation, lossFunction, savePath, saveName, numberOfSteps,
            batchSize)
        finalValidationPerformances.append(averageFinalValidationPerformance)

        ########## DETERMINE BEST MODEL SO FAR ##########
        if (averageFinalValidationPerformance < lowestLoss):
            lowestLoss = averageFinalValidationPerformance
            bestIndex = index
        index += 1

    ########## PRINT CROSS VALIDATION RESULTS ##########
    print('===================CROSS VALIDATION RESULTS===================')
    for i in range(index):
        saveName = saveNames[i]
        print('Model %s had validation performance: %f' %
              (saveName, finalValidationPerformances[i]))
    print('===================BEST MODEL===================')
    print('Best model was %s with validation performance of %f' %
          (saveNames[bestIndex], finalValidationPerformances[bestIndex]))

    index = 0

    for index in range(len(saveNames)):
        predictionLayer = predictionLayers[index]
        lossFunction = lossFunctions[index]
        trainOperation = trainOperations[index]
        numberOfSteps = numberOfStepsArray[index]
        batchSize = batchSizes[index]
        saveName = saveNames[index]

        if (index == bestIndex):
            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                fileSavePath = get('TRAIN.ROI_BASELINE.CHECKPOINT_DIR'
                                   ) + saveName + '_split1.ckpt'
                print(fileSavePath)
                saver = saveModel.restore(sess, fileSavePath)
                testLoss = GetEvaluatedLoss(sess, splitTestSet, lossFunction,
                                            matricesPL, labelsPL, trainingPL)
                print('Best model had test loss: %f' % testLoss)
        index += 1
    savePath = 'plots/modelComparison%s.png' % datetime.now().strftime(
        '%I:%M%p_%B_%d_%Y')
    PlotComparisonBarChart(performances=finalValidationPerformances,
                           names=saveNames,
                           savePath=savePath)
 def returnDataSet(self):
     mats = np.array(self.matrices)
     mats = np.reshape(mats, (mats.shape[0], mats.shape[1], mats.shape[2], 1))
     labels = np.array(self._df['AgeYears'].values.copy())
     labels = np.reshape(labels, (labels.shape[0], 1))
     return DataSet(mats, labels)