Ejemplo n.º 1
0
    def ValidateModel(self, sess, updateOp, printOps, name, numIters=5):
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        print('Model: {}'.format(name))
        bestValdOpDict = {}
        bestTestOpDict = {}
        for opName in printOps.names:
            bestValdOpDict[opName] = []
            bestTestOpDict[opName] = []

        for i in range(numIters):
            sess.run(tf.global_variables_initializer())
            extraUpdateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

            saver = tf.train.Saver()
            savePath = '{}run_{}/'.format(GlobalOpts.validationDir, i)
            saveModel.restore(sess, saver, savePath)

            valdOpDict, _ = self.GetPerformanceThroughSet(sess, printOps, setType='vald')
            testOpDict, _ = self.GetPerformanceThroughSet(sess, printOps, setType='test')
            for opName in printOps.names:
                bestValdOpDict[opName].append(valdOpDict[opName])
                bestTestOpDict[opName].append(testOpDict[opName])

        print("==============Validation Set Operations, Best==============")
        for opName in bestValdOpDict:
            outputString = '{}: {} +- {}\t{}'.format(opName, np.mean(bestValdOpDict[opName]), np.std(bestValdOpDict[opName]), bestValdOpDict[opName])
            print(outputString)
        print("==============Test Set Operations, Best==============")
        for opName in bestTestOpDict:
            outputString = '{}: {} +- {}\t{}'.format(opName, np.mean(bestTestOpDict[opName]), np.std(bestTestOpDict[opName]), bestTestOpDict[opName])
            print(outputString)
        coord.request_stop()
        coord.join(threads)
Ejemplo n.º 2
0
def TrainModel(sess, splitTrainSet, splitValidationSet, matricesPL, labelsPL,
               trainingPL, predictionLayer, trainOperation, lossFunction,
               savePath, numberOfSteps, batchSize):
    """
    Trains a model defined by matricesPL, labelsPL, predictionLayer, trainOperation and lossFunction
    over numberOfSteps steps with batch size batchSize. Uses savePath to save the model.
    """
    extraUpdateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    ############# Define tf saver #############
    saver = saveModel.restore(sess, savePath)

    ############# DEFINE ARRAYS TO HOLD LOSS #############
    accumulatedTrainingLoss = []
    accumulatedValidationLoss = []

    for batch_index in range(numberOfSteps):
        ############# RUN TRAINING OPERATIONS #############
        batch_images, batch_labels = splitTrainSet.next_batch(batchSize)
        feed_dict = DefineFeedDict(DataSet(batch_images, batch_labels),
                                   matricesPL,
                                   labelsPL,
                                   trainingPL,
                                   isTraining=True)
        sess.run([trainOperation, extraUpdateOps], feed_dict=feed_dict)

        ############# REPORT TRAINING PROGRESS #############
        trainingLoss, validationLoss, shouldUse = ReportProgress(
            sess, batch_index, lossFunction, matricesPL, labelsPL,
            splitTrainSet, splitValidationSet, trainingPL)
        if shouldUse:
            accumulatedTrainingLoss.append(trainingLoss)
            accumulatedValidationLoss.append(validationLoss)

        ############# SAVE TRAINED MODEL #############
        SaveModel(sess, batch_index, saver, savePath)

    return (accumulatedTrainingLoss, accumulatedValidationLoss)
Ejemplo n.º 3
0
def RunCrossValidation(dataSet, matricesPL, labelsPL, predictionLayers,
                       trainOperations, lossFunctions, trainingPL,
                       numberOfStepsArray, batchSizes, saveNames):
    ########## SPLIT DATA INTO TRAIN AND TEST ##########
    X_train, X_test, y_train, y_test = train_test_split(dataSet.images,
                                                        dataSet.labels,
                                                        test_size=0.1)
    splitTrainSet = DataSet(X_train, y_train)
    splitTestSet = DataSet(X_test, y_test)

    ########## ITERATE OVER ALL MODELS ##########
    index = 0
    bestIndex = -1
    lowestLoss = math.inf
    finalValidationPerformances = []
    for index in range(len(saveNames)):
        predictionLayer = predictionLayers[index]
        lossFunction = lossFunctions[index]
        trainOperation = trainOperations[index]
        numberOfSteps = numberOfStepsArray[index]
        batchSize = batchSizes[index]
        saveName = saveNames[index]

        print('===================%s===================' % saveName)
        savePath = get('TRAIN.ROI_BASELINE.CHECKPOINT_DIR') + saveName

        ########## GET CROSS VALIDATION PERFORMANCE OF MODEL ##########
        averageFinalValidationPerformance = CrossValidateModelParameters(
            splitTrainSet, matricesPL, labelsPL, trainingPL, predictionLayer,
            trainOperation, lossFunction, savePath, saveName, numberOfSteps,
            batchSize)
        finalValidationPerformances.append(averageFinalValidationPerformance)

        ########## DETERMINE BEST MODEL SO FAR ##########
        if (averageFinalValidationPerformance < lowestLoss):
            lowestLoss = averageFinalValidationPerformance
            bestIndex = index
        index += 1

    ########## PRINT CROSS VALIDATION RESULTS ##########
    print('===================CROSS VALIDATION RESULTS===================')
    for i in range(index):
        saveName = saveNames[i]
        print('Model %s had validation performance: %f' %
              (saveName, finalValidationPerformances[i]))
    print('===================BEST MODEL===================')
    print('Best model was %s with validation performance of %f' %
          (saveNames[bestIndex], finalValidationPerformances[bestIndex]))

    index = 0

    for index in range(len(saveNames)):
        predictionLayer = predictionLayers[index]
        lossFunction = lossFunctions[index]
        trainOperation = trainOperations[index]
        numberOfSteps = numberOfStepsArray[index]
        batchSize = batchSizes[index]
        saveName = saveNames[index]

        if (index == bestIndex):
            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                fileSavePath = get('TRAIN.ROI_BASELINE.CHECKPOINT_DIR'
                                   ) + saveName + '_split1.ckpt'
                print(fileSavePath)
                saver = saveModel.restore(sess, fileSavePath)
                testLoss = GetEvaluatedLoss(sess, splitTestSet, lossFunction,
                                            matricesPL, labelsPL, trainingPL)
                print('Best model had test loss: %f' % testLoss)
        index += 1
    savePath = 'plots/modelComparison%s.png' % datetime.now().strftime(
        '%I:%M%p_%B_%d_%Y')
    PlotComparisonBarChart(performances=finalValidationPerformances,
                           names=saveNames,
                           savePath=savePath)
Ejemplo n.º 4
0
    def TrainModel(self, sess, updateOp, printOps, name):
        writer = tf.summary.FileWriter('{}{}/'.format(self.summaryDir, name))

        # Initialize relevant variables
        sess.run(tf.global_variables_initializer())

        # Collect summary and graph update operations
        extraUpdateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        saver = tf.train.Saver()
        savePath = '{}{}/'.format(self.checkpointDir, name)
        if not os.path.exists(savePath):
            os.makedirs(savePath)

        bestValidationLoss = 10000000
        bestValdOpDict = {}
        bestLossStepIndex = 0

        for batchIndex in range(self.numberOfSteps):
            batchTrainFeedDict = self.GetFeedDict(sess)
            # print "#"*50
            # print batchTrainFeedDict

            if batchIndex % self.batchStepsBetweenSummary != 0:
                _, _ = sess.run([updateOp, extraUpdateOps], feed_dict=batchTrainFeedDict)
            else:
                _, _, gradSummary = sess.run([updateOp, extraUpdateOps, printOps.gradientSummary], feed_dict=batchTrainFeedDict)
                writer.add_summary(gradSummary, batchIndex)

                opValueDict, summaryFeedDict = self.GetPerformanceThroughSet(sess, printOps,
                                    setType='train', batchTrainFeedDict=batchTrainFeedDict)
                writer.add_summary(
                    sess.run(
                        printOps.mergedTrainSummary,
                        feed_dict=summaryFeedDict),
                    batchIndex)
                print("==============Train Set Operations, Step {}==============".format(batchIndex))
                for opName in opValueDict:
                    print('{}: {}'.format(opName, opValueDict[opName]))

                opValueDict, summaryFeedDict = self.GetPerformanceThroughSet(sess, printOps)
                writer.add_summary(
                    sess.run(
                        printOps.mergedValdSummary,
                        feed_dict=summaryFeedDict),
                    batchIndex)
                print("==============Validation Set Operations, Step {}==============".format(batchIndex))
                for opName in opValueDict:
                    print('{}: {}'.format(opName, opValueDict[opName]))

                validationLoss = opValueDict['loss']
                if validationLoss < bestValidationLoss:
                    bestLossStepIndex = batchIndex
                    bestValidationLoss = validationLoss
                    bestValdOpDict = opValueDict
                    self.SaveModel(sess, batchIndex, saver, savePath)

        saveModel.restore(sess, saver, savePath)
        testOpValueDict, _ = self.GetPerformanceThroughSet(sess, printOps, setType='test')
        writer.close()

        return bestValdOpDict, testOpValueDict
Ejemplo n.º 5
0
    def getPatientPerformances(self, sess, predictionOp, name, numIters=1):
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        nameOp, labelOp, imageOp = [], [], []
        numbersIters = []
        for i in range(5):
            nameOp.append(self.testSet[i].dequeueOp)
            labelOp.append(self.testSet[i].labelBatchOperation)
            imageOp.append(self.testSet[i].imageBatchOperation)
            numbersIters.append(self.testSet[i].maxItemsInQueue)
        numberIters = self.testSet[0].maxItemsInQueue + 1
        predictedAges = np.zeros((numIters, numberIters))
        absoluteErrors = np.zeros((numIters, numberIters))
        squaredErrors = np.zeros((numIters, numberIters))
        trueAges = np.zeros((numIters, numberIters))
        name_arr = []
        print('Model: {}'.format(name))

        print('SUBJECT ID\tTRUE AGE\tPREDICTED AGE')
        for i in range(numIters):
            sess.run(tf.global_variables_initializer())
            extraUpdateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            saver = tf.train.Saver()
            savePath = '{}run_{}/'.format(GlobalOpts.validationDir, i)
            saveModel.restore(sess, saver, savePath)
            numberIters = numbersIters[i]
            print("This model has {} test images.".format(numberIters))

            for j in range(numberIters):
                images, labels, names = sess.run(
                    [imageOp[i], labelOp[i], nameOp[i]])
                feed_dict = {
                    self.imagesPL: images,
                    self.labelsPL: labels,
                    self.trainingPL: False
                }
                names = names[0].decode('UTF-8')
                if i == 0 or not GlobalOpts.pretrained:
                    name_arr.append(names)
                labels = labels[0, 0]
                predictions = sess.run([predictionOp], feed_dict=feed_dict)
                predictions = np.squeeze(predictions[0])
                trueAges[i, j] = labels
                predictedAges[i, j] = predictions
                absoluteErrors[i, j] = np.abs(predictions - labels)
                squaredErrors[i, j] = np.square(predictions - labels)


#                print('{}\t{:.4f}\t{:.4f}'.format(names, labels, predictions))
        if GlobalOpts.pretrained:
            df = pd.DataFrame(
                data={
                    'Subject': np.array(name_arr),
                    'TrueAge': trueAges[0],
                    'Run_1': predictedAges[0, :],
                    'Run_2': predictedAges[1, :],
                    'Run_3': predictedAges[2, :],
                    'Run_4': predictedAges[3, :],
                    'Run_5': predictedAges[4, :],
                    'MinPredicted': np.min(predictedAges, axis=0),
                    'MaxPredicted': np.max(predictedAges, axis=0),
                    'sdPredicted': np.std(predictedAges, axis=0),
                    'AE_1': absoluteErrors[0, :],
                    'AE_2': absoluteErrors[1, :],
                    'AE_3': absoluteErrors[2, :],
                    'AE_4': absoluteErrors[3, :],
                    'AE_5': absoluteErrors[4, :],
                    'SE_1': squaredErrors[0, :],
                    'SE_2': squaredErrors[1, :],
                    'SE_3': squaredErrors[2, :],
                    'SE_4': squaredErrors[3, :],
                    'SE_5': squaredErrors[4, :],
                })
        else:
            trueAges = resultConcat(trueAges, numbersIters)
            predictedAges = resultConcat(predictedAges, numbersIters)
            absoluteErrors = resultConcat(absoluteErrors, numbersIters)
            squaredErrors = resultConcat(squaredErrors, numbersIters)
            df = pd.DataFrame(
                data={
                    'Subject': np.array(name_arr),
                    'TrueAge': trueAges,
                    'PredictedAges': predictedAges,
                    'AbsoluteErrors': absoluteErrors,
                    'SquaredErrors': squaredErrors,
                })

        # print(df)
        MAE = np.mean(absoluteErrors)
        MSE = np.mean(squaredErrors)
        print("MAE: " + str(MAE))
        print("MSE: " + str(MSE))
        # for j in range(numberIters):
        #     print('{}\t{}\t{}'.format(name_arr[j], trueAges[j], predictedAges[:, j]))
        df.to_csv('{}{}.csv'.format(
            "/data3/hyhuang/brain_age_prediction/reports/", name),
                  index=False)
        coord.request_stop()
        coord.join(threads)