def ValidateModel(self, sess, updateOp, printOps, name, numIters=5): coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) print('Model: {}'.format(name)) bestValdOpDict = {} bestTestOpDict = {} for opName in printOps.names: bestValdOpDict[opName] = [] bestTestOpDict[opName] = [] for i in range(numIters): sess.run(tf.global_variables_initializer()) extraUpdateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS) saver = tf.train.Saver() savePath = '{}run_{}/'.format(GlobalOpts.validationDir, i) saveModel.restore(sess, saver, savePath) valdOpDict, _ = self.GetPerformanceThroughSet(sess, printOps, setType='vald') testOpDict, _ = self.GetPerformanceThroughSet(sess, printOps, setType='test') for opName in printOps.names: bestValdOpDict[opName].append(valdOpDict[opName]) bestTestOpDict[opName].append(testOpDict[opName]) print("==============Validation Set Operations, Best==============") for opName in bestValdOpDict: outputString = '{}: {} +- {}\t{}'.format(opName, np.mean(bestValdOpDict[opName]), np.std(bestValdOpDict[opName]), bestValdOpDict[opName]) print(outputString) print("==============Test Set Operations, Best==============") for opName in bestTestOpDict: outputString = '{}: {} +- {}\t{}'.format(opName, np.mean(bestTestOpDict[opName]), np.std(bestTestOpDict[opName]), bestTestOpDict[opName]) print(outputString) coord.request_stop() coord.join(threads)
def TrainModel(sess, splitTrainSet, splitValidationSet, matricesPL, labelsPL, trainingPL, predictionLayer, trainOperation, lossFunction, savePath, numberOfSteps, batchSize): """ Trains a model defined by matricesPL, labelsPL, predictionLayer, trainOperation and lossFunction over numberOfSteps steps with batch size batchSize. Uses savePath to save the model. """ extraUpdateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS) ############# Define tf saver ############# saver = saveModel.restore(sess, savePath) ############# DEFINE ARRAYS TO HOLD LOSS ############# accumulatedTrainingLoss = [] accumulatedValidationLoss = [] for batch_index in range(numberOfSteps): ############# RUN TRAINING OPERATIONS ############# batch_images, batch_labels = splitTrainSet.next_batch(batchSize) feed_dict = DefineFeedDict(DataSet(batch_images, batch_labels), matricesPL, labelsPL, trainingPL, isTraining=True) sess.run([trainOperation, extraUpdateOps], feed_dict=feed_dict) ############# REPORT TRAINING PROGRESS ############# trainingLoss, validationLoss, shouldUse = ReportProgress( sess, batch_index, lossFunction, matricesPL, labelsPL, splitTrainSet, splitValidationSet, trainingPL) if shouldUse: accumulatedTrainingLoss.append(trainingLoss) accumulatedValidationLoss.append(validationLoss) ############# SAVE TRAINED MODEL ############# SaveModel(sess, batch_index, saver, savePath) return (accumulatedTrainingLoss, accumulatedValidationLoss)
def RunCrossValidation(dataSet, matricesPL, labelsPL, predictionLayers, trainOperations, lossFunctions, trainingPL, numberOfStepsArray, batchSizes, saveNames): ########## SPLIT DATA INTO TRAIN AND TEST ########## X_train, X_test, y_train, y_test = train_test_split(dataSet.images, dataSet.labels, test_size=0.1) splitTrainSet = DataSet(X_train, y_train) splitTestSet = DataSet(X_test, y_test) ########## ITERATE OVER ALL MODELS ########## index = 0 bestIndex = -1 lowestLoss = math.inf finalValidationPerformances = [] for index in range(len(saveNames)): predictionLayer = predictionLayers[index] lossFunction = lossFunctions[index] trainOperation = trainOperations[index] numberOfSteps = numberOfStepsArray[index] batchSize = batchSizes[index] saveName = saveNames[index] print('===================%s===================' % saveName) savePath = get('TRAIN.ROI_BASELINE.CHECKPOINT_DIR') + saveName ########## GET CROSS VALIDATION PERFORMANCE OF MODEL ########## averageFinalValidationPerformance = CrossValidateModelParameters( splitTrainSet, matricesPL, labelsPL, trainingPL, predictionLayer, trainOperation, lossFunction, savePath, saveName, numberOfSteps, batchSize) finalValidationPerformances.append(averageFinalValidationPerformance) ########## DETERMINE BEST MODEL SO FAR ########## if (averageFinalValidationPerformance < lowestLoss): lowestLoss = averageFinalValidationPerformance bestIndex = index index += 1 ########## PRINT CROSS VALIDATION RESULTS ########## print('===================CROSS VALIDATION RESULTS===================') for i in range(index): saveName = saveNames[i] print('Model %s had validation performance: %f' % (saveName, finalValidationPerformances[i])) print('===================BEST MODEL===================') print('Best model was %s with validation performance of %f' % (saveNames[bestIndex], finalValidationPerformances[bestIndex])) index = 0 for index in range(len(saveNames)): predictionLayer = predictionLayers[index] lossFunction = lossFunctions[index] trainOperation = trainOperations[index] numberOfSteps = numberOfStepsArray[index] batchSize = batchSizes[index] saveName = saveNames[index] if (index == bestIndex): with tf.Session() as sess: sess.run(tf.global_variables_initializer()) fileSavePath = get('TRAIN.ROI_BASELINE.CHECKPOINT_DIR' ) + saveName + '_split1.ckpt' print(fileSavePath) saver = saveModel.restore(sess, fileSavePath) testLoss = GetEvaluatedLoss(sess, splitTestSet, lossFunction, matricesPL, labelsPL, trainingPL) print('Best model had test loss: %f' % testLoss) index += 1 savePath = 'plots/modelComparison%s.png' % datetime.now().strftime( '%I:%M%p_%B_%d_%Y') PlotComparisonBarChart(performances=finalValidationPerformances, names=saveNames, savePath=savePath)
def TrainModel(self, sess, updateOp, printOps, name): writer = tf.summary.FileWriter('{}{}/'.format(self.summaryDir, name)) # Initialize relevant variables sess.run(tf.global_variables_initializer()) # Collect summary and graph update operations extraUpdateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS) saver = tf.train.Saver() savePath = '{}{}/'.format(self.checkpointDir, name) if not os.path.exists(savePath): os.makedirs(savePath) bestValidationLoss = 10000000 bestValdOpDict = {} bestLossStepIndex = 0 for batchIndex in range(self.numberOfSteps): batchTrainFeedDict = self.GetFeedDict(sess) # print "#"*50 # print batchTrainFeedDict if batchIndex % self.batchStepsBetweenSummary != 0: _, _ = sess.run([updateOp, extraUpdateOps], feed_dict=batchTrainFeedDict) else: _, _, gradSummary = sess.run([updateOp, extraUpdateOps, printOps.gradientSummary], feed_dict=batchTrainFeedDict) writer.add_summary(gradSummary, batchIndex) opValueDict, summaryFeedDict = self.GetPerformanceThroughSet(sess, printOps, setType='train', batchTrainFeedDict=batchTrainFeedDict) writer.add_summary( sess.run( printOps.mergedTrainSummary, feed_dict=summaryFeedDict), batchIndex) print("==============Train Set Operations, Step {}==============".format(batchIndex)) for opName in opValueDict: print('{}: {}'.format(opName, opValueDict[opName])) opValueDict, summaryFeedDict = self.GetPerformanceThroughSet(sess, printOps) writer.add_summary( sess.run( printOps.mergedValdSummary, feed_dict=summaryFeedDict), batchIndex) print("==============Validation Set Operations, Step {}==============".format(batchIndex)) for opName in opValueDict: print('{}: {}'.format(opName, opValueDict[opName])) validationLoss = opValueDict['loss'] if validationLoss < bestValidationLoss: bestLossStepIndex = batchIndex bestValidationLoss = validationLoss bestValdOpDict = opValueDict self.SaveModel(sess, batchIndex, saver, savePath) saveModel.restore(sess, saver, savePath) testOpValueDict, _ = self.GetPerformanceThroughSet(sess, printOps, setType='test') writer.close() return bestValdOpDict, testOpValueDict
def getPatientPerformances(self, sess, predictionOp, name, numIters=1): coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) nameOp, labelOp, imageOp = [], [], [] numbersIters = [] for i in range(5): nameOp.append(self.testSet[i].dequeueOp) labelOp.append(self.testSet[i].labelBatchOperation) imageOp.append(self.testSet[i].imageBatchOperation) numbersIters.append(self.testSet[i].maxItemsInQueue) numberIters = self.testSet[0].maxItemsInQueue + 1 predictedAges = np.zeros((numIters, numberIters)) absoluteErrors = np.zeros((numIters, numberIters)) squaredErrors = np.zeros((numIters, numberIters)) trueAges = np.zeros((numIters, numberIters)) name_arr = [] print('Model: {}'.format(name)) print('SUBJECT ID\tTRUE AGE\tPREDICTED AGE') for i in range(numIters): sess.run(tf.global_variables_initializer()) extraUpdateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS) saver = tf.train.Saver() savePath = '{}run_{}/'.format(GlobalOpts.validationDir, i) saveModel.restore(sess, saver, savePath) numberIters = numbersIters[i] print("This model has {} test images.".format(numberIters)) for j in range(numberIters): images, labels, names = sess.run( [imageOp[i], labelOp[i], nameOp[i]]) feed_dict = { self.imagesPL: images, self.labelsPL: labels, self.trainingPL: False } names = names[0].decode('UTF-8') if i == 0 or not GlobalOpts.pretrained: name_arr.append(names) labels = labels[0, 0] predictions = sess.run([predictionOp], feed_dict=feed_dict) predictions = np.squeeze(predictions[0]) trueAges[i, j] = labels predictedAges[i, j] = predictions absoluteErrors[i, j] = np.abs(predictions - labels) squaredErrors[i, j] = np.square(predictions - labels) # print('{}\t{:.4f}\t{:.4f}'.format(names, labels, predictions)) if GlobalOpts.pretrained: df = pd.DataFrame( data={ 'Subject': np.array(name_arr), 'TrueAge': trueAges[0], 'Run_1': predictedAges[0, :], 'Run_2': predictedAges[1, :], 'Run_3': predictedAges[2, :], 'Run_4': predictedAges[3, :], 'Run_5': predictedAges[4, :], 'MinPredicted': np.min(predictedAges, axis=0), 'MaxPredicted': np.max(predictedAges, axis=0), 'sdPredicted': np.std(predictedAges, axis=0), 'AE_1': absoluteErrors[0, :], 'AE_2': absoluteErrors[1, :], 'AE_3': absoluteErrors[2, :], 'AE_4': absoluteErrors[3, :], 'AE_5': absoluteErrors[4, :], 'SE_1': squaredErrors[0, :], 'SE_2': squaredErrors[1, :], 'SE_3': squaredErrors[2, :], 'SE_4': squaredErrors[3, :], 'SE_5': squaredErrors[4, :], }) else: trueAges = resultConcat(trueAges, numbersIters) predictedAges = resultConcat(predictedAges, numbersIters) absoluteErrors = resultConcat(absoluteErrors, numbersIters) squaredErrors = resultConcat(squaredErrors, numbersIters) df = pd.DataFrame( data={ 'Subject': np.array(name_arr), 'TrueAge': trueAges, 'PredictedAges': predictedAges, 'AbsoluteErrors': absoluteErrors, 'SquaredErrors': squaredErrors, }) # print(df) MAE = np.mean(absoluteErrors) MSE = np.mean(squaredErrors) print("MAE: " + str(MAE)) print("MSE: " + str(MSE)) # for j in range(numberIters): # print('{}\t{}\t{}'.format(name_arr[j], trueAges[j], predictedAges[:, j])) df.to_csv('{}{}.csv'.format( "/data3/hyhuang/brain_age_prediction/reports/", name), index=False) coord.request_stop() coord.join(threads)