def __init__(self, config, logger, metadata): super(Task, self).__init__(config, logger, metadata) self.sum = 0 self.model = Model(config.get_with_prefix("model")) self.data = Data(config.get_with_prefix("data")) self.trainer = Trainer(config.get_with_prefix("trainer"), self.model, self.data) self.best_val_acc = 0 self.number_worse_iterations = 0
def __init__(self): classifier = Classifier() classifier.Build() # Trainer, Evaluator print("Reading Training set...") # self.setdata('something') self.trainer = Trainer(classifier) self.trainEvaluator = Evaluator("train", dataSettings.PATH_TO_TRAIN_SET_CATELOG, classifier) print("\t Done.\n") print("Reading Validation set...") self.validationEvaluator = Evaluator( "validation", dataSettings.PATH_TO_VAL_SET_CATELOG, classifier) print("\t Done.\n") print("Reading Test set...") self.testEvaluator = Evaluator("test", dataSettings.PATH_TO_TEST_SET_CATELOG, classifier) print("\t Done.\n") # Summary summaryOp = tf.summary.merge_all() self.trainer.SetMergedSummaryOp(summaryOp) self.trainEvaluator.SetMergedSummaryOp(summaryOp) self.validationEvaluator.SetMergedSummaryOp(summaryOp) self.bestThreshold = None self.testEvaluator.SetMergedSummaryOp(summaryOp) # Time self._startTrainEpochTime = time.time() self._trainCountInOneEpoch = 0 # Saver self.modelSaver = tf.train.Saver( max_to_keep=trainSettings.MAX_TRAINING_SAVE_MODEL) # Session self.session = tf.Session() init = tf.global_variables_initializer() self.session.run(init) self.trainer.SetGraph(self.session.graph) self.validationEvaluator.SetGraph(self.session.graph)
def add_trainers(self, nb): old_length = len(self.trainers) for index in range(old_length, old_length + nb): self.trainers.append( Trainer(id_=index, training_queue=self.training_queue, batch_size=self.training_batch_size, model=self.impala, optimizer=self.optimizer, statistics_queue=self.statistics_queue, learning_step=self.learning_step, sequence_length=self.sequence_length))
class Task(taskplan.Task): def __init__(self, config, logger, metadata): super(Task, self).__init__(config, logger, metadata) self.sum = 0 self.model = Model(config.get_with_prefix("model")) self.data = Data(config.get_with_prefix("data")) self.trainer = Trainer(config.get_with_prefix("trainer"), self.model, self.data) self.best_val_acc = 0 self.number_worse_iterations = 0 def save(self, path): self.model.save_weights(str(path / "model.h5py")) pickle.dump(self.best_val_acc, open(str(path / "best_model.pkl"), "wb")) def step(self, tensorboard_writer, current_iteration): with tensorboard_writer.as_default(): val_acc = self.trainer.step(current_iteration) if val_acc is not None: if val_acc > self.best_val_acc: self.best_val_acc = val_acc self.model.save_weights(str(self.task_dir / "model.h5py")) self.number_worse_iterations = 0 else: self.number_worse_iterations += 1 if self.number_worse_iterations > 5: self.pause_computation = True tf.summary.scalar('val/best_acc', self.best_val_acc, step=current_iteration) def load(self, path): self.model.load_weights(str(path / "model.h5py")) self.best_val_acc = pickle.load( open(str(path / "best_model.pkl"), "rb"))
class Main: def __init__(self): classifier = Classifier() classifier.Build() # Trainer, Evaluator print("Reading Training set...") self.trainer = Trainer(classifier) self.trainEvaluator = Evaluator("train", dataSettings.PATH_TO_TRAIN_SET_CATELOG, classifier) print("\t Done.\n") print("Reading Validation set...") self.validationEvaluator = Evaluator( "validation", dataSettings.PATH_TO_VAL_SET_CATELOG, classifier) print("\t Done.\n") print("Reading Test set...") self.testEvaluator = Evaluator("test", dataSettings.PATH_TO_TEST_SET_CATELOG, classifier) print("\t Done.\n") # Summary summaryOp = tf.summary.merge_all() self.trainer.SetMergedSummaryOp(summaryOp) self.trainEvaluator.SetMergedSummaryOp(summaryOp) self.validationEvaluator.SetMergedSummaryOp(summaryOp) self.bestThreshold = None self.testEvaluator.SetMergedSummaryOp(summaryOp) # Time self._startTrainEpochTime = time.time() self._trainCountInOneEpoch = 0 # Saver self.modelSaver = tf.train.Saver( max_to_keep=trainSettings.MAX_TRAINING_SAVE_MODEL) # Session self.session = tf.Session() init = tf.global_variables_initializer() self.session.run(init) self.trainer.SetGraph(self.session.graph) self.validationEvaluator.SetGraph(self.session.graph) def __del__(self): self.session.close() def Run(self): self.recoverFromPretrainModelIfRequired() self.calculateValidationBeforeTraining() self.resetTimeMeasureVariables() print("Path to save mode: ", trainSettings.PATH_TO_SAVE_MODEL) print("\nStart Training...\n") while self.trainer.currentEpoch < trainSettings.MAX_TRAINING_EPOCH: self.trainer.PrepareNewBatchData() self.trainer.Train(self.session) self._trainCountInOneEpoch += 1 if self.trainer.isNewEpoch: print( "Epoch:", self.trainer.currentEpoch, "======================================" + "======================================" + "======================================") self.printTimeMeasurement() self.trainer.PauseDataLoading() self.evaluateValidationSetAndPrint(self.trainer.currentEpoch) self.evaluateTrainingSetAndPrint(self.trainer.currentEpoch) if trainSettings.PERFORM_DATA_AUGMENTATION: # Preload TrainBatch while evaluate the TestSet self.trainer.ContinueDataLoading() self.evaluateTestSetAndPrint(self.trainer.currentEpoch) self.trainer.ContinueDataLoading() self.resetTimeMeasureVariables() if self.trainer.currentEpoch >= trainSettings.EPOCHS_TO_START_SAVE_MODEL: self.saveCheckpoint(self.trainer.currentEpoch) print("Optimization finished.") self.trainer.Release() self.trainEvaluator.Release() self.validationEvaluator.Release() self.testEvaluator.Release() def recoverFromPretrainModelIfRequired(self): if trainSettings.PRETRAIN_MODEL_PATH_NAME != "": print("Load Pretrain model from: " + trainSettings.PRETRAIN_MODEL_PATH_NAME) listOfAllVariables = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES) variablesToBeRecovered = [ eachVariable for eachVariable in listOfAllVariables \ if eachVariable.name.split('/')[0] not in \ trainSettings.NAME_SCOPES_NOT_TO_RECOVER_FROM_CHECKPOINT ] modelLoader = tf.train.Saver(variablesToBeRecovered) modelLoader.restore(self.session, trainSettings.PRETRAIN_MODEL_PATH_NAME) def evaluateTrainingSetAndPrint(self, currentEpoch_): ''' Since the BATCH_SIZE may be small (= 4 in my case), its BatchLoss or BatchAccuracy may be fluctuated. Calculate the whole Training Loss instead. Note: If one want to calculate the BatchLoss ONLY, use Trainer.EvaluateTrainLoss(). ''' startEvaluateTime = time.time() loss, frameAccuracy, threshold, videoAccuracy = self.trainEvaluator.Evaluate( self.session, currentEpoch_=currentEpoch_, threshold_=self.bestThreshold) endEvaluateTime = time.time() self.printCalculationResults(jobType_='train', loss_=loss, frameAccuracy_=frameAccuracy, isThresholdOptimized_=False, threshold_=threshold, videoAccuracy_=videoAccuracy, duration_=(endEvaluateTime - startEvaluateTime)) def calculateValidationBeforeTraining(self): if trainSettings.PRETRAIN_MODEL_PATH_NAME != "": print( "Validation before Training ", "=============================" + "======================================" + "======================================") self.evaluateValidationSetAndPrint(currentEpoch_=0) def evaluateValidationSetAndPrint(self, currentEpoch_): startEvaluateTime = time.time() loss, frameAccuracy, threshold, videoAccuracy = self.validationEvaluator.Evaluate( self.session, currentEpoch_=currentEpoch_, threshold_=None) endEvaluateTime = time.time() self.bestThreshold = threshold self.printCalculationResults(jobType_='validation', loss_=loss, frameAccuracy_=frameAccuracy, isThresholdOptimized_=True, threshold_=threshold, videoAccuracy_=videoAccuracy, duration_=(endEvaluateTime - startEvaluateTime)) def evaluateTestSetAndPrint(self, currentEpoch_): startEvaluateTime = time.time() loss, frameAccuracy, threshold, videoAccuracy = self.testEvaluator.Evaluate( self.session, currentEpoch_=currentEpoch_, threshold_=self.bestThreshold) endEvaluateTime = time.time() self.printCalculationResults(jobType_='test', loss_=loss, frameAccuracy_=frameAccuracy, isThresholdOptimized_=False, threshold_=threshold, videoAccuracy_=videoAccuracy, duration_=(endEvaluateTime - startEvaluateTime)) def printTimeMeasurement(self): timeForTrainOneEpoch = time.time() - self._startTrainEpochTime print("\t Back Propergation time measurement:") print("\t\t duration: ", "{0:.2f}".format(timeForTrainOneEpoch), "s/epoch") averagedTrainTime = timeForTrainOneEpoch / self._trainCountInOneEpoch print("\t\t average: ", "{0:.2f}".format(averagedTrainTime), "s/batch") print() def resetTimeMeasureVariables(self): self._startTrainEpochTime = time.time() self._trainCountInOneEpoch = 0 def printCalculationResults(self, jobType_, loss_, frameAccuracy_, isThresholdOptimized_, threshold_, videoAccuracy_, duration_): floatPrecision = "{0:.4f}" print("\t " + jobType_ + ":") if isThresholdOptimized_: print("\t loss:", floatPrecision.format(loss_), " frame accuracy:", floatPrecision.format(frameAccuracy_), " best frame threshold:", threshold_, " video accuracy:", floatPrecision.format(videoAccuracy_), " duration:", "{0:.2f}".format(duration_) + "(s)\n") else: print("\t loss:", floatPrecision.format(loss_), " frame accuracy:", floatPrecision.format(frameAccuracy_), " given frame threshold:", threshold_, " video accuracy:", floatPrecision.format(videoAccuracy_), " duration:", "{0:.2f}".format(duration_) + "(s)\n") def saveCheckpoint(self, currentEpoch_): pathToSaveCheckpoint = os.path.join(trainSettings.PATH_TO_SAVE_MODEL, "save_epoch_" + str(currentEpoch_)) checkpointPathFileName = os.path.join(pathToSaveCheckpoint, "ViolenceNet.ckpt") self.modelSaver.save(self.session, checkpointPathFileName)