def report(self): if self.train_params.VERBOSE_MODE: print("[Training Parameters Overview] ------------------------------------------------------------------------") self.train_params.report() print("[Optimizer Overview] ----------------------------------------------------------------------------------") if self.optimizer is not None: print("[%s] Start learning rate: %f" % (type(self.optimizer), dl_util.get_learning_rate(self.optimizer)))
def report(self): self.print_protected_god_animal() print("[Training Parameters Overview] ------------------------------------------------------------------------") self.train_params.report() print("[Optimizer Overview] ----------------------------------------------------------------------------------") if self.optimizer is not None: print("[%s] Start learning rate: %f" % (type(self.optimizer), dl_util.get_learning_rate(self.optimizer)))
return feature_map_loss """ Do Training -------------------------------------------------------------------------------------------------------- """ time_start = datetime.datetime.now() itr = 0 for epoch in range(0, train_params.MAX_EPOCHS): try: # Iterate the for train_batch_idx, train_dict in tqdm( enumerate(train_loader), total=len(train_loader), desc='Train epoch = %d, lr=%f' % (epoch, dl_util.get_learning_rate(optimizer)), ncols=100, leave=False): itr += 1 # Switch to Train model net.train() # zero the parameter gradients optimizer.zero_grad() # Pre-process the variables # Generate 3 pyramid level with each random select 2000 samples I_a, d_a, sel_a_indices, K, I_b, q_gt, t_gt, se3_gt, T_gt = ba_tracknet_preprocess( train_dict, 3, 2000) I_a = I_a.cuda()
def train_loop(self, train_data: dataset.Dataset or dataloader.DataLoader, valid_data=None): # prepare the dataloader if the input parameter is intance of dataset if isinstance(train_data, dataset.Dataset): train_loader = dataloader.DataLoader(train_data, batch_size=self.train_params.LOADER_BATCH_SIZE, shuffle=self.train_params.LOADER_SHUFFLE, pin_memory=self.train_params.LOADER_PIN_MEM, num_workers=self.train_params.LOADER_NUM_THREADS, drop_last=False) if self.train_params.VERBOSE_MODE: print( "[Dataset Overview] ----------------------------------------------------------------------------------------") print("Train set: %d items" % (len(train_data))) elif isinstance(train_data, dataloader.DataLoader): train_loader = train_data if valid_data is not None and isinstance(valid_data, dataset.Dataset): valid_loader = dataloader.DataLoader(valid_data, batch_size=self.train_params.LOADER_VALID_BATCH_SIZE, shuffle=self.train_params.LOADER_SHUFFLE, pin_memory=self.train_params.LOADER_PIN_MEM, num_workers=self.train_params.LOADER_NUM_THREADS, drop_last=False) if self.train_params.VERBOSE_MODE: print("Validation set: %d items" % (len(valid_data))) elif valid_data is not None and isinstance(valid_data, dataloader.DataLoader): valid_loader = valid_data else: valid_loader = None # prepare the training process self._prepare_train_loop() epoch, itr = 0, 0 self.train_start_time = datetime.datetime.now() print('[Running] -----------------------------------------------------------------------------------------') try: for epoch in range(0, self.train_params.MAX_EPOCHS): if self.train_params.TQDM_PROGRESS: progress = tqdm(total=len(train_loader), ncols=100, leave=False) for train_batch_idx, train_sample in enumerate(train_loader): itr += 1 if self.train_params.TQDM_PROGRESS: progress.update(1) progress.set_description('[Train] epoch = %d, lr=%f' % (epoch, dl_util.get_learning_rate(self.optimizer))) # prepare feeding the samples if self.model is not None: self.model.train() self.optimizer.zero_grad() # update optimizer self._optimizer_update() # forward and backward log_dict = self._train_feed(train_sample, epoch, itr) # optimize the parameters self.optimizer.step() # log the training information if log_dict is not None and self.logger is not None and self.check_log_step(itr): log_dict['Iteration'] = itr + 1 log_dict['Epoch'] = epoch log_dict['Event'] = 'Training' self.logger.log(log_dict) # save the training checkpoints every 'checkpoint_steps' if self.check_checkpoint_step(itr): self.save_checkpoint(epoch, itr) # do validation if self.check_valid_step(itr) and valid_loader is not None: if self.train_params.TQDM_PROGRESS: progress.set_description('[Valid]') with torch.no_grad(): valid_log_dict = self._valid_loop(valid_loader, epoch, itr) torch.cuda.empty_cache() # log the validation if valid_log_dict is not None and self.logger is not None: valid_log_dict['Iteration'] = itr + 1 valid_log_dict['Epoch'] = epoch valid_log_dict['Event'] = 'Validating' self.logger.log(valid_log_dict) # save the checkpoint self.save_checkpoint(epoch, itr) if self.train_params.TQDM_PROGRESS: progress.close() except Exception as e: import traceback print(traceback.format_exc()) print('[Exception]: ' + str(e)) self.save_checkpoint(epoch, itr)
def train_loop(self, train_loader, valid_loader=None): # prepare the training process (e.g. adding more dict keys) self._prepare_train() epoch, itr = 0, 0 self.train_start_time = datetime.datetime.now() print('[Running] -----------------------------------------------------------------------------------------') try: for epoch in range(0, self.train_params.MAX_EPOCHS): progress = tqdm(total=len(train_loader), ncols=100, leave=False) for train_batch_idx, train_sample in enumerate(train_loader): itr += 1 progress.update(1) progress.set_description('[Train] epoch = %d, lr=%f' % (epoch, dl_util.get_learning_rate(self.optimizer))) # prepare feeding the samples self.model.train() self.optimizer.zero_grad() # update optimizer self._optimizer_update() # forward and backward log_dict = self._train_feed(train_sample, epoch, itr) # optimize the parameters self.optimizer.step() # log the training information if log_dict is not None: log_dict['Iteration'] = itr + 1 log_dict['Epoch'] = epoch log_dict['Event'] = 'Training' self.logger.log(log_dict) # save the training checkpoints every 'checkpoint_steps' if self.check_checkpoint_step(itr): self.save_checkpoint(epoch, itr) # do validation if self.check_valid_step(itr) and valid_loader is not None: progress.set_description('[Valid]') valid_log_dict = self._valid_loop(valid_loader, epoch, itr) # log the validation if valid_log_dict is not None: valid_log_dict['Iteration'] = itr + 1 valid_log_dict['Epoch'] = epoch valid_log_dict['Event'] = 'Validating' self.logger.log(valid_log_dict) # save the checkpoint self.save_checkpoint(epoch, itr) progress.close() except Exception as e: import traceback print(traceback.format_exc()) print('[Exception]: ' + str(e)) self.save_checkpoint(epoch, itr)