def run_train_epoch(self, data_loader, epoch): self.logger.info('Training: %s' % os.path.basename(self.exp_dir)) self.dloader = data_loader self.dataset_train = data_loader.dataset for key, network in list(self.networks.items()): if self.optimizers[key] == None: network.eval() else: network.train() disp_step = self.opt['disp_step'] if ('disp_step' in self.opt) else 50 train_stats = utils.DAverageMeter() self.bnumber = len(data_loader()) #print("#############################################################################################data.shape = ",data_loader(epoch).dataset.shape) for idx, batch in enumerate(tqdm(data_loader(epoch))): self.biter = idx # batch iteration. self.global_iter = self.curr_epoch * len( data_loader()) + self.biter train_stats_this = self.train_step(batch) train_stats.update(train_stats_this) if (idx + 1) % disp_step == 0: self.logger.info('==> Iteration [%3d][%4d / %4d]: %s' % (epoch + 1, idx + 1, len( data_loader()), train_stats.average())) return train_stats.average()
def evaluate_conf_matrix(self, dloader): self.logger.info('Evaluating: %s' % os.path.basename(self.exp_dir)) self.dloader = dloader self.dataset_eval = dloader.dataset self.logger.info('==> Dataset: %s [%d images]' % (dloader.dataset.name, len(dloader))) for key, network in self.networks.items(): network.eval() eval_stats = utils.DAverageMeter() self.bnumber = len(dloader()) # TODO:Aniruddha pred_var_stack, labels_var_stack = [torch.Tensor()] * 5, torch.Tensor() for idx, batch in enumerate(tqdm(dloader())): self.biter = idx eval_stats_this, pred_var, labels_var = self.evaluation_step_conf_matrix( batch) for layer_id in range(5): pred_var_stack[layer_id] = torch.cat( (pred_var_stack[layer_id], pred_var[layer_id].cpu()), dim=0) labels_var_stack = torch.cat((labels_var_stack, labels_var.cpu()), dim=0) eval_stats.update(eval_stats_this) self.logger.info('==> Results: %s' % eval_stats.average()) return eval_stats.average(), pred_var_stack, labels_var_stack
def run_train_epoch(self, data_loader, epoch): self.logger.info('Training: %s' % os.path.basename(self.exp_dir)) self.dloader = data_loader self.dataset_train = data_loader.dataset for key, network in self.networks.items(): if self.optimizers[key] == None: network.eval() else: network.train() disp_step = self.opt['disp_step'] if ('disp_step' in self.opt) else 50 train_stats = utils.DAverageMeter() self.bnumber = len(data_loader()) for idx, batch in enumerate(tqdm(data_loader(epoch))): self.biter = idx train_stats_this = self.train_step(batch) if (self.writer_train_run%100 == 0): for record in train_stats_this: self.writer.add_scalar('data/' + record + ' train', train_stats_this[record], self.writer_train_log) self.writer_train_log += 1 self.writer_train_run += 1 train_stats.update(train_stats_this) if (idx+1) % disp_step == 0: self.logger.info('==> Iteration [%3d][%4d / %4d]: %s' % (epoch+1, idx+1, len(data_loader), train_stats.average())) return train_stats.average()
def run_train_epoch(self, data_loader, epoch): self.logger.info('Training: %s' % os.path.basename(self.exp_dir)) self.dloader = data_loader self.dataset_train = data_loader.dataset for key, network in self.networks.items(): #迭代networks中的每个项目 #每个key里都有一个network块 if self.optimizers[key] == None: network.eval() #如果项目为空,则eval #在init_optimizers中更新过,所以应该是有值的 else: network.train() #如果项目不为空,则train(train,eval都是内置函数) #现在network是个模块,所以有train,这个train具体做了什么在别的文件里 disp_step = self.opt['disp_step'] if ('disp_step' in self.opt) else 50 #步长,这是啥 train_stats = utils.DAverageMeter() #定义了一个对象(字典),训练状态 self.bnumber = len( data_loader()) #重构的len,bnumber = epoch_size / self.batch_size,数 for idx, batch in enumerate(tqdm(data_loader( epoch))): #idx: index; batch = element in dataloader(epoch) #epoch分为好几个batch,每个batch里有128个 self.biter = idx train_stats_this = self.train_step( batch) #空类,子类继承,返回一个字典,其中包含loss及其他需要统计的数据 #在另外两个文件夹里 train_stats.update(train_stats_this) #更新数据,做成字典套字典 if (idx + 1) % disp_step == 0: self.logger.info('==> Iteration [%3d][%4d / %4d]: %s' % (epoch + 1, idx + 1, len(data_loader), train_stats.average())) return train_stats.average() #返回平均值
def run_train_epoch(self, data_loader, epoch): self.logger.info('Training: %s' % os.path.basename(self.exp_dir)) self.dloader = data_loader self.dataset_train = data_loader.dataset for key, network in self.networks.items(): if self.optimizers[key] == None: network.eval() else: network.train() disp_step = self.opt['disp_step'] if ('disp_step' in self.opt) else 50 train_stats = utils.DAverageMeter() self.bnumber = len(data_loader()) for idx, batch in enumerate(tqdm(data_loader(epoch))): self.biter = idx train_stats_this = self.train_step(batch) train_stats.update(train_stats_this) if (idx + 1) % disp_step == 0: self.logger.info( '==> Iteration [%3d][%4d / %4d]: %s' % (epoch + 1, idx + 1, self.bnumber, train_stats.average())) if USE_WANDB: wandb.log(train_stats_this) return train_stats.average()
def run_train_epoch(self, data_loader, epoch): self.logger.info('Training: %s' % os.path.basename(self.exp_dir)) self.logger.info( '==> Dataset: %s [%s images]' % (data_loader.dataset.name, str(len(data_loader.dataset)))) self.logger.info( "==> Iteration steps in one epoch: %d [batch size %d]" % (len(data_loader), data_loader.batch_size)) for key, network in self.networks.items(): if self.optimizers[key] == None: network.eval() else: network.train() disp_step = self.opt['disp_step'] if ('disp_step' in self.opt) else 1 train_stats = utils.DAverageMeter() for idx, batch in enumerate( tqdm(data_loader(epoch), total=len(data_loader))): train_stats_this = self.train_step(batch) train_stats.update(train_stats_this) if (idx + 1) % disp_step == 0: self.logger.info('==> Iteration [%3d][%4d / %4d]: %s' % (epoch + 1, idx + 1, len(data_loader), train_stats.average())) return train_stats.average()
def evaluate(self, dloader): self.logger.info('Evaluating: %s' % os.path.basename(self.exp_dir)) self.dloader = dloader self.dataset_eval = dloader.dataset self.logger.info('==> Dataset: %s [%d images]' % (dloader.dataset.name, len(dloader))) for key, network in self.networks.items(): network.eval() eval_stats = utils.DAverageMeter() self.bnumber = len(dloader()) for idx, batch in enumerate(tqdm(dloader())): self.biter = idx eval_stats_this = self.evaluation_step(batch) if (self.writer_eval_run % 25 == 0): for record in eval_stats_this: self.writer.add_scalar('data/' + record + ' val', eval_stats_this[record], self.writer_eval_log) self.writer_eval_log += 1 self.writer_eval_run += 1 eval_stats.update(eval_stats_this) self.logger.info('==> Results: %s' % eval_stats.average()) if self.keep_best_model_metric_name is not None: self.writer.add_scalar('data/avg_val ' + str(self.keep_best_model_metric_name), eval_stats.average()[self.keep_best_model_metric_name], self.writer_precision_log_idx) self.writer_precision_log_idx += 1 return eval_stats.average()
def evaluate(self, dloader, is_test=False): self.logger.info('Evaluating: %s' % os.path.basename(self.exp_dir)) self.dloader = dloader self.dataset_eval = dloader.dataset self.logger.info('==> Dataset: %s [%d batches]' % (dloader.dataset.name, len(dloader))) for key, network in self.networks.items(): network.eval() eval_stats = utils.DAverageMeter() self.bnumber = len(dloader) for idx, batch in enumerate(tqdm(dloader())): self.biter = idx eval_stats_this = self.evaluation_step(batch) eval_stats.update(eval_stats_this) self.logger.info('==> Results: %s' % eval_stats.average()) avg_eval_stats = eval_stats.average() for k, v in avg_eval_stats.items(): self.tb_writer.add_scalar( '{}/avg_{}'.format('test' if is_test else 'eval', k), v, self.curr_epoch) self.tb_writer.flush() return eval_stats.average()
def evaluate(self, data_loader): self.logger.info('Evaluating: %s' % os.path.basename(self.exp_dir)) self.dataset_eval = data_loader.dataset for key, network in self.networks.items(): network.eval() eval_stats = utils.DAverageMeter() for idx, batch in enumerate(tqdm(data_loader())): eval_stats_this = self.inference(batch) eval_stats.update(eval_stats_this) self.logger.info('==> Results [%d images]: %s' % (len(data_loader), eval_stats.average())) return eval_stats.average()
def process_batch(self, batch, do_train=True): opt = self.opt self.set_tensors(batch) tensors = self.tensors input = tensors['input'] target = tensors['target'] # Because the entire batch might not fit in the GPU memory, # we split it in chunks of @batch_split_size batch_size = input.size(0) batch_split_size = opt['batch_split_size'] if ('batch_split_size' in opt) else batch_size num_chunks = batch_size / batch_split_size input_chunks = input.chunk(num_chunks, 0) target_chunks = target.chunk(num_chunks, 0) network = self.networks['net'] criterion = self.criterions['net'] optimizer = None if do_train: # get the optimizer and zero the gradients optimizer = self.optimizers['net'] optimizer.zero_grad() losses = utils.DAverageMeter() # Process each chunk for input_chunk, target_chunk in zip(input_chunks, target_chunks): var_input = torch.autograd.Variable(input_chunk, volatile=(not do_train)) var_target = torch.autograd.Variable(target_chunk, volatile=(not do_train)) # forward through the network var_output = network(var_input) # compute the objective loss var_loss = criterion(var_output, var_target) if do_train: # backprograte & compute gradients var_loss.backward() # record loss losses.update({'loss net': var_loss.data.squeeze()[0]}) if do_train: # do a gradient descent step optimizer.step() return losses.average()
def evaluate(self, dloader): self.logger.info('Evaluating: %s' % os.path.basename(self.exp_dir)) self.dloader = dloader self.dataset_eval = dloader.dataset self.logger.info('==> Dataset: %s [%d images]' % (dloader.dataset.name, len(dloader))) self.network.eval() eval_stats = utils.DAverageMeter() self.bnumber = len(dloader()) for idx, batch in enumerate(tqdm(dloader())): self.biter = idx eval_stats_this = self.evaluation_step(batch) eval_stats.update(eval_stats_this) self.logger.info('==> Results: %s' % eval_stats.average()) print('==> Results: %s' % eval_stats.average()) return eval_stats.average()
def evaluate(self, dloader): self.logger.info('Evaluating: %s' % os.path.basename(self.exp_dir)) self.logger.info('==> Dataset: %s [%d images]' % (dloader.dataset.name, len(dloader.dataset))) self.logger.info( "==> Iteration steps in one epoch: %d [batch size %d]" % (len(dloader), dloader.batch_size)) for key, network in self.networks.items(): network.eval() eval_stats = utils.DAverageMeter() for idx, batch in enumerate(tqdm(dloader(), total=len(dloader))): eval_stats_this = self.evaluation_step(batch) eval_stats.update(eval_stats_this) self.logger.info('==> Results: %s' % eval_stats.average()) return eval_stats.average()
def evaluate(self, dloader, **finetune_opt): self.logger.info('Evaluating (with fine-tuning): %s' % os.path.basename(self.exp_dir)) self.dloader = dloader self.dataset_eval = dloader.dataset self.logger.info('==> Dataset: %s [%d batches]' % (dloader.dataset.name, len(dloader))) for key, network in self.networks.items(): network.eval() self.finetune_data = defaultdict(list) eval_stats = utils.DAverageMeter() self.bnumber = len(dloader) dloader_iterator = dloader.get_iterator_with_global_ids_and_labels() for idx, batch in enumerate(tqdm(dloader_iterator)): self.biter = idx eval_stats_this = self.evaluation_step(batch, **finetune_opt) eval_stats.update(eval_stats_this) self.logger.info('==> Results: %s' % eval_stats.average()) self.on_done_evaluation() return eval_stats.average()