def train_one_epoch(self, domain_name): """ One epoch of trainings :return: """ self.model.train() epoch_lossD = AverageMeter() for batch_idx, (data, target) in enumerate(self.data_loader.train_loader): # self.logger.info(str(batch_idx)) # self.logger.info(str(target.size())) data, target = data.to(self.device), target.to(self.device) self.optimizer.zero_grad() output = self.model(data) #self.logger.info(str(output.size())) loss = F.cross_entropy(output, target) loss.backward() self.optimizer.step() epoch_lossD.update(loss.item()) if batch_idx % self.config.log_interval == 0: self.logger.info( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( self.current_epoch, batch_idx * len(data), len(self.data_loader.train_loader.dataset), 100. * batch_idx / len(self.data_loader.train_loader), loss.item())) self.current_iteration += 1 self.summary_writer.add_scalar("epoch/Discriminator_loss", epoch_lossD.val, self.current_iteration) #self.visualize_one_epoch() self.logger.info("Training at epoch-" + str(self.current_epoch) + " | " + " - Discriminator Loss-: " + str(epoch_lossD.val) + "for domain " + domain_name)
def _predict_list(self, file_list, save_dir=None): assert isinstance(file_list, list),\ "In 'list' mode the input must be a valid file_path list!" consume_time = AverageMeter() assert not len(file_list) == 0, "The input file list is empty!" pb = tqdm(file_list) # processbar for idx, path in enumerate(pb): data = self._load_data(path) name = os.path.basename(path) data = self._np2tensor(data).unsqueeze(0).to(self.device) path = os.path.join(save_dir, name) if save_dir else None _, su_time = self.predict_base(model=self.model, data=data, path=path) consume_time.update(su_time, n=1) # logger description = ( "[{}/{}] speed: {time.val:.4f}s({time.avg:.4f}s)".format( idx + 1, len(file_list), time=consume_time)) pb.set_description(description) self.logger.dump(description)
def do_train(epoch): logging.info("-" * 30) logging.info(f"[TRAIN] Epoch: {epoch + 1} / {args.train_iter}") logging.info("\tLearning rate: {:.5f}".format( optimizer.param_groups[0]["lr"])) model.train() loss_meter = AverageMeter() criterion = nn.CrossEntropyLoss( ignore_index=0) # non-zero elements in `labels` are learning targets for batch_idx, batch in enumerate(train_loader): batch = [x.cuda() for x in batch] training_seq, label_seq = batch optimizer.zero_grad() logits = model(training_seq) # [B, L, #item] logits = logits.view(-1, logits.size(-1)) # [B*L, #item] label_seq = label_seq.view(-1) # [B*L] loss = criterion(logits, label_seq) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.train_grad_clip_norm) optimizer.step() loss_meter.update(loss.item()) if need_to_log(batch_idx): cur, total, loss = batch_idx + 1, len(train_loader), loss_meter.avg logging.info("\tStep: {:5d} / {:5d}\tLoss: {:.4f}".format( cur, total, loss))
def train_one_epoch(self, domain_name): """ One epoch of training :return: """ self.model.train() epoch_lossD = AverageMeter() for batch_idx, data in enumerate(self.data_loader.train_loader): # credit assignment self.optimizer.zero_grad() # clear the gardients imgs, labels = data imgs = imgs.to(self.device) predicted_labels = self.model(imgs) loss = self.criterion(predicted_labels, labels) loss.backward() # update model weights self.optimizer.step() epoch_lossD.update(loss.item()) if batch_idx % self.config.log_interval == 0: self.logger.info( 'Finetune Epoch: {} [{}/{} ({:.0f}%)] Loss: {:6f}'.format( self.current_epoch, batch_idx * len(data), len(self.data_loader.train_loader.dataset), 100. * batch_idx / len(self.data_loader.train_loader.dataset), loss.item())) self.current_iteration += 1 self.summary_writer.add_scalar( "epoch/Finetune_Training_Loss_" + domain_name, epoch_lossD.val, self.current_iteration) # self.visualize_one_epoch() self.logger.info("Finetuning at epoch-" + str(self.current_epoch) + " | " + " - Finetuning Loss-: " + str(epoch_lossD.val))
def train_one_epoch(self): loss_meter = AverageMeter() for batch, categories in tqdm(self.loader): token_type_ids = cat_to_token_type(categories, batch.shape[1]) attention_mask = get_attention_mask(batch) inputs, targets = mask_tokens(batch, self.tokenizer) inputs = inputs.to(self.device) attention_mask = attention_mask.to(self.device) token_type_ids = token_type_ids.to(self.device) targets = targets.to(self.device) loss, prediction_scores = self.model( inputs, attention_mask=attention_mask, token_type_ids=token_type_ids, masked_lm_labels=targets) # -------------------------------------------------------- _, pred = torch.max(prediction_scores, 2) pred = torch.mul(pred, attention_mask.long()) # -------------------------------------------------------- loss_meter.update(loss.item()) loss.backward() # nn.utils.clip_grad_norm_(self.model.parameters(), # self.max_grad_norm) self.optimizer.step() self.scheduler.step() self.model.zero_grad() return loss_meter.val
def quantitative_eval(self): """Routine to save quantitative results: loss + scores""" loss = AverageMeter() scores_path = self.run_dir / FINAL_SCORES_FILE scores = Scores(self.n_classes, self.n_prototypes) with open(scores_path, mode="w") as f: f.write("loss\t" + "\t".join(scores.names) + "\n") dataset = get_dataset(self.dataset_name)("train", eval_mode=True, **self.dataset_kwargs) loader = DataLoader(dataset, batch_size=self.batch_size, num_workers=self.n_workers) for images, labels in loader: images = images.to(self.device) distances = self.model(images)[1] dist_min_by_sample, argmin_idx = distances.min(1) loss.update(dist_min_by_sample.mean(), n=len(dist_min_by_sample)) scores.update(labels.long().numpy(), argmin_idx.cpu().numpy()) scores = scores.compute() self.print_and_log_info("final_loss: {:.5}".format(loss.avg)) self.print_and_log_info( "final_scores: " + ", ".join(["{}={:.4f}".format(k, v) for k, v in scores.items()])) with open(scores_path, mode="a") as f: f.write("{:.5}\t".format(loss.avg) + "\t".join(map("{:.4f}".format, scores.values())) + "\n")
def train_epoch(self, epoch=0): losses = AverageMeter() len_train = len(self.train_loader) pb = tqdm(self.train_loader) self.model.train() for i, (_, hsi) in enumerate(pb): hsi = hsi.to(self.device) sens, idx = create_sensitivity('D') sens, idx = sens.to(self.device), torch.LongTensor([idx]).to(self.device) rgb = create_rgb(sens, hsi) kls = self.model(rgb) loss = self.criterion(kls.unsqueeze(0), idx) losses.update(loss.item(), n=self.batch_size) # Compute gradients and do SGD step self.optimizer.zero_grad() loss.backward() self.optimizer.step() desc = self.logger.make_desc( i + 1, len_train, ('loss', losses, '.4f'), ) pb.set_description(desc) self.logger.dump(desc) # @zjw: tensorboard self.logger.add_scalar('Classifier-Loss/train/losses', losses.val, len_train * epoch + i) self.logger.add_scalar('Classifier-Lr', self.optimizer.param_groups[0]['lr'], epoch * len_train + i)
def train_by_epoch(self): tqdm_batch = tqdm(self.dataloader, total=self.total_iter, desc="epoch-{}".format(self.epoch)) avg_loss = AverageMeter() for curr_it, (X, y) in enumerate(tqdm_batch): self.accumulate_iter += 1 self.model.train() free(self.model) X = X.cuda(async=self.config.async_loading) logits = self.model(X) loss = self.loss_disc(logits, y) loss.backward() self.opt.step() avg_loss.update(loss) tqdm_batch.close() self.scheduler.step(avg_loss.val) with torch.no_grad(): self.model.eval()
def validate_by_epoch(self): tqdm_batch_val = tqdm(self.val_loader, total=self.val_iter, desc='epoch_val-{}'.format(self.epoch)) with torch.no_grad(): self.reg.eval() val_loss = AverageMeter() for curr_it, data in enumerate(tqdm_batch_val): edge = data['edge'].float().cuda( async=self.config.async_loading) corner = data['corner'].float().cuda( async=self.config.async_loading) box = data['box'].float().cuda(async=self.config.async_loading) reg_out = self.reg(torch.cat((edge, corner), dim=1)) loss = self.mse(reg_out, box) val_loss.update(loss) tqdm_batch_val.close() self.summary_writer.add_scalar('reg_val/loss', val_loss.val, self.epoch) if val_loss.val < self.best_val_loss: self.best_val_loss = val_loss.val if self.epoch > self.pretraining_step_size: self.save_checkpoint()
def validate_epoch(self, epoch=0, store=False): self.logger.show_nl("Epoch: [{0}]".format(epoch)) losses = AverageMeter() len_val = len(self.val_loader) pb = tqdm(self.val_loader) self.model.eval() with torch.no_grad(): for i, (name, t1, t2, label) in enumerate(pb): t1, t2, label = t1.to(self.device), t2.to( self.device), label.to(self.device) prob = self.model(t1, t2) loss = self.criterion(prob, label) losses.update(loss.item(), n=self.batch_size) # Convert to numpy arrays CM = to_array(torch.argmax(prob, 1)).astype('uint8') label = to_array(label[0]).astype('uint8') for m in self.metrics: m.update(CM, label) desc = self.logger.make_desc( i + 1, len_val, ('loss', losses, '.4f'), *((m.__name__, m, '.4f') for m in self.metrics)) pb.set_description(desc) self.logger.dump(desc) if store: self.save_image(name[0], CM.squeeze(-1), epoch) return self.metrics[0].avg if len(self.metrics) > 0 else max( 1.0 - losses.avg, self._init_max_acc)
def train_by_epoch(self): tqdm_batch = tqdm(self.dataloader, total=self.total_iter, desc='epoch-{}'.format(self.epoch)) avg_loss = AverageMeter() for curr_it, data in enumerate(tqdm_batch): self.reg.train() edge = data['edge'].float().cuda(async=self.config.async_loading) corner = data['corner'].float().cuda( async=self.config.async_loading) box = data['box'].float().cuda(async=self.config.async_loading) reg_out = self.reg(torch.cat((edge, corner), dim=1)) loss = self.mse(reg_out, box) self.opt.zero_grad() loss.backward() self.opt.step() avg_loss.update(loss) tqdm_batch.close() self.summary_writer.add_scalar('reg/loss', avg_loss.val, self.epoch) self.scheduler.step(avg_loss.val) self.logger.warning('info - lr: {}, loss: {}'.format( get_lr(self.opt), avg_loss.val))
def test(self, epoch): self.data_loader.initialize(self.sess, is_train=False, is_val=False) tt = tqdm(range(self.data_loader.num_iterations_test), total=self.data_loader.num_iterations_test, desc="Val-{}-".format(epoch)) loss_per_epoch = AverageMeter() acc_per_epoch = AverageMeter() for cur_it in tt: loss, acc = self.sess.run([self.loss_node, self.acc_node], feed_dict={self.is_training: False}) loss_per_epoch.update(loss) acc_per_epoch.update(acc) summaries_dict = { 'test/loss_per_epoch': loss_per_epoch.val, 'test/acc_per_epoch': acc_per_epoch.val } self.summarizer.summarize( self.model.global_step_tensor.eval(self.sess), summaries_dict) print("""Testing -> Val-{} loss:{:.4f} -- acc:{:.4f}""".format( epoch, loss_per_epoch.val, acc_per_epoch.val)) tt.close()
def train_epoch(self, epoch=None): self.data_loader.initialize(self.sess, is_train=True, is_val=False) tt = tqdm(range(self.data_loader.num_iterations_train), total=self.data_loader.num_iterations_train, desc="epoch-{}-".format(epoch)) loss_per_epoch = AverageMeter() acc_per_epoch = AverageMeter() for cur_it in tt: loss, acc = self.train_step() loss_per_epoch.update(loss) acc_per_epoch.update(acc) self.sess.run(self.model.global_epoch_inc) summaries_dict = { 'train/loss_per_epoch': loss_per_epoch.val, 'train/acc_per_epoch': acc_per_epoch.val } self.summarizer.summarize( self.model.global_step_tensor.eval(self.sess), summaries_dict) self.model.save(self.sess) print("""Training -> Epoch-{} loss:{:.4f} -- acc:{:.4f}""".format( epoch, loss_per_epoch.val, acc_per_epoch.val)) tt.close()
def train_one_epoch(self): self.model.train() loss = AverageMeter() acc = AverageMeter() for x, y in self.train_loader: x = x.float() x = x.to(self.device) y = y.to(self.device) output = self.model(x) current_loss = self.loss(output, y) self.optimizer.zero_grad() current_loss.backward() self.optimizer.step() loss.update(current_loss.item()) accuracy = get_accuracy(output, y) acc.update(accuracy, y.shape[0]) # if self.config.mode == 'crossval': s = ('Training epoch {} | loss: {} - accuracy: ' '{}'.format(self.cur_epoch, round(loss.val, 5), round(acc.val, 5))) # print_and_log(self.logger, s) self.logger.info(s)
def train_epoch(self, epoch=0): losses = AverageMeter() len_train = len(self.train_loader) pb = tqdm(self.train_loader) self.model.train() for i, (rgb, hsi) in enumerate(pb): hsi = hsi.to(self.device) rgb = rgb.to(self.device) recon = self.model(rgb) loss = self.criterion(recon, hsi) losses.update(loss.item(), n=self.batch_size) # Compute gradients and do SGD step self.optimizer.zero_grad() loss.backward() self.optimizer.step() desc = self.logger.make_desc( i + 1, len_train, ('loss', losses, '.4f'), ) pb.set_description(desc) self.logger.dump(desc) # @zjw: tensorboard self.logger.add_scalar('Onestep-Loss/train/', losses.val, len_train * epoch + i) self.logger.add_scalar('Onestep-Lr', self.optimizer.param_groups[0]['lr'], epoch * len_train + i)
def validate(self): self.model.eval() tqdm_batch = tqdm(self.validate_dataloader.loader, total=self.validate_dataloader.num_iterations, desc="Validation at -{}-".format(self.current_epoch)) model_loss_epoch = AverageMeter() with torch.no_grad(): for curr_it, x in enumerate(tqdm_batch): ids, input_points, gt_points = x if self.cuda: input_points = input_points.cuda( non_blocking=self.config.async_loading) gt_points = gt_points.cuda( non_blocking=self.config.async_loading) coarse, fine = self.model(input_points) loss, loss_coarse, loss_fine = self.update_loss( coarse, fine, gt_points) model_loss_epoch.update(loss.item()) self.logger.info( "Validation at epoch-{:d} | Network loss: {:.3f}".format( self.current_epoch, model_loss_epoch.val)) self.summary_writer.add_scalar("epoch-validation/loss", model_loss_epoch.val, self.current_epoch) tqdm_batch.close() return model_loss_epoch.val
def train_epoch(self, epoch=None): """ Train one epoch :param epoch: cur epoch number :return: """ self.sess.run( self.data_loader.iterator.initializer, feed_dict={self.data_loader.filenames: self.config.file_names}) loss_per_epoch = AverageMeter() while True: try: loss = self.train_step() loss_per_epoch.update(loss) except tf.errors.OutOfRangeError: break self.sess.run(self.model.increment_cur_epoch_tensor) summaries_dict = {'train/loss_per_epoch': loss_per_epoch.val} self.logger.summarize(self.model.global_step_tensor.eval(self.sess), summaries_dict) # self.model.save(self.sess) print(""" Epoch={} loss: {:.4f} """.format(epoch, loss_per_epoch.val))
def validate(self): # Eval mode self.net.eval() # Init Average Meters epoch_s_acc = AverageMeter() epoch_l_acc = AverageMeter() tqdm_batch = tqdm(self.valid_loader, f'Epoch-{self.current_epoch}-') with torch.no_grad(): for data in tqdm_batch: # Prepare data x = torch.tensor(data['x'], dtype=torch.float32, device=self.device) y = torch.tensor(data['y'], dtype=torch.float32, device=self.device) s_gt = torch.zeros_like(x) s_gt[x != 0] = 1 # Forward pass s_pred = self.net(y) # Metrics s_acc = eval_s_acc(s_pred, s_gt) l_acc = eval_l_acc(s_pred, s_gt, self.cfg.NUM_Y) batch_size = x.shape[0] epoch_s_acc.update(s_acc.item(), batch_size) epoch_l_acc.update(l_acc.item(), batch_size) tqdm_batch.close() print(f'Validate at epoch- {self.current_epoch} |' f's_acc: {epoch_s_acc.val} - l_acc: {epoch_l_acc.val}')
def test(epoch): progress_bar = tqdm(testloader) net.eval() acc_clean = AverageMeter() acc_adv = AverageMeter() for batch_idx, (images, labels) in enumerate(progress_bar): images, labels = images.cuda(), labels.cuda() with ctx_noparamgrad_and_eval(net): images_adv = test_attacker.perturb(images, labels) pred_clean = net(images).argmax(dim=1) pred_adv = net(images_adv).argmax(dim=1) acc_clean.update((pred_clean == labels).float().mean().item() * 100.0, images.size(0)) acc_adv.update((pred_adv == labels).float().mean().item() * 100.0, images.size(0)) progress_bar.set_description( 'Test Epoch: [{0}] ' 'Clean Acc: {acc_clean.val:.3f} ({acc_clean.avg:.3f}) ' 'Adv Acc: {acc_adv.val:.3f} ({acc_adv.avg:.3f}) '.format( epoch, acc_clean=acc_clean, acc_adv=acc_adv)) logging.info( f'Epoch: {epoch} | Clean: {acc_clean.avg:.2f} % | Adv: {acc_adv.avg:.2f} %' )
def validate(self): """ One cycle of model validation :return: """ self.model.eval() ave_loss = AverageMeter() metrics = IOUMetric(self.num_classes) with torch.no_grad(): for _, batch in enumerate(self.testloader): images, labels = batch size = labels.size() images = images.cuda() labels = labels.long().cuda() pred = self.model(images) losses = self.criterion(pred, labels) # pred = F.upsample(input=pred, size=( # size[-2], size[-1]), mode='bilinear') loss = losses.mean() ave_loss.update(loss.item()) _, pred_max = torch.max(pred, 1) metrics.add_batch(pred_max.cpu().numpy(), labels.cpu().numpy()) epoch_acc, _, epoch_iou_class, epoch_mean_iou, _ = metrics.evaluate() writer = self.writer_dict['writer'] global_steps = self.writer_dict['valid_global_steps'] writer.add_scalar('val_loss', ave_loss.val, global_steps) writer.add_scalar('val_acc', epoch_acc, global_steps) writer.add_scalar('val_mean_iou', epoch_mean_iou, global_steps) self.writer_dict['valid_global_steps'] = global_steps + 1 return ave_loss.val, epoch_mean_iou
def train_one_epoch(self): self.model.train() loss = AverageMeter() acc = AverageMeter() if self.verbose: iterator = tqdm(self.train_loader) else: iterator = self.train_loader for x, y in iterator: x = x.to(self.device) y = y.to(self.device) output = self.model(x) current_loss = self.loss(output, y) self.optimizer.zero_grad() current_loss.backward() self.optimizer.step() loss.update(current_loss.item()) output = output.detach().cpu().numpy() y = y.cpu().numpy() accuracy = get_accuracy(output, y) acc.update(accuracy, y.shape[0]) # if self.mode == 'crossval': s = ('Training epoch {} | loss: {} - accuracy: ' '{}'.format(self.cur_epoch, round(loss.val, 5), round(acc.val, 5))) print_and_log(self.logger, s)
def test(self, epoch): # initialize dataset self.data_loader.initialize(self.sess, mode='test') # initialize tqdm tt = tqdm(range(self.data_loader.num_iterations_test), total=self.data_loader.num_iterations_test, desc="Val-{}-".format(epoch)) loss_per_epoch = AverageMeter() kappa_per_epoch = AverageMeter() # Iterate over batches for cur_it in tt: # One step on the current batch loss, kappa = self.step() # update metrics returned from step func loss_per_epoch.update(loss) kappa_per_epoch.update(kappa) # summarize summaries_dict = { 'test/loss_per_epoch': loss_per_epoch.val, 'test/kappa_per_epoch': kappa_per_epoch.val } self.summarizer.summarize( self.model.global_step_tensor.eval(self.sess), summaries_dict) print(""" Epoch-{} loss:{:.4f} -- kappa:{:.4f} """.format(epoch, loss_per_epoch.val, kappa_per_epoch.val)) tt.close()
def train_model(model, dataloader, loss_fn, optimizer, epoch, is_lstm, use_cuda=False, verbose=False): # set model to train mode model.train() top1 = AverageMeter() total_loss = 0 # loop through data batches count = 0 for batch_idx, (X, y) in enumerate(tqdm(dataloader)): batch_size = -1 # Utilize GPU if enabled if use_cuda: if is_lstm: X['X'] = X['X'].cuda() else: X = X.cuda() y = y.cuda(async=True) if is_lstm: batch_size = X['X'].size(0) else: batch_size = X.size(0) # Compute loss predictions = model(X) count += predictions.shape[0] loss = loss_fn(predictions, y) total_loss += loss.item() if verbose: logging.debug('mini-batch loss: {}'.format(loss)) logging.debug('y: {}'.format(y)) # Compute running accuracy acc1 = accuracy(predictions.data, y, (1, )) top1.update(acc1[0], batch_size) optimizer.zero_grad() loss.backward() optimizer.step() if verbose: print('Progress [{0}/{1} ({2:.0f}%)]\tLoss:{3}'.format( count, len(dataloader.dataset), 100. * batch_idx / len(dataloader), loss.item())) total_loss /= count train_acc = top1.avg logging.info( 'Train Epoch: {} \tLoss: {:.6f} \t Training Acc: {:.2f}'.format( epoch, total_loss, train_acc)) return total_loss, train_acc
def train_by_epoch(self): tqdm_batch = tqdm(self.dataloader, total=self.total_iter, desc="epoch-{}".format(self.epoch)) avg_loss = AverageMeter() for curr_it, (X, corner) in enumerate(tqdm_batch): self.feature.train() self.corner.train() self.opt.zero_grad() X = X.cuda(async=self.config.async_loading) corner = corner.cuda(async=self.config.async_loading) feat = self.feature(X) pred_cor = self.corner(feat) loss = self.loss(corner, pred_cor) loss.backward() self.opt.step() avg_loss.update(loss) if curr_it == 4: self.record_image(X, pred_cor, corner) tqdm_batch.close() self.summary_writer.add_scalar('train/loss', avg_loss.val, self.epoch) self.scheduler.step(avg_loss.val) with torch.no_grad(): tqdm_batch = tqdm( self.testloader, total=(len(self.dataset_test) + self.config.batch_size - 1) // self.config.batch_size, desc="epoch-{}".format(self.epoch)) avg_loss = AverageMeter() for curr_it, (X, corner) in enumerate(tqdm_batch): self.feature.eval() self.corner.eval() X = X.cuda(async=self.config.async_loading) corner = corner.cuda(async=self.config.async_loading) feat = self.feature(X) pred_cor = self.corner(feat) loss = self.loss(corner, pred_cor) avg_loss.update(loss) if curr_it == 2: self.record_image(X, pred_cor, corner, 'test') tqdm_batch.close() self.summary_writer.add_scalar('eval/loss', avg_loss.val, self.epoch)
def validate(model, optimizer, criterion, metrics, options): model.eval() losses = AverageMeter() for metric in metrics: metric.reset() for batch_idx, (data, target) in zip(maybe_range(options.max_batch_per_epoch), options.val_loader): if options.use_cuda: data, target = data.cuda(), target.cuda() with torch.no_grad(): output = model(data) loss = criterion(output, target) losses.update(loss.item(), data.size(0)) for metric in metrics: metric_value = metric(output, target) metric.update(metric_value, data.size(0)) metrics_averages = [metric.average().item() for metric in metrics] loss_average = global_average(losses.sum, losses.count).item() return metrics_averages, loss_average
def train_one_epoch(self): # initialize tqdm batch tqdm_batch = tqdm(self.trainloader.loader, total=self.trainloader.num_iterations, desc="epoch-{}-".format(self.current_epoch)) self.net.train() epoch_loss = AverageMeter() for curr_it, (patches, labels) in enumerate(tqdm_batch): #y = torch.full((self.batch_size,), self.real_label) if self.cuda: patches = patches.cuda() labels = labels.cuda() patches = Variable(patches) labels = Variable(labels).long() self.net.zero_grad() output_logits, output_prob = self.net(patches) loss = self.criterion(output_logits, labels) loss.backward() self.optimizer.step() epoch_loss.update(loss.item()) self.current_iteration += 1 print("Epoch: {0}, Iteration: {1}/{2}, Loss: {3}".format(self.current_epoch, self.current_iteration,\ self.trainloader.num_iterations, loss.item())) tqdm_batch.close() self.logger.info("Training at epoch-" + str(self.current_epoch) + " | " + "Model loss: " + str(epoch_loss.val))
def train_epoch(self, epoch): losses = AverageMeter() len_train = len(self.train_loader) pb = tqdm(self.train_loader) self.model.train() for i, (t1, t2, label) in enumerate(pb): t1, t2, label = t1.to(self.device), t2.to(self.device), label.to( self.device) prob = self.model(t1, t2) loss = self.criterion(prob, label) losses.update(loss.item(), n=self.batch_size) # Compute gradients and do SGD step self.optimizer.zero_grad() loss.backward() self.optimizer.step() desc = self.logger.make_desc(i + 1, len_train, ('loss', losses, '.4f')) pb.set_description(desc) self.logger.dump(desc)
def _predict_folder(self, folder, save_dir=None): assert folder is not None and os.path.isdir(folder),\ "In 'folder' mode the input must be a valid path of a folder!" consume_time = AverageMeter() file_list = glob.glob(os.path.join(folder, '*')) assert not len(file_list) == 0, "The input folder is empty" pb = tqdm(file_list) # processbar for idx, file in enumerate(pb): img = self._load_data(file) name = os.path.basename(file) img = self._np2tensor(img).unsqueeze(0).to(self.device) save_path = os.path.join(save_dir, name) if save_dir else None _, su_time = self.predict_base(model=self.model, data=img, path=save_path) consume_time.update(su_time) # logger description = ( "[{}/{}] speed: {time.val:.4f}s({time.avg:.4f}s)".format( idx + 1, len(file_list), time=consume_time)) pb.set_description(description) self.logger.dump(description)
def test(self, state='test'): # get loss and error_nodes loss_node, = tf.get_collection('losses') error_node, = tf.get_collection('error') # initialize dataset self.data_loader.initialize(self.sess, state=state) # initialize tqdm tt = tqdm(range(self.data_loader.num_iterations_test), total=self.data_loader.num_iterations_test, desc="test") loss_per_epoch = AverageMeter() error_per_epoch = AverageMeter() # Iterate over batches for _ in tt: # One Train step on the current batch loss, acc = self.sess.run([loss_node, error_node]) # update metrics returned from train_step func loss_per_epoch.update(loss) error_per_epoch.update(acc) print("Test loss: {:.3E}, error: {:.2f}".format( loss_per_epoch.val, error_per_epoch.val)) tt.close() return loss_per_epoch.val, error_per_epoch.val
def test(self, epoch): # initialize dataset self.data_loader.initialize(self.sess, mode='eval') # initialize tqdm tt = tqdm(range(self.data_loader.num_iterations_test), total=self.data_loader.num_iterations_test, desc="Val-{}-".format(epoch)) loss_per_epoch = AverageMeter() acc_per_epoch = AverageMeter() # Iterate over batches for cur_it in tt: # One Train step on the current batch loss, acc = self.sess.run([self.loss_node, self.acc_node], feed_dict={self.is_training: False}) # update metrics returned from train_step func loss_per_epoch.update(loss) acc_per_epoch.update(acc) # summarize summaries_dict = {'eval/loss_per_epoch': loss_per_epoch.val, 'eval/acc_per_epoch': acc_per_epoch.val} self.summarizer.summarize(self.model.global_step_tensor.eval(self.sess), summaries_dict) print(""" Val-{} Eval loss:{:.4f} -- acc:{:.4f} """.format(epoch, loss_per_epoch.val, acc_per_epoch.val)) tt.close()
def validate(model, optimizer, criterion, metrics, options): model.eval() losses = AverageMeter() for metric in metrics: metric.reset() for batch_idx, (data, target) in zip(maybe_range(options.max_batch_per_epoch), options.val_loader): data = convert_dtype(options.dtype, data) if options.force_target_dtype: target = convert_dtype(options.dtype, target) if options.use_cuda: data, target = data.cuda(), target.cuda() with torch.no_grad(): output = model(data) loss = criterion(output, target) losses.update(loss.item(), data.size(0)) for metric in metrics: metric_value = metric(output, target) metric.update(metric_value, data.size(0)) metrics_averages = {metric.name: metric.average().item() for metric in metrics} loss_average = global_average(losses.sum, losses.count).item() return metrics_averages, loss_average