def training_step(self, data_loader): # LOSS AVERAGE losses = AverageMeter() # MODEL TO TRAIN MODE self.model.train() # TRAINING LOOP tk0 = tqdm(data_loader, total=len(data_loader)) for _, data in enumerate(tk0): # LOADING IMAGES & LABELS ids = data["ids"] masks = data["masks"] labels = data["labels"] ids = ids.to(self.device) masks = masks.to(self.device) labels = labels.to(self.device) # RESET GRADIENTS self.model.zero_grad() # CALCULATE LOSS output = self.model(ids, masks) loss = self.criterion(output, labels) # CALCULATE GRADIENTS loss.backward() self.optimizer.step() # UPDATE LOSS losses.update(loss.item(), ids.size(0)) tk0.set_postfix(loss=losses.avg)
def bleuScore(dataset, model): model.eval() bleu = AverageMeter() allResults = [] with torch.no_grad(): for i, item in enumerate(val_dataset): source, target = item[0], item[1].tolist() del target[0] del target[-1] results = translate(model, source, args.max_len, train_dataset.engStartTokenID(), train_dataset.engEndTokenID(), args.beam, target) bleu.update(results["bleu"]) source = source.tolist() del source[-1] allResults.append((results["bleu"], source, target, results["finals"][0][1])) if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'BLEU {bleu.val:.4f} ({bleu.avg:.4f})'.format( i, len(val_dataset), bleu=bleu)) print(' * BLEU {bleu.avg:.3f}'.format(bleu=bleu)) import pickle pickle.dump(allResults, open("results.pkl", "wb"), protocol=4) return bleu.avg
def validate(val_loader, model, criterion): """Run evaluation""" losses = AverageMeter() # Switch to evaluate mode model.eval() with torch.no_grad(): for i, (input, target) in enumerate(val_loader): target = target.cuda(non_blocking=True) input = input.cuda() batch_size = input.size(1) # Forward output = model(input) # Measure loss loss = criterion(output, target, batch_size) losses.update(loss.item(), batch_size) if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( i, len(val_loader), loss=losses)) print(' * Loss {loss.avg:.3f}'.format(loss=losses)) return losses.avg
def validate(val_loader, net): top1 = [AverageMeter(), AverageMeter()] top5 = [AverageMeter(), AverageMeter()] # switch to evaluate mode net.eval() prefetcher = DataPrefetcher(val_loader) inputs, labels = prefetcher.next() with torch.no_grad(): while inputs is not None: inputs = inputs.float().cuda() labels = labels.cuda() stu_outputs, tea_outputs = net(inputs) pred_s = accuracy(stu_outputs[-1], labels, topk=(1, 5)) pred_t = accuracy(tea_outputs[-1], labels, topk=(1, 5)) top1[0].update(pred_s[0].item(), inputs.size(0)) top5[0].update(pred_s[1].item(), inputs.size(0)) top1[1].update(pred_t[0].item(), inputs.size(0)) top5[1].update(pred_t[1].item(), inputs.size(0)) inputs, labels = prefetcher.next() return top1[0].avg, top5[0].avg, top1[1].avg, top5[1].avg
def train(train_loader, model, criterion, optimizer, epoch): """Run one train epoch""" losses = AverageMeter() # Switch to train mode model.train() for i, (input, target) in enumerate(train_loader): # input: seq_len, N # target: seq_len, N target = target.cuda(non_blocking=True) input = input.cuda() batch_size = input.size(1) # Forward output = model(input) # seq_len, N, ntokens loss = criterion(output, target, batch_size) # Backward optimizer.zero_grad() loss.backward() # Update torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() # Measure loss losses.update(loss.item(), batch_size) # Print Training Information if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, i, len(train_loader), loss=losses))
def evaluate_testset(test_data_loader, generator, loss_fn, args): # to evaluation mode generator.train(False) losses = AverageMeter('loss') start = time.time() with torch.no_grad(): for iter_idx, data in enumerate(test_data_loader, 0): in_text, text_lengths, target_vec, in_audio, aux_info = data batch_size = target_vec.size(0) in_text = in_text.to(device) target = target_vec.to(device) out_poses = generator(in_text, text_lengths, target, None) loss = loss_fn(out_poses, target) losses.update(loss.item(), batch_size) # back to training mode generator.train(True) # print elapsed_time = time.time() - start logging.info('[VAL] loss: {:.3f} / {:.1f}s'.format(losses.avg, elapsed_time)) return losses.avg
def evaluate_testset(test_data_loader, generator): # to evaluation mode generator.train(False) losses = AverageMeter('loss') start = time.time() with torch.no_grad(): for iter_idx, data in enumerate(test_data_loader, 0): target_poses, target_vec = data batch_size = target_vec.size(0) target = target_vec.to(device) loss, _ = eval_embed(None, None, None, target, generator) losses.update(loss.item(), batch_size) # back to training mode generator.train(True) # print ret_dict = {'loss': losses.avg} elapsed_time = time.time() - start logging.info('[VAL] loss: {:.3f} / {:.1f}s'.format(losses.avg, elapsed_time)) return ret_dict
def train(train_loader, model: seq2seq.Seq2seq, criterion, optimizer, epoch, teacher_forcing_ratio): """Run one train epoch""" losses = AverageMeter() # Switch to train mode model.train() for i, batch in enumerate(train_loader): # data: seq_len, N # data_mask: seq_len, N # target: seq_len, N data, data_mask, target = batch target = target.cuda(non_blocking=True) data_mask = data_mask.cuda(non_blocking=True) data = data.cuda() batch_size = data.size(1) target_len = target.size(0) # Forward # Encoder source_hs, hidden = model.encoder(data) # Decoder ctx = None hidden = model.transformHidden(hidden) outputs = [] use_teacher_forcing = random.random() < teacher_forcing_ratio x = target[0] for j in range(1, target_len): output, hidden, ctx = model.decoder(x, hidden, ctx, source_hs, data_mask) outputs.append(output) with torch.no_grad(): if use_teacher_forcing: x = target[j] else: topi = torch.topk(output, 1, dim=1)[1] # N, 1 x = topi.squeeze() # N outputs = torch.stack(outputs) # seq_len, N, n_tokens loss = criterion(outputs, target[1:], batch_size) # Backward optimizer.zero_grad() loss.backward() # Update torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() # Measure loss losses.update(loss.item(), batch_size) # Print Training Information if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, i, len(train_loader), loss=losses))
def init_fn(self, shared_model=None, **kwargs): if self.options.model.name == "pixel2mesh": # Visualization renderer self.renderer = MeshRenderer(self.options.dataset.camera_f, self.options.dataset.camera_c, self.options.dataset.mesh_pos) # create ellipsoid self.ellipsoid = Ellipsoid(self.options.dataset.mesh_pos) else: self.renderer = None if shared_model is not None: self.model = shared_model else: if self.options.model.name == "pixel2mesh": # create model self.model = P2MModel(self.options.model, self.ellipsoid, self.options.dataset.camera_f, self.options.dataset.camera_c, self.options.dataset.mesh_pos) elif self.options.model.name == "classifier": self.model = Classifier(self.options.model, self.options.dataset.num_classes) else: raise NotImplementedError("Your model is not found") self.model = torch.nn.DataParallel(self.model, device_ids=self.gpus).cuda() # Setup a joint optimizer for the 2 models if self.options.optim.name == "adam": self.optimizer = torch.optim.Adam( params=list(self.model.parameters()), lr=self.options.optim.lr, betas=(self.options.optim.adam_beta1, 0.999), weight_decay=self.options.optim.wd ) elif self.options.optim.name == "sgd": self.optimizer = torch.optim.SGD( params=list(self.model.parameters()), lr=self.options.optim.lr, momentum=self.options.optim.sgd_momentum, weight_decay=self.options.optim.wd ) else: raise NotImplementedError("Your optimizer is not found") self.lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( self.optimizer, self.options.optim.lr_step, self.options.optim.lr_factor ) # Create loss functions if self.options.model.name == "pixel2mesh": self.criterion = P2MLoss(self.options.loss, self.ellipsoid).cuda() elif self.options.model.name == "classifier": self.criterion = CrossEntropyLoss() else: raise NotImplementedError("Your loss is not found") # Create AverageMeters for losses self.losses = AverageMeter() # Evaluators self.evaluators = [Evaluator(self.options, self.logger, self.summary_writer, shared_model=self.model)]
def valid_epoch(self, epoch): batch_time = AverageMeter() total_losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() correct = 0 """ validate """ self.model.eval() test_loss = 0 end = time.time() for batch_idx, (inputs, label) in enumerate(self.valid_dataloader): with torch.no_grad(): inputs = inputs.to(self.opt.device) label = label.to(self.opt.device) output = self.model(inputs) loss = self.criterion(output, label) total_losses.update(loss.item(), inputs.size(0)) pred = output.data.max(1, keepdim=True)[1].cpu() correct += pred.eq(label.cpu().view_as(pred)).sum() batch_time.update(time.time() - end) end = time.time() test_loss += loss.item() if batch_idx % self.opt.print_freq_eval == 0: print('Validation[%d/%d] Total Loss: %.4f[%.4f]' % (batch_idx, len(self.valid_dataloader), loss.item(), test_loss / (batch_idx + 1))) num_test_data = len(self.valid_dataloader.dataset) accuracy = 100. * correct / num_test_data test_loss /= len(self.valid_dataloader) if test_loss < self.best_loss: print('Saving..') state = { 'model': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_loss': test_loss, 'epoch': epoch, } torch.save(state, os.path.join(self.opt.expr_dir, 'model_best.pth')) self.best_loss = test_loss print('[*] Model %s,\tCurrent Loss: %f\tBest Loss: %f' % (self.opt.model, test_loss, self.best_loss)) print('Val Accuracy: {}/{} ({:.0f}%)'.format(correct, num_test_data, accuracy))
def average_of_average_meters(self, average_meters): s = sum([meter.sum for meter in average_meters]) c = sum([meter.count for meter in average_meters]) weighted_avg = s / c if c > 0 else 0. avg = sum([meter.avg for meter in average_meters]) / len(average_meters) ret = AverageMeter() if self.weighted_mean: ret.val, ret.avg = avg, weighted_avg else: ret.val, ret.avg = weighted_avg, avg return ret
def evaluate(self): self.logger.info("Running evaluations...") # clear evaluate_step_count, but keep total count uncleared self.evaluate_step_count = 0 test_data_loader = DataLoader(self.dataset, batch_size=self.options.test.batch_size * self.options.num_gpus, num_workers=self.options.num_workers, pin_memory=self.options.pin_memory, shuffle=self.options.test.shuffle, collate_fn=self.dataset_collate_fn) if self.options.model.name == "pixel2mesh": self.chamfer_distance = [ AverageMeter() for _ in range(self.num_classes) ] self.f1_tau = [AverageMeter() for _ in range(self.num_classes)] self.f1_2tau = [AverageMeter() for _ in range(self.num_classes)] elif self.options.model.name == "classifier": self.acc_1 = AverageMeter() self.acc_5 = AverageMeter() # Iterate over all batches in an epoch for step, batch in enumerate(test_data_loader): # Send input to GPU batch = { k: v.cuda() if isinstance(v, torch.Tensor) else v for k, v in batch.items() } # Run evaluation step out = self.evaluate_step(batch) # Tensorboard logging every summary_steps steps if self.evaluate_step_count % self.options.test.summary_steps == 0: self.evaluate_summaries(batch, out) # add later to log at step 0 self.evaluate_step_count += 1 self.total_step_count += 1 for key, val in self.get_result_summary().items(): scalar = val if isinstance(val, AverageMeter): scalar = val.avg self.logger.info("Test [%06d] %s: %.6f" % (self.total_step_count, key, scalar)) self.summary_writer.add_scalar("eval_" + key, scalar, self.total_step_count + 1)
def train(train_loader, model, criterion, optimizer, epoch): """Run one train epoch""" losses = AverageMeter() top1 = AverageMeter() # Switch to train mode model.train() for i, (input, target) in enumerate(train_loader): target = target.cuda(non_blocking=True) input = input.cuda() # Forward output = model(input) loss = criterion(output, target) # Backward optimizer.zero_grad() loss.backward() # Update optimizer.step() # Measure accuracy and record loss prec1 = accuracy(output.data, target) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) # Print Training Information if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( epoch, i, len(train_loader), loss=losses, top1=top1))
def validate(val_loader, model, criterion): """Run evaluation""" losses = AverageMeter() top1 = AverageMeter() # Switch to evaluate mode model.eval() with torch.no_grad(): for i, (input, target) in enumerate(val_loader): target = target.cuda(non_blocking=True) input = input.cuda() # Forward output = model(input) # Measure accuracy and record loss loss = criterion(output, target) prec1 = accuracy(output, target) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( i, len(val_loader), loss=losses, top1=top1)) print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1)) return top1.avg
def eval_step(self, data_loader, metric): # LOSS & METRIC AVERAGE losses = AverageMeter() metrics_avg = AverageMeter() # MODEL TO EVAL MODE self.model.eval() # VALIDATION LOOP with torch.no_grad(): tk0 = tqdm(data_loader, total=len(data_loader)) for _, data in enumerate(tk0): # LOADING IMAGES & LABELS images = data["images"] labels = data["labels"] images = images.to(self.device) labels = labels.to(self.device) # CALCULATE LOSS & METRICS output = self.model(images) loss = self.criterion(output, labels) metric_used = metrics_dict[metric] predictions = torch.softmax(output, dim=1) _, predictions = torch.max(predictions, dim=1) metric_value = metric_used(labels, predictions) losses.update(loss.item(), images.size(0)) metrics_avg.update(metric_value.item(), images.size(0)) tk0.set_postfix(loss=losses.avg) print(f"Validation Loss = {losses.avg}") return loss, metrics_avg.avg
def validate(val_loader, model, criterion): """Run evaluation""" losses = AverageMeter() # Switch to evaluate mode model.eval() with torch.no_grad(): for i, batch in enumerate(val_loader): # data: seq_len, N # data_mask: seq_len, N # target: seq_len, N data, data_mask, target = batch target = target.cuda(non_blocking=True) data_mask = data_mask.cuda(non_blocking=True) data = data.cuda() batch_size = data.size(1) target_len = target.size(0) # Forward # Encoder source_hs, hidden = model.encoder(data) # Decoder ctx = None hidden = model.transformHidden(hidden) outputs = [] x = target[0] for j in range(1, target_len): output, hidden, ctx = model.decoder(x, hidden, ctx, source_hs, data_mask) outputs.append(output) topi = torch.topk(output, 1, dim=1)[1] # N, 1 x = topi.squeeze() # N outputs = torch.stack(outputs) # seq_len, N, n_tokens # Measure loss loss = criterion(outputs, target[1:], batch_size) losses.update(loss.item(), batch_size) if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( i, len(val_loader), loss=losses)) print(' * Loss {loss.avg:.3f}'.format(loss=losses)) return losses.avg
def __init__(self, configer): self.configer = configer self.data_path = configer.get( "data", "data_path") #: str: Path to data directory # Train val and test accuracy self.accuracy = AverageMeter() # DataLoaders self.data_loader = None # Module load and save utility self.device = self.configer.get("device") self.model_utility = ModuleUtilizer( self.configer ) #: Model utility for load, save and update optimizer self.net = None # Training procedure self.transforms = None # Other useful data self.in_planes = np.prod(self.configer.get( "data", "n_features")) #: int: Input channels self.window = self.configer.get( "data", "n_frames") #: int: Number of frames per sequence self.n_classes = self.configer.get( "data", "n_classes") #: int: Total number of classes for dataset self.dataset = self.configer.get( "dataset").lower() #: str: Type of dataset
def __init__(self, configer): self.configer = configer self.batch_time = AverageMeter() self.data_time = AverageMeter() self.train_losses = AverageMeter() self.val_losses = AverageMeter() self.seg_visualizer = SegVisualizer(configer) self.seg_loss_manager = SegLossManager(configer) self.module_utilizer = ModuleUtilizer(configer) self.seg_model_manager = SegModelManager(configer) self.seg_data_loader = SegDataLoader(configer) self.seg_net = None self.train_loader = None self.val_loader = None self.optimizer = None self.lr = None self.iters = None
def __init__(self, configer): self.configer = configer self.batch_time = AverageMeter() self.data_time = AverageMeter() self.train_losses = AverageMeter() self.val_losses = AverageMeter() self.vis = PoseVisualizer(configer) self.loss_manager = PoseLossManager(configer) self.model_manager = PoseModelManager(configer) self.data_loader = PoseDataLoader(configer) self.module_utilizer = ModuleUtilizer(configer) self.pose_net = None self.train_loader = None self.val_loader = None self.optimizer = None self.lr = None self.iters = None
def train(train_loader, model, criterion, optimizer, epoch): """Run one train epoch""" losses = AverageMeter() # Switch to train mode model.train() for i, batch in enumerate(train_loader): documents, documents_mask, documents_len = batch["documents"], batch["documents_mask"], batch["documents_len"] query, query_mask = batch["query"], batch["query_mask"] answer, candidates = batch["answer"], batch["candidates"] answer, candidates = answer.cuda(non_blocking=True), candidates.cuda(non_blocking=True) documents, documents_mask, documents_len = documents.cuda(), documents_mask.cuda(), documents_len.cuda() query, query_mask = query.cuda(), query_mask.cuda() batch_size = documents.size(1) # Forward probs = model(documents, documents_mask, documents_len, query, query_mask, candidates) loss = criterion(probs, answer, candidates) # Backward optimizer.zero_grad() loss.backward() # Update torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() # Measure loss losses.update(loss.item(), batch_size) # Print Training Information if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, i, len(train_loader), loss=losses))
def init_fn(self, shared_model=None, **kwargs): # Create auxiliary models self.init_auxiliary() if shared_model is not None: self.model = shared_model else: self.model = self.init_model() self.model = DataParallelModel(self.model.cuda(), device_ids=self.gpus) # self.model = torch.nn.DataParallel(self.model, device_ids=self.gpus).cuda() # Setup a joint optimizer for the 2 models self.optimizer = self.init_optimizer(self.options.optim.name) self.lr_scheduler = self.init_lr(self.options.optim.lr_scheduler) # Create loss functions self.criterion = self.init_loss_functions() self.criterion = DataParallelCriterion(self.criterion.cuda(), device_ids=self.gpus) # Create AverageMeters for losses self.losses = AverageMeter() # Evaluators # self.evaluators = [Evaluator(self.options, self.logger, self.summary_writer, shared_model=self.model)] self.dataset_size = None
def train_epoch(module, config, writer, logger): # Trains the model for a single epoch. batch_time = AverageMeter() train_loss = AverageMeter() # Unpacks the module model = module.model device = module.device train_loader = module.train_loader loss_fn = module.loss_fn optimizer = module.optimizer model.train() idx = 0 for images, labels in train_loader: idx += 1 start_tic = time.time() images = images.to(device) labels = labels.to(device) optimizer.zero_grad() outputs = model(images) loss = loss_fn(input=outputs, target=labels) loss.backward() optimizer.step() batch_time.update(time.time() - start_tic) train_loss.update(loss.data.item()) if idx % config.training.disp_iter == 0: # This is the iteration to display the information. print_str = "Iter {:d} Loss: {:.4f} Time/Batch: {:.4f}".format( idx, train_loss.average(), batch_time.average()) print(print_str) logger.info(print_str)
def test_network(cfg, network, data_loader, checkpoint, result_set): _checkpoint = torch.load(checkpoint) _checkpoint = {k.replace('module.', ''): v for k, v in _checkpoint['rmnet'].items()} network.load_state_dict(_checkpoint) network.eval() checkpoint = os.path.basename(checkpoint) test_metrics = AverageMeter(Metrics.names()) device, = list(set(p.device for p in network.parameters())) for idx, (video_name, n_objects, frames, masks, optical_flows) in enumerate( tqdm(data_loader, leave=False, desc='%s on GPU %d' % (checkpoint, device.index), position=device.index)): with torch.no_grad(): try: est_probs = network(frames, masks, optical_flows, n_objects, cfg.TEST.MEMORIZE_EVERY, device) est_probs = est_probs.permute(0, 2, 1, 3, 4) masks = torch.argmax(masks, dim=2) est_masks = torch.argmax(est_probs, dim=1) except Exception as ex: logging.warning('Error occurred during testing Checkpoint[Name=%s]: %s' % (checkpoint, ex)) continue metrics = Metrics.get(est_masks[0], masks[0]) test_metrics.update(metrics, torch.max(n_objects[0]).item()) jf_mean = test_metrics.avg(2) if jf_mean != 0: logging.info('Checkpoint[Name=%s] has been tested successfully, JF-Mean = %.4f.' % (checkpoint, jf_mean)) else: logging.warning('Exception occurred during testing Checkpoint[Name=%s]' % checkpoint) result_set['JF-Mean'] = jf_mean
def validate(self): batch_time = AverageMeter() total_losses = AverageMeter() loc_losses = AverageMeter() cls_losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() """ validate """ self.model.eval() test_loss = 0 gt_boxes = [] gt_labels = [] end = time.time() log = '' for batch_idx, (inputs, loc_targets, cls_targets, fname) in enumerate(self.test_dataloader): img_size = [inputs.size()[2] * 1.0, inputs.size()[3] * 1.0] # img_size = img_size.numpy() box_coder = S3FDBoxCoder(input_size=img_size) pred_boxes = [] pred_labels = [] pred_scores = [] with torch.no_grad(): gt_boxes.append(loc_targets.squeeze(0)) gt_labels.append(cls_targets.squeeze(0)) inputs = inputs.to(self.opt.device) loc_targets = loc_targets.to(self.opt.device) cls_targets = cls_targets.to(self.opt.device) loc_preds, cls_preds = self.model(inputs) box_preds, label_preds, score_preds = self.box_coder.decode( loc_preds.cpu().data.squeeze(), F.softmax(cls_preds.squeeze(), dim=1).cpu().data, score_thresh=0.6, nms_thresh=0.45) pred_boxes.append(box_preds) pred_labels.append(label_preds) pred_scores.append(score_preds) print('debug')
def validate(val_loader, model, criterion): """Run evaluation""" losses = AverageMeter() top1 = AverageMeter() # Switch to evaluate mode model.eval() with torch.no_grad(): for i, batch in enumerate(val_loader): documents, documents_mask, documents_len = batch["documents"], batch["documents_mask"], batch["documents_len"] query, query_mask = batch["query"], batch["query_mask"] answer, candidates = batch["answer"], batch["candidates"] answer, candidates = answer.cuda(non_blocking=True), candidates.cuda(non_blocking=True) documents, documents_mask, documents_len = documents.cuda(), documents_mask.cuda(), documents_len.cuda() query, query_mask = query.cuda(), query_mask.cuda() batch_size = documents.size(1) # Forward probs = model(documents, documents_mask, documents_len, query, query_mask, candidates) # Measure loss loss = criterion(probs, answer, candidates) losses.update(loss.item(), batch_size) prec1 = accuracy(probs, answer, candidates) top1.update(prec1, batch_size) if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( i, len(val_loader), loss=losses, top1=top1)) print( ' * Loss {loss.avg:.3f}\tPrec@1 {top1.avg:.3f}'.format(loss=losses, top1=top1)) return losses.avg
def validate(val_loader, net): top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode net.eval() prefetcher = DataPrefetcher(val_loader) inputs, labels = prefetcher.next() with torch.no_grad(): while inputs is not None: inputs = inputs.float().cuda() labels = labels.cuda() stu_outputs, _ = net(inputs) pred1, pred5 = accuracy(stu_outputs[-1], labels, topk=(1, 5)) top1.update(pred1.item(), inputs.size(0)) top5.update(pred5.item(), inputs.size(0)) inputs, labels = prefetcher.next() return top1.avg, top5.avg
def __init__(self, configer): self.configer = configer self.data_path = configer.get( "data", "data_path") #: str: Path to data directory # Train val and test accuracy self.accuracy = AverageMeter() # DataLoaders self.data_loader = None # Module load and save utility self.device = self.configer.get("device") self.model_utility = ModuleUtilizer( self.configer ) #: Model utility for load, save and update optimizer self.net = None # Training procedure self.transforms = None # Other useful data self.backbone = self.configer.get("network", "backbone") #: str: Backbone type self.in_planes = None #: int: Input channels self.clip_length = self.configer.get( "data", "n_frames") #: int: Number of frames per sequence self.n_classes = self.configer.get( "data", "n_classes") #: int: Total number of classes for dataset self.data_type = self.configer.get( "data", "type") #: str: Type of data (rgb, depth, ir, leapmotion) self.dataset = self.configer.get( "dataset").lower() #: str: Type of dataset self.optical_flow = self.configer.get("data", "optical_flow") if self.optical_flow is None: self.optical_flow = True
def validate(module, epoch, best_iou, num_classes, writer, logger): # Runs validation for the model on the appropriate split and returns best iou. # Unpack the module model = module.model device = module.device val_loader = module.val_loader loss_fn = module.loss_fn avg_loss = AverageMeter() running_score = RunningScore(num_classes) model.eval() with torch.no_grad(): for idx, (images, labels) in tqdm(enumerate(val_loader)): images = images.to(device) labels = labels.to(device) outputs = model(images) loss = loss_fn(input=outputs, target=labels) avg_loss.update(loss.data.item()) pred = outputs.data.max(1)[1].cpu().numpy() gt = labels.data.cpu().numpy() running_score.update(gt, pred) writer.add_scalar("Val Loss", avg_loss.average(), epoch) logger.info("Epoch: {} Loss: {:.4f}".format(epoch, avg_loss.average())) mean_iou, disp_score = running_score.get_scores() logger.info(disp_score) if mean_iou >= best_iou: # Saves the model if the current mean_iou is better. best_iou = mean_iou path = os.path.join(writer.file_writer.get_logdir(), "best_model.pkl") save_model(model=model, optimizer=module.optimizer, epoch=epoch, best_iou=best_iou, path=path) return best_iou
def valid_epoch(self, epoch): batch_time = AverageMeter() total_losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() correct = 0 """ validate """ self.model.eval() test_loss = 0 end = time.time() for batch_idx, (inputs, label) in enumerate(self.valid_dataloader): with torch.no_grad(): inputs = inputs.to(self.opt.device) label = label.to(self.opt.device) output = self.model(inputs) loss = self.criterion(output, label) total_losses.update(loss.item(), inputs.size(0)) pred = output.data.max(1, keepdim=True)[1].cpu() label_array_temp = label.cpu().numpy() if batch_idx == 0: pred_array = pred.numpy()[:] label_array = label_array_temp else: pred_array = np.concatenate((pred_array, pred.numpy()[:])) label_array = np.concatenate( (label_array, label_array_temp)) correct += pred.eq(label.cpu().view_as(pred)).sum() batch_time.update(time.time() - end) end = time.time() test_loss += loss.item() if batch_idx % self.opt.print_freq_eval == 0: print('Validation[%d/%d] Total Loss: %.4f[%.4f]' % (batch_idx, len(self.valid_dataloader), loss.item(), test_loss / (batch_idx + 1))) num_test_data = len(self.valid_dataloader.dataset) accuracy = 100. * correct / num_test_data test_loss /= len(self.valid_dataloader) if test_loss < self.best_loss: print('Saving..') state = { 'model': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_loss': test_loss, 'epoch': epoch + 1, } torch.save(state, os.path.join(self.opt.expr_dir, 'model_best.pth')) self.best_loss = test_loss print('[*] Model %s,\tCurrent Loss: %f\tBest Loss: %f' % (self.opt.model, test_loss, self.best_loss)) print('Val Accuracy: {}/{} ({:.0f}%)'.format(correct, num_test_data, accuracy)) ALPHABET_list = list(ALPHABET) y_true_list = [] y_pred_list = [] for i in range(len(pred_array)): y_true = label_array[i] y_pred = pred_array[i][0] y_true_list.append(ALPHABET_list[y_true]) y_pred_list.append(ALPHABET_list[y_pred]) cm = confusion_matrix(y_true_list, y_pred_list, ALPHABET_list) print_cm(cm, ALPHABET_list)
def test_net(cfg, epoch_idx=-1, test_data_loader=None, test_writer=None, model=None): # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use torch.backends.cudnn.benchmark = True if test_data_loader is None: # Set up data loader dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[ cfg.DATASET.TEST_DATASET](cfg) test_data_loader = torch.utils.data.DataLoader( dataset=dataset_loader.get_dataset( utils.data_loaders.DatasetSubset.TEST), batch_size=1, num_workers=cfg.CONST.NUM_WORKERS, collate_fn=utils.data_loaders.collate_fn, pin_memory=True, shuffle=False) # Setup networks and initialize networks if model is None: model = Model(dataset=cfg.DATASET.TRAIN_DATASET) if torch.cuda.is_available(): model = torch.nn.DataParallel(model).cuda() assert 'WEIGHTS' in cfg.CONST and cfg.CONST.WEIGHTS logging.info('Recovering from %s ...' % (cfg.CONST.WEIGHTS)) checkpoint = torch.load(cfg.CONST.WEIGHTS) model.load_state_dict(checkpoint['model']) # Switch models to evaluation mode model.eval() n_samples = len(test_data_loader) test_losses = AverageMeter(['cd1', 'cd2', 'cd3', 'pmd']) test_metrics = AverageMeter(Metrics.names()) category_metrics = dict() # Testing loop with tqdm(test_data_loader) as t: # print('repeating') for model_idx, (taxonomy_id, model_id, data) in enumerate(t): taxonomy_id = taxonomy_id[0] if isinstance( taxonomy_id[0], str) else taxonomy_id[0].item() model_id = model_id[0] with torch.no_grad(): for k, v in data.items(): data[k] = utils.helpers.var_or_cuda(v) partial = data['partial_cloud'] gt = data['gtcloud'] partial = random_subsample( partial.repeat((1, 8, 1)).reshape(-1, 16384, 3)) # b*8, 2048, 3 b, n, _ = partial.shape pcds, deltas = model(partial.contiguous()) cd1 = chamfer_sqrt(pcds[0].reshape(-1, 16384, 3).contiguous(), gt).item() * 1e3 cd2 = chamfer_sqrt(pcds[1].reshape(-1, 16384, 3).contiguous(), gt).item() * 1e3 cd3 = chamfer_sqrt(pcds[2].reshape(-1, 16384, 3).contiguous(), gt).item() * 1e3 # pmd loss pmd_losses = [] for delta in deltas: pmd_losses.append(torch.sum(delta**2)) pmd = torch.sum(torch.stack(pmd_losses)) / 3 pmd_item = pmd.item() _metrics = [pmd_item, cd3] test_losses.update([cd1, cd2, cd3, pmd_item]) test_metrics.update(_metrics) if taxonomy_id not in category_metrics: category_metrics[taxonomy_id] = AverageMeter( Metrics.names()) category_metrics[taxonomy_id].update(_metrics) t.set_description( 'Test[%d/%d] Taxonomy = %s Sample = %s Losses = %s Metrics = %s' % (model_idx + 1, n_samples, taxonomy_id, model_id, [ '%.4f' % l for l in test_losses.val() ], ['%.4f' % m for m in _metrics])) # Print testing results print( '============================ TEST RESULTS ============================' ) print('Taxonomy', end='\t') print('#Sample', end='\t') for metric in test_metrics.items: print(metric, end='\t') print() for taxonomy_id in category_metrics: print(taxonomy_id, end='\t') print(category_metrics[taxonomy_id].count(0), end='\t') for value in category_metrics[taxonomy_id].avg(): print('%.4f' % value, end='\t') print() print('Overall', end='\t\t\t') for value in test_metrics.avg(): print('%.4f' % value, end='\t') print('\n') # Add testing results to TensorBoard if test_writer is not None: test_writer.add_scalar('Loss/Epoch/cd1', test_losses.avg(0), epoch_idx) test_writer.add_scalar('Loss/Epoch/cd2', test_losses.avg(1), epoch_idx) test_writer.add_scalar('Loss/Epoch/cd3', test_losses.avg(2), epoch_idx) test_writer.add_scalar('Loss/Epoch/delta', test_losses.avg(3), epoch_idx) for i, metric in enumerate(test_metrics.items): test_writer.add_scalar('Metric/%s' % metric, test_metrics.avg(i), epoch_idx) return test_losses.avg(2)
class OpenPose(object): """ The class for Pose Estimation. Include train, val, test & predict. """ def __init__(self, configer): self.configer = configer self.batch_time = AverageMeter() self.data_time = AverageMeter() self.train_losses = AverageMeter() self.val_losses = AverageMeter() self.vis = PoseVisualizer(configer) self.loss_manager = PoseLossManager(configer) self.model_manager = PoseModelManager(configer) self.data_loader = PoseDataLoader(configer) self.module_utilizer = ModuleUtilizer(configer) self.pose_net = None self.train_loader = None self.val_loader = None self.optimizer = None self.lr = None self.iters = None def init_model(self): self.pose_net = self.model_manager.pose_detector() self.iters = 0 self.pose_net, _ = self.module_utilizer.load_net(self.pose_net) self.optimizer, self.lr = self.module_utilizer.update_optimizer(self.pose_net, self.iters) if self.configer.get('dataset') == 'coco': self.train_loader = self.data_loader.get_trainloader(OPCocoLoader) self.val_loader = self.data_loader.get_valloader(OPCocoLoader) else: Log.error('Dataset: {} is not valid!'.format(self.configer.get('dataset'))) exit(1) self.mse_loss = self.loss_manager.get_pose_loss('mse_loss') def __train(self): """ Train function of every epoch during train phase. """ self.pose_net.train() start_time = time.time() # data_tuple: (inputs, heatmap, maskmap, vecmap) for i, data_tuple in enumerate(self.train_loader): self.data_time.update(time.time() - start_time) # Change the data type. if len(data_tuple) < 2: Log.error('Train Loader Error!') exit(0) inputs = Variable(data_tuple[0].cuda(async=True)) heatmap = Variable(data_tuple[1].cuda(async=True)) maskmap = None if len(data_tuple) > 2: maskmap = Variable(data_tuple[2].cuda(async=True)) # Forward pass. paf_out, heatmap_out = self.pose_net(inputs) self.vis.vis_paf(paf_out, inputs.data.cpu().squeeze().numpy().transpose(1, 2, 0), name='paf_out') # Compute the loss of the train batch & backward. loss_heatmap = self.mse_loss(heatmap_out, heatmap, maskmap) loss = loss_heatmap if len(data_tuple) > 3: vecmap = Variable(data_tuple[3].cuda(async=True)) self.vis.vis_paf(vecmap, inputs.data.cpu().squeeze().numpy().transpose(1, 2, 0), name='paf') loss_associate = self.mse_loss(paf_out, vecmap, maskmap) loss += loss_associate self.train_losses.update(loss.data[0], inputs.size(0)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Update the vars of the train phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.iters += 1 # Print the log info & reset the states. if self.iters % self.configer.get('solver', 'display_iter') == 0: Log.info('Train Iteration: {0}\t' 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 'Learning rate = {2}\n' 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( self.iters, self.configer.get('solver', 'display_iter'), self.lr, batch_time=self.batch_time, data_time=self.data_time, loss=self.train_losses)) self.batch_time.reset() self.data_time.reset() self.train_losses.reset() # Check to val the current model. if self.val_loader is not None and \ self.iters % self.configer.get('solver', 'test_interval') == 0: self.__val() # Adjust the learning rate after every iteration. self.optimizer, self.lr = self.module_utilizer.update_optimizer(self.pose_net, self.iters) def __val(self): """ Validation function during the train phase. """ self.pose_net.eval() start_time = time.time() for j, data_tuple in enumerate(self.val_loader): # Change the data type. inputs = Variable(data_tuple[0].cuda(async=True), volatile=True) heatmap = Variable(data_tuple[1].cuda(async=True), volatile=True) maskmap = None if len(data_tuple) > 2: maskmap = Variable(data_tuple[2].cuda(async=True), volatile=True) # Forward pass. paf_out, heatmap_out = self.pose_net(inputs) # Compute the loss of the val batch. loss_heatmap = self.mse_loss(heatmap_out, heatmap, maskmap) loss = loss_heatmap if len(data_tuple) > 3: vecmap = Variable(data_tuple[3].cuda(async=True), volatile=True) loss_associate = self.mse_loss(paf_out, vecmap, maskmap) loss = loss_heatmap + loss_associate self.val_losses.update(loss.data[0], inputs.size(0)) # Update the vars of the val phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.module_utilizer.save_net(self.pose_net, self.iters) # Print the log info & reset the states. Log.info( 'Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t' 'Loss {loss.avg:.8f}\n'.format( batch_time=self.batch_time, loss=self.val_losses)) self.batch_time.reset() self.val_losses.reset() self.pose_net.train() def train(self): cudnn.benchmark = True while self.iters < self.configer.get('solver', 'max_iter'): self.__train() if self.iters == self.configer.get('solver', 'max_iter'): break
class FCNSegmentor(object): """ The class for Pose Estimation. Include train, val, val & predict. """ def __init__(self, configer): self.configer = configer self.batch_time = AverageMeter() self.data_time = AverageMeter() self.train_losses = AverageMeter() self.val_losses = AverageMeter() self.seg_visualizer = SegVisualizer(configer) self.seg_loss_manager = SegLossManager(configer) self.module_utilizer = ModuleUtilizer(configer) self.seg_model_manager = SegModelManager(configer) self.seg_data_loader = SegDataLoader(configer) self.seg_net = None self.train_loader = None self.val_loader = None self.optimizer = None self.lr = None self.iters = None def init_model(self): self.seg_net = self.seg_model_manager.seg_net() self.iters = 0 self.seg_net, _ = self.module_utilizer.load_net(self.seg_net) self.optimizer, self.lr = self.module_utilizer.update_optimizer(self.seg_net, self.iters) if self.configer.get('dataset') == 'cityscape': self.train_loader = self.seg_data_loader.get_trainloader(FSCityScapeLoader) self.val_loader = self.seg_data_loader.get_valloader(FSCityScapeLoader) else: Log.error('Dataset: {} is not valid!'.format(self.configer.get('dataset'))) exit(1) self.pixel_loss = self.seg_loss_manager.get_seg_loss('cross_entropy_loss') def __train(self): """ Train function of every epoch during train phase. """ self.seg_net.train() start_time = time.time() # data_tuple: (inputs, heatmap, maskmap, tagmap, num_objects) for i, data_tuple in enumerate(self.train_loader): self.data_time.update(time.time() - start_time) # Change the data type. if len(data_tuple) < 2: Log.error('Train Loader Error!') exit(0) inputs = Variable(data_tuple[0].cuda(async=True)) targets = Variable(data_tuple[1].cuda(async=True)) # Forward pass. outputs = self.seg_net(inputs) # Compute the loss of the train batch & backward. loss_pixel = self.pixel_loss(outputs, targets) loss = loss_pixel self.train_losses.update(loss.data[0], inputs.size(0)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Update the vars of the train phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.iters += 1 # Print the log info & reset the states. if self.iters % self.configer.get('solver', 'display_iter') == 0: Log.info('Train Iteration: {0}\t' 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 'Learning rate = {2}\n' 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( self.iters, self.configer.get('solver', 'display_iter'), self.lr, batch_time=self.batch_time, data_time=self.data_time, loss=self.train_losses)) self.batch_time.reset() self.data_time.reset() self.train_losses.reset() # Check to val the current model. if self.val_loader is not None and \ self.iters % self.configer.get('solver', 'test_interval') == 0: self.__val() self.optimizer, self.lr = self.module_utilizer.update_optimizer(self.seg_net, self.iters) def __val(self): """ Validation function during the train phase. """ self.seg_net.eval() start_time = time.time() for j, data_tuple in enumerate(self.val_loader): # Change the data type. inputs = Variable(data_tuple[0].cuda(async=True), volatile=True) targets = Variable(data_tuple[1].cuda(async=True), volatile=True) # Forward pass. outputs = self.seg_net(inputs) # Compute the loss of the val batch. loss_pixel = self.pixel_loss(outputs, targets) loss = loss_pixel self.val_losses.update(loss.data[0], inputs.size(0)) # Update the vars of the val phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.module_utilizer.save_net(self.seg_net, self.iters) # Print the log info & reset the states. Log.info( 'Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t' 'Loss {loss.avg:.8f}\n'.format( batch_time=self.batch_time, loss=self.val_losses)) self.batch_time.reset() self.val_losses.reset() self.seg_net.train() def train(self): cudnn.benchmark = True while self.iters < self.configer.get('solver', 'max_iter'): self.__train() if self.iters == self.configer.get('solver', 'max_iter'): break
class ConvPoseMachine(object): """ The class for Pose Estimation. Include train, val, val & predict. """ def __init__(self, configer): self.configer = configer self.batch_time = AverageMeter() self.data_time = AverageMeter() self.train_losses = AverageMeter() self.val_losses = AverageMeter() self.pose_visualizer = PoseVisualizer(configer) self.loss_manager = PoseLossManager(configer) self.model_manager = PoseModelManager(configer) self.train_utilizer = ModuleUtilizer(configer) self.pose_net = None self.train_loader = None self.val_loader = None self.optimizer = None self.best_model_loss = None self.is_best = None self.lr = None self.iters = None def init_model(self, train_loader=None, val_loader=None): self.pose_net = self.model_manager.pose_detector() self.pose_net, self.iters = self.train_utilizer.load_net(self.pose_net) self.optimizer = self.train_utilizer.update_optimizer(self.pose_net, self.iters) self.train_loader = train_loader self.val_loader = val_loader self.heatmap_loss = self.loss_manager.get_pose_loss('heatmap_loss') def __train(self): """ Train function of every epoch during train phase. """ self.pose_net.train() start_time = time.time() # data_tuple: (inputs, heatmap, maskmap, tagmap, num_objects) for i, data_tuple in enumerate(self.train_loader): self.data_time.update(time.time() - start_time) # Change the data type. if len(data_tuple) < 2: Log.error('Train Loader Error!') exit(0) inputs = Variable(data_tuple[0].cuda(async=True)) heatmap = Variable(data_tuple[1].cuda(async=True)) maskmap = None if len(data_tuple) > 2: maskmap = Variable(data_tuple[2].cuda(async=True)) self.pose_visualizer.vis_tensor(heatmap, name='heatmap') self.pose_visualizer.vis_tensor((inputs*256+128)/255, name='image') # Forward pass. outputs = self.pose_net(inputs) self.pose_visualizer.vis_tensor(outputs, name='output') self.pose_visualizer.vis_peaks(inputs, outputs, name='peak') # Compute the loss of the train batch & backward. loss_heatmap = self.heatmap_loss(outputs, heatmap, maskmap) loss = loss_heatmap self.train_losses.update(loss.data[0], inputs.size(0)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Update the vars of the train phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.iters += 1 # Print the log info & reset the states. if self.iters % self.configer.get('solver', 'display_iter') == 0: Log.info('Train Iteration: {0}\t' 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 'Learning rate = {2}\n' 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( self.iters, self.configer.get('solver', 'display_iter'), self.lr, batch_time=self.batch_time, data_time=self.data_time, loss=self.train_losses)) self.batch_time.reset() self.data_time.reset() self.train_losses.reset() # Check to val the current model. if self.val_loader is not None and \ self.iters % self.configer.get('solver', 'test_interval') == 0: self.__val() self.optimizer = self.train_utilizer.update_optimizer(self.pose_net, self.iters) def __val(self): """ Validation function during the train phase. """ self.pose_net.eval() start_time = time.time() for j, data_tuple in enumerate(self.val_loader): # Change the data type. inputs = Variable(data_tuple[0].cuda(async=True), volatile=True) heatmap = Variable(data_tuple[1].cuda(async=True), volatile=True) maskmap = None if len(data_tuple) > 2: maskmap = Variable(data_tuple[2].cuda(async=True), volatile=True) # Forward pass. outputs = self.pose_net(inputs) self.pose_visualizer.vis_peaks(inputs, outputs, name='peak_val') # Compute the loss of the val batch. loss_heatmap = self.heatmap_loss(outputs, heatmap, maskmap) loss = loss_heatmap self.val_losses.update(loss.data[0], inputs.size(0)) # Update the vars of the val phase. self.batch_time.update(time.time() - start_time) start_time = time.time() # Print the log info & reset the states. Log.info( 'Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t' 'Loss {loss.avg:.8f}\n'.format( batch_time=self.batch_time, loss=self.val_losses)) self.batch_time.reset() self.val_losses.reset() self.pose_net.train() def train(self): cudnn.benchmark = True while self.iters < self.configer.get('solver', 'max_iter'): self.__train() if self.iters == self.configer.get('solver', 'max_iter'): break def test(self, img_path=None, img_dir=None): if img_path is not None and os.path.exists(img_path): image = Image.open(img_path).convert('RGB')