def validate(val_loader, model, criterion): """Run evaluation""" losses = AverageMeter() # Switch to evaluate mode model.eval() with torch.no_grad(): for i, (input, target) in enumerate(val_loader): target = target.cuda(non_blocking=True) input = input.cuda() batch_size = input.size(1) # Forward output = model(input) # Measure loss loss = criterion(output, target, batch_size) losses.update(loss.item(), batch_size) if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( i, len(val_loader), loss=losses)) print(' * Loss {loss.avg:.3f}'.format(loss=losses)) return losses.avg
def bleuScore(dataset, model): model.eval() bleu = AverageMeter() allResults = [] with torch.no_grad(): for i, item in enumerate(val_dataset): source, target = item[0], item[1].tolist() del target[0] del target[-1] results = translate(model, source, args.max_len, train_dataset.engStartTokenID(), train_dataset.engEndTokenID(), args.beam, target) bleu.update(results["bleu"]) source = source.tolist() del source[-1] allResults.append((results["bleu"], source, target, results["finals"][0][1])) if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'BLEU {bleu.val:.4f} ({bleu.avg:.4f})'.format( i, len(val_dataset), bleu=bleu)) print(' * BLEU {bleu.avg:.3f}'.format(bleu=bleu)) import pickle pickle.dump(allResults, open("results.pkl", "wb"), protocol=4) return bleu.avg
def validate(val_loader, model, criterion): """Run evaluation""" losses = AverageMeter() top1 = AverageMeter() # Switch to evaluate mode model.eval() with torch.no_grad(): for i, (input, target) in enumerate(val_loader): target = target.cuda(non_blocking=True) input = input.cuda() # Forward output = model(input) # Measure accuracy and record loss loss = criterion(output, target) prec1 = accuracy(output, target) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( i, len(val_loader), loss=losses, top1=top1)) print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1)) return top1.avg
def train(train_loader, model, criterion, optimizer, epoch): """Run one train epoch""" losses = AverageMeter() top1 = AverageMeter() # Switch to train mode model.train() for i, (input, target) in enumerate(train_loader): target = target.cuda(non_blocking=True) input = input.cuda() # Forward output = model(input) loss = criterion(output, target) # Backward optimizer.zero_grad() loss.backward() # Update optimizer.step() # Measure accuracy and record loss prec1 = accuracy(output.data, target) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) # Print Training Information if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( epoch, i, len(train_loader), loss=losses, top1=top1))
def eval_step(self, data_loader, metric): # LOSS & METRIC AVERAGE losses = AverageMeter() metrics_avg = AverageMeter() # MODEL TO EVAL MODE self.model.eval() # VALIDATION LOOP with torch.no_grad(): tk0 = tqdm(data_loader, total=len(data_loader)) for _, data in enumerate(tk0): # LOADING IMAGES & LABELS images = data["images"] labels = data["labels"] images = images.to(self.device) labels = labels.to(self.device) # CALCULATE LOSS & METRICS output = self.model(images) loss = self.criterion(output, labels) metric_used = metrics_dict[metric] predictions = torch.softmax(output, dim=1) _, predictions = torch.max(predictions, dim=1) metric_value = metric_used(labels, predictions) losses.update(loss.item(), images.size(0)) metrics_avg.update(metric_value.item(), images.size(0)) tk0.set_postfix(loss=losses.avg) print(f"Validation Loss = {losses.avg}") return loss, metrics_avg.avg
def evaluate_testset(test_data_loader, generator, loss_fn, args): # to evaluation mode generator.train(False) losses = AverageMeter('loss') start = time.time() with torch.no_grad(): for iter_idx, data in enumerate(test_data_loader, 0): in_text, text_lengths, target_vec, in_audio, aux_info = data batch_size = target_vec.size(0) in_text = in_text.to(device) target = target_vec.to(device) out_poses = generator(in_text, text_lengths, target, None) loss = loss_fn(out_poses, target) losses.update(loss.item(), batch_size) # back to training mode generator.train(True) # print elapsed_time = time.time() - start logging.info('[VAL] loss: {:.3f} / {:.1f}s'.format(losses.avg, elapsed_time)) return losses.avg
def evaluate_testset(test_data_loader, generator): # to evaluation mode generator.train(False) losses = AverageMeter('loss') start = time.time() with torch.no_grad(): for iter_idx, data in enumerate(test_data_loader, 0): target_poses, target_vec = data batch_size = target_vec.size(0) target = target_vec.to(device) loss, _ = eval_embed(None, None, None, target, generator) losses.update(loss.item(), batch_size) # back to training mode generator.train(True) # print ret_dict = {'loss': losses.avg} elapsed_time = time.time() - start logging.info('[VAL] loss: {:.3f} / {:.1f}s'.format(losses.avg, elapsed_time)) return ret_dict
def training_step(self, data_loader): # LOSS AVERAGE losses = AverageMeter() # MODEL TO TRAIN MODE self.model.train() # TRAINING LOOP tk0 = tqdm(data_loader, total=len(data_loader)) for _, data in enumerate(tk0): # LOADING IMAGES & LABELS ids = data["ids"] masks = data["masks"] labels = data["labels"] ids = ids.to(self.device) masks = masks.to(self.device) labels = labels.to(self.device) # RESET GRADIENTS self.model.zero_grad() # CALCULATE LOSS output = self.model(ids, masks) loss = self.criterion(output, labels) # CALCULATE GRADIENTS loss.backward() self.optimizer.step() # UPDATE LOSS losses.update(loss.item(), ids.size(0)) tk0.set_postfix(loss=losses.avg)
def train(train_loader, model, criterion, optimizer, epoch): """Run one train epoch""" losses = AverageMeter() # Switch to train mode model.train() for i, (input, target) in enumerate(train_loader): # input: seq_len, N # target: seq_len, N target = target.cuda(non_blocking=True) input = input.cuda() batch_size = input.size(1) # Forward output = model(input) # seq_len, N, ntokens loss = criterion(output, target, batch_size) # Backward optimizer.zero_grad() loss.backward() # Update torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() # Measure loss losses.update(loss.item(), batch_size) # Print Training Information if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, i, len(train_loader), loss=losses))
def train(train_loader, model: seq2seq.Seq2seq, criterion, optimizer, epoch, teacher_forcing_ratio): """Run one train epoch""" losses = AverageMeter() # Switch to train mode model.train() for i, batch in enumerate(train_loader): # data: seq_len, N # data_mask: seq_len, N # target: seq_len, N data, data_mask, target = batch target = target.cuda(non_blocking=True) data_mask = data_mask.cuda(non_blocking=True) data = data.cuda() batch_size = data.size(1) target_len = target.size(0) # Forward # Encoder source_hs, hidden = model.encoder(data) # Decoder ctx = None hidden = model.transformHidden(hidden) outputs = [] use_teacher_forcing = random.random() < teacher_forcing_ratio x = target[0] for j in range(1, target_len): output, hidden, ctx = model.decoder(x, hidden, ctx, source_hs, data_mask) outputs.append(output) with torch.no_grad(): if use_teacher_forcing: x = target[j] else: topi = torch.topk(output, 1, dim=1)[1] # N, 1 x = topi.squeeze() # N outputs = torch.stack(outputs) # seq_len, N, n_tokens loss = criterion(outputs, target[1:], batch_size) # Backward optimizer.zero_grad() loss.backward() # Update torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() # Measure loss losses.update(loss.item(), batch_size) # Print Training Information if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, i, len(train_loader), loss=losses))
def valid_epoch(self, epoch): batch_time = AverageMeter() total_losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() correct = 0 """ validate """ self.model.eval() test_loss = 0 end = time.time() for batch_idx, (inputs, label) in enumerate(self.valid_dataloader): with torch.no_grad(): inputs = inputs.to(self.opt.device) label = label.to(self.opt.device) output = self.model(inputs) loss = self.criterion(output, label) total_losses.update(loss.item(), inputs.size(0)) pred = output.data.max(1, keepdim=True)[1].cpu() correct += pred.eq(label.cpu().view_as(pred)).sum() batch_time.update(time.time() - end) end = time.time() test_loss += loss.item() if batch_idx % self.opt.print_freq_eval == 0: print('Validation[%d/%d] Total Loss: %.4f[%.4f]' % (batch_idx, len(self.valid_dataloader), loss.item(), test_loss / (batch_idx + 1))) num_test_data = len(self.valid_dataloader.dataset) accuracy = 100. * correct / num_test_data test_loss /= len(self.valid_dataloader) if test_loss < self.best_loss: print('Saving..') state = { 'model': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_loss': test_loss, 'epoch': epoch, } torch.save(state, os.path.join(self.opt.expr_dir, 'model_best.pth')) self.best_loss = test_loss print('[*] Model %s,\tCurrent Loss: %f\tBest Loss: %f' % (self.opt.model, test_loss, self.best_loss)) print('Val Accuracy: {}/{} ({:.0f}%)'.format(correct, num_test_data, accuracy))
def validate(val_loader, model, criterion): """Run evaluation""" losses = AverageMeter() # Switch to evaluate mode model.eval() with torch.no_grad(): for i, batch in enumerate(val_loader): # data: seq_len, N # data_mask: seq_len, N # target: seq_len, N data, data_mask, target = batch target = target.cuda(non_blocking=True) data_mask = data_mask.cuda(non_blocking=True) data = data.cuda() batch_size = data.size(1) target_len = target.size(0) # Forward # Encoder source_hs, hidden = model.encoder(data) # Decoder ctx = None hidden = model.transformHidden(hidden) outputs = [] x = target[0] for j in range(1, target_len): output, hidden, ctx = model.decoder(x, hidden, ctx, source_hs, data_mask) outputs.append(output) topi = torch.topk(output, 1, dim=1)[1] # N, 1 x = topi.squeeze() # N outputs = torch.stack(outputs) # seq_len, N, n_tokens # Measure loss loss = criterion(outputs, target[1:], batch_size) losses.update(loss.item(), batch_size) if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( i, len(val_loader), loss=losses)) print(' * Loss {loss.avg:.3f}'.format(loss=losses)) return losses.avg
def validate(module, epoch, best_iou, num_classes, writer, logger): # Runs validation for the model on the appropriate split and returns best iou. # Unpack the module model = module.model device = module.device val_loader = module.val_loader loss_fn = module.loss_fn avg_loss = AverageMeter() running_score = RunningScore(num_classes) model.eval() with torch.no_grad(): for idx, (images, labels) in tqdm(enumerate(val_loader)): images = images.to(device) labels = labels.to(device) outputs = model(images) loss = loss_fn(input=outputs, target=labels) avg_loss.update(loss.data.item()) pred = outputs.data.max(1)[1].cpu().numpy() gt = labels.data.cpu().numpy() running_score.update(gt, pred) writer.add_scalar("Val Loss", avg_loss.average(), epoch) logger.info("Epoch: {} Loss: {:.4f}".format(epoch, avg_loss.average())) mean_iou, disp_score = running_score.get_scores() logger.info(disp_score) if mean_iou >= best_iou: # Saves the model if the current mean_iou is better. best_iou = mean_iou path = os.path.join(writer.file_writer.get_logdir(), "best_model.pkl") save_model(model=model, optimizer=module.optimizer, epoch=epoch, best_iou=best_iou, path=path) return best_iou
def train(train_loader, model, criterion, optimizer, epoch): """Run one train epoch""" losses = AverageMeter() # Switch to train mode model.train() for i, batch in enumerate(train_loader): documents, documents_mask, documents_len = batch["documents"], batch["documents_mask"], batch["documents_len"] query, query_mask = batch["query"], batch["query_mask"] answer, candidates = batch["answer"], batch["candidates"] answer, candidates = answer.cuda(non_blocking=True), candidates.cuda(non_blocking=True) documents, documents_mask, documents_len = documents.cuda(), documents_mask.cuda(), documents_len.cuda() query, query_mask = query.cuda(), query_mask.cuda() batch_size = documents.size(1) # Forward probs = model(documents, documents_mask, documents_len, query, query_mask, candidates) loss = criterion(probs, answer, candidates) # Backward optimizer.zero_grad() loss.backward() # Update torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() # Measure loss losses.update(loss.item(), batch_size) # Print Training Information if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, i, len(train_loader), loss=losses))
def validate(val_loader, net): top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode net.eval() prefetcher = DataPrefetcher(val_loader) inputs, labels = prefetcher.next() with torch.no_grad(): while inputs is not None: inputs = inputs.float().cuda() labels = labels.cuda() stu_outputs, _ = net(inputs) pred1, pred5 = accuracy(stu_outputs[-1], labels, topk=(1, 5)) top1.update(pred1.item(), inputs.size(0)) top5.update(pred5.item(), inputs.size(0)) inputs, labels = prefetcher.next() return top1.avg, top5.avg
def validate(val_loader, model, criterion): """Run evaluation""" losses = AverageMeter() top1 = AverageMeter() # Switch to evaluate mode model.eval() with torch.no_grad(): for i, batch in enumerate(val_loader): documents, documents_mask, documents_len = batch["documents"], batch["documents_mask"], batch["documents_len"] query, query_mask = batch["query"], batch["query_mask"] answer, candidates = batch["answer"], batch["candidates"] answer, candidates = answer.cuda(non_blocking=True), candidates.cuda(non_blocking=True) documents, documents_mask, documents_len = documents.cuda(), documents_mask.cuda(), documents_len.cuda() query, query_mask = query.cuda(), query_mask.cuda() batch_size = documents.size(1) # Forward probs = model(documents, documents_mask, documents_len, query, query_mask, candidates) # Measure loss loss = criterion(probs, answer, candidates) losses.update(loss.item(), batch_size) prec1 = accuracy(probs, answer, candidates) top1.update(prec1, batch_size) if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( i, len(val_loader), loss=losses, top1=top1)) print( ' * Loss {loss.avg:.3f}\tPrec@1 {top1.avg:.3f}'.format(loss=losses, top1=top1)) return losses.avg
def train_epoch(module, config, writer, logger): # Trains the model for a single epoch. batch_time = AverageMeter() train_loss = AverageMeter() # Unpacks the module model = module.model device = module.device train_loader = module.train_loader loss_fn = module.loss_fn optimizer = module.optimizer model.train() idx = 0 for images, labels in train_loader: idx += 1 start_tic = time.time() images = images.to(device) labels = labels.to(device) optimizer.zero_grad() outputs = model(images) loss = loss_fn(input=outputs, target=labels) loss.backward() optimizer.step() batch_time.update(time.time() - start_tic) train_loss.update(loss.data.item()) if idx % config.training.disp_iter == 0: # This is the iteration to display the information. print_str = "Iter {:d} Loss: {:.4f} Time/Batch: {:.4f}".format( idx, train_loss.average(), batch_time.average()) print(print_str) logger.info(print_str)
def test_network(cfg, network, data_loader, checkpoint, result_set): _checkpoint = torch.load(checkpoint) _checkpoint = {k.replace('module.', ''): v for k, v in _checkpoint['rmnet'].items()} network.load_state_dict(_checkpoint) network.eval() checkpoint = os.path.basename(checkpoint) test_metrics = AverageMeter(Metrics.names()) device, = list(set(p.device for p in network.parameters())) for idx, (video_name, n_objects, frames, masks, optical_flows) in enumerate( tqdm(data_loader, leave=False, desc='%s on GPU %d' % (checkpoint, device.index), position=device.index)): with torch.no_grad(): try: est_probs = network(frames, masks, optical_flows, n_objects, cfg.TEST.MEMORIZE_EVERY, device) est_probs = est_probs.permute(0, 2, 1, 3, 4) masks = torch.argmax(masks, dim=2) est_masks = torch.argmax(est_probs, dim=1) except Exception as ex: logging.warning('Error occurred during testing Checkpoint[Name=%s]: %s' % (checkpoint, ex)) continue metrics = Metrics.get(est_masks[0], masks[0]) test_metrics.update(metrics, torch.max(n_objects[0]).item()) jf_mean = test_metrics.avg(2) if jf_mean != 0: logging.info('Checkpoint[Name=%s] has been tested successfully, JF-Mean = %.4f.' % (checkpoint, jf_mean)) else: logging.warning('Exception occurred during testing Checkpoint[Name=%s]' % checkpoint) result_set['JF-Mean'] = jf_mean
def train_local_model(self, train_loader): if self.args.loss_weight: self.criterion = nn.CrossEntropyLoss(weight=train_loader.weight) for epoch in range(self.args.local_epoch): # Train self.model.train() avg_loss = AverageMeter() avg_acc = AverageMeter() avg_pos_acc = AverageMeter() # t = tqdm(self.train_loader) batch_size = self.args.batch_size train_num = len(train_loader) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = train_loader[start:end] if not instance: continue label, tokens, pos1, pos2, mask = self.batchify(instance) logits = self.model(label, None, tokens, pos1, pos2, mask, bag_size=self.args.bag_size) loss = self.criterion(logits, label) score, pred = logits.max(-1) # (B) acc = float((pred == label).long().sum()) / label.size(0) pos_total = (label != 0).long().sum() pos_correct = ((pred == label).long() * (label != 0).long()).sum() if pos_total > 0: pos_acc = float(pos_correct) / float(pos_total) else: pos_acc = 0 # Log avg_loss.update(loss.item(), 1) avg_acc.update(acc, 1) avg_pos_acc.update(pos_acc, 1) # t.set_postfix(loss=avg_loss.avg, acc=avg_acc.avg, pos_acc=avg_pos_acc.avg) # Optimize loss.backward() self.optimizer.step() self.optimizer.zero_grad() gc.collect() torch.cuda.empty_cache() print(" loss: %.4f; acc: %.4f; pos_acc: %.4f" % (avg_loss.avg, avg_acc.avg, avg_pos_acc.avg), flush=True) param = self.model.state_dict() if self.args.use_gpu: for k, v in param.items(): param[k] = v.cpu() return {"param": param, "loss": avg_loss}
def val_epoch(n, s, rounds, logger, only_test_time=False): """根据给定 n, s 验证 rounds 次结果正确性. Args: n: int, 链长度. s: int, 节点可连接最远距离. rounds: int, 验证轮数. logger: 输出器. only_test_time: bool, True - 只测 dp 法时间, 不测对错. """ logger.info('Validation: n={}, s={}, rounds={}'.format(n, s, rounds)) if only_test_time: dp_timer = AverageMeter() for i in range(rounds): cost = val_once(n, s, logger, '{}/{}'.format(i + 1, rounds), only_test_time) dp_timer.update(cost) logger.info('DP法 平均损耗时间 {}ms, 最小/最大损耗时间 {}/{}ms.'.format( dp_timer.avg, dp_timer.min, dp_timer.max)) logger.info('End validation epoch.') logger.info('-' * 10) else: correct = 0 exh_timer, dp_timer = AverageMeter(), AverageMeter() for i in range(rounds): cost1, cost2, if_correct = val_once(n, s, logger, '{}/{}'.format(i + 1, rounds)) exh_timer.update(cost1) dp_timer.update(cost2) correct += 1 if if_correct else 0 logger.info('DP 结果验证 正确数/全部数: {}/{}'.format(correct, rounds)) logger.info('穷举法 平均损耗时间 {}ms, 最小/最大损耗时间 {}/{}ms.'.format( exh_timer.avg, exh_timer.min, exh_timer.max)) logger.info('DP法 平均损耗时间 {}ms, 最小/最大损耗时间 {}/{}ms.'.format( dp_timer.avg, dp_timer.min, dp_timer.max)) #logger.info('DPnew法 平均损耗时间 {}ms, 最小/最大损耗时间 {}/{}ms.'.format(dpnew_timer.avg, dpnew_timer.min, dpnew_timer.max)) logger.info('End validation epoch.') logger.info('-' * 10)
def run_magnet_loss(args): m = 6 d = 6 k = 6 alpha = 1.0 batch_size = m * d root_folder = args.root_folder print('ROOT FOLDER: ', root_folder) folder_filenames = [] file_kic = [] labels = [] print('Parsing files in folder... ') for dirpath, dirnames, filenames in os.walk(root_folder): for i, filex in enumerate(filenames): if filex.endswith('.npz'): folder_filenames.append(os.path.join(dirpath, filex)) kicx = int(re.search(r'\d+', filex).group()) file_kic.append(kicx) labels.append(np.load(os.path.join(dirpath, filex))['label']) file_kic = np.array(file_kic) folder_filenames = np.array(folder_filenames) labels = np.array(labels) print('folder filenames: ') train_ids, val_ids, train_labels, val_labels = train_test_split( folder_filenames, labels, stratify=labels, test_size=0.15, random_state=137) train_labels = labels[np.in1d(folder_filenames, train_ids)] val_labels = labels[np.in1d(folder_filenames, val_ids)] train_filenames = folder_filenames[np.in1d(folder_filenames, train_ids)] val_filenames = folder_filenames[np.in1d(folder_filenames, val_ids)] print('Total Files: ', len(file_kic)) print('Train Unique IDs: ', len(train_ids)) print('Setting up generators... ') train_gen = NPZ_Dataset(filenames=train_filenames, labels=train_labels) train_sampler = SubsetSequentialSampler(range(len(train_gen)), range(m * d)) train_dataloader = utils.DataLoader(train_gen, num_workers=1, batch_size=m * d, shuffle=False, sampler=train_sampler) val_gen = NPZ_Dataset(filenames=val_filenames, labels=val_labels) val_dataloader = utils.DataLoader(val_gen, num_workers=4) trainloader, testloader, trainset, testset = train_dataloader, val_dataloader, train_gen, val_gen emb_dim = args.embed_dim epoch_steps = len(trainloader) n_steps = epoch_steps * 50 * 2 cluster_refresh_interval = epoch_steps model = SLOSH_Embedding(embed_size=emb_dim) model.cuda() print(model) optimizer = optim.Adam(model.parameters(), lr=args.lr) minibatch_magnet_loss = MagnetLoss() # Get initial embedding initial_reps = compute_reps(model, train_gen, 400) # Create batcher batch_builder = ClusterBatchBuilder(train_labels, k, m, d) batch_builder.update_clusters(initial_reps) batch_losses = [] batch_example_inds, batch_class_inds = batch_builder.gen_batch() trainloader.sampler.batch_indices = batch_example_inds model.train() losses = AverageMeter() for i in tqdm(range(n_steps)): for batch_idx, (img, target) in enumerate(trainloader): img = img.cuda().float() optimizer.zero_grad() output, _ = model(img) batch_loss, batch_example_losses = minibatch_magnet_loss( output, batch_class_inds, m, d, alpha) batch_loss.backward() optimizer.step() # Update loss index batch_builder.update_losses(batch_example_inds, batch_example_losses) batch_losses.append(batch_loss.item()) if not i % 1000: print('Epoch %d, Loss: %.4f' % (i, batch_loss)) if not i % cluster_refresh_interval: print("Refreshing clusters") reps = compute_reps(model, trainset, 400) batch_builder.update_clusters(reps) if not i % 2000: n_plot = 10000 plot_embedding(X=compute_reps(model, trainset, 400)[:n_plot], y=train_labels[:n_plot], name=str(i) + '_train_embed%d' % emb_dim, save_embed=True, filename=train_filenames, batch_builder=batch_builder) plot_embedding(X=compute_reps(model, testset, 400)[:n_plot], y=val_labels[:n_plot], name=str(i) + '__val_embed%d' % emb_dim, save_embed=False, filename=val_filenames) torch.save( model.state_dict(), args.model_folder + str(i) + "_train_embed%d.torchmodel" % emb_dim) batch_example_inds, batch_class_inds = batch_builder.gen_batch() trainloader.sampler.batch_indices = batch_example_inds losses.update(batch_loss, 1)
class OpenPose(object): """ The class for Pose Estimation. Include train, val, test & predict. """ def __init__(self, configer): self.configer = configer self.batch_time = AverageMeter() self.data_time = AverageMeter() self.train_losses = AverageMeter() self.val_losses = AverageMeter() self.vis = PoseVisualizer(configer) self.loss_manager = PoseLossManager(configer) self.model_manager = PoseModelManager(configer) self.data_loader = PoseDataLoader(configer) self.module_utilizer = ModuleUtilizer(configer) self.pose_net = None self.train_loader = None self.val_loader = None self.optimizer = None self.lr = None self.iters = None def init_model(self): self.pose_net = self.model_manager.pose_detector() self.iters = 0 self.pose_net, _ = self.module_utilizer.load_net(self.pose_net) self.optimizer, self.lr = self.module_utilizer.update_optimizer(self.pose_net, self.iters) if self.configer.get('dataset') == 'coco': self.train_loader = self.data_loader.get_trainloader(OPCocoLoader) self.val_loader = self.data_loader.get_valloader(OPCocoLoader) else: Log.error('Dataset: {} is not valid!'.format(self.configer.get('dataset'))) exit(1) self.mse_loss = self.loss_manager.get_pose_loss('mse_loss') def __train(self): """ Train function of every epoch during train phase. """ self.pose_net.train() start_time = time.time() # data_tuple: (inputs, heatmap, maskmap, vecmap) for i, data_tuple in enumerate(self.train_loader): self.data_time.update(time.time() - start_time) # Change the data type. if len(data_tuple) < 2: Log.error('Train Loader Error!') exit(0) inputs = Variable(data_tuple[0].cuda(async=True)) heatmap = Variable(data_tuple[1].cuda(async=True)) maskmap = None if len(data_tuple) > 2: maskmap = Variable(data_tuple[2].cuda(async=True)) # Forward pass. paf_out, heatmap_out = self.pose_net(inputs) self.vis.vis_paf(paf_out, inputs.data.cpu().squeeze().numpy().transpose(1, 2, 0), name='paf_out') # Compute the loss of the train batch & backward. loss_heatmap = self.mse_loss(heatmap_out, heatmap, maskmap) loss = loss_heatmap if len(data_tuple) > 3: vecmap = Variable(data_tuple[3].cuda(async=True)) self.vis.vis_paf(vecmap, inputs.data.cpu().squeeze().numpy().transpose(1, 2, 0), name='paf') loss_associate = self.mse_loss(paf_out, vecmap, maskmap) loss += loss_associate self.train_losses.update(loss.data[0], inputs.size(0)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Update the vars of the train phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.iters += 1 # Print the log info & reset the states. if self.iters % self.configer.get('solver', 'display_iter') == 0: Log.info('Train Iteration: {0}\t' 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 'Learning rate = {2}\n' 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( self.iters, self.configer.get('solver', 'display_iter'), self.lr, batch_time=self.batch_time, data_time=self.data_time, loss=self.train_losses)) self.batch_time.reset() self.data_time.reset() self.train_losses.reset() # Check to val the current model. if self.val_loader is not None and \ self.iters % self.configer.get('solver', 'test_interval') == 0: self.__val() # Adjust the learning rate after every iteration. self.optimizer, self.lr = self.module_utilizer.update_optimizer(self.pose_net, self.iters) def __val(self): """ Validation function during the train phase. """ self.pose_net.eval() start_time = time.time() for j, data_tuple in enumerate(self.val_loader): # Change the data type. inputs = Variable(data_tuple[0].cuda(async=True), volatile=True) heatmap = Variable(data_tuple[1].cuda(async=True), volatile=True) maskmap = None if len(data_tuple) > 2: maskmap = Variable(data_tuple[2].cuda(async=True), volatile=True) # Forward pass. paf_out, heatmap_out = self.pose_net(inputs) # Compute the loss of the val batch. loss_heatmap = self.mse_loss(heatmap_out, heatmap, maskmap) loss = loss_heatmap if len(data_tuple) > 3: vecmap = Variable(data_tuple[3].cuda(async=True), volatile=True) loss_associate = self.mse_loss(paf_out, vecmap, maskmap) loss = loss_heatmap + loss_associate self.val_losses.update(loss.data[0], inputs.size(0)) # Update the vars of the val phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.module_utilizer.save_net(self.pose_net, self.iters) # Print the log info & reset the states. Log.info( 'Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t' 'Loss {loss.avg:.8f}\n'.format( batch_time=self.batch_time, loss=self.val_losses)) self.batch_time.reset() self.val_losses.reset() self.pose_net.train() def train(self): cudnn.benchmark = True while self.iters < self.configer.get('solver', 'max_iter'): self.__train() if self.iters == self.configer.get('solver', 'max_iter'): break
def valid_epoch(self, epoch): batch_time = AverageMeter() total_losses = AverageMeter() loc_losses = AverageMeter() cls_losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() """ validate """ self.model.eval() test_loss = 0 gt_boxes = [] gt_labels = [] pred_boxes = [] pred_labels = [] pred_scores = [] end = time.time() log = '' for batch_idx, (inputs, loc_targets, cls_targets) in enumerate(self.valid_dataloader): with torch.no_grad(): gt_boxes.append(loc_targets.squeeze(0)) gt_labels.append(cls_targets.squeeze(0)) inputs = Variable(inputs) loc_targets = Variable(loc_targets) cls_targets = Variable(cls_targets) if self.opt.num_gpus: inputs = inputs.cuda() loc_targets = loc_targets.cuda() cls_targets = cls_targets.cuda() loc_preds, cls_preds = self.model(inputs) loss, loc_loss, cls_loss = self.criterion(loc_preds, loc_targets, cls_preds, cls_targets) # box_preds, label_preds, score_preds = self.box_coder.decode( # loc_preds.cpu().data.squeeze(), # F.softmax(cls_preds.squeeze(), dim=1).cpu().data, # score_thresh=0.6, # nms_thresh=0.45) # pred_boxes.append(box_preds) # pred_labels.append(label_preds) # pred_scores.append(score_preds) total_losses.update(loss.data[0], inputs.data.size(0)) loc_losses.update(loc_loss.data[0], inputs.data.size(0)) cls_losses.update(cls_loss.data[0], inputs.data.size(0)) batch_time.update(time.time() - end) end = time.time() """ log = img_path[0] + '\n' log += str(len(pred_boxes[0])) + '\n' for (box, score) in zip(pred_boxes[0], pred_scores[0]): box_xywh = change_box_order(box.unsqueeze(0), 'xyxy2xywh').cpu().numpy()[0] log += '%f %f %f %f' % (box_xywh[0], box_xywh[1], box_xywh[2], box_xywh[3]) log += ' %f' % score + '\n' """ test_loss += loss.data[0] if batch_idx % self.opt.print_freq_eval == 0: print('Validation[%d/%d] Total Loss: %.4f, Loc Loss: %.4f, Cls Loss: %.4f' % (batch_idx, len(self.valid_dataloader), loss.data[0], loc_loss.data[0], cls_loss.data[0])) test_loss /= len(self.valid_dataloader) if test_loss < self.best_loss: print('Saving..') state = { 'model': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_loss': test_loss, 'epoch': epoch, } torch.save(state, os.path.join(self.opt.expr_dir, 'model_best.pth')) self.best_loss = test_loss print('[*] Model %s,\tCurrent Loss: %f\tBest Loss: %f' % (self.opt.model, test_loss, self.best_loss))
def valid_epoch(self, epoch): batch_time = AverageMeter() total_losses = AverageMeter() first_losses = AverageMeter() middle_losses = AverageMeter() last_losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() correct_f = 0 correct_m = 0 correct_l = 0 """ validate """ self.model.eval() test_loss = 0 end = time.time() for batch_idx, (inputs, label_first, label_middle, label_last) in enumerate(self.valid_dataloader): with torch.no_grad(): inputs = inputs.to(self.opt.device) label_first = label_first.to(self.opt.device) label_middle = label_middle.to(self.opt.device) label_last = label_last.to(self.opt.device) output_first, output_middle, output_last = self.model(inputs) loss_first = self.criterion_first(output_first, label_first) loss_middle = self.criterion_middle(output_middle, label_middle) loss_last = self.criterion_last(output_last, label_last) loss = loss_first + loss_middle + loss_last total_losses.update(loss.item(), inputs.size(0)) first_losses.update(loss_first.item(), inputs.size(0)) middle_losses.update(loss_middle.item(), inputs.size(0)) last_losses.update(loss_last.item(), inputs.size(0)) pred_f = output_first.data.max(1, keepdim=True)[1].cpu() pred_m = output_middle.data.max(1, keepdim=True)[1].cpu() pred_l = output_last.data.max(1, keepdim=True)[1].cpu() correct_f += pred_f.eq(label_first.cpu().view_as(pred_f)).sum() correct_m += pred_m.eq( label_middle.cpu().view_as(pred_m)).sum() correct_l += pred_l.eq(label_last.cpu().view_as(pred_l)).sum() batch_time.update(time.time() - end) end = time.time() test_loss += loss.item() if batch_idx % self.opt.print_freq_eval == 0: print( 'Validation[%d/%d] Total Loss: %.4f, First Loss: %.4f, Middle Loss: %.4f, Last Loss: %.4f' % (batch_idx, len(self.valid_dataloader), loss.item(), loss_first.item(), loss_middle.item(), loss_last.item())) num_test_data = len(self.valid_dataloader.dataset) accuracy_f = 100. * correct_f / num_test_data accuracy_m = 100. * correct_m / num_test_data accuracy_l = 100. * correct_l / num_test_data test_loss /= len(self.valid_dataloader) if test_loss < self.best_loss: print('Saving..') state = { 'model': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_loss': test_loss, 'epoch': epoch, } torch.save(state, os.path.join(self.opt.expr_dir, 'model_best.pth')) self.best_loss = test_loss print('[*] Model %s,\tCurrent Loss: %f\tBest Loss: %f' % (self.opt.model, test_loss, self.best_loss)) print( 'Val Accuracy_F: {}/{} ({:.0f}%) | Val Accuracy_M: {}/{} ({:.0f}%) | Val Accuracy_L: {}/{} ({:.0f}%)\n' .format(correct_f, num_test_data, accuracy_f, correct_m, num_test_data, accuracy_m, correct_l, num_test_data, accuracy_l))
def run_magnet_loss(): ''' Test function for the magnet loss ''' m = 8 d = 8 k = 8 alpha = 1.0 batch_size = m * d global plotter plotter = VisdomLinePlotter(env_name=args.name) trainloader, testloader, trainset, testset, n_train = load_dataset(args) emb_dim = 2 n_epochs = 15 epoch_steps = len(trainloader) n_steps = epoch_steps * 15 cluster_refresh_interval = epoch_steps if args.mnist: model = torch.nn.DataParallel(LeNet(emb_dim)).cuda() if args.cifar10: model = torch.nn.DataParallel(VGG(depth=16, num_classes=emb_dim)) print(model) optimizer = optim.Adam(model.parameters(), lr=args.lr) minibatch_magnet_loss = MagnetLoss() images = getattr(trainset, 'train_data') labels = getattr(trainset, 'train_labels') # Get initial embedding initial_reps = compute_reps(model, trainset, 400) if args.cifar10: labels = np.array(labels, dtype=np.float32) # Create batcher batch_builder = ClusterBatchBuilder(labels, k, m, d) batch_builder.update_clusters(initial_reps) batch_losses = [] batch_example_inds, batch_class_inds = batch_builder.gen_batch() trainloader.sampler.batch_indices = batch_example_inds _ = model.train() losses = AverageMeter() for i in tqdm(range(n_steps)): for batch_idx, (img, target) in enumerate(trainloader): img = Variable(img).cuda() target = Variable(target).cuda() optimizer.zero_grad() output, features = model(img) batch_loss, batch_example_losses = minibatch_magnet_loss( output, batch_class_inds, m, d, alpha) batch_loss.backward() optimizer.step() # Update loss index batch_builder.update_losses(batch_example_inds, batch_example_losses) batch_losses.append(batch_loss.data[0]) if not i % 1000: print(i, batch_loss) if not i % cluster_refresh_interval: print("Refreshing clusters") reps = compute_reps(model, trainset, 400) batch_builder.update_clusters(reps) if not i % 2000: n_plot = 10000 plot_embedding(compute_reps(model, trainset, 400)[:n_plot], labels[:n_plot], name=i) batch_example_inds, batch_class_inds = batch_builder.gen_batch() trainloader.sampler.batch_indices = batch_example_inds losses.update(batch_loss, 1) # Log the training loss if args.visdom: plotter.plot('loss', 'train', i, losses.avg.data[0]) # Plot loss curve plot_smooth(batch_losses, "batch-losses")
def run_epoch(self, phase, epoch, loader, stats_meter, stats_no_meter, results): # keep track of how long data loading and processing takes batch_time = AverageMeter() data_time = AverageMeter() # other meteres (top1, top5, loss, etc.) meters = {} for name, func in stats_meter.items(): meters[name] = AverageMeter() # chnage model to train or eval mode if phase == 'train': self.model.train() elif phase == 'test': self.model.eval() else: raise Exception('Phase must be train or test!') t = time.time() # iterate over all batches for iter, batch in enumerate(loader, 1): data_time.update(time.time() - t) # batch input and target output # if self.loader == 'basic_meta_data': input_text = batch[0] # tweets input_meta = batch[1] # metadata target = batch[2] # transfer data to gpu input_text = input_text.to(self.device) input_meta = input_meta.to(self.device) target = target.to(self.device) torch.set_grad_enabled(phase == 'train') est = self.model(input_text=input_text, input_meta=input_meta) loss = 0.0 three_losses = [] three_baseline_losses = [] for output_idx in range(3): flavor_est = est[:, output_idx] flavor_target = target[:, output_idx] flavor_loss = self.criterion(flavor_est, flavor_target) three_losses.append(flavor_loss) loss += flavor_loss / 3.0 # Baseline loss baseline_flavor_est = input_meta[:, output_idx + 6] baseline_flavor_loss = self.criterion(baseline_flavor_est, flavor_target) three_baseline_losses.append(baseline_flavor_loss) loss1 = three_losses[0] loss2 = three_losses[1] loss3 = three_losses[2] baseline1 = three_baseline_losses[0] baseline2 = three_baseline_losses[1] baseline3 = three_baseline_losses[2] ratios = [] for output_idx in range(3): ratios.append(three_baseline_losses[output_idx] / three_losses[output_idx]) # backward pass if phase == 'train': self.optimizer.zero_grad() loss.backward() self.optimizer.step() # update meters (top1, top5, loss, etc.) for name, func in stats_meter.items(): meters[name].update(func(locals()), input_text.data.shape[0]) batch_time.update(time.time() - t) output = '{}\t' \ 'Network: {}\t' \ 'Epoch: [{}/{}][{}/{}]\t' \ 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Data: {data_time.val:.3f} ({data_time.avg:.3f})\t' \ .format(phase.capitalize(), self.net, epoch, self.epochs, iter, len(loader), batch_time=batch_time, data_time=data_time) for name, meter in meters.items(): output = output + '{}: {meter.val:.4f} ({meter.avg:.4f})\t' \ .format(name, meter=meter) print(output) sys.stdout.flush() if iter == len(loader): # save the following into the csv stats = { 'phase': phase, 'epoch': epoch, 'iter': iter, 'iters': len(loader), 'iter_batch_time': batch_time.val, 'avg_batch_time': batch_time.avg, 'iter_data_time': data_time.val, 'avg_data_time': data_time.avg, } # meters that will be saved into the csv for name, meter in meters.items(): stats['iter_' + name] = meter.val stats['avg_' + name] = meter.avg stats['sum_' + name] = meter.sum stats['count_' + name] = meter.count # stats that have no meters but will be saved into the csv for name, func in stats_no_meter.items(): stats[name] = func(locals()) # save all fields in "self" into the csv # these include (almost) all the fields in train.py results.append(dict(self.__getstate__(), **stats)) t = time.time() if phase == 'test': return results, stats['avg_loss'], stats['avg_loss1'], stats[ 'avg_loss2'], stats['avg_loss3'] else: return results
def valid_epoch(self, epoch): batch_time = AverageMeter() total_losses = AverageMeter() loc_losses = AverageMeter() cls_losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() """ validate """ self.model.eval() test_loss = 0 gt_boxes = [] gt_labels = [] end = time.time() log = '' for batch_idx, (inputs, loc_targets, cls_targets) in enumerate(self.valid_dataloader): with torch.no_grad(): gt_boxes.append(loc_targets.squeeze(0)) gt_labels.append(cls_targets.squeeze(0)) inputs = Variable(inputs) loc_targets = Variable(loc_targets) cls_targets = Variable(cls_targets) if self.opt.num_gpus: inputs = inputs.cuda() loc_targets = loc_targets.cuda() cls_targets = cls_targets.cuda() loc_preds, cls_preds = self.model(inputs) loss, loc_loss, cls_loss = self.criterion(loc_preds, loc_targets, cls_preds, cls_targets) total_losses.update(loss.data[0], inputs.data.size(0)) loc_losses.update(loc_loss.data[0], inputs.data.size(0)) cls_losses.update(cls_loss.data[0], inputs.data.size(0)) batch_time.update(time.time() - end) end = time.time() test_loss += loss.data[0] if batch_idx % self.opt.print_freq_eval == 0: print('Validation[%d/%d] Total Loss: %.4f, Loc Loss: %.4f, Cls Loss: %.4f' % (batch_idx, len(self.valid_dataloader), loss.data[0], loc_loss.data[0], cls_loss.data[0])) test_loss /= len(self.valid_dataloader) if test_loss < self.best_loss: print('Saving..') state = { 'model': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_loss': test_loss, 'epoch': epoch, } torch.save(state, os.path.join(self.opt.expr_dir, 'model_best.pth')) self.best_loss = test_loss print('[*] Model %s,\tCurrent Loss: %f\tBest Loss: %f' % (self.opt.model, test_loss, self.best_loss))
def train(dataloader, optimizer, model): iter = 0 begin_time = 0.0 average_meter = AverageMeter() num_per_epoch = len(dataloader.dataset) // (cfg.GRAD.BATCH_SIZE) tb_writer = SummaryWriter(cfg.GRAD.LOG_DIR) for epoch in range(cfg.GRAD.EPOCHS): dataloader.dataset.shuffle() begin_time = time.time() for data in dataloader: examplar_img = data['examplar_img'].cuda() train_search_img = data['train_search_img'].cuda() train_gt_cls = data['train_gt_cls'].cuda() train_gt_delta = data['train_gt_delta'].cuda() train_delta_weight = data['train_delta_weight'].cuda() test_search_img = data['test_search_img'].cuda() test_gt_cls = data['test_gt_cls'].cuda() test_gt_delta = data['test_gt_delta'].cuda() test_delta_weight = data['test_delta_weight'].cuda() data_time = time.time() - begin_time losses = model.forward(examplar_img, train_search_img, train_gt_cls, train_gt_delta, train_delta_weight, test_search_img, test_gt_cls, test_gt_delta, test_delta_weight) cls_loss = losses['cls_loss'] loc_loss = losses['loc_loss'] loss = losses['total_loss'] optimizer.zero_grad() loss.backward() optimizer.step() batch_time = time.time() - begin_time batch_info = {} batch_info['data_time'] = data_time batch_info['batch_time'] = batch_time average_meter.update(**batch_info) # add summary writer for k, v in losses.items(): if k.startswith('examplar'): tb_writer.add_histogram(k, v, iter) else: tb_writer.add_scalar(k, v, iter) if iter % cfg.TRAIN.PRINT_EVERY == 0: logger.info( 'epoch: {}, iter: {}, init_cls_loss: {}, init_loc_loss: {}, init_loss: {}' .format(epoch + 1, iter, losses['init_cls_loss'].item(), losses['init_loc_loss'].item(), losses['init_total_loss'].item())) logger.info( 'epoch: {}, iter: {}, cls_loss: {}, loc_loss: {}, loss: {}' .format(epoch + 1, iter, cls_loss.item(), loc_loss.item(), loss.item())) print_speed(iter + 1, average_meter.batch_time.avg, cfg.GRAD.EPOCHS * num_per_epoch) begin_time = time.time() iter += 1 # save train_state if not os.path.exists(cfg.GRAD.SNAPSHOT_DIR): os.makedirs(cfg.GRAD.SNAPSHOT_DIR) # put the update to the rpn state state = { "model": model.state_dict(), "optimizer": optimizer.state_dict(), "epoch": epoch, } save_path = "{}/checkpoint_e{}.pth".format(cfg.GRAD.SNAPSHOT_DIR, epoch) logger.info("save state to {}".format(save_path)) torch.save(state, save_path)
def evaluate_testset(test_data_loader, generator, loss_fn, embed_space_evaluator, args): # to evaluation mode generator.train(False) if embed_space_evaluator: embed_space_evaluator.reset() losses = AverageMeter('loss') joint_mae = AverageMeter('mae_on_joint') accel = AverageMeter('accel') start = time.time() with torch.no_grad(): for iter_idx, data in enumerate(test_data_loader, 0): in_text, text_lengths, in_text_padded, _, target_vec, in_audio, in_spec, aux_info = data batch_size = target_vec.size(0) in_text = in_text.to(device) in_text_padded = in_text_padded.to(device) in_audio = in_audio.to(device) in_spec = in_spec.to(device) target = target_vec.to(device) # speaker input speaker_model = utils.train_utils.get_speaker_model(generator) if speaker_model: vid_indices = [ random.choice(list(speaker_model.word2index.values())) for _ in range(batch_size) ] vid_indices = torch.LongTensor(vid_indices).to(device) else: vid_indices = None pre_seq = target.new_zeros( (target.shape[0], target.shape[1], target.shape[2] + 1)) pre_seq[:, 0:args.n_pre_poses, :-1] = target[:, 0:args.n_pre_poses] pre_seq[:, 0:args.n_pre_poses, -1] = 1 # indicating bit for constraints pre_seq_partial = pre_seq[:, 0:args.n_pre_poses, :-1] if args.model == 'joint_embedding': loss, out_dir_vec = eval_embed(in_text_padded, in_audio, pre_seq_partial, target, generator, mode='speech') elif args.model == 'gesture_autoencoder': loss, _ = eval_embed(in_text_padded, in_audio, pre_seq_partial, target, generator) elif args.model == 'seq2seq': out_dir_vec = generator(in_text, text_lengths, target, None) loss = loss_fn(out_dir_vec, target) elif args.model == 'speech2gesture': out_dir_vec = generator(in_spec, pre_seq_partial) loss = loss_fn(out_dir_vec, target) elif args.model == 'multimodal_context': out_dir_vec, *_ = generator(pre_seq, in_text_padded, in_audio, vid_indices) loss = F.l1_loss(out_dir_vec, target) else: assert False losses.update(loss.item(), batch_size) if args.model != 'gesture_autoencoder': if embed_space_evaluator: embed_space_evaluator.push_samples(in_text_padded, in_audio, out_dir_vec, target) # calculate MAE of joint coordinates out_dir_vec = out_dir_vec.cpu().numpy() out_dir_vec += np.array(args.mean_dir_vec).squeeze() out_joint_poses = convert_dir_vec_to_pose(out_dir_vec) target_vec = target_vec.cpu().numpy() target_vec += np.array(args.mean_dir_vec).squeeze() target_poses = convert_dir_vec_to_pose(target_vec) if out_joint_poses.shape[1] == args.n_poses: diff = out_joint_poses[:, args. n_pre_poses:] - target_poses[:, args. n_pre_poses:] else: diff = out_joint_poses - target_poses[:, args.n_pre_poses:] mae_val = np.mean(np.absolute(diff)) joint_mae.update(mae_val, batch_size) # accel target_acc = np.diff(target_poses, n=2, axis=1) out_acc = np.diff(out_joint_poses, n=2, axis=1) accel.update(np.mean(np.abs(target_acc - out_acc)), batch_size) # back to training mode generator.train(True) # print ret_dict = {'loss': losses.avg, 'joint_mae': joint_mae.avg} elapsed_time = time.time() - start if embed_space_evaluator and embed_space_evaluator.get_no_of_samples() > 0: frechet_dist, feat_dist = embed_space_evaluator.get_scores() logging.info( '[VAL] loss: {:.3f}, joint mae: {:.5f}, accel diff: {:.5f}, FGD: {:.3f}, feat_D: {:.3f} / {:.1f}s' .format(losses.avg, joint_mae.avg, accel.avg, frechet_dist, feat_dist, elapsed_time)) ret_dict['frechet'] = frechet_dist ret_dict['feat_dist'] = feat_dist else: logging.info('[VAL] loss: {:.3f}, joint mae: {:.3f} / {:.1f}s'.format( losses.avg, joint_mae.avg, elapsed_time)) return ret_dict
class FCNSegmentor(object): """ The class for Pose Estimation. Include train, val, val & predict. """ def __init__(self, configer): self.configer = configer self.batch_time = AverageMeter() self.data_time = AverageMeter() self.train_losses = AverageMeter() self.val_losses = AverageMeter() self.seg_visualizer = SegVisualizer(configer) self.seg_loss_manager = SegLossManager(configer) self.module_utilizer = ModuleUtilizer(configer) self.seg_model_manager = SegModelManager(configer) self.seg_data_loader = SegDataLoader(configer) self.seg_net = None self.train_loader = None self.val_loader = None self.optimizer = None self.lr = None self.iters = None def init_model(self): self.seg_net = self.seg_model_manager.seg_net() self.iters = 0 self.seg_net, _ = self.module_utilizer.load_net(self.seg_net) self.optimizer, self.lr = self.module_utilizer.update_optimizer(self.seg_net, self.iters) if self.configer.get('dataset') == 'cityscape': self.train_loader = self.seg_data_loader.get_trainloader(FSCityScapeLoader) self.val_loader = self.seg_data_loader.get_valloader(FSCityScapeLoader) else: Log.error('Dataset: {} is not valid!'.format(self.configer.get('dataset'))) exit(1) self.pixel_loss = self.seg_loss_manager.get_seg_loss('cross_entropy_loss') def __train(self): """ Train function of every epoch during train phase. """ self.seg_net.train() start_time = time.time() # data_tuple: (inputs, heatmap, maskmap, tagmap, num_objects) for i, data_tuple in enumerate(self.train_loader): self.data_time.update(time.time() - start_time) # Change the data type. if len(data_tuple) < 2: Log.error('Train Loader Error!') exit(0) inputs = Variable(data_tuple[0].cuda(async=True)) targets = Variable(data_tuple[1].cuda(async=True)) # Forward pass. outputs = self.seg_net(inputs) # Compute the loss of the train batch & backward. loss_pixel = self.pixel_loss(outputs, targets) loss = loss_pixel self.train_losses.update(loss.data[0], inputs.size(0)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Update the vars of the train phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.iters += 1 # Print the log info & reset the states. if self.iters % self.configer.get('solver', 'display_iter') == 0: Log.info('Train Iteration: {0}\t' 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 'Learning rate = {2}\n' 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( self.iters, self.configer.get('solver', 'display_iter'), self.lr, batch_time=self.batch_time, data_time=self.data_time, loss=self.train_losses)) self.batch_time.reset() self.data_time.reset() self.train_losses.reset() # Check to val the current model. if self.val_loader is not None and \ self.iters % self.configer.get('solver', 'test_interval') == 0: self.__val() self.optimizer, self.lr = self.module_utilizer.update_optimizer(self.seg_net, self.iters) def __val(self): """ Validation function during the train phase. """ self.seg_net.eval() start_time = time.time() for j, data_tuple in enumerate(self.val_loader): # Change the data type. inputs = Variable(data_tuple[0].cuda(async=True), volatile=True) targets = Variable(data_tuple[1].cuda(async=True), volatile=True) # Forward pass. outputs = self.seg_net(inputs) # Compute the loss of the val batch. loss_pixel = self.pixel_loss(outputs, targets) loss = loss_pixel self.val_losses.update(loss.data[0], inputs.size(0)) # Update the vars of the val phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.module_utilizer.save_net(self.seg_net, self.iters) # Print the log info & reset the states. Log.info( 'Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t' 'Loss {loss.avg:.8f}\n'.format( batch_time=self.batch_time, loss=self.val_losses)) self.batch_time.reset() self.val_losses.reset() self.seg_net.train() def train(self): cudnn.benchmark = True while self.iters < self.configer.get('solver', 'max_iter'): self.__train() if self.iters == self.configer.get('solver', 'max_iter'): break
def test_net(cfg, epoch_idx=-1, test_data_loader=None, test_writer=None, model=None): # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use torch.backends.cudnn.benchmark = True if test_data_loader is None: # Set up data loader dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[ cfg.DATASET.TEST_DATASET](cfg) test_data_loader = torch.utils.data.DataLoader( dataset=dataset_loader.get_dataset( utils.data_loaders.DatasetSubset.TEST), batch_size=1, num_workers=cfg.CONST.NUM_WORKERS, collate_fn=utils.data_loaders.collate_fn, pin_memory=True, shuffle=False) # Setup networks and initialize networks if model is None: model = Model(dataset=cfg.DATASET.TRAIN_DATASET) if torch.cuda.is_available(): model = torch.nn.DataParallel(model).cuda() assert 'WEIGHTS' in cfg.CONST and cfg.CONST.WEIGHTS logging.info('Recovering from %s ...' % (cfg.CONST.WEIGHTS)) checkpoint = torch.load(cfg.CONST.WEIGHTS) model.load_state_dict(checkpoint['model']) # Switch models to evaluation mode model.eval() n_samples = len(test_data_loader) test_losses = AverageMeter(['cd1', 'cd2', 'cd3', 'pmd']) test_metrics = AverageMeter(Metrics.names()) category_metrics = dict() # Testing loop with tqdm(test_data_loader) as t: # print('repeating') for model_idx, (taxonomy_id, model_id, data) in enumerate(t): taxonomy_id = taxonomy_id[0] if isinstance( taxonomy_id[0], str) else taxonomy_id[0].item() model_id = model_id[0] with torch.no_grad(): for k, v in data.items(): data[k] = utils.helpers.var_or_cuda(v) partial = data['partial_cloud'] gt = data['gtcloud'] partial = random_subsample( partial.repeat((1, 8, 1)).reshape(-1, 16384, 3)) # b*8, 2048, 3 b, n, _ = partial.shape pcds, deltas = model(partial.contiguous()) cd1 = chamfer_sqrt(pcds[0].reshape(-1, 16384, 3).contiguous(), gt).item() * 1e3 cd2 = chamfer_sqrt(pcds[1].reshape(-1, 16384, 3).contiguous(), gt).item() * 1e3 cd3 = chamfer_sqrt(pcds[2].reshape(-1, 16384, 3).contiguous(), gt).item() * 1e3 # pmd loss pmd_losses = [] for delta in deltas: pmd_losses.append(torch.sum(delta**2)) pmd = torch.sum(torch.stack(pmd_losses)) / 3 pmd_item = pmd.item() _metrics = [pmd_item, cd3] test_losses.update([cd1, cd2, cd3, pmd_item]) test_metrics.update(_metrics) if taxonomy_id not in category_metrics: category_metrics[taxonomy_id] = AverageMeter( Metrics.names()) category_metrics[taxonomy_id].update(_metrics) t.set_description( 'Test[%d/%d] Taxonomy = %s Sample = %s Losses = %s Metrics = %s' % (model_idx + 1, n_samples, taxonomy_id, model_id, [ '%.4f' % l for l in test_losses.val() ], ['%.4f' % m for m in _metrics])) # Print testing results print( '============================ TEST RESULTS ============================' ) print('Taxonomy', end='\t') print('#Sample', end='\t') for metric in test_metrics.items: print(metric, end='\t') print() for taxonomy_id in category_metrics: print(taxonomy_id, end='\t') print(category_metrics[taxonomy_id].count(0), end='\t') for value in category_metrics[taxonomy_id].avg(): print('%.4f' % value, end='\t') print() print('Overall', end='\t\t\t') for value in test_metrics.avg(): print('%.4f' % value, end='\t') print('\n') # Add testing results to TensorBoard if test_writer is not None: test_writer.add_scalar('Loss/Epoch/cd1', test_losses.avg(0), epoch_idx) test_writer.add_scalar('Loss/Epoch/cd2', test_losses.avg(1), epoch_idx) test_writer.add_scalar('Loss/Epoch/cd3', test_losses.avg(2), epoch_idx) test_writer.add_scalar('Loss/Epoch/delta', test_losses.avg(3), epoch_idx) for i, metric in enumerate(test_metrics.items): test_writer.add_scalar('Metric/%s' % metric, test_metrics.avg(i), epoch_idx) return test_losses.avg(2)
def valid_epoch(self, epoch): batch_time = AverageMeter() total_losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() correct = 0 """ validate """ self.model.eval() test_loss = 0 end = time.time() for batch_idx, (inputs, label) in enumerate(self.valid_dataloader): with torch.no_grad(): inputs = inputs.to(self.opt.device) label = label.to(self.opt.device) output = self.model(inputs) loss = self.criterion(output, label) total_losses.update(loss.item(), inputs.size(0)) pred = output.data.max(1, keepdim=True)[1].cpu() label_array_temp = label.cpu().numpy() if batch_idx == 0: pred_array = pred.numpy()[:] label_array = label_array_temp else: pred_array = np.concatenate((pred_array, pred.numpy()[:])) label_array = np.concatenate( (label_array, label_array_temp)) correct += pred.eq(label.cpu().view_as(pred)).sum() batch_time.update(time.time() - end) end = time.time() test_loss += loss.item() if batch_idx % self.opt.print_freq_eval == 0: print('Validation[%d/%d] Total Loss: %.4f[%.4f]' % (batch_idx, len(self.valid_dataloader), loss.item(), test_loss / (batch_idx + 1))) num_test_data = len(self.valid_dataloader.dataset) accuracy = 100. * correct / num_test_data test_loss /= len(self.valid_dataloader) if test_loss < self.best_loss: print('Saving..') state = { 'model': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_loss': test_loss, 'epoch': epoch + 1, } torch.save(state, os.path.join(self.opt.expr_dir, 'model_best.pth')) self.best_loss = test_loss print('[*] Model %s,\tCurrent Loss: %f\tBest Loss: %f' % (self.opt.model, test_loss, self.best_loss)) print('Val Accuracy: {}/{} ({:.0f}%)'.format(correct, num_test_data, accuracy)) ALPHABET_list = list(ALPHABET) y_true_list = [] y_pred_list = [] for i in range(len(pred_array)): y_true = label_array[i] y_pred = pred_array[i][0] y_true_list.append(ALPHABET_list[y_true]) y_pred_list.append(ALPHABET_list[y_pred]) cm = confusion_matrix(y_true_list, y_pred_list, ALPHABET_list) print_cm(cm, ALPHABET_list)
class ConvPoseMachine(object): """ The class for Pose Estimation. Include train, val, val & predict. """ def __init__(self, configer): self.configer = configer self.batch_time = AverageMeter() self.data_time = AverageMeter() self.train_losses = AverageMeter() self.val_losses = AverageMeter() self.pose_visualizer = PoseVisualizer(configer) self.loss_manager = PoseLossManager(configer) self.model_manager = PoseModelManager(configer) self.train_utilizer = ModuleUtilizer(configer) self.pose_net = None self.train_loader = None self.val_loader = None self.optimizer = None self.best_model_loss = None self.is_best = None self.lr = None self.iters = None def init_model(self, train_loader=None, val_loader=None): self.pose_net = self.model_manager.pose_detector() self.pose_net, self.iters = self.train_utilizer.load_net(self.pose_net) self.optimizer = self.train_utilizer.update_optimizer(self.pose_net, self.iters) self.train_loader = train_loader self.val_loader = val_loader self.heatmap_loss = self.loss_manager.get_pose_loss('heatmap_loss') def __train(self): """ Train function of every epoch during train phase. """ self.pose_net.train() start_time = time.time() # data_tuple: (inputs, heatmap, maskmap, tagmap, num_objects) for i, data_tuple in enumerate(self.train_loader): self.data_time.update(time.time() - start_time) # Change the data type. if len(data_tuple) < 2: Log.error('Train Loader Error!') exit(0) inputs = Variable(data_tuple[0].cuda(async=True)) heatmap = Variable(data_tuple[1].cuda(async=True)) maskmap = None if len(data_tuple) > 2: maskmap = Variable(data_tuple[2].cuda(async=True)) self.pose_visualizer.vis_tensor(heatmap, name='heatmap') self.pose_visualizer.vis_tensor((inputs*256+128)/255, name='image') # Forward pass. outputs = self.pose_net(inputs) self.pose_visualizer.vis_tensor(outputs, name='output') self.pose_visualizer.vis_peaks(inputs, outputs, name='peak') # Compute the loss of the train batch & backward. loss_heatmap = self.heatmap_loss(outputs, heatmap, maskmap) loss = loss_heatmap self.train_losses.update(loss.data[0], inputs.size(0)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Update the vars of the train phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.iters += 1 # Print the log info & reset the states. if self.iters % self.configer.get('solver', 'display_iter') == 0: Log.info('Train Iteration: {0}\t' 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 'Learning rate = {2}\n' 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( self.iters, self.configer.get('solver', 'display_iter'), self.lr, batch_time=self.batch_time, data_time=self.data_time, loss=self.train_losses)) self.batch_time.reset() self.data_time.reset() self.train_losses.reset() # Check to val the current model. if self.val_loader is not None and \ self.iters % self.configer.get('solver', 'test_interval') == 0: self.__val() self.optimizer = self.train_utilizer.update_optimizer(self.pose_net, self.iters) def __val(self): """ Validation function during the train phase. """ self.pose_net.eval() start_time = time.time() for j, data_tuple in enumerate(self.val_loader): # Change the data type. inputs = Variable(data_tuple[0].cuda(async=True), volatile=True) heatmap = Variable(data_tuple[1].cuda(async=True), volatile=True) maskmap = None if len(data_tuple) > 2: maskmap = Variable(data_tuple[2].cuda(async=True), volatile=True) # Forward pass. outputs = self.pose_net(inputs) self.pose_visualizer.vis_peaks(inputs, outputs, name='peak_val') # Compute the loss of the val batch. loss_heatmap = self.heatmap_loss(outputs, heatmap, maskmap) loss = loss_heatmap self.val_losses.update(loss.data[0], inputs.size(0)) # Update the vars of the val phase. self.batch_time.update(time.time() - start_time) start_time = time.time() # Print the log info & reset the states. Log.info( 'Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t' 'Loss {loss.avg:.8f}\n'.format( batch_time=self.batch_time, loss=self.val_losses)) self.batch_time.reset() self.val_losses.reset() self.pose_net.train() def train(self): cudnn.benchmark = True while self.iters < self.configer.get('solver', 'max_iter'): self.__train() if self.iters == self.configer.get('solver', 'max_iter'): break def test(self, img_path=None, img_dir=None): if img_path is not None and os.path.exists(img_path): image = Image.open(img_path).convert('RGB')