def train(train_loader, model, optimizer, lr_scheduler, epoch, cfg): global tb_index, best_acc, cur_lr, logger cur_lr = lr_scheduler.get_cur_lr() logger = logging.getLogger('global') avg = AverageMeter() model.train() model = model.cuda() end = time.time() def is_valid_number(x): return not (math.isnan(x) or math.isinf(x) or x > 1e4) num_per_epoch = len(train_loader.dataset) // args.epochs // args.batch start_epoch = epoch epoch = epoch for iter, input in enumerate(train_loader): if epoch != iter // num_per_epoch + start_epoch: # next epoch epoch = iter // num_per_epoch + start_epoch if not os.path.exists(args.save_dir): # makedir/save model os.makedirs(args.save_dir) save_checkpoint( { 'epoch': epoch, 'arch': args.arch, 'state_dict': model.module.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), 'anchor_cfg': cfg['anchors'] }, False, os.path.join(args.save_dir, 'checkpoint_e%d.pth' % (epoch)), os.path.join(args.save_dir, 'best.pth')) if epoch == args.epochs: return if model.module.features.unfix(epoch / args.epochs): logger.info('unfix part model.') optimizer, lr_scheduler = build_opt_lr(model.module, cfg, args, epoch) lr_scheduler.step(epoch) cur_lr = lr_scheduler.get_cur_lr() logger.info('epoch:{}'.format(epoch)) tb_index = iter if iter % num_per_epoch == 0 and iter != 0: for idx, pg in enumerate(optimizer.param_groups): logger.info("epoch {} lr {}".format(epoch, pg['lr'])) tb_writer.add_scalar('lr/group%d' % (idx + 1), pg['lr'], tb_index) data_time = time.time() - end avg.update(data_time=data_time) x = { 'cfg': cfg, 'template': torch.autograd.Variable(input[0]).cuda(), 'search': torch.autograd.Variable(input[1]).cuda(), 'label_cls': torch.autograd.Variable(input[2]).cuda(), 'label_loc': torch.autograd.Variable(input[3]).cuda(), 'label_loc_weight': torch.autograd.Variable(input[4]).cuda(), 'label_mask': torch.autograd.Variable(input[6]).cuda(), 'label_mask_weight': torch.autograd.Variable(input[7]).cuda(), } outputs = model(x) rpn_cls_loss, rpn_loc_loss, rpn_mask_loss = torch.mean( outputs['losses'][0]), torch.mean( outputs['losses'][1]), torch.mean(outputs['losses'][2]) mask_iou_mean, mask_iou_at_5, mask_iou_at_7 = torch.mean( outputs['accuracy'][0]), torch.mean( outputs['accuracy'][1]), torch.mean(outputs['accuracy'][2]) cls_weight, reg_weight, mask_weight = cfg['loss']['weight'] loss = rpn_cls_loss * cls_weight + rpn_loc_loss * reg_weight + rpn_mask_loss * mask_weight optimizer.zero_grad() loss.backward() if cfg['clip']['split']: torch.nn.utils.clip_grad_norm_(model.module.features.parameters(), cfg['clip']['feature']) torch.nn.utils.clip_grad_norm_(model.module.rpn_model.parameters(), cfg['clip']['rpn']) torch.nn.utils.clip_grad_norm_( model.module.mask_model.parameters(), cfg['clip']['mask']) else: torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) # gradient clip if is_valid_number(loss.item()): optimizer.step() siammask_loss = loss.item() batch_time = time.time() - end avg.update(batch_time=batch_time, rpn_cls_loss=rpn_cls_loss, rpn_loc_loss=rpn_loc_loss, rpn_mask_loss=rpn_mask_loss, siammask_loss=siammask_loss, mask_iou_mean=mask_iou_mean, mask_iou_at_5=mask_iou_at_5, mask_iou_at_7=mask_iou_at_7) tb_writer.add_scalar('loss/cls', rpn_cls_loss, tb_index) tb_writer.add_scalar('loss/loc', rpn_loc_loss, tb_index) tb_writer.add_scalar('loss/mask', rpn_mask_loss, tb_index) tb_writer.add_scalar('mask/mIoU', mask_iou_mean, tb_index) tb_writer.add_scalar('mask/[email protected]', mask_iou_at_5, tb_index) tb_writer.add_scalar('mask/[email protected]', mask_iou_at_7, tb_index) end = time.time() if (iter + 1) % args.print_freq == 0: logger.info( 'Epoch: [{0}][{1}/{2}] lr: {lr:.6f}\t{batch_time:s}\t{data_time:s}' '\t{rpn_cls_loss:s}\t{rpn_loc_loss:s}\t{rpn_mask_loss:s}\t{siammask_loss:s}' '\t{mask_iou_mean:s}\t{mask_iou_at_5:s}\t{mask_iou_at_7:s}'. format(epoch + 1, (iter + 1) % num_per_epoch, num_per_epoch, lr=cur_lr, batch_time=avg.batch_time, data_time=avg.data_time, rpn_cls_loss=avg.rpn_cls_loss, rpn_loc_loss=avg.rpn_loc_loss, rpn_mask_loss=avg.rpn_mask_loss, siammask_loss=avg.siammask_loss, mask_iou_mean=avg.mask_iou_mean, mask_iou_at_5=avg.mask_iou_at_5, mask_iou_at_7=avg.mask_iou_at_7)) print_speed(iter + 1, avg.batch_time.avg, args.epochs * num_per_epoch)
def train(train_loader, model, optimizer, lr_scheduler, epoch, cfg): global tb_index, best_acc, cur_lr, logger cur_lr = lr_scheduler.get_cur_lr() logger = logging.getLogger('global') avg = AverageMeter() model.train() # model.module.features.eval() # model.module.rpn_model.eval() # model.module.features.apply(BNtoFixed) # model.module.rpn_model.apply(BNtoFixed) # # model.module.mask_model.train() # model.module.refine_model.train() model = model.cuda() end = time.time() def is_valid_number(x): return not (math.isnan(x) or math.isinf(x) or x > 1e4) num_per_epoch = len(train_loader.dataset) // args.epochs // args.batch start_epoch = epoch epoch = epoch with torch.no_grad(): for iter, input in enumerate(train_loader): if iter > 100: break if epoch != iter // num_per_epoch + start_epoch: # next epoch epoch = iter // num_per_epoch + start_epoch if epoch == args.epochs: return if model.module.features.unfix(epoch / args.epochs): logger.info('unfix part model.') optimizer, lr_scheduler = build_opt_lr( model.module, cfg, args, epoch) lr_scheduler.step(epoch) cur_lr = lr_scheduler.get_cur_lr() logger.info('epoch:{}'.format(epoch)) tb_index = iter if iter % num_per_epoch == 0 and iter != 0: for idx, pg in enumerate(optimizer.param_groups): logger.info("epoch {} lr {}".format(epoch, pg['lr'])) tb_writer.add_scalar('lr/group%d' % (idx + 1), pg['lr'], tb_index) data_time = time.time() - end avg.update(data_time=data_time) x_rpn = { 'cfg': cfg, 'template': torch.autograd.Variable(input[0]).cuda(), 'search': torch.autograd.Variable(input[1]).cuda(), 'label_cls': torch.autograd.Variable(input[2]).cuda(), 'label_loc': torch.autograd.Variable(input[3]).cuda(), 'label_loc_weight': torch.autograd.Variable(input[4]).cuda(), 'label_mask': torch.autograd.Variable(input[6]).cuda() } x_kp = input[7] x_kp = { x: torch.autograd.Variable(y).cuda() for x, y in x_kp.items() } x_rpn['anchors'] = train_loader.dataset.anchors.all_anchors[0] outputs = model(x_rpn, x_kp) roi_box = outputs['predict'][-1] pred_kp = outputs['predict'][2]['hm_hp'] batch_img = x_rpn['search'].expand(x_kp['hm_hp'].size(0), -1, -1, -1) gt_img, pred_img = save_gt_pred_heatmaps( batch_img, x_kp['hm_hp'], pred_kp, 'test_imgs/test_{}.jpg'.format(iter)) # rpn_pred_cls, rpn_pred_loc = outputs['predict'][:2] # rpn_pred_cls = outputs['predict'][-1] # anchors = train_loader.dataset.anchors.all_anchors[0] # # normalized_boxes = proposal_layer([rpn_pred_cls, rpn_pred_loc], anchors, config=cfg) # print('rpn_pred_cls: ', rpn_pred_cls.shape) rpn_cls_loss, rpn_loc_loss, kp_losses = torch.mean(outputs['losses'][0]),\ torch.mean(outputs['losses'][1]),\ outputs['losses'][3] kp_loss = torch.mean(kp_losses['loss']) kp_hp_loss = torch.mean(kp_losses['hp_loss']) kp_hm_hp_loss = torch.mean(kp_losses['hm_hp_loss']) kp_hp_offset_loss = torch.mean(kp_losses['hp_offset_loss']) # mask_iou_mean, mask_iou_at_5, mask_iou_at_7 = torch.mean(outputs['accuracy'][0]), torch.mean(outputs['accuracy'][1]), torch.mean(outputs['accuracy'][2]) cls_weight, reg_weight, kp_weight = cfg['loss']['weight'] loss = rpn_cls_loss * cls_weight + rpn_loc_loss * reg_weight + kp_loss * kp_weight optimizer.zero_grad() loss.backward() if cfg['clip']['split']: torch.nn.utils.clip_grad_norm_( model.module.features.parameters(), cfg['clip']['feature']) torch.nn.utils.clip_grad_norm_( model.module.rpn_model.parameters(), cfg['clip']['rpn']) torch.nn.utils.clip_grad_norm_( model.module.mask_model.parameters(), cfg['clip']['mask']) else: torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) # gradient clip if is_valid_number(loss.item()): optimizer.step() siammask_loss = loss.item() batch_time = time.time() - end avg.update(batch_time=batch_time, rpn_cls_loss=rpn_cls_loss, rpn_loc_loss=rpn_loc_loss, kp_hp_loss=kp_hp_loss, kp_hm_hp_loss=kp_hm_hp_loss, kp_hp_offset_loss=kp_hp_offset_loss, kp_loss=kp_loss, siammask_loss=siammask_loss) # mask_iou_mean=mask_iou_mean, mask_iou_at_5=mask_iou_at_5, mask_iou_at_7=mask_iou_at_7) tb_writer.add_scalar('loss/cls', rpn_cls_loss, tb_index) tb_writer.add_scalar('loss/loc', rpn_loc_loss, tb_index) tb_writer.add_scalar('loss/kp_hp_loss', kp_hp_loss, tb_index) tb_writer.add_scalar('loss/kp_hm_hp_loss', kp_hm_hp_loss, tb_index) tb_writer.add_scalar('loss/kp_hp_offset_loss', kp_hp_offset_loss, tb_index) # tb_writer.add_scalar('loss/kp', kp_loss, tb_index) end = time.time() if (iter + 1) % args.print_freq == 0: logger.info( 'Epoch: [{0}][{1}/{2}] lr: {lr:.6f}\t{batch_time:s}\t{data_time:s}' '\t{rpn_cls_loss:s}\t{rpn_loc_loss:s}' '\t{kp_hp_loss:s}\t{kp_hm_hp_loss:s}\t{kp_hp_offset_loss:s}' '\t{kp_loss:s}\t{siammask_loss:s}'.format( epoch + 1, (iter + 1) % num_per_epoch, num_per_epoch, lr=cur_lr, batch_time=avg.batch_time, data_time=avg.data_time, rpn_cls_loss=avg.rpn_cls_loss, rpn_loc_loss=avg.rpn_loc_loss, kp_hp_loss=avg.kp_hp_loss, kp_hm_hp_loss=avg.kp_hm_hp_loss, kp_hp_offset_loss=avg.kp_hp_offset_loss, kp_loss=avg.kp_loss, siammask_loss=avg.siammask_loss, )) # mask_iou_mean=avg.mask_iou_mean, # mask_iou_at_5=avg.mask_iou_at_5,mask_iou_at_7=avg.mask_iou_at_7)) print_speed(iter + 1, avg.batch_time.avg, args.epochs * num_per_epoch)
def validation(epoch, log_interval, test_dataloader, model, loss, writer, device): """Validate on test dataset. Current validation is only for loss, pos|neg_distance. In future, we will add more validation like MAP5|10|50|100. (maybe in another file.) Args: log_interval: How many time will the logger log once. test_dataloader: It should not be none! A Triplet dataloader to validate data. model: The model that used to test on dataset. loss: Loss metric. writer: Tensorboard writer device: Device that model compute on Return: epoch avrage value: triplet_loss, pos_dists, neg_dists """ logger.info( "\n------------------------- Start validation -------------------------\n" ) # epoch average meter avg_test = AverageMeter() # get test batch count current_test_batch = 0 total_test_batch = len(test_dataloader) # check dataloader is not None assert test_dataloader is not None, "test_dataloader should not be None." for batch_idx, batch_sample in enumerate(test_dataloader): # Skip last iteration to avoid the problem of having different number of tensors while calculating # averages (sizes of tensors must be the same for pairwise distance calculation) if batch_idx + 1 == len(test_dataloader): continue # switch to evaluation mode. for param in model.parameters(): param.requires_grad = False model.eval() # start time counting batch_start_time_test = time.time() # Forward pass - compute embeddings anc_imgs = batch_sample['anchor_img'] pos_imgs = batch_sample['pos_img'] neg_imgs = batch_sample['neg_img'] pos_cls = batch_sample['pos_cls'] neg_cls = batch_sample['neg_cls'] # move to device anc_imgs = anc_imgs.to(device) pos_imgs = pos_imgs.to(device) neg_imgs = neg_imgs.to(device) pos_cls = pos_cls.to(device) neg_cls = neg_cls.to(device) # forward output = model.forward_triplet(anc_imgs, pos_imgs, neg_imgs) # get output anc_emb = output['anchor_map'] pos_emb = output['pos_map'] neg_emb = output['neg_map'] pos_dists = torch.mean(output['dist_pos']) neg_dists = torch.mean(output['dist_neg']) # loss compute loss_value = loss(anc_emb, pos_emb, neg_emb) # batch time & batch count current_test_batch += 1 batch_time = time.time() - batch_start_time_test # update avg avg_test.update(time=batch_time, triplet_loss=loss_value, pos_dists=pos_dists, neg_dists=neg_dists) if current_test_batch % log_interval == 0: print_speed(current_test_batch, batch_time, total_test_batch, "global") logger.info( "\n current global average information:\n batch_time {0:.5f} | triplet_loss: {1:.5f} | pos_dists: {2:.5f} | neg_dists: {3:.5f} \n" .format(avg_test.time.avg, avg_test.triplet_loss.avg, avg_test.pos_dists.avg, avg_test.neg_dists.avg)) else: writer.add_scalar("Validate/Loss/train", avg_test.triplet_loss.avg, global_step=epoch) writer.add_scalar("Validate/Other/pos_dists", avg_test.pos_dists.avg, global_step=epoch) writer.add_scalar("Validate/Other/neg_dists", avg_test.neg_dists.avg, global_step=epoch) return avg_test.triplet_loss.avg, avg_test.pos_dists.avg, avg_test.neg_dists.avg
epoch * train_lenth + step + 1) writer.add_video('test_seq/gt_seq', gt_seq_test, epoch * train_lenth + step + 1) writer.add_video('test_seq/pred_seq', test_output, epoch * train_lenth + step + 1) writer.add_scalars( 'loss/merge', { "train_loss": train_loss, "test_loss": test_loss, "train_metric": train_metric, "test_metric": test_metric }, epoch * train_lenth + step + 1) # 更新avrager avg.update(step_time=step_time, train_loss=train_loss, test_loss=test_loss, train_metric=train_metric) # 算平均值 # 打印结果 if (step + 1) % print_freq == 0: global_logger.info( 'Epoch: [{0}][{1}/{2}] {step_time:s}\t{train_loss:s}\t{test_loss:s}\t{train_metric:s}' .format(epoch + 1, (step + 1) % train_lenth, train_lenth, step_time=avg.step_time, train_loss=avg.train_loss, test_loss=avg.test_loss, train_metric=avg.train_metric)) print_speed(epoch * train_lenth + step + 1, avg.step_time.avg, epoches * train_lenth)
def train(train_loader, model, optimizer, lr_scheduler, epoch, cfg): global tb_index, best_acc, cur_lr, logger cur_lr = lr_scheduler.get_cur_lr() logger = logging.getLogger('global') avg = AverageMeter() model.train() model = model.cuda() end = time.time() def is_valid_number(x): return not (math.isnan(x) or math.isinf(x) or x > 1e4) num_per_epoch = len(train_loader.dataset) // args.epochs // args.batch start_epoch = epoch epoch = epoch for iter, input in enumerate(train_loader): if epoch != iter // num_per_epoch + start_epoch: # next epoch epoch = iter // num_per_epoch + start_epoch if not os.path.exists(args.save_dir): # makedir/save model os.makedirs(args.save_dir) save_checkpoint( { 'epoch': epoch, 'arch': args.arch, 'state_dict': model.module.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), 'anchor_cfg': cfg['anchors'] }, False, os.path.join(args.save_dir, 'checkpoint_e%d.pth' % (epoch)), os.path.join(args.save_dir, 'best.pth')) if epoch == args.epochs: return if model.module.features.unfix(epoch / args.epochs): logger.info('unfix part model.') optimizer, lr_scheduler = build_opt_lr(model.module, cfg, args, epoch) lr_scheduler.step(epoch) cur_lr = lr_scheduler.get_cur_lr() logger.info('epoch:{}'.format(epoch)) tb_index = iter if iter % num_per_epoch == 0 and iter != 0: for idx, pg in enumerate(optimizer.param_groups): logger.info("epoch {} lr {}".format(epoch, pg['lr'])) tb_writer.add_scalar('lr/group%d' % (idx + 1), pg['lr'], tb_index) data_time = time.time() - end avg.update(data_time=data_time) x = { 'cfg': cfg, 'template': torch.autograd.Variable(input[0]).cuda(), 'search': torch.autograd.Variable(input[1]).cuda(), 'label_cls': torch.autograd.Variable(input[2]).cuda(), 'label_loc': torch.autograd.Variable(input[3]).cuda(), 'label_loc_weight': torch.autograd.Variable(input[4]).cuda(), 'label_mask': torch.autograd.Variable(input[6]).cuda(), 'label_kp_weight': torch.autograd.Variable(input[7]).cuda(), 'label_mask_weight': torch.autograd.Variable(input[8]).cuda(), } outputs = model(x) # print(x['search'].shape) pred_mask = outputs['predict'][2] pred_mask = select_pred_heatmap( pred_mask, x['label_mask_weight']) #is rpn_pred_mask (bs, 17, 127, 127) true_search = select_gt_img(x['search'], x['label_mask_weight']) if true_search.shape: save_batch_heatmaps(true_search, pred_mask, vis_outpath + '{}.jpg'.format(iter), normalize=True) # pred_mask = pred_mask.cpu(.sh).detach().numpy() # true_search = true_search.cpu().detach().numpy() # print("pose_mask", pred_mask.shape) # pose_heat = np.transpose(pred_mask[0,:,:,:],(1,2,0)) #shape (127,127,17) # plt.figure(num='image', figsize=(128,128)) # # plt.subplot(1, 2, 1) # plt.title('origin image') # plt.imshow(np.transpose(true_search[0,:,:,:], (1,2,0))) # # plt.subplot(1, 2, 2) # plt.title('heatmap') # pose_map = np.zeros((127,127), np.float32) # for i in range(pred_mask.shape[1]): # pose_map += pose_heat[:,:,i] # plt.imshow(pose_map) # plt.axis('off') # # # plt.show() # 可视化: 把17个map都投影到一张黑色图片上 rpn_cls_loss, rpn_loc_loss, rpn_mask_loss = torch.mean(outputs['losses'][0]),\ torch.mean(outputs['losses'][1]),\ torch.mean(outputs['losses'][2]) # mask_iou_mean, mask_iou_at_5, mask_iou_at_7 = torch.mean(outputs['accuracy'][0]), torch.mean(outputs['accuracy'][1]), torch.mean(outputs['accuracy'][2]) cls_weight, reg_weight, mask_weight = cfg['loss']['weight'] loss = rpn_cls_loss * cls_weight + rpn_loc_loss * reg_weight + rpn_mask_loss * mask_weight optimizer.zero_grad() loss.backward() if cfg['clip']['split']: torch.nn.utils.clip_grad_norm_(model.module.features.parameters(), cfg['clip']['feature']) torch.nn.utils.clip_grad_norm_(model.module.rpn_model.parameters(), cfg['clip']['rpn']) torch.nn.utils.clip_grad_norm_( model.module.mask_model.parameters(), cfg['clip']['mask']) else: torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) # gradient clip if is_valid_number(loss.item()): optimizer.step() siammask_loss = loss.item() batch_time = time.time() - end avg.update(batch_time=batch_time, rpn_cls_loss=rpn_cls_loss, rpn_loc_loss=rpn_loc_loss, rpn_mask_loss=rpn_mask_loss * mask_weight, siammask_loss=siammask_loss) # mask_iou_mean=mask_iou_mean, mask_iou_at_5=mask_iou_at_5, mask_iou_at_7=mask_iou_at_7) tb_writer.add_scalar('loss/cls', rpn_cls_loss, tb_index) tb_writer.add_scalar('loss/loc', rpn_loc_loss, tb_index) tb_writer.add_scalar('loss/mask', rpn_mask_loss * mask_weight, tb_index) # tb_writer.add_scalar('mask/mIoU', mask_iou_mean, tb_index) # tb_writer.add_scalar('mask/[email protected]', mask_iou_at_5, tb_index) # tb_writer.add_scalar('mask/[email protected]', mask_iou_at_7, tb_index) end = time.time() if (iter + 1) % args.print_freq == 0: logger.info( 'Epoch: [{0}][{1}/{2}] lr: {lr:.6f}\t{batch_time:s}\t{data_time:s}' '\t{rpn_cls_loss:s}\t{rpn_loc_loss:s}\t{rpn_mask_loss:s}\t{siammask_loss:s}' .format( epoch + 1, (iter + 1) % num_per_epoch, num_per_epoch, lr=cur_lr, batch_time=avg.batch_time, data_time=avg.data_time, rpn_cls_loss=avg.rpn_cls_loss, rpn_loc_loss=avg.rpn_loc_loss, rpn_mask_loss=avg.rpn_mask_loss, siammask_loss=avg.siammask_loss, )) # mask_iou_mean=avg.mask_iou_mean, # mask_iou_at_5=avg.mask_iou_at_5,mask_iou_at_7=avg.mask_iou_at_7)) print_speed(iter + 1, avg.batch_time.avg, args.epochs * num_per_epoch)
pos_dists = torch.mean(output['dist_pos']) neg_dists = torch.mean(output['dist_neg']) # loss compute loss_value = loss(anc_emb, pos_emb, neg_emb) # Backward pass optimizer_model.zero_grad() loss_value.backward() optimizer_model.step() current_batch += 1 batch_time = time.time() - batch_start_time avg.update(time=batch_time, triplet_loss=loss_value, pos_dists=pos_dists, neg_dists=neg_dists) writer.add_scalar("Train_Batch/Loss/train_loss", loss_value, global_step=current_batch) writer.add_scalar("Train_Batch/Distance/pos_dists", pos_dists, global_step=current_batch) writer.add_scalar("Train_Batch/Distance/neg_dists", neg_dists, global_step=current_batch) writer.add_scalar("Train_Batch_Global_AVG/loss", avg.triplet_loss.avg, global_step=current_batch) writer.add_scalar("Train_Batch_Global_AVG/pos_dists", avg.pos_dists.avg,
# 送入模型进行推断 test_output = model(seq_test, future=input_num) # loss计算 test_loss = loss(seq_test[:, -input_num:, :, :, :], gt_seq_test[:, -input_num:, :, :, :]) step_time = time.time() - step_time # 将有用的信息存进tensorboard中 if (step+1) % print_freq == 0: writer.add_video('seq/train_seq', seq, epoch*train_lenth + step + 1) writer.add_video('seq/gt_seq', seq_target, epoch*train_lenth + step + 1) writer.add_video('seq/pred_seq', layer_output, epoch*train_lenth + step + 1) writer.add_scalars('loss/merge', {"train_loss": loss_,"test_loss":test_loss}, epoch*train_lenth + step + 1) # 更新avrager avg.update(step_time=step_time, train_loss=loss_, test_loss=test_loss) # 算平均值 # 打印结果 if (step+1) % print_freq == 0: global_logger.info('Epoch: [{0}][{1}/{2}] {step_time:s}\t{train_loss:s}\t{test_loss:s}'.format( epoch+1, (step + 1) % train_lenth, train_lenth, step_time=avg.step_time, train_loss=avg.train_loss, test_loss=avg.test_loss)) print_speed(epoch*train_lenth + step + 1, avg.step_time.avg, epoches * train_lenth) # scheduler更新 scheduler.step()
points_feats = model(points_graph) global_feats = torch.cat((visual_feats, points_feats), dim=0).contiguous() labels = torch.cat((visual_graph.y, points_graph.y), dim=0).contiguous() iloss, _, _, _, _, _ = global_loss(trip_loss, global_feats, labels) batch_loss += iloss batch_loss = batch_loss / len(value[0]) optimizer.zero_grad() batch_loss.backward() # clip_gradient(model, 10) optimizer.step() batch_time = time.time() - end_time avg.update(batch_time=batch_time, loss=batch_loss.item()) if (step + 1) % args.disp_interval == 0: vis.plot('loss', avg.avg('loss')) log_str = '(Train) Epoch: [{0}][{1}/{2}]\t lr: {lr:.6f} \t {batch_time:s} \t {loss:s} \n'.format( epoch, step + 1, len(train_dataloader), lr=lr, batch_time=avg.batch_time, loss=avg.loss) vis.log(log_str) if args.trainval: # validation model.eval() valid_avg = AverageMeter() valid_disp_interval = int(args.disp_interval /