def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) if cfg.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False) else: model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) if cfg.USE_GPU: model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss(use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT) if cfg.USE_GPU: criterion = criterion.cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ # transforms.ToPILImage(), # transforms.Pad(padding=(64,0,64,0), fill=0, padding_mode='constant'), transforms.ToTensor(), normalize, ])) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=True) # evaluate on validation set validate(cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir)
def main(): args = parse_args() reset_config(config, args) logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED model = eval('models.' + config.MODEL.NAME + '.get_pose_net')( config, is_train=False) if config.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(config.TEST.MODEL_FILE)) state_dict = torch.load(config.TEST.MODEL_FILE) new_state_dict = {} for k in state_dict: if k.startswith('module.'): new_state_dict[k[len('module.'):]] = state_dict[k] else: new_state_dict[k] = state_dict[k] model.load_state_dict(new_state_dict) else: model_state_file = os.path.join(final_output_dir, 'final_state.pth.tar') logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) gpus = [int(i) for i in config.GPUS.split(',')] model = torch.nn.DataParallel(model, device_ids=gpus).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) valid_dataset = eval('dataset.' + config.DATASET.DATASET)( config, config.DATASET.ROOT, config.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=True) # evaluate on validation set validate(config, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir)
def test(model, cfg, final_output_dir, tb_log_dir): #model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer #model.eval() criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( # cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, # transforms.Compose([ # transforms.ToTensor(), # normalize, # ]) # ) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=True) # evaluate on validation set perf_indicator = validate_select(cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir) return perf_indicator
def main(): args = parse_args() reset_config(config, args) logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED backbone_model = eval('models.' + config.BACKBONE_MODEL + '.get_pose_net')( config, is_train=False) model = eval('models.' + config.MODEL + '.get_multiview_pose_net')( backbone_model, config) if config.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(config.TEST.MODEL_FILE)) model.load_state_dict(torch.load(config.TEST.MODEL_FILE)) else: # model_path = 'model_best.pth.tar' if config.TEST.STATE == 'best' else 'final_state_ep{}.pth.tar'.format(config.TEST.STATE) model_path = 'final_state_ep{}.pth.tar'.format(config.TEST.STATE) model_state_file = os.path.join(final_output_dir, model_path) logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) gpus = [int(i) for i in config.GPUS.split(',')] model = torch.nn.DataParallel(model, device_ids=gpus).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) valid_dataset = eval('dataset.' + config.DATASET.TEST_DATASET)( config, config.DATASET.TEST_SUBSET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=True) # evaluate on validation set perf_indicator = validate(config, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir) logger.info('perf indicator {}'.format(perf_indicator))
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED ncfg = linecache.getline('tools/testmodel/prune36.txt', 2) ncfg = ncfg.strip().strip("[]").split(",") for i in range(len(ncfg)): ncfg[i] = int(ncfg[i]) from models.purnpose_hrnet import get_pose_net model = get_pose_net(cfg, ncfg, is_train=False) if cfg.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False) else: model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=True) # evaluate on validation set validate(cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir)
def __init__(self, cfg, is_train=True): super().__init__() self.save_hyperparameters() self.cfg = cfg model = PoseHighResolutionNet(cfg=self.cfg) # random initialization #model.init_weights("") self.model = model self.criterion = JointsMSELoss(use_target_weight=True)
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) if cfg.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False) else: model_state_file = os.path.join(final_output_dir, 'model_best.pth') logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer heatmapLoss = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT # true ).cuda() # Data loading code test_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, 'test', transforms.Compose([ transforms.ToTensor(), ])) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=True) # evaluate on validation set test(cfg, test_loader, test_dataset, model, heatmapLoss, final_output_dir)
def main(): args = parse_args() reset_config(config, args) # tensorboard logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'test', 'valid') # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED torch.backends.cudnn.benchmark = True model = Network(config, gt.DARTS) if config.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(config.TEST.MODEL_FILE)) model.load_state_dict(torch.load(config.TEST.MODEL_FILE), strict=False) else: model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) gpus = [int(i) for i in config.GPUS.split(',')] criterion = JointsMSELoss( use_target_weight=config.LOSS.USE_TARGET_WEIGHT).to(device) model = nn.DataParallel(model, device_ids=gpus).to(device) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) valid_dataset = eval('dataset.' + config.DATASET.DATASET)( config, config.DATASET.ROOT, config.TRAIN.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=True) validate(config, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir)
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model_p, model_d = eval('models.' + cfg.MODEL.NAME + '.get_adaptive_pose_net')(cfg, is_train=True) if cfg.TRAIN.CHECKPOINT: logger.info('=> loading model from {}'.format(cfg.TRAIN.CHECKPOINT)) model_p.load_state_dict(torch.load(cfg.TRAIN.CHECKPOINT)) else: model_state_file = os.path.join(final_output_dir, 'checkpoint.pth') logger.info('=> loading model from {}'.format(model_state_file)) model_p.load_state_dict(torch.load(model_state_file)) # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'pre_train_global_steps': 0, 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model_p, (dump_input, ), verbose=False) logger.info(get_model_summary(model_p, dump_input)) model_p = torch.nn.DataParallel(model_p, device_ids=cfg.GPUS).cuda() model_d = torch.nn.DataParallel(model_d, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer for pose_net criterion_p = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() optimizer_p = get_optimizer(cfg, model_p) # define loss function (criterion) and optimizer for domain criterion_d = torch.nn.BCEWithLogitsLoss().cuda() optimizer_d = get_optimizer(cfg, model_d) # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_pre_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_PRE_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_pre_loader = torch.utils.data.DataLoader( train_pre_dataset, batch_size=cfg.TRAIN.PRE_BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) syn_labels = train_dataset._load_syrip_syn_annotations() train_loader = torch.utils.data.DataLoader( train_dataset, sampler=BalancedBatchSampler(train_dataset, syn_labels), batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) ''' train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY ) ''' valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) best_perf = 0.0 best_model = False last_epoch = -1 begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model_p.load_state_dict(checkpoint['state_dict']) optimizer_p.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) # freeze some layers idx = 0 print('Parametersssssssssssssss') for param in model_p.parameters(): if idx <= 108: #fix 108 for stage 2 + bottleneck or fix 483 for stage 3 + stage 2+ bottleneck param.requires_grad = False #print(param.data.shape) idx = idx + 1 lr_scheduler_p = torch.optim.lr_scheduler.MultiStepLR( optimizer_p, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) lr_scheduler_d = torch.optim.lr_scheduler.MultiStepLR( optimizer_d, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR) epoch_D = cfg.TRAIN.PRE_EPOCH losses_D_list = [] acces_D_list = [] acc_num_total = 0 num = 0 losses_d = AverageMeter() # Pretrained Stage print('Pretrained Stage:') print('Start to train Domain Classifier-------') for epoch_d in range(epoch_D): # epoch model_d.train() model_p.train() for i, (input, target, target_weight, meta) in enumerate(train_pre_loader): # iteration # compute output for pose_net feature_outputs, outputs = model_p(input) #print(feature_outputs.size()) # compute for domain classifier domain_logits = model_d(feature_outputs.detach()) domain_label = (meta['synthetic'].unsqueeze(-1) * 1.0).cuda(non_blocking=True) # print(domain_label) loss_d = criterion_d(domain_logits, domain_label) loss_d.backward(retain_graph=True) optimizer_d.step() # compute accuracy of classifier acc_num = 0 for j in range(len(domain_label)): if (domain_logits[j] > 0 and domain_label[j] == 1.0) or ( domain_logits[j] < 0 and domain_label[j] == 0.0): acc_num += 1 acc_num_total += 1 num += 1 acc_d = acc_num * 1.0 / input.size(0) acces_D_list.append(acc_d) optimizer_d.zero_grad() losses_d.update(loss_d.item(), input.size(0)) if i % cfg.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Accuracy_d: {3} ({4})\t' \ 'Loss_d: {loss_d.val:.5f} ({loss_d.avg:.5f})'.format( epoch_d, i, len(train_pre_loader), acc_d, acc_num_total * 1.0 / num, loss_d = losses_d) logger.info(msg) writer = writer_dict['writer'] pre_global_steps = writer_dict['pre_train_global_steps'] writer.add_scalar('pre_train_loss_D', losses_d.val, pre_global_steps) writer.add_scalar('pre_train_acc_D', acc_d, pre_global_steps) writer_dict['pre_train_global_steps'] = pre_global_steps + 1 losses_D_list.append(losses_d.val) print('Training Stage (Step I and II):') losses_P_list = [] acces_P_list = [] losses_p = AverageMeter() acces_p = AverageMeter() for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler_p.step() # train for one epoch losses_P_list, losses_D_list, acces_P_list, acces_D_list = train_adaptive( cfg, train_loader, model_p, model_d, criterion_p, criterion_d, optimizer_p, optimizer_d, epoch, final_output_dir, tb_log_dir, writer_dict, losses_P_list, losses_D_list, acces_P_list, acces_D_list, acc_num_total, num, losses_p, acces_p, losses_d) # evaluate on validation set perf_indicator = validate_adaptive(cfg, valid_loader, valid_dataset, model_p, criterion_p, final_output_dir, tb_log_dir, writer_dict) if perf_indicator > best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model_p.state_dict(), 'best_state_dict': model_p.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer_p.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( 'saving final model state to {}'.format(final_model_state_file)) torch.save(model_p.module.state_dict(), final_model_state_file) writer_dict['writer'].close() np.save('./losses_D.npy', np.array(losses_D_list)) # Adversarial-D np.save('./losses_P.npy', np.array(losses_P_list)) # P np.save('./acces_P.npy', np.array(acces_P_list)) # P np.save('./acces_D.npy', np.array(acces_D_list)) # D
def main(): args = parse_args() update_config(cfg, args) if args.prevModelDir and args.modelDir: # copy pre models for philly copy_prev_models(args.prevModelDir, args.modelDir) logger, final_output_dir, tb_log_dir = create_logger(cfg, args.cfg, 'valid', dry=True) logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) epoch = 0 model = torch.nn.DataParallel(model, device_ids=cfg.GPUS) if cfg.TEST.MODEL_FILE == 'none': t = {} logger.info('=> Not reloading any model') elif cfg.TEST.MODEL_FILE: logger.info('=> loading TEST.MODEL_FILE model from {}'.format( cfg.TEST.MODEL_FILE)) t = torch.load(cfg.TEST.MODEL_FILE) model.load_state_dict(t, strict=True) else: model_state_file = os.path.join(final_output_dir, 'model_best.pth') logger.info('=> loading model from {}'.format(model_state_file)) t = torch.load(model_state_file) if 'state_dict' in t: begin_epoch = t['epoch'] best_perf = t['perf'] last_epoch = t['epoch'] model.load_state_dict(t['state_dict']) else: model.load_state_dict(t) if 'epoch' in t: epoch = t['epoch'] logger.info('Reloaded epoch', epoch) else: logger.info('No epoch in model, setting to last epoch') epoch = cfg.TRAIN.END_EPOCH # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) # evaluate on validation set validate(cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, epoch)
def main_worker(rank, args, config, num_gpus): os.environ['MASTER_ADDR'] = '127.0.0.1' os.environ['MASTER_PORT'] = '29500' dist.init_process_group(backend='nccl', rank=rank, world_size=num_gpus) print('Rank: {} finished initializing, PID: {}'.format(rank, os.getpid())) if rank == 0: logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) else: final_output_dir = None tb_log_dir = None # Gracefully kill all subprocesses by command <'kill subprocess 0'> signal.signal(signal.SIGTERM, signal_handler) if rank == 0: logger.info('Rank {} has registerred signal handler'.format(rank)) # device in current process device = torch.device('cuda', rank) backbone_model = eval('models.' + config.BACKBONE_MODEL + '.get_pose_net')( config, is_train=True) base_model = eval('models.' + config.MODEL + '.get_multiview_pose_net')( backbone_model, config) model_dict = OrderedDict() model_dict['base_model'] = base_model.to(device) if config.LOSS.USE_GLOBAL_MI_LOSS: global_discriminator = models.discriminator.GlobalDiscriminator(config) model_dict['global_discriminator'] = global_discriminator.to(device) if config.LOSS.USE_LOCAL_MI_LOSS: local_discriminator = models.discriminator.LocalDiscriminator(config) model_dict['local_discriminator'] = local_discriminator.to(device) if config.LOSS.USE_DOMAIN_TRANSFER_LOSS: domain_discriminator = models.discriminator.DomainDiscriminator(config) model_dict['domain_discriminator'] = domain_discriminator.to(device) if config.LOSS.USE_VIEW_MI_LOSS: view_discriminator = models.discriminator.ViewDiscriminator(config) model_dict['view_discriminator'] = view_discriminator.to(device) if config.LOSS.USE_JOINTS_MI_LOSS: joints_discriminator = models.discriminator.JointsDiscriminator(config) model_dict['joints_discriminator'] = joints_discriminator.to(device) if config.LOSS.USE_HEATMAP_MI_LOSS: heatmap_discriminator = models.discriminator.HeatmapDiscriminator(config) model_dict['heatmap_discriminator'] = heatmap_discriminator.to(device) # copy model files and print model config if rank == 0: this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../../lib/models', config.MODEL + '.py'), final_output_dir) shutil.copy2(args.cfg, final_output_dir) logger.info(pprint.pformat(model_dict['base_model'])) if config.LOSS.USE_GLOBAL_MI_LOSS: logger.info(pprint.pformat(model_dict['global_discriminator'])) if config.LOSS.USE_LOCAL_MI_LOSS: logger.info(pprint.pformat(model_dict['local_discriminator'])) if config.LOSS.USE_DOMAIN_TRANSFER_LOSS: logger.info(pprint.pformat(model_dict['domain_discriminator'])) if config.LOSS.USE_VIEW_MI_LOSS: logger.info(pprint.pformat(model_dict['view_discriminator'])) if config.LOSS.USE_JOINTS_MI_LOSS: logger.info(pprint.pformat(model_dict['joints_discriminator'])) if config.LOSS.USE_HEATMAP_MI_LOSS: logger.info(pprint.pformat(model_dict['heatmap_discriminator'])) if config.LOSS.USE_GLOBAL_MI_LOSS or config.LOSS.USE_LOCAL_MI_LOSS \ or config.LOSS.USE_DOMAIN_TRANSFER_LOSS or config.LOSS.USE_VIEW_MI_LOSS \ or config.LOSS.USE_JOINTS_MI_LOSS or config.LOSS.USE_HEATMAP_MI_LOSS: shutil.copy2( os.path.join(this_dir, '../../lib/models', 'discriminator.py'), final_output_dir) # tensorboard writer writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } if rank == 0 else None # dump_input = torch.rand( # (config.TRAIN.BATCH_SIZE, 3, # config.NETWORK.IMAGE_SIZE[1], config.NETWORK.IMAGE_SIZE[0])) # writer_dict['writer'].add_graph(model, (dump_input,)) # first resume, then parallel for key in model_dict.keys(): model_dict[key] = torch.nn.parallel.DistributedDataParallel(model_dict[key], device_ids=[rank], output_device=rank) # one by one dist.barrier() # get optimizer optimizer_dict = {} optimizer_base_model = get_optimizer(config, model_dict['base_model']) optimizer_dict['base_model'] = optimizer_base_model if config.LOSS.USE_GLOBAL_MI_LOSS: optimizer_global = get_optimizer(config, model_dict['global_discriminator'], is_discriminator=True) optimizer_dict['global_discriminator'] = optimizer_global if config.LOSS.USE_LOCAL_MI_LOSS: optimizer_local = get_optimizer(config, model_dict['local_discriminator'], is_discriminator=True) optimizer_dict['local_discriminator'] = optimizer_local if config.LOSS.USE_DOMAIN_TRANSFER_LOSS: optimizer_domain = get_optimizer(config, model_dict['domain_discriminator'], is_discriminator=True) optimizer_dict['domain_discriminator'] = optimizer_domain if config.LOSS.USE_VIEW_MI_LOSS: optimizer_view = get_optimizer(config, model_dict['view_discriminator'], is_discriminator=True) optimizer_dict['view_discriminator'] = optimizer_view if config.LOSS.USE_JOINTS_MI_LOSS: optimizer_joints = get_optimizer(config, model_dict['joints_discriminator'], is_discriminator=True) optimizer_dict['joints_discriminator'] = optimizer_joints if config.LOSS.USE_HEATMAP_MI_LOSS: optimizer_heatmap = get_optimizer(config, model_dict['heatmap_discriminator'], is_discriminator=True) optimizer_dict['heatmap_discriminator'] = optimizer_heatmap # resume if config.TRAIN.RESUME: assert config.TRAIN.RESUME_PATH != '', 'You must designate a path for config.TRAIN.RESUME_PATH, rank: {}'.format(rank) if rank == 0: logger.info('=> loading model from {}'.format(config.TRAIN.RESUME_PATH)) # !!! map_location must be cpu, otherwise a lot memory will be allocated on gpu:0. state_dict = torch.load(config.TRAIN.RESUME_PATH, map_location=torch.device('cpu')) if 'state_dict_base_model' in state_dict: if rank == 0: logger.info('=> new loading mode') for key in model_dict.keys(): # delete params of the aggregation layer if key == 'base_model' and not config.NETWORK.AGGRE: for param_key in list(state_dict['state_dict_base_model'].keys()): if 'aggre_layer' in param_key: state_dict['state_dict_base_model'].pop(param_key) model_dict[key].module.load_state_dict(state_dict['state_dict_' + key]) else: if rank == 0: logger.info('=> old loading mode') # delete params of the aggregation layer if not config.NETWORK.AGGRE: for param_key in list(state_dict.keys()): if 'aggre_layer' in param_key: state_dict.pop(param_key) model_dict['base_model'].module.load_state_dict(state_dict) # Traing on server cluster, resumed when interrupted start_epoch = config.TRAIN.BEGIN_EPOCH if config.TRAIN.ON_SERVER_CLUSTER: start_epoch, model_dict, optimizer_dict, loaded_iteration = load_checkpoint(model_dict, optimizer_dict, final_output_dir) if args.iteration < loaded_iteration: # this training process shold be skipped if rank == 0: logger.info('=> Skipping training iteration #{}'.format(args.iteration)) return # lr schedulers have different starting points yet share same decay strategy. lr_scheduler_dict = {} for key in optimizer_dict.keys(): lr_scheduler_dict[key] = torch.optim.lr_scheduler.MultiStepLR( optimizer_dict[key], config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR) # torch.set_num_threads(8) criterion_dict = {} criterion_dict['mse_weights'] = JointsMSELoss( use_target_weight=config.LOSS.USE_TARGET_WEIGHT).to(device) criterion_dict['mse'] = torch.nn.MSELoss(reduction='mean').to(device) if config.LOSS.USE_FUNDAMENTAL_LOSS: criterion_dict['fundamental'] = FundamentalLoss(config) if config.LOSS.USE_GLOBAL_MI_LOSS or config.LOSS.USE_LOCAL_MI_LOSS: criterion_dict['mutual_info'] = MILoss(config, model_dict) if config.LOSS.USE_DOMAIN_TRANSFER_LOSS: criterion_dict['bce'] = torch.nn.BCELoss().to(device) if config.LOSS.USE_VIEW_MI_LOSS: criterion_dict['view_mi'] = ViewMILoss(config, model_dict) if config.LOSS.USE_JOINTS_MI_LOSS: criterion_dict['joints_mi'] = JointsMILoss(config, model_dict) if config.LOSS.USE_HEATMAP_MI_LOSS: criterion_dict['heatmap_mi'] = HeatmapMILoss(config, model_dict) # Data loading code if rank == 0: logger.info('=> loading dataset') normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + config.DATASET.TRAIN_DATASET)( config, config.DATASET.TRAIN_SUBSET, True, transforms.Compose([ transforms.ToTensor(), normalize, ]), config.DATASET.PSEUDO_LABEL_PATH, config.DATASET.NO_DISTORTION) valid_dataset = eval('dataset.' + config.DATASET.TEST_DATASET)( config, config.DATASET.TEST_SUBSET, False, transforms.Compose([ transforms.ToTensor(), normalize, ]), '', config.DATASET.NO_DISTORTION) # Debug ################## # print('len of mixed dataset:', len(train_dataset)) # print('len of multiview h36m dataset:', len(valid_dataset)) train_loader, train_sampler = get_training_loader(train_dataset, config) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE, # no need to multiply len(gpus) shuffle=False, num_workers=int(config.WORKERS / num_gpus), pin_memory=False) best_perf = 0 best_model = False dist.barrier() for epoch in range(start_epoch, config.TRAIN.END_EPOCH): for lr_scheduler in lr_scheduler_dict.values(): lr_scheduler.step() train_sampler.set_epoch(epoch) train(config, train_loader, model_dict, criterion_dict, optimizer_dict, epoch, final_output_dir, writer_dict, rank) perf_indicator = validate(config, valid_loader, valid_dataset, model_dict, criterion_dict, final_output_dir, writer_dict, rank) if rank == 0: if perf_indicator > best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_dict = { 'epoch': epoch + 1, 'model': get_model_name(config), 'perf': perf_indicator, 'iteration': args.iteration } model_state_dict = {} optimizer_state_dict = {} for key, model in model_dict.items(): model_state_dict['state_dict_' + key] = model.module.state_dict() optimizer_state_dict['optimizer_' + key] = optimizer_dict[key].state_dict() save_dict.update(model_state_dict) save_dict.update(optimizer_state_dict) save_checkpoint(save_dict, best_model, final_output_dir) dist.barrier() if rank == 0: final_model_state_file = os.path.join(final_output_dir, 'final_state.pth.tar') logger.info('saving final model state to {}'.format(final_model_state_file)) torch.save(model_state_dict, final_model_state_file) writer_dict['writer'].close() print('Rank {} exit'.format(rank))
# GPU settings gpus = tf.config.list_physical_devices("GPU") if gpus: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) cfg = get_config_params(Config.TRAINING_CONFIG_NAME) hrnet = get_model(cfg) print_model_summary(hrnet) # Dataset coco = CocoDataset(config_params=cfg, dataset_type="train") dataset, dataset_length = coco.generate_dataset() # loss and optimizer loss = JointsMSELoss() optimizer = tf.optimizers.Adam(learning_rate=1e-3) # metircs loss_metric = tf.metrics.Mean() pck = PCK() accuracy_metric = tf.metrics.Mean() def train_step(batch_data): gt = GroundTruth(cfg, batch_data) images, target, target_weight = gt.get_ground_truth() with tf.GradientTape() as tape: y_pred = hrnet(images, training=True) loss_value = loss(y_pred, target, target_weight) gradients = tape.gradient(loss_value, hrnet.trainable_variables) optimizer.apply_gradients(
def main(): # get logger if_test = opts.if_test if if_test: log_suffix = 'test' else: log_suffix = 'train' logger = Colorlogger( opts.log_dir, '{}_logs.txt'.format(log_suffix)) # avoid overwritting, will append opt.set_env(opts) opt.print_options(opts, if_sv=True) n_jt = SLP_RD.joint_num_ori # # get model model = get_pose_net(in_ch=opts.input_nc, out_ch=n_jt) # why call it get c # define loss function (criterion) and optimizer criterion = JointsMSELoss( # try to not use weights use_target_weight=True).cuda() # ds adaptor SLP_rd_train = SLP_RD(opts, phase='train') # all test result SLP_fd_train = SLP_FD(SLP_rd_train, opts, phase='train', if_sq_bb=True) train_loader = DataLoader(dataset=SLP_fd_train, batch_size=opts.batch_size // len(opts.trainset), shuffle=True, num_workers=opts.n_thread, pin_memory=opts.if_pinMem) SLP_rd_test = SLP_RD( opts, phase=opts.test_par ) # all test result # can test against all controled in opt SLP_fd_test = SLP_FD(SLP_rd_test, opts, phase='test', if_sq_bb=True) test_loader = DataLoader(dataset=SLP_fd_test, batch_size=opts.batch_size // len(opts.trainset), shuffle=False, num_workers=opts.n_thread, pin_memory=opts.if_pinMem) # for visualzier if opts.display_id > 0: visualizer = Visualizer( opts) # only plot losses here, a loss log comes with it, else: visualizer = None # get optmizer best_perf = 0.0 last_epoch = -1 optimizer = Adam(model.parameters(), lr=opts.lr) checkpoint_file = os.path.join(opts.model_dir, 'checkpoint.pth') if 0 == opts.start_epoch or not path.exists( checkpoint_file): # from scratch begin_epoch = 0 # either set or not exist all the same from 0 losses = [] # for tracking model performance. accs = [] else: # get chk points logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict( checkpoint['state_dict']) # here should be cuda setting losses = checkpoint['losses'] accs = checkpoint['accs'] optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) milestones = opts.lr_dec_epoch lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones, opts.lr_dec_factor, last_epoch=last_epoch ) # scheduler will be set to place given last from checkpoints if opts.epoch_step > 0: end_epoch = min(opts.end_epoch, opts.start_epoch + opts.epoch_step) else: end_epoch = opts.end_epoch dump_input = torch.rand((1, opts.input_nc, opts.sz_pch[1], opts.sz_pch[0])) logger.info(get_model_summary(model, dump_input)) model = torch.nn.DataParallel(model, device_ids=opts.gpu_ids).cuda() n_iter = opts.trainIter # only for test purpose quick test if not if_test: for epoch in range(begin_epoch, end_epoch): if opts.display_id > 0: visualizer.reset() # clean up the vis # train for one epoch rst_trn = train(train_loader, SLP_rd_train, model, criterion, optimizer, epoch, n_iter=n_iter, logger=logger, opts=opts, visualizer=visualizer) losses += rst_trn['losses'] accs += rst_trn['accs'] # evaluate on validation set to update rst_test = validate( test_loader, SLP_rd_test, model, criterion, n_iter=n_iter, logger=logger, opts=opts ) # save preds, gt, preds_in ori, idst_normed to recovery, error here for last epoch? pck_all = rst_test['pck'] perf_indicator = pck_all[-1][-1] # the last entry pckh05 = np.array( pck_all)[:, -1] # the last indicies 15 x 11 last titles_c = list(SLP_rd_test.joints_name[:SLP_rd_test.joint_num_ori] ) + ['total'] ut.prt_rst([pckh05], titles_c, ['pckh0.5'], fn_prt=logger.info) lr_scheduler.step() # new version updating here if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(opts.model_dir)) ckp = { 'epoch': epoch + 1, # epoch to next, after finish 0 this is 1 'model': opts.model, 'state_dict': model.module.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), 'losses': losses, # for later updating 'accs': accs, } torch.save(ckp, os.path.join(opts.model_dir, 'checkpoint.pth')) if best_model: torch.save(ckp, os.path.join(opts.model_dir, 'model_best.pth')) # save directly, if statebest save another final_model_state_file = os.path.join( opts.model_dir, 'final_state.pth' # only after last iters ) logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) # single test with loaded model, save the result logger.info('----run final test----') rst_test = validate( test_loader, SLP_rd_test, model, criterion, n_iter=n_iter, logger=logger, opts=opts, if_svVis=True) # save preds, gt, preds_in ori, idst_normed to recovery pck_all = rst_test['pck'] # perf_indicator = pck_all[-1][-1] # last entry of list pckh05 = np.array(pck_all)[:, -1] # why only 11 pck?? titles_c = list( SLP_rd_test.joints_name[:SLP_rd_test.joint_num_ori]) + ['total'] ut.prt_rst([pckh05], titles_c, ['pckh0.5'], fn_prt=logger.info) pth_rst = path.join(opts.rst_dir, opts.nmTest + '.json') with open(pth_rst, 'w') as f: json.dump(rst_test, f)
def train(model: nn.Module, train_loader: torch.utils.data.DataLoader, eval_loader: torch.utils.data.DataLoader) -> None: """ Train a PoseNet model given parameters in config Arguments: model (nn.Module): PoseNet instance train_loader (torch.utils.data.DataLoader): Dataloader for training data eval_loader (torch.utils.data.DataLoader): Dataloader for validation data """ optimizer = getattr(optim, config.OPTIMIZER)(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY) overall_iter = 0 JointLoss = JointsMSELoss() BoneSymmLoss = BoneSymmMSELoss() logger.info("[+] Starting training.") for epoch in range(config.NUM_EPOCHS): model.train() for batch_idx, sample in enumerate(train_loader): image, pose3d, pose2d, heatmap2d = sample['image'], sample[ 'pose3d'], sample['pose2d'], sample['heatmap2d'] if config.USE_GPU: image, pose3d, pose2d, heatmap2d = to_cuda( [image, pose3d, pose2d, heatmap2d]) optimizer.zero_grad() output = model(image) termwise_loss = { 'heatmap': JointLoss(output['hrnet_maps'], heatmap2d), 'cyclic_inward': F.mse_loss(output['cycl_martinez']['pose_3d'], pose3d), 'cyclic_outward': F.mse_loss(output['cycl_martinez']['pose_2d'], pose2d), 'bone_symm': BoneSymmLoss(output['cycl_martinez']['pose_3d']) } loss = config.posenet.LOSS_COEFF['hrnet_maps'] * termwise_loss['heatmap'] + \ config.posenet.LOSS_COEFF['cycl_martinez']['pose_3d'] * termwise_loss['cyclic_inward'] + \ config.posenet.LOSS_COEFF['cycl_martinez']['pose_2d'] * termwise_loss['cyclic_outward'] + \ config.posenet.LOSS_COEFF['bone_symm'] * termwise_loss['bone_symm'] loss.backward() optimizer.step() if batch_idx % config.PRINT_BATCH_FREQ == 0: mpjpe = MPJPE_(output['cycl_martinez']['pose_3d'].detach(), pose3d.detach(), train_loader.dataset.std.numpy()) logger.debug( f'Train Epoch: {epoch} [{batch_idx}]\tTotal Loss: {loss.item():.6f}\tMPJPE: {mpjpe:.6f}' ) logger.debug(print_termwise_loss(termwise_loss)) overall_iter += 1 if config.SAVE_ITER_FREQ and overall_iter % config.SAVE_ITER_FREQ == 0: torch.save( model.state_dict(), os.path.join(config.LOG_PATH, config.NAME + f"-iter={overall_iter}")) evaluate(model, eval_loader, epoch) logger.info("[+] Finished training.")
def main(): args = parse_args() reset_config(config, args) logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED model = eval('models.' + config.MODEL.NAME + '.get_pose_net')( config, is_train=False) if config.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(config.TEST.MODEL_FILE)) model.load_state_dict(torch.load(config.TEST.MODEL_FILE)) else: model_state_file = os.path.join(final_output_dir, 'final_state.pth.tar') logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) gpus = [int(i) for i in config.GPUS.split(',')] model = torch.nn.DataParallel(model, device_ids=gpus).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda() ## Load an image if args.input_image: image_file = args.input_image else: image_file = '/home/bh/Downloads/g.jpg' data_numpy = cv2.imread(image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) # object detection box box = [0, 0, 320, 320] c, s = _box2cs(box, data_numpy.shape[0], data_numpy.shape[1]) r = 0 #trans = get_affine_transform(c, s, r, config.MODEL.IMAGE_SIZE) #print('transform: {}'.format(trans)) #input = cv2.warpAffine( #data_numpy, #trans, #(int(config.MODEL.IMAGE_SIZE[0]), int(config.MODEL.IMAGE_SIZE[1])), #flags=cv2.INTER_LINEAR) input = data_numpy # vis transformed image #cv2.imshow('image', input) #cv2.waitKey(3000) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) input = transform(input).unsqueeze(0) if args.threshold: threshold = args.threshold else: threshold = 0.5 print('threshold:{}'.format(threshold)) # switch to evaluate mode model.eval() with torch.no_grad(): # compute output heatmap output = model(input) # compute coordinate preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), np.asarray([c]), np.asarray([s])) print('pred: {} maxval: {}'.format(preds, maxvals)) # plot image = data_numpy.copy() for i in range(preds[0].shape[0]): mat = preds[0][i] val = maxvals[0][i] x, y = int(mat[0]), int(mat[1]) if val > threshold: cv2.circle(image, (x * 4, y * 4), 2, (255, 0, 0), 2) # vis result #cv2.imshow('res', image) #cv2.waitKey(0) cv2.imwrite('output.jpg', image)
def main_worker(gpu, ngpus_per_node, args, final_output_dir, tb_log_dir): args.gpu = gpu args.rank = args.rank * ngpus_per_node + gpu print('Init process group: dist_url: {}, world_size: {}, rank: {}'.format(cfg.DIST_URL, args.world_size, args.rank)) dist.init_process_group(backend=cfg.DIST_BACKEND, init_method=cfg.DIST_URL, world_size=args.world_size, rank=args.rank) update_config(cfg, args) # setup logger logger, _ = setup_logger(final_output_dir, args.rank, 'train') model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(cfg, is_train=True) logger.info(get_model_summary(model, torch.zeros(1, 3, *cfg.MODEL.IMAGE_SIZE))) # copy model file if not cfg.MULTIPROCESSING_DISTRIBUTED or (cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0): this_dir = os.path.dirname(__file__) shutil.copy2(os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } if not cfg.MULTIPROCESSING_DISTRIBUTED or (cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0): dump_input = torch.rand((1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, )) # logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) if cfg.MODEL.SYNC_BN: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) torch.cuda.set_device(args.gpu) model.cuda(args.gpu) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) # define loss function (criterion) and optimizer criterion = JointsMSELoss(use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda(args.gpu) # Data loading code train_dataset = eval('dataset.'+cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) ) valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) ) train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS), shuffle=(train_sampler is None), num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY, sampler=train_sampler ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY ) logger.info(train_loader.dataset) best_perf = -1 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): # train for one epoch train(cfg, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # In PyTorch 1.1.0 and later, you should call `lr_scheduler.step()` after `optimizer.step()`. lr_scheduler.step() # evaluate on validation set perf_indicator = validate( args, cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, writer_dict ) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False if not cfg.MULTIPROCESSING_DISTRIBUTED or ( cfg.MULTIPROCESSING_DISTRIBUTED and args.rank == 0 ): logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint({ 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join( final_output_dir, 'final_state{}.pth.tar'.format(gpu) ) logger.info('saving final model state to {}'.format( final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( cfg, is_train=False ) if cfg.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False) else: model_state_file = os.path.join( final_output_dir, 'final_state.pth' ) logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) model.eval() # Data loading code normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) print(cfg.DATASET.DATASET) print(cfg.DATASET.ROOT) print(cfg.DATASET.TEST_SET) img_sets = img_coco.IMGCOCO(cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,transforms.Compose([transforms.ToTensor(), normalize,])) all_imgids = img_sets.image_set with torch.no_grad(): for idx, imid in enumerate(all_imgids): #if idx >= 20: # break persons, all_bbs, all_scales, ori_img, imname = img_sets.generate_pose_input(imid) all_pts = [] for pid, person in enumerate(persons): outputs = model(person) #print(outputs.numpy().shape) preds, maxvals = get_final_preds(cfg, outputs.clone().cpu().numpy(), [],[]) kpts = preds[0,:] * 4 all_pts.append(kpts) #print(kpts) #print(kpts.astype(np.int32)) #draw_kpts(ori_persons[pid], kpts) #cv2.imshow('people', person) #cv2.waitKey() vis_img = draw_kpts(ori_img,all_bbs, all_pts, all_scales) out_path = os.path.join('results', imname) cv2.imwrite(out_path, vis_img) return valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([transforms.ToTensor(),normalize,])) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT ).cuda() # Data loading code normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) return valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=True ) # evaluate on validation set validate(cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir)
train_dataset = eval('dataset.' + config.DATASET.DATASET)( config, config.DATASET.ROOT, config.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config.TRAIN.BATCH_SIZE * len(gpus), shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, pin_memory=True) ## model inference start criterion = JointsMSELoss(use_target_weight=config.LOSS.USE_TARGET_WEIGHT, use_gain_loss=config.LOSS.USE_GAIN_LOSS) train_loader = iter(train_loader) for i in range(10): input, target, target_weight, meta = next(train_loader) input = input.cuda() x1, x2, x3, x4, x5, x6, x = model(input) mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) dtype = input.dtype mean = torch.as_tensor(mean, dtype=dtype, device=input.device) std = torch.as_tensor(std, dtype=dtype, device=input.device) if mean.ndim == 1: mean = mean.view(-1, 1, 1) if std.ndim == 1:
def main(): # 对输入参数进行解析 args = parse_args() # 根据输入参数对cfg进行更新 update_config(cfg, args) # 创建logger,用于记录训练过程的打印信息 logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting # 使用GPU的一些相关设置 cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED # 根据配置文件构建网络 # 两个模型:models.pose_hrnet和models.pose_resnet,用get_pose_net这个函数可以获得网络结构 print('models.' + cfg.MODEL.NAME + '.get_pose_net') model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=True) # copy model file # 拷贝lib/models/pose_hrnet.py文件到输出目录之中 this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) # logger.info(pprint.pformat(model)) # 用于训练信息的图形化显示 writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } # 用于模型的图形化显示 dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input)) # 让模型支持多GPU训练 model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer # 用于计算loss criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code # 对输入图像数据进行正则化处理 normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 创建训练以及测试数据的迭代器 train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) # 模型加载以及优化策略的相关配置 best_perf = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) # 循环迭代进行训练 for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch train(cfg, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set perf_indicator = validate(cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, writer_dict) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) # 模型保存 final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main(): args = parse_args() reset_config(config, args) logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED backbone_model = eval('models.' + config.BACKBONE_MODEL + '.get_pose_net')( config, is_train=True) model = eval('models.' + config.MODEL + '.get_multiview_pose_net')( backbone_model, config) print(model) this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../../lib/models', config.MODEL + '.py'), final_output_dir) shutil.copy2(args.cfg, final_output_dir) logger.info(pprint.pformat(model)) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } gpus = [int(i) for i in config.GPUS.split(',')] model = torch.nn.DataParallel(model, device_ids=gpus).cuda() criterion = JointsMSELoss( use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda() optimizer = get_optimizer(config, model) start_epoch = config.TRAIN.BEGIN_EPOCH if config.TRAIN.RESUME: start_epoch, model, optimizer = load_checkpoint(model, optimizer, final_output_dir) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR) # Data loading code normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + config.DATASET.TRAIN_DATASET)( config, config.DATASET.TRAIN_SUBSET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + config.DATASET.TEST_DATASET)( config, config.DATASET.TEST_SUBSET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN.BATCH_SIZE * len(gpus), shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, pin_memory=True) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=True) best_perf = 0.0 best_model = False for epoch in range(start_epoch, config.TRAIN.END_EPOCH): lr_scheduler.step() train(config, train_loader, model, criterion, optimizer, epoch, final_output_dir, writer_dict) perf_indicator = validate(config, valid_loader, valid_dataset, model, criterion, final_output_dir, writer_dict) if perf_indicator > best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint({ 'epoch': epoch + 1, 'model': get_model_name(config), 'state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth.tar') logger.info('saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main(): args = parse_args() reset_config(config, args) logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, "train") logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED model = eval("models." + config.MODEL.NAME + ".get_pose_net")( config, is_train=True) # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, "../lib/models", config.MODEL.NAME + ".py"), final_output_dir, ) writer_dict = { "writer": SummaryWriter(log_dir=tb_log_dir), "train_global_steps": 0, "valid_global_steps": 0, } dump_input = torch.rand(( config.TRAIN.BATCH_SIZE, 3, config.MODEL.IMAGE_SIZE[1], config.MODEL.IMAGE_SIZE[0], )) writer_dict["writer"].add_graph(model, (dump_input, ), verbose=False) gpus = [int(i) for i in config.GPUS.split(",")] model = torch.nn.DataParallel(model, device_ids=gpus).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda() optimizer = get_optimizer(config, model) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR) # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval("dataset." + config.DATASET.DATASET)( config, config.DATASET.ROOT, config.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ]), ) valid_dataset = eval("dataset." + config.DATASET.DATASET)( config, config.DATASET.ROOT, config.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ]), ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN.BATCH_SIZE * len(gpus), shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, pin_memory=True, ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=True, ) best_perf = 0.0 best_model = False for epoch in range(config.TRAIN.BEGIN_EPOCH, config.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch train( config, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict, ) # evaluate on validation set perf_indicator = validate( config, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, writer_dict, ) if perf_indicator > best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info("=> saving checkpoint to {}".format(final_output_dir)) save_checkpoint( { "epoch": epoch + 1, "model": get_model_name(config), "state_dict": model.state_dict(), "perf": perf_indicator, "optimizer": optimizer.state_dict(), }, best_model, final_output_dir, ) final_model_state_file = os.path.join(final_output_dir, "final_state.pth.tar") logger.info( "saving final model state to {}".format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict["writer"].close()
def main(): args = parse_args() reset_config(config, args) logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED backbone_model = eval('models.' + config.BACKBONE_MODEL + '.get_pose_net')( config, is_train=False) base_model = eval('models.' + config.MODEL + '.get_multiview_pose_net')( backbone_model, config) model_dict = {} model_dict['base_model'] = base_model config.LOSS.USE_GLOBAL_MI_LOSS = False config.LOSS.USE_LOCAL_MI_LOSS = False config.LOSS.USE_FUNDAMENTAL_LOSS = False # if config.LOSS.USE_GLOBAL_MI_LOSS: # global_discriminator = models.discriminator.GlobalDiscriminator(config) # model_dict['global_discriminator'] = global_discriminator # if config.LOSS.USE_LOCAL_MI_LOSS: # local_discriminator = models.discriminator.LocalDiscriminator(config) # model_dict['local_discriminator'] = local_discriminator if config.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(config.TEST.MODEL_FILE)) state_dict = torch.load(config.TEST.MODEL_FILE) else: model_path = 'model_best.pth.tar' if config.TEST.STATE == 'best' else 'final_state.pth.tar' model_state_file = os.path.join(final_output_dir, model_path) logger.info('=> loading model from {}'.format(model_state_file)) state_dict = torch.load(model_state_file) if 'state_dict_base_model' in state_dict: logger.info('=> new loading mode') for key, model in model_dict.items(): # delete params of the aggregation layer if key == 'base_model' and not config.NETWORK.AGGRE: for param_key in list(state_dict['state_dict_base_model'].keys()): if 'aggre_layer' in param_key: state_dict['state_dict_base_model'].pop(param_key) model_dict[key].load_state_dict(state_dict['state_dict_' + key]) else: logger.info('=> old loading mode') # delete params of the aggregation layer if not config.NETWORK.AGGRE: for param_key in list(state_dict.keys()): if 'aggre_layer' in param_key: state_dict.pop(param_key) model_dict['base_model'].load_state_dict(state_dict) gpus = [int(i) for i in config.GPUS.split(',')] for key, model in model_dict.items(): model_dict[key] = torch.nn.DataParallel(model, device_ids=gpus).cuda() # define loss function (criterion) and optimizer criterion_dict = {} criterion_dict['mse_weights'] = JointsMSELoss( use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda() criterion_dict['mse'] = torch.nn.MSELoss(reduction='mean').cuda() # if config.LOSS.USE_FUNDAMENTAL_LOSS: # criterion_dict['fundamental'] = FundamentalLoss(config) # if config.LOSS.USE_GLOBAL_MI_LOSS or config.LOSS.USE_LOCAL_MI_LOSS: # criterion_dict['mutual_info'] = MILoss(config, model_dict) # Data loading code normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) valid_dataset = eval('dataset.' + config.DATASET.TEST_DATASET)( config, config.DATASET.TEST_SUBSET, False, # training set, is_trainin=True transforms.Compose([ transforms.ToTensor(), normalize, ]), '', config.DATASET.NO_DISTORTION) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=True) # evaluate on validation set validate(config, valid_loader, valid_dataset, model_dict, criterion_dict, final_output_dir, None, rank=0)
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) t_checkpoints = cfg.KD.TEACHER #注意是在student配置文件中修改 train_type = cfg.KD.TRAIN_TYPE #注意是在student配置文件中修改 train_type = get_train_type(train_type, t_checkpoints) logger.info('=> train type is {} '.format(train_type)) if train_type == 'FPD': cfg_name = 'student_' + os.path.basename(args.cfg).split('.')[0] else: cfg_name = os.path.basename(args.cfg).split('.')[0] save_yaml_file(cfg_name, cfg, final_output_dir) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=True) # fpd method, default NORMAL if train_type == 'FPD': tcfg = cfg.clone() tcfg.defrost() tcfg.merge_from_file(args.tcfg) tcfg.freeze() tcfg_name = 'teacher_' + os.path.basename(args.tcfg).split('.')[0] save_yaml_file(tcfg_name, tcfg, final_output_dir) # teacher model tmodel = eval('models.' + tcfg.MODEL.NAME + '.get_pose_net')( tcfg, is_train=False) load_checkpoint(t_checkpoints, tmodel, strict=True, model_info='teacher_' + tcfg.MODEL.NAME) tmodel = torch.nn.DataParallel(tmodel, device_ids=cfg.GPUS).cuda() # define kd_pose loss function (criterion) and optimizer kd_pose_criterion = JointsMSELoss( use_target_weight=tcfg.LOSS.USE_TARGET_WEIGHT).cuda() # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) # logger.info(pprint.pformat(model)) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input)) if cfg.TRAIN.CHECKPOINT: load_checkpoint(cfg.TRAIN.CHECKPOINT, model, strict=True, model_info='student_' + cfg.MODEL.NAME) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # you can choose or replace pose_loss and kd_pose_loss type, including mse,kl,ohkm loss ect # define pose loss function (criterion) and optimizer pose_criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) best_perf = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) # evaluate on validation set validate(cfg, valid_loader, valid_dataset, tmodel, pose_criterion, final_output_dir, tb_log_dir, writer_dict) validate(cfg, valid_loader, valid_dataset, model, pose_criterion, final_output_dir, tb_log_dir, writer_dict) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # fpd method, default NORMAL if train_type == 'FPD': # train for one epoch fpd_train(cfg, train_loader, model, tmodel, pose_criterion, kd_pose_criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) else: # train for one epoch train(cfg, train_loader, model, pose_criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set perf_indicator = validate(cfg, valid_loader, valid_dataset, model, pose_criterion, final_output_dir, tb_log_dir, writer_dict) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main(): args = parse_args() update_config(cfg, args) cfg.defrost() cfg.freeze() record_prefix = './eval2D_results_' if args.is_vis: result_dir = record_prefix + cfg.EXP_NAME mse2d_lst = np.loadtxt(os.path.join(result_dir, 'mse2d_each_joint.txt')) PCK2d_lst = np.loadtxt(os.path.join(result_dir, 'PCK2d.txt')) plot_performance(PCK2d_lst[1, :], PCK2d_lst[0, :], mse2d_lst) exit() cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model_path = args.model_path is_vis = args.is_vis # FP16 SETTING if cfg.FP16.ENABLED: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." if cfg.FP16.STATIC_LOSS_SCALE != 1.0: if not cfg.FP16.ENABLED: print( "Warning: if --fp16 is not used, static_loss_scale will be ignored." ) model = eval(cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) # # calculate GFLOPS # dump_input = torch.rand( # (5, 3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0]) # ) # print(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) # ops, params = get_model_complexity_info( # model, (3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0]), # as_strings=True, print_per_layer_stat=True, verbose=True) # input() if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.MODEL.SYNC_BN and not args.distributed: print( 'Warning: Sync BatchNorm is only supported in distributed training.' ) if args.gpu != -1: device = torch.device('cuda:' + str(args.gpu)) torch.cuda.set_device(args.gpu) else: device = torch.device('cpu') # load model state if model_path: print("Loading model:", model_path) ckpt = torch.load(model_path) #, map_location='cpu') if 'state_dict' not in ckpt.keys(): state_dict = ckpt else: state_dict = ckpt['state_dict'] print('Model epoch {}'.format(ckpt['epoch'])) for key in list(state_dict.keys()): new_key = key.replace("module.", "") state_dict[new_key] = state_dict.pop(key) model.load_state_dict(state_dict, strict=True) model.to(device) # calculate GFLOPS dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0])).to(device) print(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) model.eval() # inference_dataset = eval('dataset.{}'.format(cfg.DATASET.TEST_DATASET[0].replace('_kpt','')))( # cfg.DATA_DIR, # cfg.DATASET.TEST_SET, # transform=transform # ) inference_dataset = eval('dataset.{}'.format( cfg.DATASET.TEST_DATASET[0].replace('_kpt', '')))( cfg.DATA_DIR, cfg.DATASET.TEST_SET, transforms=build_transforms(cfg, is_train=False)) batch_size = args.batch_size data_loader = torch.utils.data.DataLoader( inference_dataset, batch_size=batch_size, #48 shuffle=False, num_workers=min(8, batch_size), #8 pin_memory=False) print('\nEvaluation loader information:\n' + str(data_loader.dataset)) n_joints = cfg.DATASET.NUM_JOINTS th2d_lst = np.array([i for i in range(1, 50)]) PCK2d_lst = np.zeros((len(th2d_lst), )) mse2d_lst = np.zeros((n_joints, )) visibility_lst = np.zeros((n_joints, )) print('Start evaluating... [Batch size: {}]\n'.format( data_loader.batch_size)) with torch.no_grad(): pose2d_mse_loss = JointsMSELoss().to(device) infer_time = [0, 0] start_time = time.time() for i, ret in enumerate(data_loader): # pose2d_gt: b x 21 x 2 is [u,v] 0<=u<64, 0<=v<64 (heatmap size) # visibility: b x 21 vis=0/1 imgs = ret['imgs'] pose2d_gt = ret['pose2d'] # b [x v] x 21 x 2 visibility = ret['visibility'] # b [x v] x 21 x 1 s1 = time.time() if 'CPM' == cfg.MODEL.NAME: pose2d_gt = pose2d_gt.view(-1, *pose2d_gt.shape[-2:]) heatmap_lst = model( imgs.to(device), ret['centermaps'].to(device) ) # 6 groups of heatmaps, each of which has size (1,22,32,32) heatmaps = heatmap_lst[-1][:, 1:] pose2d_pred = data_loader.dataset.get_kpts(heatmaps) hm_size = heatmap_lst[-1].shape[-1] # 32 else: if cfg.MODEL.NAME == 'pose_hrnet_transformer': # imgs: b(1) x (4*seq_len) x 3 x 256 x 256 n_batches, seq_len = imgs.shape[0], imgs.shape[1] // 4 idx_lst = torch.tensor([4 * i for i in range(seq_len)]) imgs = torch.stack([ imgs[b, idx_lst + cam_idx] for b in range(n_batches) for cam_idx in range(4) ]) # (b*4) x seq_len x 3 x 256 x 256 pose2d_pred, heatmaps_pred, _ = model( imgs.cuda(device)) # (b*4) x 21 x 2 pose2d_gt = pose2d_gt[:, 4 * (seq_len // 2):4 * ( seq_len // 2 + 1)].contiguous().view( -1, *pose2d_pred.shape[-2:]) # (b*4) x 21 x 2 visibility = visibility[:, 4 * (seq_len // 2):4 * ( seq_len // 2 + 1)].contiguous().view( -1, *visibility.shape[-2:]) # (b*4) x 21 else: if 'Aggr' in cfg.MODEL.NAME: # imgs: b x (4*5) x 3 x 256 x 256 n_batches, seq_len = imgs.shape[0], len( cfg.DATASET.SEQ_IDX) true_batch_size = imgs.shape[1] // seq_len pose2d_gt = torch.cat([ pose2d_gt[b, true_batch_size * (seq_len // 2):true_batch_size * (seq_len // 2 + 1)] for b in range(n_batches) ], dim=0) visibility = torch.cat([ visibility[b, true_batch_size * (seq_len // 2):true_batch_size * (seq_len // 2 + 1)] for b in range(n_batches) ], dim=0) imgs = torch.cat([ imgs[b, true_batch_size * j:true_batch_size * (j + 1)] for j in range(seq_len) for b in range(n_batches) ], dim=0) # (b*4*5) x 3 x 256 x 256 heatmaps_pred, _ = model(imgs.to(device)) else: pose2d_gt = pose2d_gt.view(-1, *pose2d_gt.shape[-2:]) heatmaps_pred, _ = model( imgs.to(device)) # b x 21 x 64 x 64 pose2d_pred = get_final_preds( heatmaps_pred, cfg.MODEL.HEATMAP_SOFTMAX) # b x 21 x 2 hm_size = heatmaps_pred.shape[-1] # 64 if i > 20: infer_time[0] += 1 infer_time[1] += time.time() - s1 # rescale to the original image before DLT if 'RHD' in cfg.DATASET.TEST_DATASET[0]: crop_size, corner = ret['crop_size'], ret['corner'] crop_size, corner = crop_size.view(-1, 1, 1), corner.unsqueeze( 1) # b x 1 x 1; b x 2 x 1 pose2d_pred = pose2d_pred.cpu() * crop_size / hm_size + corner pose2d_gt = pose2d_gt * crop_size / hm_size + corner else: orig_width, orig_height = data_loader.dataset.orig_img_size pose2d_pred[:, :, 0] *= orig_width / hm_size pose2d_pred[:, :, 1] *= orig_height / hm_size pose2d_gt[:, :, 0] *= orig_width / hm_size pose2d_gt[:, :, 1] *= orig_height / hm_size # for k in range(21): # print(pose2d_gt[0,k].tolist(), pose2d_pred[0,k].tolist()) # input() # 2D errors pose2d_pred, pose2d_gt, visibility = pose2d_pred.cpu().numpy( ), pose2d_gt.numpy(), visibility.squeeze(2).numpy() # import matplotlib.pyplot as plt # imgs = cv2.resize(imgs[0].permute(1,2,0).cpu().numpy(), tuple(data_loader.dataset.orig_img_size)) # for k in range(21): # print(pose2d_gt[0,k],pose2d_pred[0,k],visibility[0,k]) # for k in range(0,21,5): # fig = plt.figure() # ax1 = fig.add_subplot(131) # ax2 = fig.add_subplot(132) # ax3 = fig.add_subplot(133) # ax1.imshow(cv2.cvtColor(imgs / imgs.max(), cv2.COLOR_BGR2RGB)) # plot_hand(ax1, pose2d_gt[0,:,0:2], order='uv') # ax2.imshow(cv2.cvtColor(imgs / imgs.max(), cv2.COLOR_BGR2RGB)) # plot_hand(ax2, pose2d_pred[0,:,0:2], order='uv') # ax3.imshow(heatmaps_pred[0,k].cpu().numpy()) # plt.show() mse_each_joint = np.linalg.norm(pose2d_pred - pose2d_gt, axis=2) * visibility # b x 21 mse2d_lst += mse_each_joint.sum(axis=0) visibility_lst += visibility.sum(axis=0) for th_idx in range(len(th2d_lst)): PCK2d_lst[th_idx] += np.sum( (mse_each_joint < th2d_lst[th_idx]) * visibility) period = 10 if i % (len(data_loader) // period) == 0: print("[Evaluation]{}% finished.".format( period * i // (len(data_loader) // period))) #if i == 10:break print('Evaluation spent {:.2f} s\tfps: {:.1f} {:.4f}'.format( time.time() - start_time, infer_time[0] / infer_time[1], infer_time[1] / infer_time[0])) mse2d_lst /= visibility_lst PCK2d_lst /= visibility_lst.sum() result_dir = record_prefix + cfg.EXP_NAME if not os.path.exists(result_dir): os.mkdir(result_dir) mse_file, pck_file = os.path.join( result_dir, 'mse2d_each_joint.txt'), os.path.join(result_dir, 'PCK2d.txt') print('Saving results to ' + mse_file) print('Saving results to ' + pck_file) np.savetxt(mse_file, mse2d_lst, fmt='%.4f') np.savetxt(pck_file, np.stack((th2d_lst, PCK2d_lst))) plot_performance(PCK2d_lst, th2d_lst, mse2d_lst)
def main(): os.environ["CUDA_VISIBLE_DEVICES"] = "1" args = parse_args() print('out') print(args) reset_config(config, args) logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED model = eval('models.' + config.MODEL.NAME + '.get_pose_net')( config, is_train=True) # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', config.MODEL.NAME + '.py'), final_output_dir) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (config.TRAIN.BATCH_SIZE, 3, config.MODEL.IMAGE_SIZE[1], config.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, ), verbose=False) gpus = [int(i) for i in config.GPUS.split(',')] model = torch.nn.DataParallel(model, device_ids=gpus).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda() optimizer = get_optimizer(config, model) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR) # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + config.DATASET.DATASET)( config, config.DATASET.ROOT, config.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + config.DATASET.DATASET)( config, config.DATASET.ROOT, config.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN.BATCH_SIZE * len(gpus), shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, pin_memory=True) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=True) best_perf = 0.0 best_model = False for epoch in range(config.TRAIN.BEGIN_EPOCH, config.TRAIN.END_EPOCH): lr_scheduler.step() #print("model check!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") #for i,p in enumerate(model.parameters()): # print(p.requires_grad) # train for one epoch train(config, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set perf_indicator = validate(config, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, writer_dict) if perf_indicator > best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': get_model_name(config), 'state_dict': model.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth.tar') logger.info( 'saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
'SAVE_BATCH_IMAGES_GT': True, 'SAVE_BATCH_IMAGES_PRED': True, 'SAVE_HEATMAPS_GT': True, 'SAVE_HEATMAPS_PRED': True } } # cudnn related setting cudnn.benchmark = cfg['CUDNN']['BENCHMARK'] torch.backends.cudnn.deterministic = cfg['CUDNN']['DETERMINISTIC'] torch.backends.cudnn.enabled = cfg['CUDNN']['ENABLED'] model = get_pose_net(cfg, is_train=False).cuda() model.load_state_dict(torch.load(cfg['TEST']['MODEL_FILE']), strict=False) # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=cfg['LOSS']['USE_TARGET_WEIGHT']).cuda() test_transforms = transforms.Compose([ transforms.Resize((256, 256)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # Data loading code files = get_all_files_from_path('/media/filip/HDD1/Images/', "jpg") dataset_json = {"data": []} for file in files: data = { "kp_vis": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "h36m_info": {
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model1 = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) model1.load_state_dict(torch.load( 'output_768cv/PEdataset/my_hrnet768/cfg768f1/final_state.pth'), strict=False) model1 = torch.nn.DataParallel(model1, device_ids=cfg.GPUS).cuda() model2 = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) model2.load_state_dict(torch.load( 'output_768cv/PEdataset/my_hrnet768/cfg768f2/final_state.pth'), strict=False) model2 = torch.nn.DataParallel(model2, device_ids=cfg.GPUS).cuda() model3 = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) model3.load_state_dict(torch.load( 'output_768cv/PEdataset/my_hrnet768/cfg768f3/final_state.pth'), strict=False) model3 = torch.nn.DataParallel(model3, device_ids=cfg.GPUS).cuda() model4 = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) model4.load_state_dict(torch.load( 'output_768cv/PEdataset/my_hrnet768/cfg768f4/final_state.pth'), strict=False) model4 = torch.nn.DataParallel(model4, device_ids=cfg.GPUS).cuda() model5 = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) model5.load_state_dict(torch.load( 'output_768cv/PEdataset/my_hrnet768/cfg768f5/final_state.pth'), strict=False) model5 = torch.nn.DataParallel(model5, device_ids=cfg.GPUS).cuda() model6 = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) model6.load_state_dict(torch.load( 'output_768cv/PEdataset/my_hrnet768/cfg768f6/final_state.pth'), strict=False) model6 = torch.nn.DataParallel(model6, device_ids=cfg.GPUS).cuda() models = [model1, model2, model3, model4, model5, model6] # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=True) # evaluate on validation set validate_cv(cfg, valid_loader, valid_dataset, models, criterion, final_output_dir, tb_log_dir)
def main(): args = parse_args() reset_config(config, args) logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'train') # print code version info repo = Repo('') repo_git = repo.git working_tree_diff_head = repo_git.diff('HEAD') this_commit_hash = repo.commit() cur_branches = repo_git.branch('--list') logger.info('Current Code Version is {}'.format(this_commit_hash)) logger.info('Current Branch Info :\n{}'.format(cur_branches)) logger.info( 'Working Tree diff with HEAD: \n{}'.format(working_tree_diff_head)) logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED backbone_model = eval('models.' + config.BACKBONE_MODEL + '.get_pose_net')( config, is_train=True) model = models.multiview_pose_net.get_multiview_pose_net( backbone_model, config) # logger.info(pprint.pformat(model)) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } # dump_input = torch.rand( # (config.TRAIN.BATCH_SIZE, 3, # config.NETWORK.NUM_JOINTS, # config.NETWORK.IMAGE_SIZE[1], config.NETWORK.IMAGE_SIZE[0])) # writer_dict['writer'].add_graph(model, dump_input) gpus = [int(i) for i in config.GPUS.split(',')] model = torch.nn.DataParallel(model, device_ids=gpus).cuda() criterion = JointsMSELoss( use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda() # criterion_fuse = JointsMSELoss(use_target_weight=True).cuda() optimizer = get_optimizer(config, model) start_epoch = config.TRAIN.BEGIN_EPOCH if config.TRAIN.RESUME: start_epoch, model, optimizer, ckpt_perf = load_checkpoint( model, optimizer, final_output_dir) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR) # Data loading normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + config.DATASET.TRAIN_DATASET)( config, config.DATASET.TRAIN_SUBSET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + config.DATASET.TEST_DATASET)( config, config.DATASET.TEST_SUBSET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN.BATCH_SIZE * len(gpus), shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, collate_fn=totalcapture_collate, pin_memory=True) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE * len(gpus), shuffle=False, num_workers=config.WORKERS, collate_fn=totalcapture_collate, pin_memory=True) best_perf = ckpt_perf best_epoch = -1 best_model = False for epoch in range(start_epoch, config.TRAIN.END_EPOCH): lr_scheduler.step() extra_param = dict() # extra_param['loss2'] = criterion_fuse train(config, train_loader, model, criterion, optimizer, epoch, final_output_dir, writer_dict, **extra_param) perf_indicator = validate(config, valid_loader, valid_dataset, model, criterion, final_output_dir, writer_dict, **extra_param) logger.info( '=> perf indicator at epoch {} is {}. old best is {} '.format( epoch, perf_indicator, best_perf)) if perf_indicator > best_perf: best_perf = perf_indicator best_model = True best_epoch = epoch logger.info( '====> find new best model at end of epoch {}. (start from 0)'. format(epoch)) else: best_model = False logger.info( 'epoch of best validation results is {}'.format(best_epoch)) logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': get_model_name(config), 'state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) # save final state at every epoch final_model_state_file = os.path.join( final_output_dir, 'final_state_ep{}.pth.tar'.format(epoch)) logger.info( 'saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK # 用于加快训练速度,同时避免benchmark的随机性 torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')( cfg, is_train=True) # eval()函数执行一个字符串表达式,并返回表达式的值 # copy model file this_dir = os.path.dirname(__file__) # 取当前路径 shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) # logger.info(pprint.pformat(model)) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input)) # 记录模型日志 model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() #model = torch.nn.DataParallel(model, device_ids=[0]).cuda() # 多GPU训练 # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() regress_loss = RegLoss(use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize( # 使用Imagenet的均值和标准差进行归一化 mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) # 图像处理 train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY, ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY, ) best_perf = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch train(cfg, train_loader, model, criterion, regress_loss, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set perf_indicator = validate(cfg, valid_loader, valid_dataset, model, criterion, regress_loss, final_output_dir, tb_log_dir, writer_dict) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main(): args = parse_args() reset_config(config, args) logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED model = eval('models.' + config.MODEL.NAME + '.get_pose_net_eca_second_deconv')(config, is_train=False).cuda() second_deconv = eval('models.' + config.MODEL.NAME + '.get_second_deconv')(config).cuda() second_deconv.load_state_dict( torch.load( 'output/coco/pose_resnet_50/256x192_d256x3_adam_lr1e-3/2021-02-15-12-13/model_best.pth.tar' )) if config.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(config.TEST.MODEL_FILE)) weight = torch.load(config.TEST.MODEL_FILE) weight_keys = weight.keys() ## 내가 학습한 모델은 key에 modul.이 붙어 있음 if 'module' in list(weight.keys())[0]: new_weight = dict() for key in list(weight_keys): new_weight[key[7:]] = weight[key] model.load_state_dict(new_weight) ## 내가 학습한 모델은 key에 modul.이 붙어 있음 else: model.load_state_dict(weight) else: model_state_file = os.path.join(final_output_dir, 'final_state.pth.tar') logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) gpus = [int(i) for i in config.GPUS.split(',')] model = torch.nn.DataParallel(model, device_ids=gpus).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) valid_dataset = eval('dataset.' + config.DATASET.DATASET)( config, config.DATASET.ROOT, config.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=True) # evaluate on validation set validate(config, valid_loader, valid_dataset, model, second_deconv, criterion, final_output_dir, tb_log_dir)