def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) logger.info( get_model_summary(model.cuda(), torch.zeros(1, 3, *cfg.MODEL.IMAGE_SIZE).cuda())) if cfg.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False) else: model_state_file = os.path.join(final_output_dir, 'model_best.pth') logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # if args.save_trace: # example = torch.rand(1, 3, 256, 192).cuda() # traced_script_module = torch.jit.trace(model.half().module, example) # traced_script_module.save("lpn_resnet50_trace.pt") # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=True) # evaluate on validation set validate(cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir)
def main(): args = parse_args() update_config(cfg, args) check_config(cfg) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) dump_input = torch.rand( (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)) logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True) else: model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar') logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() model.eval() if cfg.MODEL.NAME == 'pose_hourglass': transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), ]) else: transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) transforms_pre = torchvision.transforms.Compose([ ToNumpy(), ]) # iterate over all datasets datasets_root_path = "/media/jld/DATOS_JLD/datasets" datasets = ["cityscapes", "kitti", "tsinghua"] # testing sets from cityscapes and kitti does not have groundtruth --> processing not required datasplits = [["train", "val"], ["train"], ["train", "val", "test"]] keypoints_output_root_path = "/media/jld/DATOS_JLD/git-repos/paper-revista-keypoints/results" model_name = osp.basename( cfg.TEST.MODEL_FILE).split('.')[0] # Model name + configuration for dsid, dataset in enumerate(datasets): dataset_root_path = osp.join(datasets_root_path, dataset) output_root_path = osp.join(keypoints_output_root_path, dataset) for datasplit in datasplits[dsid]: loggur.info(f"Processing split {datasplit} of {dataset}") input_img_dir = osp.join(dataset_root_path, datasplit) output_kps_json_dir = osp.join(output_root_path, datasplit, model_name) loggur.info(f"Input image dir: {input_img_dir}") loggur.info(f"Output pose JSON dir: {output_kps_json_dir}") # test_dataset = torchvision.datasets.ImageFolder("/media/jld/DATOS_JLD/git-repos/paper-revista-keypoints/test_images/", transform=transforms_pre) test_dataset = dsjld.BaseDataset(input_img_dir, output_kps_json_dir, transform=transforms_pre) test_dataset.generate_io_samples_pairs() # Stablish weight of keypoints scores (like openpifpaf in https://github.com/vita-epfl/openpifpaf/blob/master/openpifpaf/decoder/annotation.py#L44) n_keypoints = 17 kps_score_weights = numpy.ones((17, )) kps_score_weights[:3] = 3.0 # Normalize weights to sum 1 kps_score_weights /= numpy.sum(kps_score_weights) data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=False) parser = HeatmapParser(cfg) all_preds = [] all_scores = [] pbar = tqdm( total=len(test_dataset)) # if cfg.TEST.LOG_PROGRESS else None for i, (img, imgidx) in enumerate(data_loader): assert 1 == img.size(0), 'Test batch size should be 1' img = img[0].cpu().numpy() # size at scale 1.0 base_size, center, scale = get_multi_scale_size( img, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR)) with torch.no_grad(): final_heatmaps = None tags_list = [] for idx, s in enumerate( sorted(cfg.TEST.SCALE_FACTOR, reverse=True)): input_size = cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( img, input_size, s, min(cfg.TEST.SCALE_FACTOR)) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( cfg, model, image_resized, cfg.TEST.FLIP_TEST, cfg.TEST.PROJECT2IMAGE, base_size) final_heatmaps, tags_list = aggregate_results( cfg, s, final_heatmaps, tags_list, heatmaps, tags) final_heatmaps = final_heatmaps / float( len(cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = parser.parse(final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)]) # if cfg.TEST.LOG_PROGRESS: pbar.update() # Save all keypoints in a JSON dict final_json_results = [] for kps in final_results: kpsdict = {} x = kps[:, 0] y = kps[:, 1] kps_scores = kps[:, 2] kpsdict['keypoints'] = kps[:, 0:3].tolist() # bounding box by means of minmax approach (without zero elements) xmin = numpy.float64(numpy.min(x[numpy.nonzero(x)])) xmax = numpy.float64(numpy.max(x)) width = numpy.float64(xmax - xmin) ymin = numpy.float64(numpy.min(y[numpy.nonzero(y)])) ymax = numpy.float64(numpy.max(y)) height = numpy.float64(ymax - ymin) kpsdict['bbox'] = [xmin, ymin, width, height] # Calculate pose score as a weighted mean of keypoints scores kpsdict['score'] = numpy.float64( numpy.sum(kps_score_weights * numpy.sort(kps_scores)[::-1])) final_json_results.append(kpsdict) with open(test_dataset.output_json_files_list[imgidx], "w") as f: json.dump(final_json_results, f) all_preds.append(final_results) all_scores.append(scores) if cfg.TEST.LOG_PROGRESS: pbar.close()
args = parse_args() update_config(cfg, args) check_config(cfg) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # CUDA settings cudnn.benchmark = cfg.CUDNN.BENCHMARK cudnn.deterministic = cfg.CUDNN.DETERMINISTIC cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) rand_input = torch.randn(1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE) logger.info(get_model_summary(model, rand_input, verbose=cfg.VERBOSE)) if cfg.FP16.ENABLED: model = network_to_half(model) transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])
def main(): args = parse_args() update_config(cfg, args) check_config(cfg) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) dump_input = torch.rand( (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)) logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True) else: model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar') logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() model.eval() data_loader, test_dataset = make_test_dataloader(cfg) if cfg.MODEL.NAME == 'pose_hourglass': transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), ]) else: transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) parser = HeatmapParser(cfg) all_preds = [] all_scores = [] # pbar = tqdm(total=len(test_dataset)) if cfg.TEST.LOG_PROGRESS else None pbar = tqdm(total=len(test_dataset)) for i, (images, annos) in enumerate(data_loader): assert 1 == images.size(0), 'Test batch size should be 1' image = images[0].cpu().numpy() # size at scale 1.0 base_size, center, scale = get_multi_scale_size( image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR)) with torch.no_grad(): final_heatmaps = None tags_list = [] for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)): input_size = cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( image, input_size, s, min(cfg.TEST.SCALE_FACTOR)) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( cfg, model, image_resized, cfg.TEST.FLIP_TEST, cfg.TEST.PROJECT2IMAGE, base_size) final_heatmaps, tags_list = aggregate_results( cfg, s, final_heatmaps, tags_list, heatmaps, tags) final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = parser.parse(final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)]) if cfg.RESCORE.USE: try: scores = rescore_valid(cfg, final_results, scores) except: print("got one.") # if cfg.TEST.LOG_PROGRESS: # pbar.update() pbar.update() if i % cfg.PRINT_FREQ == 0: prefix = '{}_{}'.format( os.path.join(final_output_dir, 'result_valid'), i) # logger.info('=> write {}'.format(prefix)) save_valid_image(image, final_results, '{}.jpg'.format(prefix), dataset=test_dataset.name) # for scale_idx in range(len(outputs)): # prefix_scale = prefix + '_output_{}'.format( # # cfg.DATASET.OUTPUT_SIZE[scale_idx] # scale_idx # ) # save_debug_images( # cfg, images, None, None, # outputs[scale_idx], prefix_scale # ) all_preds.append(final_results) all_scores.append(scores) if cfg.TEST.LOG_PROGRESS: pbar.close() name_values, _ = test_dataset.evaluate(cfg, all_preds, all_scores, final_output_dir) if isinstance(name_values, list): for name_value in name_values: _print_name_value(logger, name_value, cfg.MODEL.NAME) else: _print_name_value(logger, name_values, cfg.MODEL.NAME)
def main(): args = parse_args() update_config(cfg, args) cfg.defrost() cfg.freeze() record_prefix = './eval2D_results_' if args.is_vis: result_dir = record_prefix + cfg.EXP_NAME mse2d_lst = np.loadtxt(os.path.join(result_dir, 'mse2d_each_joint.txt')) PCK2d_lst = np.loadtxt(os.path.join(result_dir, 'PCK2d.txt')) plot_performance(PCK2d_lst[1, :], PCK2d_lst[0, :], mse2d_lst) exit() cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model_path = args.model_path is_vis = args.is_vis # FP16 SETTING if cfg.FP16.ENABLED: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." if cfg.FP16.STATIC_LOSS_SCALE != 1.0: if not cfg.FP16.ENABLED: print( "Warning: if --fp16 is not used, static_loss_scale will be ignored." ) model = eval(cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) # # calculate GFLOPS # dump_input = torch.rand( # (5, 3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0]) # ) # print(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) # ops, params = get_model_complexity_info( # model, (3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0]), # as_strings=True, print_per_layer_stat=True, verbose=True) # input() if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.MODEL.SYNC_BN and not args.distributed: print( 'Warning: Sync BatchNorm is only supported in distributed training.' ) if args.gpu != -1: device = torch.device('cuda:' + str(args.gpu)) torch.cuda.set_device(args.gpu) else: device = torch.device('cpu') # load model state if model_path: print("Loading model:", model_path) ckpt = torch.load(model_path) #, map_location='cpu') if 'state_dict' not in ckpt.keys(): state_dict = ckpt else: state_dict = ckpt['state_dict'] print('Model epoch {}'.format(ckpt['epoch'])) for key in list(state_dict.keys()): new_key = key.replace("module.", "") state_dict[new_key] = state_dict.pop(key) model.load_state_dict(state_dict, strict=True) model.to(device) # calculate GFLOPS dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0])).to(device) print(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) model.eval() # inference_dataset = eval('dataset.{}'.format(cfg.DATASET.TEST_DATASET[0].replace('_kpt','')))( # cfg.DATA_DIR, # cfg.DATASET.TEST_SET, # transform=transform # ) inference_dataset = eval('dataset.{}'.format( cfg.DATASET.TEST_DATASET[0].replace('_kpt', '')))( cfg.DATA_DIR, cfg.DATASET.TEST_SET, transforms=build_transforms(cfg, is_train=False)) batch_size = args.batch_size data_loader = torch.utils.data.DataLoader( inference_dataset, batch_size=batch_size, #48 shuffle=False, num_workers=min(8, batch_size), #8 pin_memory=False) print('\nEvaluation loader information:\n' + str(data_loader.dataset)) n_joints = cfg.DATASET.NUM_JOINTS th2d_lst = np.array([i for i in range(1, 50)]) PCK2d_lst = np.zeros((len(th2d_lst), )) mse2d_lst = np.zeros((n_joints, )) visibility_lst = np.zeros((n_joints, )) print('Start evaluating... [Batch size: {}]\n'.format( data_loader.batch_size)) with torch.no_grad(): pose2d_mse_loss = JointsMSELoss().to(device) infer_time = [0, 0] start_time = time.time() for i, ret in enumerate(data_loader): # pose2d_gt: b x 21 x 2 is [u,v] 0<=u<64, 0<=v<64 (heatmap size) # visibility: b x 21 vis=0/1 imgs = ret['imgs'] pose2d_gt = ret['pose2d'] # b [x v] x 21 x 2 visibility = ret['visibility'] # b [x v] x 21 x 1 s1 = time.time() if 'CPM' == cfg.MODEL.NAME: pose2d_gt = pose2d_gt.view(-1, *pose2d_gt.shape[-2:]) heatmap_lst = model( imgs.to(device), ret['centermaps'].to(device) ) # 6 groups of heatmaps, each of which has size (1,22,32,32) heatmaps = heatmap_lst[-1][:, 1:] pose2d_pred = data_loader.dataset.get_kpts(heatmaps) hm_size = heatmap_lst[-1].shape[-1] # 32 else: if cfg.MODEL.NAME == 'pose_hrnet_transformer': # imgs: b(1) x (4*seq_len) x 3 x 256 x 256 n_batches, seq_len = imgs.shape[0], imgs.shape[1] // 4 idx_lst = torch.tensor([4 * i for i in range(seq_len)]) imgs = torch.stack([ imgs[b, idx_lst + cam_idx] for b in range(n_batches) for cam_idx in range(4) ]) # (b*4) x seq_len x 3 x 256 x 256 pose2d_pred, heatmaps_pred, _ = model( imgs.cuda(device)) # (b*4) x 21 x 2 pose2d_gt = pose2d_gt[:, 4 * (seq_len // 2):4 * ( seq_len // 2 + 1)].contiguous().view( -1, *pose2d_pred.shape[-2:]) # (b*4) x 21 x 2 visibility = visibility[:, 4 * (seq_len // 2):4 * ( seq_len // 2 + 1)].contiguous().view( -1, *visibility.shape[-2:]) # (b*4) x 21 else: if 'Aggr' in cfg.MODEL.NAME: # imgs: b x (4*5) x 3 x 256 x 256 n_batches, seq_len = imgs.shape[0], len( cfg.DATASET.SEQ_IDX) true_batch_size = imgs.shape[1] // seq_len pose2d_gt = torch.cat([ pose2d_gt[b, true_batch_size * (seq_len // 2):true_batch_size * (seq_len // 2 + 1)] for b in range(n_batches) ], dim=0) visibility = torch.cat([ visibility[b, true_batch_size * (seq_len // 2):true_batch_size * (seq_len // 2 + 1)] for b in range(n_batches) ], dim=0) imgs = torch.cat([ imgs[b, true_batch_size * j:true_batch_size * (j + 1)] for j in range(seq_len) for b in range(n_batches) ], dim=0) # (b*4*5) x 3 x 256 x 256 heatmaps_pred, _ = model(imgs.to(device)) else: pose2d_gt = pose2d_gt.view(-1, *pose2d_gt.shape[-2:]) heatmaps_pred, _ = model( imgs.to(device)) # b x 21 x 64 x 64 pose2d_pred = get_final_preds( heatmaps_pred, cfg.MODEL.HEATMAP_SOFTMAX) # b x 21 x 2 hm_size = heatmaps_pred.shape[-1] # 64 if i > 20: infer_time[0] += 1 infer_time[1] += time.time() - s1 # rescale to the original image before DLT if 'RHD' in cfg.DATASET.TEST_DATASET[0]: crop_size, corner = ret['crop_size'], ret['corner'] crop_size, corner = crop_size.view(-1, 1, 1), corner.unsqueeze( 1) # b x 1 x 1; b x 2 x 1 pose2d_pred = pose2d_pred.cpu() * crop_size / hm_size + corner pose2d_gt = pose2d_gt * crop_size / hm_size + corner else: orig_width, orig_height = data_loader.dataset.orig_img_size pose2d_pred[:, :, 0] *= orig_width / hm_size pose2d_pred[:, :, 1] *= orig_height / hm_size pose2d_gt[:, :, 0] *= orig_width / hm_size pose2d_gt[:, :, 1] *= orig_height / hm_size # for k in range(21): # print(pose2d_gt[0,k].tolist(), pose2d_pred[0,k].tolist()) # input() # 2D errors pose2d_pred, pose2d_gt, visibility = pose2d_pred.cpu().numpy( ), pose2d_gt.numpy(), visibility.squeeze(2).numpy() # import matplotlib.pyplot as plt # imgs = cv2.resize(imgs[0].permute(1,2,0).cpu().numpy(), tuple(data_loader.dataset.orig_img_size)) # for k in range(21): # print(pose2d_gt[0,k],pose2d_pred[0,k],visibility[0,k]) # for k in range(0,21,5): # fig = plt.figure() # ax1 = fig.add_subplot(131) # ax2 = fig.add_subplot(132) # ax3 = fig.add_subplot(133) # ax1.imshow(cv2.cvtColor(imgs / imgs.max(), cv2.COLOR_BGR2RGB)) # plot_hand(ax1, pose2d_gt[0,:,0:2], order='uv') # ax2.imshow(cv2.cvtColor(imgs / imgs.max(), cv2.COLOR_BGR2RGB)) # plot_hand(ax2, pose2d_pred[0,:,0:2], order='uv') # ax3.imshow(heatmaps_pred[0,k].cpu().numpy()) # plt.show() mse_each_joint = np.linalg.norm(pose2d_pred - pose2d_gt, axis=2) * visibility # b x 21 mse2d_lst += mse_each_joint.sum(axis=0) visibility_lst += visibility.sum(axis=0) for th_idx in range(len(th2d_lst)): PCK2d_lst[th_idx] += np.sum( (mse_each_joint < th2d_lst[th_idx]) * visibility) period = 10 if i % (len(data_loader) // period) == 0: print("[Evaluation]{}% finished.".format( period * i // (len(data_loader) // period))) #if i == 10:break print('Evaluation spent {:.2f} s\tfps: {:.1f} {:.4f}'.format( time.time() - start_time, infer_time[0] / infer_time[1], infer_time[1] / infer_time[0])) mse2d_lst /= visibility_lst PCK2d_lst /= visibility_lst.sum() result_dir = record_prefix + cfg.EXP_NAME if not os.path.exists(result_dir): os.mkdir(result_dir) mse_file, pck_file = os.path.join( result_dir, 'mse2d_each_joint.txt'), os.path.join(result_dir, 'PCK2d.txt') print('Saving results to ' + mse_file) print('Saving results to ' + pck_file) np.savetxt(mse_file, mse2d_lst, fmt='%.4f') np.savetxt(pck_file, np.stack((th2d_lst, PCK2d_lst))) plot_performance(PCK2d_lst, th2d_lst, mse2d_lst)
def main(): args = parse_args() update_config(cfg, args) check_config(cfg) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) dump_input = torch.rand( (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)) logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True) else: model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar') logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() model.eval() test_dataset = HIEDataset(DATA_PATH) data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=False) if cfg.MODEL.NAME == 'pose_hourglass': transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), ]) else: transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize( # mean=[0.485, 0.456, 0.406], # std=[0.229, 0.224, 0.225] mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) parser = HeatmapParser(cfg) all_preds = [] all_scores = [] pbar = tqdm(total=len(test_dataset)) if cfg.TEST.LOG_PROGRESS else None for i, images in enumerate(data_loader): # for i, (images, annos) in enumerate(data_loader): assert 1 == images.size(0), 'Test batch size should be 1' image = images[0].cpu().numpy() # size at scale 1.0 if (i % 100 == 0): print("Start process images %d" % i) base_size, center, scale = get_multi_scale_size( image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR)) # print("Multi-scale end") with torch.no_grad(): final_heatmaps = None tags_list = [] for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)): input_size = cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( image, input_size, s, min(cfg.TEST.SCALE_FACTOR)) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( cfg, model, image_resized, cfg.TEST.FLIP_TEST, cfg.TEST.PROJECT2IMAGE, base_size) final_heatmaps, tags_list = aggregate_results( cfg, s, final_heatmaps, tags_list, heatmaps, tags) final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = parser.parse(final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)]) if cfg.TEST.LOG_PROGRESS: pbar.update() if i % cfg.PRINT_FREQ == 0: prefix = '{}_{}'.format( os.path.join(final_output_dir, 'result_valid'), i) # logger.info('=> write {}'.format(prefix)) # save_valid_image(image, final_results, '{}.jpg'.format(prefix), dataset=test_dataset.name) # save_valid_image(image, final_results, '{}.jpg'.format(prefix),dataset='HIE20') # save_debug_images(cfg, image_resized, None, None, outputs, prefix) all_preds.append(final_results) all_scores.append(scores) if cfg.TEST.LOG_PROGRESS: pbar.close() # save preds and scores as json test_dataset.save_json(all_preds, all_scores) print('Save finished!')
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model_p, model_d = eval('models.' + cfg.MODEL.NAME + '.get_adaptive_pose_net')(cfg, is_train=True) if cfg.TRAIN.CHECKPOINT: logger.info('=> loading model from {}'.format(cfg.TRAIN.CHECKPOINT)) model_p.load_state_dict(torch.load(cfg.TRAIN.CHECKPOINT)) else: model_state_file = os.path.join(final_output_dir, 'checkpoint.pth') logger.info('=> loading model from {}'.format(model_state_file)) model_p.load_state_dict(torch.load(model_state_file)) # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'pre_train_global_steps': 0, 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model_p, (dump_input, ), verbose=False) logger.info(get_model_summary(model_p, dump_input)) model_p = torch.nn.DataParallel(model_p, device_ids=cfg.GPUS).cuda() model_d = torch.nn.DataParallel(model_d, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer for pose_net criterion_p = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() optimizer_p = get_optimizer(cfg, model_p) # define loss function (criterion) and optimizer for domain criterion_d = torch.nn.BCEWithLogitsLoss().cuda() optimizer_d = get_optimizer(cfg, model_d) # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_pre_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_PRE_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_pre_loader = torch.utils.data.DataLoader( train_pre_dataset, batch_size=cfg.TRAIN.PRE_BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) syn_labels = train_dataset._load_syrip_syn_annotations() train_loader = torch.utils.data.DataLoader( train_dataset, sampler=BalancedBatchSampler(train_dataset, syn_labels), batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) ''' train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY ) ''' valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) best_perf = 0.0 best_model = False last_epoch = -1 begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model_p.load_state_dict(checkpoint['state_dict']) optimizer_p.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) # freeze some layers idx = 0 print('Parametersssssssssssssss') for param in model_p.parameters(): if idx <= 108: #fix 108 for stage 2 + bottleneck or fix 483 for stage 3 + stage 2+ bottleneck param.requires_grad = False #print(param.data.shape) idx = idx + 1 lr_scheduler_p = torch.optim.lr_scheduler.MultiStepLR( optimizer_p, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) lr_scheduler_d = torch.optim.lr_scheduler.MultiStepLR( optimizer_d, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR) epoch_D = cfg.TRAIN.PRE_EPOCH losses_D_list = [] acces_D_list = [] acc_num_total = 0 num = 0 losses_d = AverageMeter() # Pretrained Stage print('Pretrained Stage:') print('Start to train Domain Classifier-------') for epoch_d in range(epoch_D): # epoch model_d.train() model_p.train() for i, (input, target, target_weight, meta) in enumerate(train_pre_loader): # iteration # compute output for pose_net feature_outputs, outputs = model_p(input) #print(feature_outputs.size()) # compute for domain classifier domain_logits = model_d(feature_outputs.detach()) domain_label = (meta['synthetic'].unsqueeze(-1) * 1.0).cuda(non_blocking=True) # print(domain_label) loss_d = criterion_d(domain_logits, domain_label) loss_d.backward(retain_graph=True) optimizer_d.step() # compute accuracy of classifier acc_num = 0 for j in range(len(domain_label)): if (domain_logits[j] > 0 and domain_label[j] == 1.0) or ( domain_logits[j] < 0 and domain_label[j] == 0.0): acc_num += 1 acc_num_total += 1 num += 1 acc_d = acc_num * 1.0 / input.size(0) acces_D_list.append(acc_d) optimizer_d.zero_grad() losses_d.update(loss_d.item(), input.size(0)) if i % cfg.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Accuracy_d: {3} ({4})\t' \ 'Loss_d: {loss_d.val:.5f} ({loss_d.avg:.5f})'.format( epoch_d, i, len(train_pre_loader), acc_d, acc_num_total * 1.0 / num, loss_d = losses_d) logger.info(msg) writer = writer_dict['writer'] pre_global_steps = writer_dict['pre_train_global_steps'] writer.add_scalar('pre_train_loss_D', losses_d.val, pre_global_steps) writer.add_scalar('pre_train_acc_D', acc_d, pre_global_steps) writer_dict['pre_train_global_steps'] = pre_global_steps + 1 losses_D_list.append(losses_d.val) print('Training Stage (Step I and II):') losses_P_list = [] acces_P_list = [] losses_p = AverageMeter() acces_p = AverageMeter() for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler_p.step() # train for one epoch losses_P_list, losses_D_list, acces_P_list, acces_D_list = train_adaptive( cfg, train_loader, model_p, model_d, criterion_p, criterion_d, optimizer_p, optimizer_d, epoch, final_output_dir, tb_log_dir, writer_dict, losses_P_list, losses_D_list, acces_P_list, acces_D_list, acc_num_total, num, losses_p, acces_p, losses_d) # evaluate on validation set perf_indicator = validate_adaptive(cfg, valid_loader, valid_dataset, model_p, criterion_p, final_output_dir, tb_log_dir, writer_dict) if perf_indicator > best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model_p.state_dict(), 'best_state_dict': model_p.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer_p.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( 'saving final model state to {}'.format(final_model_state_file)) torch.save(model_p.module.state_dict(), final_model_state_file) writer_dict['writer'].close() np.save('./losses_D.npy', np.array(losses_D_list)) # Adversarial-D np.save('./losses_P.npy', np.array(losses_P_list)) # P np.save('./acces_P.npy', np.array(acces_P_list)) # P np.save('./acces_D.npy', np.array(acces_D_list)) # D
# nn.init.constant_(m.bias, 0) def get_pose_net(cfg, is_train, **kwargs): model = Poes_CPM(BasicStage, cfg) if is_train and cfg.MODEL.INIT_WEIGHTS: model.init_weights(cfg.MODEL.PRETRAINED) return model if __name__ == '__main__': from torchsummary import summary from utils.utils import get_model_summary from utils.receptive_field import receptive_field, receptive_field_for_unit from utils.visual import Visualizer from utils.ops import * x_img = torch.randn((1, 3, 256, 256)).cuda() x_center_map = torch.randn((1, 1, 256, 256)) # print(x_img.size()) model = Pose_CPM(BasicStage, '').cuda() # out = model(x_img) # print(model) # print(out[0].size()) # print(summary(model, (3, 256, 256))) print(get_model_summary(model, x_img, verbose=True)) # rec = receptive_field(model, (3, 256, 256)) # print(rec)
def main(): args = parse_args() # YACS是一个轻量级库,用于定义和管理系统配置,例如那些在为科学实验设计的软件中常见的配置。 # 这些“配置”通常涵盖诸如用于训练机器学习模型的超参数或可配置模型超参数(诸如卷积神经网络的深度)之类的概念。 update_config(cfg, args) # 根据手工输入的参数更新配置文件中的参数 logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED # eval(): 用来执行一个字符串表达式,并返回表达式的值 model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=True) # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) # logger.info(pprint.pformat(model)) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code # 归一化数据 # ``input[channel] = (input[channel] - mean[channel]) / std[channel]`` normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 载入mpii数据集 train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) best_perf = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # 调用core.function在训练集上训练: # train for one epoch train(cfg, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # 调用core.function在验证集上验证: # evaluate on validation set perf_indicator = validate(cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, writer_dict) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main_worker(gpus, ngpus_per_node, args, final_output_dir, tb_log_dir): # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED #os.environ['CUDA_VISIBLE_DEVICES']=gpus # Parallel setting print("Use GPU: {} for training".format(gpus)) update_config(cfg, args) #test(cfg, args) # logger setting logger, _ = setup_logger(final_output_dir, args.rank, 'train') writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } # model initilization model = eval(cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=True) # load pretrained model before DDP initialization checkpoint_file = os.path.join(final_output_dir, 'model_best.pth.tar') if cfg.AUTO_RESUME: if os.path.exists(checkpoint_file): checkpoint = torch.load(checkpoint_file, map_location='cpu') state_dict = checkpoint['state_dict'] for key in list(state_dict.keys()): new_key = key.replace("module.", "") state_dict[new_key] = state_dict.pop(key) model.load_state_dict(state_dict) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) elif cfg.MODEL.HRNET_PRETRAINED: logger.info("=> loading a pretrained model '{}'".format( cfg.MODEL.PRETRAINED)) checkpoint = torch.load(cfg.MODEL.HRNET_PRETRAINED, map_location='cpu') state_dict = checkpoint['state_dict'] for key in list(state_dict.keys()): new_key = key.replace("module.", "") state_dict[new_key] = state_dict.pop(key) model.load_state_dict(state_dict) # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) # copy configuration file config_dir = args.cfg shutil.copy2(os.path.join(args.cfg), final_output_dir) # calculate GFLOPS dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0])) logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) #ops, params = get_model_complexity_info( # model, (3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0]), # as_strings=True, print_per_layer_stat=True, verbose=True) # FP16 SETTING if cfg.FP16.ENABLED: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." if cfg.FP16.STATIC_LOSS_SCALE != 1.0: if not cfg.FP16.ENABLED: print( "Warning: if --fp16 is not used, static_loss_scale will be ignored." ) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.MODEL.SYNC_BN and not cfg.cfg.DISTRIBUTED: print( 'Warning: Sync BatchNorm is only supported in distributed training.' ) # Distributed Computing master = True if cfg.DISTRIBUTED: # This block is not available args.local_rank += int(gpus[0]) print('This process is using GPU', args.local_rank) device = args.local_rank master = device == int(gpus[0]) dist.init_process_group(backend='nccl') if cfg.MODEL.SYNC_BN: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if gpus is not None: torch.cuda.set_device(device) model.cuda(device) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have # workers = int(workers / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[device], output_device=device, find_unused_parameters=True) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) else: # implement this block gpu_ids = eval('[' + gpus + ']') device = gpu_ids[0] print('This process is using GPU', str(device)) model = torch.nn.DataParallel(model, gpu_ids).cuda(device) # Prepare loss functions criterion = {} if cfg.LOSS.WITH_HEATMAP_LOSS: criterion['heatmap_loss'] = HeatmapLoss().cuda() if cfg.LOSS.WITH_POSE2D_LOSS: criterion['pose2d_loss'] = JointsMSELoss().cuda() if cfg.LOSS.WITH_BONE_LOSS: criterion['bone_loss'] = BoneLengthLoss().cuda() if cfg.LOSS.WITH_JOINTANGLE_LOSS: criterion['jointangle_loss'] = JointAngleLoss().cuda() best_perf = 1e9 best_model = False last_epoch = -1 # optimizer must be initilized after model initilization optimizer = get_optimizer(cfg, model) if cfg.FP16.ENABLED: optimizer = FP16_Optimizer( optimizer, static_loss_scale=cfg.FP16.STATIC_LOSS_SCALE, dynamic_loss_scale=cfg.FP16.DYNAMIC_LOSS_SCALE, verbose=False) begin_epoch = cfg.TRAIN.BEGIN_EPOCH if not cfg.AUTO_RESUME and cfg.MODEL.HRNET_PRETRAINED: optimizer.load_state_dict(checkpoint['optimizer']) if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): begin_epoch = checkpoint['epoch'] best_perf = checkpoint['loss'] optimizer.load_state_dict(checkpoint['optimizer']) if 'train_global_steps' in checkpoint.keys() and \ 'valid_global_steps' in checkpoint.keys(): writer_dict['train_global_steps'] = checkpoint[ 'train_global_steps'] writer_dict['valid_global_steps'] = checkpoint[ 'valid_global_steps'] if cfg.FP16.ENABLED: logger.info("=> Using FP16 mode") lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer.optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=begin_epoch) elif cfg.TRAIN.LR_SCHEDULE == 'warmup': from utils.utils import get_linear_schedule_with_warmup lr_scheduler = get_linear_schedule_with_warmup( optimizer=optimizer, num_warmup_steps=cfg.TRAIN.WARMUP_EPOCHS, num_training_steps=cfg.TRAIN.END_EPOCH - cfg.TRAIN.BEGIN_EPOCH, last_epoch=begin_epoch) elif cfg.TRAIN.LR_SCHEDULE == 'multi_step': lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=begin_epoch) else: print('Unknown learning rate schedule!') exit() # Data loading code train_loader_dict = make_dataloader(cfg, is_train=True, distributed=cfg.DISTRIBUTED) valid_loader_dict = make_dataloader(cfg, is_train=False, distributed=cfg.DISTRIBUTED) for i, (dataset_name, train_loader) in enumerate(train_loader_dict.items()): logger.info( 'Training Loader {}/{}:\n'.format(i + 1, len(train_loader_dict)) + str(train_loader.dataset)) for i, (dataset_name, valid_loader) in enumerate(valid_loader_dict.items()): logger.info('Validation Loader {}/{}:\n'.format( i + 1, len(valid_loader_dict)) + str(valid_loader.dataset)) #writer_dict['writer'].add_graph(model, (dump_input, )) """ Start training """ start_time = time.time() with torch.autograd.set_detect_anomaly(True): for epoch in range(begin_epoch + 1, cfg.TRAIN.END_EPOCH + 1): epoch_start_time = time.time() # shuffle datasets with the sample random seed if cfg.DISTRIBUTED: for data_loader in train_loader_dict.values(): data_loader.sampler.set_epoch(epoch) # train for one epoch # get_last_lr() returns a list logger.info('Start training [{}/{}] lr: {:.4e}'.format( epoch, cfg.TRAIN.END_EPOCH - cfg.TRAIN.BEGIN_EPOCH, lr_scheduler.get_last_lr()[0])) train(cfg, args, master, train_loader_dict, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict, logger, fp16=cfg.FP16.ENABLED, device=device) # In PyTorch 1.1.0 and later, you should call `lr_scheduler.step()` after `optimizer.step()`. lr_scheduler.step() # evaluate on validation set if not cfg.WITHOUT_EVAL: logger.info('Start evaluating [{}/{}]'.format( epoch, cfg.TRAIN.END_EPOCH - 1)) with torch.no_grad(): recorder = validate(cfg, args, master, valid_loader_dict, model, criterion, final_output_dir, tb_log_dir, writer_dict, logger, device=device) val_total_loss = recorder.avg_total_loss best_model = False if val_total_loss < best_perf: logger.info( 'This epoch yielded a better model with total loss {:.4f} < {:.4f}.' .format(val_total_loss, best_perf)) best_perf = val_total_loss best_model = True else: val_total_loss = 0 best_model = True if master: logger.info( '=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch, 'model': cfg.EXP_NAME + '.' + cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'loss': val_total_loss, 'optimizer': optimizer.state_dict(), 'train_global_steps': writer_dict['train_global_steps'], 'valid_global_steps': writer_dict['valid_global_steps'] }, best_model, final_output_dir) print('\nEpoch {} spent {:.2f} hours\n'.format( epoch, (time.time() - epoch_start_time) / 3600)) #if epoch == 3:break if master: final_model_state_file = os.path.join( final_output_dir, 'final_state{}.pth.tar'.format(gpus)) logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.state_dict(), final_model_state_file) writer_dict['writer'].close() print( '\n[Training Accomplished] {} epochs spent {:.2f} hours\n'.format( cfg.TRAIN.END_EPOCH - begin_epoch + 1, (time.time() - start_time) / 3600))
def main(): args = parse_args() update_config(cfg, args) check_config(cfg) pose_dir = prepare_output_dirs(args.outputDir) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) dump_input = torch.rand( (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)) logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True) else: model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar') logger.info('=> loading model from {}'.format(model_state_file)) # model.load_state_dict(torch.load(model_state_file)) pretrian_model_state = torch.load(model_state_file) for name, param in model.state_dict().items(): model.state_dict()[name].copy_(pretrian_model_state['1.' + name]) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() model.eval() # data_loader, test_dataset = make_test_dataloader(cfg) if cfg.MODEL.NAME == 'pose_hourglass': transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), ]) else: transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) parser = HeatmapParser(cfg) # Loading an video vidcap = cv2.VideoCapture(args.videoFile) fps = vidcap.get(cv2.CAP_PROP_FPS) if fps < args.inferenceFps: print('desired inference fps is ' + str(args.inferenceFps) + ' but video fps is ' + str(fps)) exit() skip_frame_cnt = round(fps / args.inferenceFps) frame_width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT)) outcap = cv2.VideoWriter( '{}/{}_pose.avi'.format( args.outputDir, os.path.splitext(os.path.basename(args.videoFile))[0]), cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), int(skip_frame_cnt), (frame_width, frame_height)) count = 0 while vidcap.isOpened(): total_now = time.time() ret, image_bgr = vidcap.read() count += 1 if not ret: continue if count % skip_frame_cnt != 0: continue image_debug = image_bgr.copy() now = time.time() image = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) # image = image_rgb.cpu().numpy() # size at scale 1.0 base_size, center, scale = get_multi_scale_size( image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR)) with torch.no_grad(): final_heatmaps = None tags_list = [] for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)): input_size = cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( image, input_size, s, min(cfg.TEST.SCALE_FACTOR)) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( cfg, model, image_resized, cfg.TEST.FLIP_TEST, cfg.TEST.PROJECT2IMAGE, base_size) final_heatmaps, tags_list = aggregate_results( cfg, s, final_heatmaps, tags_list, heatmaps, tags) final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = parser.parse(final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)]) for person_joints in final_results: for joint in person_joints: x, y = int(joint[0]), int(joint[1]) cv2.circle(image_debug, (x, y), 4, (255, 0, 0), 2) then = time.time() print("Find person pose in: {} sec".format(then - now)) if cv2.waitKey(1) & 0xFF == ord('q'): break img_file = os.path.join(pose_dir, 'pose_{:08d}.jpg'.format(count)) cv2.imwrite(img_file, image_debug) outcap.write(image_debug) vidcap.release() outcap.release()
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED percent = args.percent #0.6133902788734437 #0.8 #0.6133902788734437 #0.8 #0.6133902788734437#0.78 model = getprunemodel(percent) this_dir = os.path.dirname(__file__) shutil.copy2(os.path.join(this_dir, '../lib/models/purnpose_hrnet.py'), final_output_dir) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) # writer_dict['writer'].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input)) criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) best_perf = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch train(cfg, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set perf_indicator = validate(cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, writer_dict) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main(): rand_state = np.random.RandomState(1311) args = parse_args() update_config(cfg, args) # We have 118,287 images total in the training set, so let's choose 5000 images on each cycle, # 50000 images total (~1/2 of total) images_per_cycle = 5000 logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) pool_idx = list(range(len(train_dataset))) train_idx = [] validation_accuracies = list() device = 'cuda' # if (torch.cuda.is_available()) else 'cpu' progress = tqdm.tqdm(range(10)) for cycle in progress: model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')( cfg, is_train=True) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) # writer_dict['writer'].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input)) if args.use_active_learning: model = ActiveLearning(model) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer # Don't use reduction, will will apply 'mean' later. criterion = JointsMSELoss(use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT, reduction='none').cuda() if args.use_active_learning and not cycle == 0: indices, losses = choose_indices_loss_prediction_active_learning( model, cycle, rand_state, pool_idx, train_dataset, device, count=images_per_cycle, subset_factor=5, is_human_pose=True) train_idx.extend(indices) if args.output_superannotate_csv_file is not None: write_entropies_csv(train_dataset, indices, losses, args.output_superannotate_csv_file) else: train_idx.extend( random_indices(pool_idx, rand_state, count=images_per_cycle)) train_loader = torch.utils.data.DataLoader( data.Subset(train_dataset, train_idx), batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) best_perf = 0.0 best_accuracy = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch train(cfg, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict, use_active_learning=args.use_active_learning, active_learning_lamda=0.0001, cycle=cycle) valid_model = model if args.use_active_learning: valid_model = torch.nn.DataParallel( model.module.base_model, device_ids=cfg.GPUS).cuda() # evaluate on validation set perf_indicator, validation_acc = validate( cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, writer_dict, use_active_learning=args.use_active_learning) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False if validation_acc >= best_accuracy: best_accuracy = validation_acc # Martun: Don't save the checkpoints, we don't want to continue the training on the # next cycle, we want to start from scratch. # logger.info('=> saving checkpoint to {}'.format(final_output_dir)) #save_checkpoint({ # 'epoch': epoch + 1, # 'model': cfg.MODEL.NAME, # 'state_dict': model.state_dict(), # 'best_state_dict': model.module.state_dict(), # 'perf': perf_indicator, # 'optimizer': optimizer.state_dict(), #}, best_model, final_output_dir) # final_model_state_file = os.path.join( # final_output_dir, 'final_state.pth' # ) # logger.info('=> saving final model state to {}'.format( # final_model_state_file) # ) # torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close() validation_accuracies.append(best_accuracy) print("{} accuracies: {}".format( "Active Learning" if args.use_active_learning else "Random", str(validation_accuracies)))
def main(): args = parse_args() update_config(cfg, args) setup_seed(cfg.SEED) os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(x) for x in cfg.GPUS]) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, args.mention, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=True) # print(model) # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) # logger.info(pprint.pformat(model)) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[1])) try: writer_dict['writer'].add_graph(model, (dump_input, )) except Exception as e: logger.info(e) try: logger.info(get_model_summary(model, dump_input)) except: pass model = torch.nn.DataParallel(model, device_ids=list(range(len( cfg.GPUS)))).cuda() # define loss function (criterion) and optimizer criterion = eval(cfg.LOSS.NAME)(cfg).cuda() if cfg.LOSS.NAME == 'ModMSE_KL_CC_NSS_Loss': criterion_val = ModMSE_KL_CC_Loss(cfg).cuda() else: criterion_val = criterion # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) logger.info(os.linesep + 'train_set : {:d} entries'.format(len(train_dataset))) logger.info('val_set : {:d} entries'.format(len(valid_dataset)) + os.linesep) if cfg.DATASET.SAMPLER == "": train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) elif cfg.DATASET.SAMPLER == "RandomIdentitySampler": train_loader = torch.utils.data.DataLoader( train_dataset, sampler=dataset.RandomIdentitySampler( train_dataset.images, cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), cfg.DATASET.NUM_INSTANCES), batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS) // cfg.DATASET.NUM_INSTANCES, shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, sampler=dataset.RandomIdentitySampler( valid_dataset.images, cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), cfg.DATASET.NUM_INSTANCES), batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS) // cfg.DATASET.NUM_INSTANCES, shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) else: assert False best_perf = None best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) if cfg.TRAIN.WARMUP_EPOCHS > 0: lr_scheduler = WarmupMultiStepLR(optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, warmup_iters=cfg.TRAIN.WARMUP_EPOCHS, last_epoch=last_epoch) else: lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # torch.cuda.empty_cache() # train for one epoch train(cfg, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # torch.cuda.empty_cache() # evaluate on validation set perf_indicator, is_larger_better = validate(cfg, valid_loader, valid_dataset, model, criterion_val, final_output_dir, tb_log_dir, writer_dict) if is_larger_better: if best_perf is None or perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False else: if best_perf is None or perf_indicator <= best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main(): args = parse_args() update_config(cfg, args) # Add KFold training support isKFold = True cross_train_set = get_img_Ids( 'data/tiger/annotations/person_keypoints_train.json') # bas_train, base_test = kfold_split_generate(cross_train_set) from sklearn.model_selection import KFold kFoldNum = 1 kf = KFold(n_splits=5) # Create 10 splits for the Tiger dataset for Ftrain, Ftest in kf.split(cross_train_set): train_data = [cross_train_set[i] for i in Ftrain] val_data = [cross_train_set[i] for i in Ftest] logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train', str(kFoldNum)) kFoldNum = kFoldNum + 1 logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')( cfg, is_train=True) # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) # logger.info(pprint.pformat(model)) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) print("TRAIN DATA COUNT : " + str(len(train_data))) print("VAL DATA COUNT : " + str(len(val_data))) train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ]), isKFold, train_data) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ]), False, val_data) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) best_perf = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch train(cfg, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set perf_indicator = validate(cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, writer_dict) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main_worker(gpu, ngpus_per_node, args, final_output_dir, tb_log_dir): args.gpu = gpu args.rank = args.rank * ngpus_per_node + gpu print('Init process group: dist_url: {}, world_size: {}, rank: {}'.format(cfg.DIST_URL, args.world_size, args.rank)) dist.init_process_group(backend=cfg.DIST_BACKEND, init_method=cfg.DIST_URL, world_size=args.world_size, rank=args.rank) update_config(cfg, args) # setup logger logger, _ = setup_logger(final_output_dir, args.rank, 'train') model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(cfg, is_train=True) logger.info(get_model_summary(model, torch.zeros(1, 3, *cfg.MODEL.IMAGE_SIZE))) # copy model file if not cfg.MULTIPROCESSING_DISTRIBUTED or (cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0): this_dir = os.path.dirname(__file__) shutil.copy2(os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } if not cfg.MULTIPROCESSING_DISTRIBUTED or (cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0): dump_input = torch.rand((1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, )) # logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) if cfg.MODEL.SYNC_BN: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) torch.cuda.set_device(args.gpu) model.cuda(args.gpu) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) # define loss function (criterion) and optimizer criterion = JointsMSELoss(use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda(args.gpu) # Data loading code train_dataset = eval('dataset.'+cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) ) valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) ) train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS), shuffle=(train_sampler is None), num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY, sampler=train_sampler ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY ) logger.info(train_loader.dataset) best_perf = -1 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): # train for one epoch train(cfg, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # In PyTorch 1.1.0 and later, you should call `lr_scheduler.step()` after `optimizer.step()`. lr_scheduler.step() # evaluate on validation set perf_indicator = validate( args, cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, writer_dict ) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False if not cfg.MULTIPROCESSING_DISTRIBUTED or ( cfg.MULTIPROCESSING_DISTRIBUTED and args.rank == 0 ): logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint({ 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join( final_output_dir, 'final_state{}.pth.tar'.format(gpu) ) logger.info('saving final model state to {}'.format( final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main(): args = parse_args() update_config(cfg, args) check_config(cfg) # pose_dir = prepare_output_dirs(args.outputDir) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) dump_input = torch.rand( (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)) logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True) else: model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar') logger.info('=> loading model from {}'.format(model_state_file)) pretrian_model_state = torch.load(model_state_file) for name, param in model.state_dict().items(): model.state_dict()[name].copy_(pretrian_model_state['1.' + name]) # model.load_state_dict(torch.load(model_state_file)) # model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() model.eval() # Input to the model # batch_size = 1 x = torch.randn(1, 3, 256, 256, requires_grad=True) torch_out = model(x) # Export the model torch.onnx.export( model, # model being run x, # model input (or a tuple for multiple inputs) args. output_onnx, # where to save the model (can be a file or file-like object) export_params= True, # store the trained parameter weights inside the model file opset_version=11, # the ONNX version to export the model to do_constant_folding= True, # whether to execute constant folding for optimization input_names=['input'], # the model's input names output_names=['output1'] # the model's output names )
def main(): os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Arguments parser = argparse.ArgumentParser( description= 'Multi-Loss Rebalancing Algorithm for Monocular Depth Estimation') parser.add_argument('--backbone', default='PNASNet5Large', type=str, help='DenseNet161 (bs12) / PNASNet5Largea (bs6)') parser.add_argument('--decoder_scale', default=1024, type=int, help='valid for PNASNet5Large') parser.add_argument('--epochs', default=20, type=int, help='number of total epochs to run') parser.add_argument('--lr', '--learning-rate', default=0.0001, type=float, help='initial learning rate') parser.add_argument('--bs', default=8, type=int, help='batch size') parser.add_argument('--weight_initialization', default=True, type=bool) parser.add_argument('--weight_rebalancing', default=True, type=bool) parser.add_argument('--num_weight_rebalancing_per_epoch', default=4, type=int) parser.add_argument('--num_save_per_epoch', default=4, type=int) parser.add_argument('--lambda_for_adjust_start', default=3, type=float) parser.add_argument('--lambda_for_adjust_slope', default=-1.5, type=float) parser.add_argument('--lambda_for_adjust_min', default=-3, type=float) #parser.add_argument('--train_dataset_path', default='dataset/train_reduced05.zip', type=str) #parser.add_argument('--train_dataset_csv_list', default='train_reduced05/train.csv', type=str) parser.add_argument('--train_dataset_path', default='dataset/train795.zip', type=str) parser.add_argument('--train_dataset_csv_list', default='train795/train.csv', type=str) args = parser.parse_args() # image size original_image_size = [480, 640] input_image_size = [288, 384] # interpolation function / relu interpolate_bicubic_fullsize = nn.Upsample(size=original_image_size, mode='bicubic') relu = nn.ReLU() # create model model = network_model.create_model(args.backbone, args.decoder_scale) print('Summary: All Network') print( utils_utils.get_model_summary(model, torch.rand(1, 3, input_image_size[0], input_image_size[1]).cuda(), verbose=True)) print('Model created.') # Training parameters optimizer = torch.optim.Adam(model.parameters(), args.lr) batch_size = args.bs # loading training/testing data train_loader, num_train_data = utils_get_data.getTrainingData( batch_size, args.train_dataset_path, args.train_dataset_csv_list) # Model path model_path = utils_utils.make_model_path(args.backbone, args.decoder_scale, batch_size) # train scores train_scores = np.zeros((num_train_data, 78)) # 78 scores train_metrics = np.zeros((num_train_data, 8)) # 8 metrics # loss term loss_weights = utils_multi_loss.get_loss_weights() loss_initialize_scale = utils_multi_loss.get_loss_initialize_scale() loss_valid = np.array(loss_weights) > 0 # save path savePath = model_path + '/weight/loss_weights.csv' dataframe = pd.DataFrame(loss_weights) dataframe.to_csv(savePath, header=False, index=False) # weight rebalancing argument weight_initialization = args.weight_initialization weight_rebalancing = args.weight_rebalancing weight_initialization_done = False last_rebalancing_iter = 0 previous_total_loss = 0 previous_loss = 0 # iter/epoch iter_per_epoch = len(train_loader) # save iteration iter_list_save = utils_utils.get_notable_iter( iter_per_epoch, num_per_epoch=args.num_save_per_epoch) iter_list_rebalancing = utils_utils.get_notable_iter( iter_per_epoch, num_per_epoch=args.num_weight_rebalancing_per_epoch) # mixed precision + Dataparallel if APEX_AVAILABLE == True: use_amp = True model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=True, loss_scale="dynamic") else: use_amp = False model = nn.DataParallel(model) try: # try to load epoch1_iter00000 model_name = "model/epoch01_iter00000.pth" model.load_state_dict(torch.load(model_name)) print('LOAD MODEL ', model_name) except: # save model print('THERE IS NO MODEL TO LOAD') model_name = model_path + "/model/epoch" + str(0 + 1).zfill( 2) + '_iter' + str(0).zfill(5) + ".pth" print('SAVE MODEL:' + model_path) torch.save(model.state_dict(), model_name) # Start training... for epoch in range(args.epochs): print('---------------------------------------------------------') print('-------------- TRAINING OF EPOCH ' + str(0 + epoch + 1).zfill(2) + 'START ----------------') end = time.time() # Switch to train mode model.train() # train parameter current_lambda_for_adjust = max( args.lambda_for_adjust_start + epoch * args.lambda_for_adjust_slope, args.lambda_for_adjust_min) for i, sample_batched in enumerate(train_loader): optimizer.zero_grad() # Prepare sample and target image = torch.autograd.Variable(sample_batched['image'].cuda()) depth_gt = torch.autograd.Variable( sample_batched['depth'].cuda(non_blocking=True)) # depth gt depth_gt_input = depth_gt depth_gt_full = interpolate_bicubic_fullsize(depth_gt_input) depth_gt_for_loss = depth_gt_input depth_gt_for_loss = depth_gt_for_loss.cuda() depth_gt_for_metric = ( relu(depth_gt_full[:, :, 0 + 20:480 - 20, 0 + 24:640 - 24] - 0.0001) + 0.0001) # Predict image_input = image depth_pred_for_loss = model(image_input).cuda() depth_pred_full = interpolate_bicubic_fullsize(depth_pred_for_loss) depth_pred_for_metric = ( relu(depth_pred_full[:, :, 0 + 20:480 - 20, 0 + 24:640 - 24] - 0.0001) + 0.0001) # current batch size current_batch_size = depth_gt_for_loss.size(0) # compute loss losses = utils_multi_loss.compute_multi_loss( depth_pred_for_loss, depth_gt_for_loss, loss_valid) # compute iter loss & train_scores loss, l_custom, train_scores = utils_multi_loss.get_loss_1batch( batch_size, current_batch_size, i, num_train_data, loss_weights, train_scores, losses) metrics = utils_multi_loss.compute_multi_metric( depth_pred_for_metric, depth_gt_for_metric) train_metrics = utils_multi_loss.get_metric_1batch( batch_size, current_batch_size, i, num_train_data, train_metrics, metrics) # Update if use_amp == True: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() # Measure elapsed time end = time.time() # Log progress if (i + 1) % 100 == 0 or (i + 1) == iter_per_epoch: if epoch == 0: train_scores_mean = train_scores[0:(i + 1) * batch_size].mean(axis=0) train_metrics_mean = train_metrics[0:(i + 1) * batch_size].mean(axis=0) else: train_scores_mean = train_scores.mean(axis=0) train_metrics_mean = train_metrics.mean(axis=0) # Print to console print('Epoch: [{0}][{1}/{2}]\t'.format(epoch, i + 1, iter_per_epoch)) utils_utils.print_metrics(train_metrics_mean) utils_utils.print_scores(train_scores_mean) if (i + 1) == 1 or (i + 1) % 1000 == 0 or (i + 1) == iter_per_epoch: savePath = model_path + "/current" + ".csv" dataframe = pd.DataFrame(train_scores) dataframe.to_csv(savePath, header=False, index=False) if i in iter_list_save: model_name = model_path + "/model/epoch" + str( epoch + 1).zfill(2) + '_iter' + str(i).zfill(5) + ".pth" # save model print('SAVE MODEL:' + model_path + '/model') torch.save(model.state_dict(), model_name) if i in iter_list_rebalancing: temp_train_scores_mean = train_scores[last_rebalancing_iter * batch_size:(i + 1) * batch_size, :].mean( axis=0) total_loss = np.sum(temp_train_scores_mean * loss_weights) if weight_initialization == True and weight_initialization_done == False: for index_loss in range(len(loss_valid)): if loss_valid[index_loss] == 1: loss_weights[index_loss] = ( total_loss * loss_initialize_scale[index_loss] ) / temp_train_scores_mean[index_loss] else: loss_weights[index_loss] = 0 # save previous record weight_initialization_done = True previous_total_loss = np.sum(temp_train_scores_mean * loss_weights) previous_loss = temp_train_scores_mean elif weight_rebalancing == True and ( weight_initialization_done == True or weight_initialization == False): temp_train_scores_mean = train_scores[ last_rebalancing_iter * batch_size:(i + 1) * batch_size, :].mean(axis=0) total_loss = np.sum(temp_train_scores_mean * loss_weights) previous_loss_weights = np.array(loss_weights) if previous_total_loss > 0: for index_loss in range(len(loss_valid)): if loss_valid[index_loss] == 1: adjust_term = 1 + current_lambda_for_adjust * ( (total_loss / previous_total_loss) * (previous_loss[index_loss] / temp_train_scores_mean[index_loss]) - 1) adjust_term = min(max(adjust_term, 1.0 / 2.0), 2.0 / 1.0) loss_weights[ index_loss] = previous_loss_weights[ index_loss] * adjust_term else: loss_weights[index_loss] = 0 # save previous record previous_total_loss = np.sum(temp_train_scores_mean * loss_weights) previous_loss = temp_train_scores_mean # save - loss weights savePath = model_path + "/weight/weight" + str( epoch + 1).zfill(2) + '_iter' + str(i).zfill(5) + ".csv" dataframe = pd.DataFrame(loss_weights) dataframe.to_csv(savePath, header=False, index=False) last_rebalancing_iter = (i + 1) % iter_per_epoch # save - each image train score savePath = model_path + "/score/train_epoch" + str(0 + epoch + 1).zfill(2) + ".csv" dataframe = pd.DataFrame(train_scores) dataframe.to_csv(savePath, header=False, index=False) # save - train mean score savePath = model_path + "/score/train_mean_epoch" + str( 0 + epoch + 1).zfill(2) + ".csv" dataframe = pd.DataFrame(train_scores_mean) dataframe.to_csv(savePath, header=False, index=False) # save - each image train score savePath = model_path + "/metric/train_epoch" + str( 0 + epoch + 1).zfill(2) + ".csv" dataframe = pd.DataFrame(train_metrics) dataframe.to_csv(savePath, header=False, index=False) # save - train mean score savePath = model_path + "/metric/train_mean_epoch" + str( 0 + epoch + 1).zfill(2) + ".csv" dataframe = pd.DataFrame(train_metrics_mean) dataframe.to_csv(savePath, header=False, index=False) print('-------------- TRAINING OF EPOCH ' + str(0 + epoch + 1).zfill(2) + 'FINISH ---------------') print('---------------------------------------------------------') print(' ') print(' ') print(' ')
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, "train") logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval("models." + cfg.MODEL.NAME + ".get_pose_net")(cfg, is_train=True) # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, "../lib/models", cfg.MODEL.NAME + ".py"), final_output_dir) # logger.info(pprint.pformat(model)) writer_dict = { "writer": SummaryWriter(log_dir=tb_log_dir), "train_global_steps": 0, "valid_global_steps": 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict["writer"].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval("dataset." + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval("dataset." + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY, ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY, ) best_perf = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, "checkpoint.pth") if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint["epoch"] best_perf = checkpoint["perf"] last_epoch = checkpoint["epoch"] model.load_state_dict(checkpoint["state_dict"]) optimizer.load_state_dict(checkpoint["optimizer"]) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint["epoch"])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch train(cfg, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set perf_indicator = validate(cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, writer_dict) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info("=> saving checkpoint to {}".format(final_output_dir)) save_checkpoint( { "epoch": epoch + 1, "model": cfg.MODEL.NAME, "state_dict": model.state_dict(), "best_state_dict": model.module.state_dict(), "perf": perf_indicator, "optimizer": optimizer.state_dict(), }, best_model, final_output_dir, ) final_model_state_file = os.path.join(final_output_dir, "final_state.pth") logger.info( "=> saving final model state to {}".format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict["writer"].close()
class Trainer(object): def __init__(self, args): self.args = args self.train_dir = args.train_dir self.val_dir = args.val_dir self.model_arch = args.model_arch self.dataset = args.dataset self.workers = 1 self.weight_decay = 0.0005 self.momentum = 0.9 self.batch_size = 8 self.lr = 0.0001 self.gamma = 0.333 self.step_size = 13275 self.sigma = 3 self.stride = 8 cudnn.benchmark = True if self.dataset == "LSP": self.numClasses = 14 elif self.dataset == "MPII": self.numClasses = 16 self.train_loader, self.val_loader = getDataloader(self.dataset, self.train_dir,\ self.val_dir, self.sigma, self.stride, self.workers, self.batch_size) model = unipose(self.dataset, num_classes=self.numClasses,backbone='resnet',output_stride=16,sync_bn=True,freeze_bn=False, stride=self.stride) self.model = model.cuda() self.criterion = nn.MSELoss().cuda() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr) self.best_model = 12345678.9 self.iters = 0 if self.args.pretrained is not None: checkpoint = torch.load(self.args.pretrained) p = checkpoint['state_dict'] state_dict = self.model.state_dict() model_dict = {} for k,v in p.items(): if k in state_dict: model_dict[k] = v state_dict.update(model_dict) self.model.load_state_dict(state_dict) self.isBest = 0 self.bestPCK = 0 self.bestPCKh = 0 # Print model summary and metrics dump_input = torch.rand((1, 3, 368, 368])) print(get_model_summary(self.modelmodel, dump_input)) def training(self, epoch): train_loss = 0.0 self.model.train() print("Epoch " + str(epoch) + ':') tbar = tqdm(self.train_loader) for i, (input, heatmap, centermap, img_path) in enumerate(tbar): learning_rate = adjust_learning_rate(self.optimizer, self.iters, self.lr, policy='step', gamma=self.gamma, step_size=self.step_size) input_var = input.cuda() heatmap_var = heatmap.cuda() self.optimizer.zero_grad() heat = self.model(input_var) loss_heat = self.criterion(heat, heatmap_var) loss = loss_heat train_loss += loss_heat.item() loss.backward() self.optimizer.step() tbar.set_description('Train loss: %.6f' % (train_loss / ((i + 1)*self.batch_size))) self.iters += 1 if i == 10000: break
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) t_checkpoints = cfg.KD.TEACHER #注意是在student配置文件中修改 train_type = cfg.KD.TRAIN_TYPE #注意是在student配置文件中修改 train_type = get_train_type(train_type, t_checkpoints) logger.info('=> train type is {} '.format(train_type)) if train_type == 'FPD': cfg_name = 'student_' + os.path.basename(args.cfg).split('.')[0] else: cfg_name = os.path.basename(args.cfg).split('.')[0] save_yaml_file(cfg_name, cfg, final_output_dir) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=True) # fpd method, default NORMAL if train_type == 'FPD': tcfg = cfg.clone() tcfg.defrost() tcfg.merge_from_file(args.tcfg) tcfg.freeze() tcfg_name = 'teacher_' + os.path.basename(args.tcfg).split('.')[0] save_yaml_file(tcfg_name, tcfg, final_output_dir) # teacher model tmodel = eval('models.' + tcfg.MODEL.NAME + '.get_pose_net')( tcfg, is_train=False) load_checkpoint(t_checkpoints, tmodel, strict=True, model_info='teacher_' + tcfg.MODEL.NAME) tmodel = torch.nn.DataParallel(tmodel, device_ids=cfg.GPUS).cuda() # define kd_pose loss function (criterion) and optimizer kd_pose_criterion = JointsMSELoss( use_target_weight=tcfg.LOSS.USE_TARGET_WEIGHT).cuda() # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) # logger.info(pprint.pformat(model)) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input)) if cfg.TRAIN.CHECKPOINT: load_checkpoint(cfg.TRAIN.CHECKPOINT, model, strict=True, model_info='student_' + cfg.MODEL.NAME) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # you can choose or replace pose_loss and kd_pose_loss type, including mse,kl,ohkm loss ect # define pose loss function (criterion) and optimizer pose_criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) best_perf = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) # evaluate on validation set validate(cfg, valid_loader, valid_dataset, tmodel, pose_criterion, final_output_dir, tb_log_dir, writer_dict) validate(cfg, valid_loader, valid_dataset, model, pose_criterion, final_output_dir, tb_log_dir, writer_dict) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # fpd method, default NORMAL if train_type == 'FPD': # train for one epoch fpd_train(cfg, train_loader, model, tmodel, pose_criterion, kd_pose_criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) else: # train for one epoch train(cfg, train_loader, model, pose_criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set perf_indicator = validate(cfg, valid_loader, valid_dataset, model, pose_criterion, final_output_dir, tb_log_dir, writer_dict) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main(): args = parse_args() update_config(cfg, args) if args.prevModelDir and args.modelDir: # copy pre models for philly copy_prev_models(args.prevModelDir, args.modelDir) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=True) # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) # logger.info(pprint.pformat(model)) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input)) model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) best_perf = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') # if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): # logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) # checkpoint = torch.load(checkpoint_file) # begin_epoch = checkpoint['epoch'] # best_perf = checkpoint['perf'] # last_epoch = checkpoint['epoch'] # model.load_state_dict(checkpoint['state_dict']) # # optimizer.load_state_dict(checkpoint['optimizer']) # logger.info("=> loaded checkpoint '{}' (epoch {})".format( # checkpoint_file, checkpoint['epoch'])) # checkpoint = torch.load('output/jd/pose_hrnet/crop_face/checkpoint.pth') # model.load_state_dict(checkpoint['state_dict']) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch train(cfg, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set # perf_indicator = validate( # cfg, valid_loader, valid_dataset, model, criterion, # final_output_dir, tb_log_dir, writer_dict # ) # # if perf_indicator >= best_perf: # best_perf = perf_indicator # best_model = True # else: # best_model = False # import tqdm # import cv2 # import numpy as np # from lib.utils.imutils import im_to_numpy, im_to_torch # flip = True # full_result = [] # for i, (inputs,target, target_weight, meta) in enumerate(valid_loader): # with torch.no_grad(): # input_var = torch.autograd.Variable(inputs.cuda()) # if flip == True: # flip_inputs = inputs.clone() # for i, finp in enumerate(flip_inputs): # finp = im_to_numpy(finp) # finp = cv2.flip(finp, 1) # flip_inputs[i] = im_to_torch(finp) # flip_input_var = torch.autograd.Variable(flip_inputs.cuda()) # # # compute output # refine_output = model(input_var) # score_map = refine_output.data.cpu() # score_map = score_map.numpy() # # if flip == True: # flip_output = model(flip_input_var) # flip_score_map = flip_output.data.cpu() # flip_score_map = flip_score_map.numpy() # # for i, fscore in enumerate(flip_score_map): # fscore = fscore.transpose((1, 2, 0)) # fscore = cv2.flip(fscore, 1) # fscore = list(fscore.transpose((2, 0, 1))) # for (q, w) in train_dataset.flip_pairs: # fscore[q], fscore[w] = fscore[w], fscore[q] # fscore = np.array(fscore) # score_map[i] += fscore # score_map[i] /= 2 # # # ids = meta['imgID'].numpy() # # det_scores = meta['det_scores'] # for b in range(inputs.size(0)): # # details = meta['augmentation_details'] # # imgid = meta['imgid'][b] # # print(imgid) # # category = meta['category'][b] # # print(category) # single_result_dict = {} # single_result = [] # # single_map = score_map[b] # r0 = single_map.copy() # r0 /= 255 # r0 += 0.5 # v_score = np.zeros(106) # for p in range(106): # single_map[p] /= np.amax(single_map[p]) # border = 10 # dr = np.zeros((112 + 2 * border, 112 + 2 * border)) # dr[border:-border, border:-border] = single_map[p].copy() # dr = cv2.GaussianBlur(dr, (7, 7), 0) # lb = dr.argmax() # y, x = np.unravel_index(lb, dr.shape) # dr[y, x] = 0 # lb = dr.argmax() # py, px = np.unravel_index(lb, dr.shape) # y -= border # x -= border # py -= border + y # px -= border + x # ln = (px ** 2 + py ** 2) ** 0.5 # delta = 0.25 # if ln > 1e-3: # x += delta * px / ln # y += delta * py / ln # x = max(0, min(x, 112 - 1)) # y = max(0, min(y, 112 - 1)) # resy = float((4 * y + 2) / 112 * (450)) # resx = float((4 * x + 2) / 112 * (450)) # # resy = float((4 * y + 2) / cfg.data_shape[0] * (450)) # # resx = float((4 * x + 2) / cfg.data_shape[1] * (450)) # v_score[p] = float(r0[p, int(round(y) + 1e-10), int(round(x) + 1e-10)]) # single_result.append(resx) # single_result.append(resy) # if len(single_result) != 0: # result = [] # # result.append(imgid) # j = 0 # while j < len(single_result): # result.append(float(single_result[j])) # result.append(float(single_result[j + 1])) # j += 2 # full_result.append(result) model.eval() import numpy as np from core.inference import get_final_preds from utils.transforms import flip_back import csv num_samples = len(valid_dataset) all_preds = np.zeros((num_samples, 106, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 full_result = [] with torch.no_grad(): for i, (input, target, target_weight, meta) in enumerate(valid_loader): # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if cfg.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), valid_dataset.flip_pairs) output_flipped = torch.from_numpy( output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if cfg.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss c = meta['center'].numpy() s = meta['scale'].numpy() # print(c.shape) # print(s.shape) # print(c[:3, :]) # print(s[:3, :]) score = meta['score'].numpy() preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), c, s) # print(preds.shape) for b in range(input.size(0)): result = [] # pic_name=meta['image'][b].split('/')[-1] # result.append(pic_name) for points in range(106): # result.append(str(int(preds[b][points][0])) + ' ' + str(int(preds[b][points][1]))) result.append(float(preds[b][points][0])) result.append(float(preds[b][points][1])) full_result.append(result) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) idx += num_images # with open('res.csv', 'w', newline='') as f: # writer = csv.writer(f) # writer.writerows(full_result) gt = [] with open("/home/sk49/workspace/cy/jd/val.txt") as f: for line in f.readlines(): rows = list(map(float, line.strip().split(' ')[1:])) gt.append(rows) error = 0 for i in range(len(gt)): error = NME(full_result[i], gt[i]) + error print(error) log_file = [] log_file.append( [epoch, optimizer.state_dict()['param_groups'][0]['lr'], error]) with open('log_file.csv', 'a', newline='') as f: writer1 = csv.writer(f) writer1.writerows(log_file) # logger.close() logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), # 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main_worker( gpu, ngpus_per_node, args, final_output_dir, tb_log_dir ): # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED if cfg.FP16.ENABLED: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." if cfg.FP16.STATIC_LOSS_SCALE != 1.0: if not cfg.FP16.ENABLED: print("Warning: if --fp16 is not used, static_loss_scale will be ignored.") args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if cfg.MULTIPROCESSING_DISTRIBUTED: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu print('Init process group: dist_url: {}, world_size: {}, rank: {}'. format(args.dist_url, args.world_size, args.rank)) dist.init_process_group( backend=cfg.DIST_BACKEND, init_method=args.dist_url, world_size=args.world_size, rank=args.rank ) update_config(cfg, args) # setup logger logger, _ = setup_logger(final_output_dir, args.rank, 'train') model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( cfg, is_train=True ) # copy model file if not cfg.MULTIPROCESSING_DISTRIBUTED or ( cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0 ): this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir ) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } if not cfg.MULTIPROCESSING_DISTRIBUTED or ( cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0 ): dump_input = torch.rand( (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE) ) #writer_dict['writer'].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.MODEL.SYNC_BN and not args.distributed: print('Warning: Sync BatchNorm is only supported in distributed training.') if args.distributed: if cfg.MODEL.SYNC_BN: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have # args.workers = int(args.workers / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu] ) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer loss_factory = MultiLossFactory(cfg).cuda() # Data loading code train_loader = make_dataloader( cfg, is_train=True, distributed=args.distributed ) logger.info(train_loader.dataset) best_perf = -1 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) if cfg.FP16.ENABLED: optimizer = FP16_Optimizer( optimizer, static_loss_scale=cfg.FP16.STATIC_LOSS_SCALE, dynamic_loss_scale=cfg.FP16.DYNAMIC_LOSS_SCALE ) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join( final_output_dir, 'checkpoint.pth.tar') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) # if cfg.FP16.ENABLED: # lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( # optimizer.optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, # last_epoch=last_epoch # ) # else: # lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( # optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, # last_epoch=last_epoch # ) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): # train one epoch do_train(cfg, model, train_loader, loss_factory, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict, fp16=cfg.FP16.ENABLED) # In PyTorch 1.1.0 and later, you should call `lr_scheduler.step()` after `optimizer.step()`. # lr_scheduler.step() perf_indicator = epoch if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False if not cfg.MULTIPROCESSING_DISTRIBUTED or ( cfg.MULTIPROCESSING_DISTRIBUTED and args.rank == 0 ): logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint({ 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join( final_output_dir, 'final_state{}.pth.tar'.format(gpu) ) logger.info('saving final model state to {}'.format( final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main_worker( gpu, ngpus_per_node, args, final_output_dir, tb_log_dir ): # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if cfg.MULTIPROCESSING_DISTRIBUTED: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes # 通过节点序号来计算进程在所有进程之中的序号 args.rank = args.rank * ngpus_per_node + gpu print('Init process group: dist_url: {}, world_size: {}, rank: {}'. format(args.dist_url, args.world_size, args.rank)) dist.init_process_group( backend=cfg.DIST_BACKEND, init_method=args.dist_url, world_size=args.world_size, rank=args.rank ) update_config(cfg, args) # setup logger logger, _ = setup_logger(final_output_dir, args.rank, 'train') model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( cfg, is_train=True ) # copy model file if not cfg.MULTIPROCESSING_DISTRIBUTED or ( cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0 ): this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir ) # 利用tensorboard可视化结果 writer_dict = { 'writer': SummaryWriter(logdir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have # args.workers = int(args.workers / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu] ) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: model = torch.nn.DataParallel(model).cuda() if not cfg.MULTIPROCESSING_DISTRIBUTED or ( cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0 ): dump_input = torch.rand( (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE) ).cuda() #writer_dict['writer'].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) # define loss function (criterion) and optimizer loss_factory = MultiLossFactory(cfg).cuda() # Data loading code train_loader = make_dataloader( cfg, is_train=True, distributed=args.distributed )
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK # 用于加快训练速度,同时避免benchmark的随机性 torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')( cfg, is_train=True) # eval()函数执行一个字符串表达式,并返回表达式的值 # copy model file this_dir = os.path.dirname(__file__) # 取当前路径 shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) # logger.info(pprint.pformat(model)) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input)) # 记录模型日志 model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() #model = torch.nn.DataParallel(model, device_ids=[0]).cuda() # 多GPU训练 # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() regress_loss = RegLoss(use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize( # 使用Imagenet的均值和标准差进行归一化 mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) # 图像处理 train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY, ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY, ) best_perf = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch train(cfg, train_loader, model, criterion, regress_loss, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set perf_indicator = validate(cfg, valid_loader, valid_dataset, model, criterion, regress_loss, final_output_dir, tb_log_dir, writer_dict) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main(): # 对输入参数进行解析 args = parse_args() # 根据输入参数对cfg进行更新 update_config(cfg, args) # 创建logger,用于记录训练过程的打印信息 logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting # 使用GPU的一些相关设置 cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED # 根据配置文件构建网络 # 两个模型:models.pose_hrnet和models.pose_resnet,用get_pose_net这个函数可以获得网络结构 print('models.' + cfg.MODEL.NAME + '.get_pose_net') model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=True) # copy model file # 拷贝lib/models/pose_hrnet.py文件到输出目录之中 this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) # logger.info(pprint.pformat(model)) # 用于训练信息的图形化显示 writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } # 用于模型的图形化显示 dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input)) # 让模型支持多GPU训练 model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer # 用于计算loss criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code # 对输入图像数据进行正则化处理 normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 创建训练以及测试数据的迭代器 train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) # 模型加载以及优化策略的相关配置 best_perf = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) # 循环迭代进行训练 for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch train(cfg, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set perf_indicator = validate(cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, writer_dict) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) # 模型保存 final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main(): args = get_args() # get student config student_cfg = get_student_cfg(cfg, args.student_file) student_cfg.LOG_DIR = args.log student_cfg.PRINT_FREQ = int(args.print_freq) if args.mode == 'test': student_cfg.DATASET.TEST = 'test2017' logger, final_output_dir, tb_log_dir = create_logger( student_cfg, args.student_file, 'valid') logger.info(pprint.pformat(args)) logger.info(student_cfg) # cudnn related setting cudnn.benchmark = student_cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = student_cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = student_cfg.CUDNN.ENABLED dev = 'cuda' if torch.cuda.is_available() else 'cpu' model = PoseHigherResolutionNet(student_cfg) model.load_state_dict(torch.load(args.model_file)) dump_input = torch.rand( (1, 3, student_cfg.DATASET.INPUT_SIZE, student_cfg.DATASET.INPUT_SIZE)) logger.info( get_model_summary(model, dump_input, verbose=student_cfg.VERBOSE)) model = torch.nn.DataParallel(model, device_ids=student_cfg.GPUS).cuda() model.eval() data_loader, test_dataset = make_test_dataloader(student_cfg) transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) parser = HeatmapParser(student_cfg) all_preds = [] all_scores = [] pbar = tqdm( total=len(test_dataset)) if student_cfg.TEST.LOG_PROGRESS else None for i, (images, annos) in enumerate(data_loader): assert 1 == images.size(0), 'Test batch size should be 1' image = images[0].cpu().numpy() # size at scale 1.0 base_size, center, scale = get_multi_scale_size( image, student_cfg.DATASET.INPUT_SIZE, 1.0, min(student_cfg.TEST.SCALE_FACTOR)) with torch.no_grad(): final_heatmaps = None tags_list = [] for idx, s in enumerate( sorted(student_cfg.TEST.SCALE_FACTOR, reverse=True)): input_size = student_cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( image, input_size, s, min(student_cfg.TEST.SCALE_FACTOR)) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( student_cfg, model, image_resized, student_cfg.TEST.FLIP_TEST, student_cfg.TEST.PROJECT2IMAGE, base_size) final_heatmaps, tags_list = aggregate_results( student_cfg, s, final_heatmaps, tags_list, heatmaps, tags) final_heatmaps = final_heatmaps / float( len(student_cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = parser.parse(final_heatmaps, tags, student_cfg.TEST.ADJUST, student_cfg.TEST.REFINE) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)]) if student_cfg.TEST.LOG_PROGRESS: pbar.update() if i % student_cfg.PRINT_FREQ == 0: prefix = '{}_{}'.format( os.path.join(final_output_dir, 'result_valid'), i) # logger.info('=> write {}'.format(prefix)) save_valid_image(image, final_results, '{}.jpg'.format(prefix), dataset=test_dataset.name) # save_debug_images(cfg, image_resized, None, None, outputs, prefix) all_preds.append(final_results) all_scores.append(scores) if student_cfg.TEST.LOG_PROGRESS: pbar.close() name_values, _ = test_dataset.evaluate(cfg, all_preds, all_scores, final_output_dir) if isinstance(name_values, list): for name_value in name_values: _print_name_value(logger, name_value, cfg.MODEL.NAME) else: _print_name_value(logger, name_values, cfg.MODEL.NAME)
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( cfg, is_train=True ) # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) # logger.info(pprint.pformat(model)) dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]) ) logger.info(get_model_summary(model, dump_input)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer heatmapLoss = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT #true ).cuda() # Data loading code train_dataset = eval('dataset.'+cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, 'train', transforms.Compose([ transforms.ToTensor(), ]) ) valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, 'val', transforms.Compose([ transforms.ToTensor(), ]) ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=True ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=True ) best_perf = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join( final_output_dir, 'checkpoint.pth' ) if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch ) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch train(cfg, train_loader, model, heatmapLoss, optimizer, epoch, final_output_dir) # evaluate on validation set perf_indicator = validate( cfg, valid_loader, valid_dataset, model, heatmapLoss, final_output_dir ) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True best_model_state_file = os.path.join( final_output_dir, 'best_model.pth' ) logger.info('=> saving best model state to {}'.format( best_model_state_file) ) else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint({ 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join( final_output_dir, 'final_state.pth' ) logger.info('=> saving final model state to {}'.format( final_model_state_file) ) torch.save(model.module.state_dict(), final_model_state_file)