Ejemplo n.º 1
0
def main():
    args = parse_args()
    update_config(cfg, args)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'valid')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=False)
    logger.info(
        get_model_summary(model.cuda(),
                          torch.zeros(1, 3, *cfg.MODEL.IMAGE_SIZE).cuda()))

    if cfg.TEST.MODEL_FILE:
        logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
    else:
        model_state_file = os.path.join(final_output_dir, 'model_best.pth')
        logger.info('=> loading model from {}'.format(model_state_file))
        model.load_state_dict(torch.load(model_state_file))

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()

    # if args.save_trace:
    #     example = torch.rand(1, 3, 256, 192).cuda()
    #     traced_script_module = torch.jit.trace(model.half().module, example)
    #     traced_script_module.save("lpn_resnet50_trace.pt")

    # define loss function (criterion) and optimizer
    criterion = JointsMSELoss(
        use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda()

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=False,
        num_workers=cfg.WORKERS,
        pin_memory=True)

    # evaluate on validation set
    validate(cfg, valid_loader, valid_dataset, model, criterion,
             final_output_dir, tb_log_dir)
Ejemplo n.º 2
0
def main():
    args = parse_args()
    update_config(cfg, args)
    check_config(cfg)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'valid')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=False)

    dump_input = torch.rand(
        (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE))
    logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.TEST.MODEL_FILE:
        logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True)
    else:
        model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar')
        logger.info('=> loading model from {}'.format(model_state_file))
        model.load_state_dict(torch.load(model_state_file))

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
    model.eval()
    if cfg.MODEL.NAME == 'pose_hourglass':
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
        ])
    else:
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
        ])
    transforms_pre = torchvision.transforms.Compose([
        ToNumpy(),
    ])
    # iterate over all datasets
    datasets_root_path = "/media/jld/DATOS_JLD/datasets"
    datasets = ["cityscapes", "kitti", "tsinghua"]
    # testing sets from cityscapes and kitti does not have groundtruth --> processing not required
    datasplits = [["train", "val"], ["train"], ["train", "val", "test"]]
    keypoints_output_root_path = "/media/jld/DATOS_JLD/git-repos/paper-revista-keypoints/results"
    model_name = osp.basename(
        cfg.TEST.MODEL_FILE).split('.')[0]  # Model name + configuration
    for dsid, dataset in enumerate(datasets):
        dataset_root_path = osp.join(datasets_root_path, dataset)
        output_root_path = osp.join(keypoints_output_root_path, dataset)
        for datasplit in datasplits[dsid]:
            loggur.info(f"Processing split {datasplit} of {dataset}")
            input_img_dir = osp.join(dataset_root_path, datasplit)
            output_kps_json_dir = osp.join(output_root_path, datasplit,
                                           model_name)
            loggur.info(f"Input image dir: {input_img_dir}")
            loggur.info(f"Output pose JSON dir: {output_kps_json_dir}")
            # test_dataset = torchvision.datasets.ImageFolder("/media/jld/DATOS_JLD/git-repos/paper-revista-keypoints/test_images/", transform=transforms_pre)
            test_dataset = dsjld.BaseDataset(input_img_dir,
                                             output_kps_json_dir,
                                             transform=transforms_pre)
            test_dataset.generate_io_samples_pairs()
            # Stablish weight of keypoints scores (like openpifpaf in https://github.com/vita-epfl/openpifpaf/blob/master/openpifpaf/decoder/annotation.py#L44)
            n_keypoints = 17
            kps_score_weights = numpy.ones((17, ))
            kps_score_weights[:3] = 3.0
            # Normalize weights to sum 1
            kps_score_weights /= numpy.sum(kps_score_weights)
            data_loader = torch.utils.data.DataLoader(test_dataset,
                                                      batch_size=1,
                                                      shuffle=False,
                                                      num_workers=0,
                                                      pin_memory=False)
            parser = HeatmapParser(cfg)
            all_preds = []
            all_scores = []

            pbar = tqdm(
                total=len(test_dataset))  # if cfg.TEST.LOG_PROGRESS else None
            for i, (img, imgidx) in enumerate(data_loader):
                assert 1 == img.size(0), 'Test batch size should be 1'

                img = img[0].cpu().numpy()
                # size at scale 1.0
                base_size, center, scale = get_multi_scale_size(
                    img, cfg.DATASET.INPUT_SIZE, 1.0,
                    min(cfg.TEST.SCALE_FACTOR))

                with torch.no_grad():
                    final_heatmaps = None
                    tags_list = []
                    for idx, s in enumerate(
                            sorted(cfg.TEST.SCALE_FACTOR, reverse=True)):
                        input_size = cfg.DATASET.INPUT_SIZE
                        image_resized, center, scale = resize_align_multi_scale(
                            img, input_size, s, min(cfg.TEST.SCALE_FACTOR))
                        image_resized = transforms(image_resized)
                        image_resized = image_resized.unsqueeze(0).cuda()

                        outputs, heatmaps, tags = get_multi_stage_outputs(
                            cfg, model, image_resized, cfg.TEST.FLIP_TEST,
                            cfg.TEST.PROJECT2IMAGE, base_size)

                        final_heatmaps, tags_list = aggregate_results(
                            cfg, s, final_heatmaps, tags_list, heatmaps, tags)

                    final_heatmaps = final_heatmaps / float(
                        len(cfg.TEST.SCALE_FACTOR))
                    tags = torch.cat(tags_list, dim=4)
                    grouped, scores = parser.parse(final_heatmaps, tags,
                                                   cfg.TEST.ADJUST,
                                                   cfg.TEST.REFINE)

                    final_results = get_final_preds(
                        grouped, center, scale,
                        [final_heatmaps.size(3),
                         final_heatmaps.size(2)])

                # if cfg.TEST.LOG_PROGRESS:
                pbar.update()
                # Save all keypoints in a JSON dict
                final_json_results = []
                for kps in final_results:
                    kpsdict = {}
                    x = kps[:, 0]
                    y = kps[:, 1]
                    kps_scores = kps[:, 2]
                    kpsdict['keypoints'] = kps[:, 0:3].tolist()
                    # bounding box by means of minmax approach (without zero elements)
                    xmin = numpy.float64(numpy.min(x[numpy.nonzero(x)]))
                    xmax = numpy.float64(numpy.max(x))
                    width = numpy.float64(xmax - xmin)
                    ymin = numpy.float64(numpy.min(y[numpy.nonzero(y)]))
                    ymax = numpy.float64(numpy.max(y))
                    height = numpy.float64(ymax - ymin)
                    kpsdict['bbox'] = [xmin, ymin, width, height]
                    # Calculate pose score as a weighted mean of keypoints scores
                    kpsdict['score'] = numpy.float64(
                        numpy.sum(kps_score_weights *
                                  numpy.sort(kps_scores)[::-1]))
                    final_json_results.append(kpsdict)

                with open(test_dataset.output_json_files_list[imgidx],
                          "w") as f:
                    json.dump(final_json_results, f)

                all_preds.append(final_results)
                all_scores.append(scores)

            if cfg.TEST.LOG_PROGRESS:
                pbar.close()
Ejemplo n.º 3
0
    args = parse_args()
    update_config(cfg, args)
    check_config(cfg)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'valid')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # CUDA settings
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=False)

    rand_input = torch.randn(1, 3, cfg.DATASET.INPUT_SIZE,
                             cfg.DATASET.INPUT_SIZE)
    logger.info(get_model_summary(model, rand_input, verbose=cfg.VERBOSE))

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    transforms = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
    ])
Ejemplo n.º 4
0
def main():
    args = parse_args()
    update_config(cfg, args)
    check_config(cfg)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'valid')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=False)

    dump_input = torch.rand(
        (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE))
    logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.TEST.MODEL_FILE:
        logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True)
    else:
        model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar')
        logger.info('=> loading model from {}'.format(model_state_file))
        model.load_state_dict(torch.load(model_state_file))

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
    model.eval()

    data_loader, test_dataset = make_test_dataloader(cfg)

    if cfg.MODEL.NAME == 'pose_hourglass':
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
        ])
    else:
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
        ])

    parser = HeatmapParser(cfg)
    all_preds = []
    all_scores = []

    # pbar = tqdm(total=len(test_dataset)) if cfg.TEST.LOG_PROGRESS else None
    pbar = tqdm(total=len(test_dataset))
    for i, (images, annos) in enumerate(data_loader):
        assert 1 == images.size(0), 'Test batch size should be 1'

        image = images[0].cpu().numpy()
        # size at scale 1.0
        base_size, center, scale = get_multi_scale_size(
            image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR))

        with torch.no_grad():
            final_heatmaps = None
            tags_list = []
            for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR,
                                           reverse=True)):
                input_size = cfg.DATASET.INPUT_SIZE
                image_resized, center, scale = resize_align_multi_scale(
                    image, input_size, s, min(cfg.TEST.SCALE_FACTOR))
                image_resized = transforms(image_resized)
                image_resized = image_resized.unsqueeze(0).cuda()

                outputs, heatmaps, tags = get_multi_stage_outputs(
                    cfg, model, image_resized, cfg.TEST.FLIP_TEST,
                    cfg.TEST.PROJECT2IMAGE, base_size)

                final_heatmaps, tags_list = aggregate_results(
                    cfg, s, final_heatmaps, tags_list, heatmaps, tags)

            final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR))
            tags = torch.cat(tags_list, dim=4)
            grouped, scores = parser.parse(final_heatmaps, tags,
                                           cfg.TEST.ADJUST, cfg.TEST.REFINE)

            final_results = get_final_preds(
                grouped, center, scale,
                [final_heatmaps.size(3),
                 final_heatmaps.size(2)])
            if cfg.RESCORE.USE:
                try:
                    scores = rescore_valid(cfg, final_results, scores)
                except:
                    print("got one.")
        # if cfg.TEST.LOG_PROGRESS:
        #     pbar.update()
        pbar.update()

        if i % cfg.PRINT_FREQ == 0:
            prefix = '{}_{}'.format(
                os.path.join(final_output_dir, 'result_valid'), i)
            # logger.info('=> write {}'.format(prefix))
            save_valid_image(image,
                             final_results,
                             '{}.jpg'.format(prefix),
                             dataset=test_dataset.name)
            # for scale_idx in range(len(outputs)):
            #     prefix_scale = prefix + '_output_{}'.format(
            #         # cfg.DATASET.OUTPUT_SIZE[scale_idx]
            #         scale_idx
            #     )
            #     save_debug_images(
            #         cfg, images, None, None,
            #         outputs[scale_idx], prefix_scale
            #     )
        all_preds.append(final_results)
        all_scores.append(scores)

    if cfg.TEST.LOG_PROGRESS:
        pbar.close()

    name_values, _ = test_dataset.evaluate(cfg, all_preds, all_scores,
                                           final_output_dir)

    if isinstance(name_values, list):
        for name_value in name_values:
            _print_name_value(logger, name_value, cfg.MODEL.NAME)
    else:
        _print_name_value(logger, name_values, cfg.MODEL.NAME)
def main():
    args = parse_args()

    update_config(cfg, args)
    cfg.defrost()
    cfg.freeze()

    record_prefix = './eval2D_results_'
    if args.is_vis:
        result_dir = record_prefix + cfg.EXP_NAME
        mse2d_lst = np.loadtxt(os.path.join(result_dir,
                                            'mse2d_each_joint.txt'))
        PCK2d_lst = np.loadtxt(os.path.join(result_dir, 'PCK2d.txt'))

        plot_performance(PCK2d_lst[1, :], PCK2d_lst[0, :], mse2d_lst)
        exit()

    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model_path = args.model_path
    is_vis = args.is_vis

    # FP16 SETTING
    if cfg.FP16.ENABLED:
        assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled."

    if cfg.FP16.STATIC_LOSS_SCALE != 1.0:
        if not cfg.FP16.ENABLED:
            print(
                "Warning:  if --fp16 is not used, static_loss_scale will be ignored."
            )

    model = eval(cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False)

    # # calculate GFLOPS
    # dump_input = torch.rand(
    #     (5, 3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0])
    # )

    # print(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    # ops, params = get_model_complexity_info(
    #    model, (3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0]),
    #    as_strings=True, print_per_layer_stat=True, verbose=True)
    # input()

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.MODEL.SYNC_BN and not args.distributed:
        print(
            'Warning: Sync BatchNorm is only supported in distributed training.'
        )

    if args.gpu != -1:
        device = torch.device('cuda:' + str(args.gpu))
        torch.cuda.set_device(args.gpu)
    else:
        device = torch.device('cpu')
    # load model state
    if model_path:
        print("Loading model:", model_path)
        ckpt = torch.load(model_path)  #, map_location='cpu')
        if 'state_dict' not in ckpt.keys():
            state_dict = ckpt
        else:
            state_dict = ckpt['state_dict']
            print('Model epoch {}'.format(ckpt['epoch']))

        for key in list(state_dict.keys()):
            new_key = key.replace("module.", "")
            state_dict[new_key] = state_dict.pop(key)

        model.load_state_dict(state_dict, strict=True)

    model.to(device)

    # calculate GFLOPS
    dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0])).to(device)

    print(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    model.eval()

    # inference_dataset = eval('dataset.{}'.format(cfg.DATASET.TEST_DATASET[0].replace('_kpt','')))(
    #     cfg.DATA_DIR,
    #     cfg.DATASET.TEST_SET,
    #     transform=transform
    # )
    inference_dataset = eval('dataset.{}'.format(
        cfg.DATASET.TEST_DATASET[0].replace('_kpt', '')))(
            cfg.DATA_DIR,
            cfg.DATASET.TEST_SET,
            transforms=build_transforms(cfg, is_train=False))

    batch_size = args.batch_size
    data_loader = torch.utils.data.DataLoader(
        inference_dataset,
        batch_size=batch_size,  #48
        shuffle=False,
        num_workers=min(8, batch_size),  #8
        pin_memory=False)

    print('\nEvaluation loader information:\n' + str(data_loader.dataset))
    n_joints = cfg.DATASET.NUM_JOINTS
    th2d_lst = np.array([i for i in range(1, 50)])
    PCK2d_lst = np.zeros((len(th2d_lst), ))
    mse2d_lst = np.zeros((n_joints, ))
    visibility_lst = np.zeros((n_joints, ))

    print('Start evaluating... [Batch size: {}]\n'.format(
        data_loader.batch_size))
    with torch.no_grad():
        pose2d_mse_loss = JointsMSELoss().to(device)
        infer_time = [0, 0]
        start_time = time.time()
        for i, ret in enumerate(data_loader):
            # pose2d_gt: b x 21 x 2 is [u,v] 0<=u<64, 0<=v<64 (heatmap size)
            # visibility: b x 21 vis=0/1
            imgs = ret['imgs']
            pose2d_gt = ret['pose2d']  # b [x v] x 21 x 2
            visibility = ret['visibility']  # b [x v] x 21 x 1

            s1 = time.time()
            if 'CPM' == cfg.MODEL.NAME:
                pose2d_gt = pose2d_gt.view(-1, *pose2d_gt.shape[-2:])
                heatmap_lst = model(
                    imgs.to(device), ret['centermaps'].to(device)
                )  # 6 groups of heatmaps, each of which has size (1,22,32,32)
                heatmaps = heatmap_lst[-1][:, 1:]
                pose2d_pred = data_loader.dataset.get_kpts(heatmaps)
                hm_size = heatmap_lst[-1].shape[-1]  # 32
            else:
                if cfg.MODEL.NAME == 'pose_hrnet_transformer':
                    # imgs: b(1) x (4*seq_len) x 3 x 256 x 256
                    n_batches, seq_len = imgs.shape[0], imgs.shape[1] // 4
                    idx_lst = torch.tensor([4 * i for i in range(seq_len)])
                    imgs = torch.stack([
                        imgs[b, idx_lst + cam_idx] for b in range(n_batches)
                        for cam_idx in range(4)
                    ])  # (b*4) x seq_len x 3 x 256 x 256

                    pose2d_pred, heatmaps_pred, _ = model(
                        imgs.cuda(device))  # (b*4) x 21 x 2
                    pose2d_gt = pose2d_gt[:, 4 * (seq_len // 2):4 * (
                        seq_len // 2 + 1)].contiguous().view(
                            -1, *pose2d_pred.shape[-2:])  # (b*4) x 21 x 2
                    visibility = visibility[:, 4 * (seq_len // 2):4 * (
                        seq_len // 2 + 1)].contiguous().view(
                            -1, *visibility.shape[-2:])  # (b*4) x 21

                else:
                    if 'Aggr' in cfg.MODEL.NAME:
                        # imgs: b x (4*5) x 3 x 256 x 256
                        n_batches, seq_len = imgs.shape[0], len(
                            cfg.DATASET.SEQ_IDX)
                        true_batch_size = imgs.shape[1] // seq_len
                        pose2d_gt = torch.cat([
                            pose2d_gt[b, true_batch_size *
                                      (seq_len // 2):true_batch_size *
                                      (seq_len // 2 + 1)]
                            for b in range(n_batches)
                        ],
                                              dim=0)

                        visibility = torch.cat([
                            visibility[b, true_batch_size *
                                       (seq_len // 2):true_batch_size *
                                       (seq_len // 2 + 1)]
                            for b in range(n_batches)
                        ],
                                               dim=0)

                        imgs = torch.cat([
                            imgs[b, true_batch_size * j:true_batch_size *
                                 (j + 1)] for j in range(seq_len)
                            for b in range(n_batches)
                        ],
                                         dim=0)  # (b*4*5) x 3 x 256 x 256

                        heatmaps_pred, _ = model(imgs.to(device))
                    else:
                        pose2d_gt = pose2d_gt.view(-1, *pose2d_gt.shape[-2:])
                        heatmaps_pred, _ = model(
                            imgs.to(device))  # b x 21 x 64 x 64

                    pose2d_pred = get_final_preds(
                        heatmaps_pred, cfg.MODEL.HEATMAP_SOFTMAX)  # b x 21 x 2

                hm_size = heatmaps_pred.shape[-1]  # 64

            if i > 20:
                infer_time[0] += 1
                infer_time[1] += time.time() - s1

            # rescale to the original image before DLT

            if 'RHD' in cfg.DATASET.TEST_DATASET[0]:
                crop_size, corner = ret['crop_size'], ret['corner']
                crop_size, corner = crop_size.view(-1, 1, 1), corner.unsqueeze(
                    1)  # b x 1 x 1; b x 2 x 1
                pose2d_pred = pose2d_pred.cpu() * crop_size / hm_size + corner
                pose2d_gt = pose2d_gt * crop_size / hm_size + corner
            else:
                orig_width, orig_height = data_loader.dataset.orig_img_size
                pose2d_pred[:, :, 0] *= orig_width / hm_size
                pose2d_pred[:, :, 1] *= orig_height / hm_size
                pose2d_gt[:, :, 0] *= orig_width / hm_size
                pose2d_gt[:, :, 1] *= orig_height / hm_size

                # for k in range(21):
                #     print(pose2d_gt[0,k].tolist(), pose2d_pred[0,k].tolist())
                # input()
            # 2D errors
            pose2d_pred, pose2d_gt, visibility = pose2d_pred.cpu().numpy(
            ), pose2d_gt.numpy(), visibility.squeeze(2).numpy()

            # import matplotlib.pyplot as plt
            # imgs = cv2.resize(imgs[0].permute(1,2,0).cpu().numpy(), tuple(data_loader.dataset.orig_img_size))
            # for k in range(21):
            #     print(pose2d_gt[0,k],pose2d_pred[0,k],visibility[0,k])
            # for k in range(0,21,5):
            #     fig = plt.figure()
            #     ax1 = fig.add_subplot(131)
            #     ax2 = fig.add_subplot(132)
            #     ax3 = fig.add_subplot(133)
            #     ax1.imshow(cv2.cvtColor(imgs / imgs.max(), cv2.COLOR_BGR2RGB))
            #     plot_hand(ax1, pose2d_gt[0,:,0:2], order='uv')
            #     ax2.imshow(cv2.cvtColor(imgs / imgs.max(), cv2.COLOR_BGR2RGB))
            #     plot_hand(ax2, pose2d_pred[0,:,0:2], order='uv')
            #     ax3.imshow(heatmaps_pred[0,k].cpu().numpy())
            #     plt.show()
            mse_each_joint = np.linalg.norm(pose2d_pred - pose2d_gt,
                                            axis=2) * visibility  # b x 21

            mse2d_lst += mse_each_joint.sum(axis=0)
            visibility_lst += visibility.sum(axis=0)

            for th_idx in range(len(th2d_lst)):
                PCK2d_lst[th_idx] += np.sum(
                    (mse_each_joint < th2d_lst[th_idx]) * visibility)

            period = 10
            if i % (len(data_loader) // period) == 0:
                print("[Evaluation]{}% finished.".format(
                    period * i // (len(data_loader) // period)))
            #if i == 10:break
        print('Evaluation spent {:.2f} s\tfps: {:.1f} {:.4f}'.format(
            time.time() - start_time, infer_time[0] / infer_time[1],
            infer_time[1] / infer_time[0]))

        mse2d_lst /= visibility_lst
        PCK2d_lst /= visibility_lst.sum()

        result_dir = record_prefix + cfg.EXP_NAME
        if not os.path.exists(result_dir):
            os.mkdir(result_dir)

        mse_file, pck_file = os.path.join(
            result_dir,
            'mse2d_each_joint.txt'), os.path.join(result_dir, 'PCK2d.txt')
        print('Saving results to ' + mse_file)
        print('Saving results to ' + pck_file)
        np.savetxt(mse_file, mse2d_lst, fmt='%.4f')
        np.savetxt(pck_file, np.stack((th2d_lst, PCK2d_lst)))

        plot_performance(PCK2d_lst, th2d_lst, mse2d_lst)
Ejemplo n.º 6
0
def main():
    args = parse_args()
    update_config(cfg, args)
    check_config(cfg)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'valid')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=False)

    dump_input = torch.rand(
        (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE))
    logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.TEST.MODEL_FILE:
        logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True)
    else:
        model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar')
        logger.info('=> loading model from {}'.format(model_state_file))
        model.load_state_dict(torch.load(model_state_file))

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
    model.eval()

    test_dataset = HIEDataset(DATA_PATH)
    data_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=0,
                                              pin_memory=False)

    if cfg.MODEL.NAME == 'pose_hourglass':
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
        ])
    else:
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(
                # mean=[0.485, 0.456, 0.406],
                # std=[0.229, 0.224, 0.225]
                mean=[0.5, 0.5, 0.5],
                std=[0.5, 0.5, 0.5])
        ])

    parser = HeatmapParser(cfg)
    all_preds = []
    all_scores = []

    pbar = tqdm(total=len(test_dataset)) if cfg.TEST.LOG_PROGRESS else None
    for i, images in enumerate(data_loader):
        # for i, (images, annos) in enumerate(data_loader):
        assert 1 == images.size(0), 'Test batch size should be 1'

        image = images[0].cpu().numpy()
        # size at scale 1.0
        if (i % 100 == 0):
            print("Start process images %d" % i)
        base_size, center, scale = get_multi_scale_size(
            image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR))
        # print("Multi-scale end")

        with torch.no_grad():
            final_heatmaps = None
            tags_list = []
            for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR,
                                           reverse=True)):
                input_size = cfg.DATASET.INPUT_SIZE
                image_resized, center, scale = resize_align_multi_scale(
                    image, input_size, s, min(cfg.TEST.SCALE_FACTOR))
                image_resized = transforms(image_resized)
                image_resized = image_resized.unsqueeze(0).cuda()

                outputs, heatmaps, tags = get_multi_stage_outputs(
                    cfg, model, image_resized, cfg.TEST.FLIP_TEST,
                    cfg.TEST.PROJECT2IMAGE, base_size)

                final_heatmaps, tags_list = aggregate_results(
                    cfg, s, final_heatmaps, tags_list, heatmaps, tags)

            final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR))
            tags = torch.cat(tags_list, dim=4)
            grouped, scores = parser.parse(final_heatmaps, tags,
                                           cfg.TEST.ADJUST, cfg.TEST.REFINE)

            final_results = get_final_preds(
                grouped, center, scale,
                [final_heatmaps.size(3),
                 final_heatmaps.size(2)])

        if cfg.TEST.LOG_PROGRESS:
            pbar.update()

        if i % cfg.PRINT_FREQ == 0:
            prefix = '{}_{}'.format(
                os.path.join(final_output_dir, 'result_valid'), i)
            # logger.info('=> write {}'.format(prefix))
            # save_valid_image(image, final_results, '{}.jpg'.format(prefix),         dataset=test_dataset.name)
            # save_valid_image(image, final_results, '{}.jpg'.format(prefix),dataset='HIE20')
            # save_debug_images(cfg, image_resized, None, None, outputs, prefix)

        all_preds.append(final_results)
        all_scores.append(scores)

    if cfg.TEST.LOG_PROGRESS:
        pbar.close()

    # save preds and scores as json
    test_dataset.save_json(all_preds, all_scores)
    print('Save finished!')
Ejemplo n.º 7
0
def main():
    args = parse_args()
    update_config(cfg, args)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model_p, model_d = eval('models.' + cfg.MODEL.NAME +
                            '.get_adaptive_pose_net')(cfg, is_train=True)

    if cfg.TRAIN.CHECKPOINT:
        logger.info('=> loading model from {}'.format(cfg.TRAIN.CHECKPOINT))
        model_p.load_state_dict(torch.load(cfg.TRAIN.CHECKPOINT))
    else:
        model_state_file = os.path.join(final_output_dir, 'checkpoint.pth')
        logger.info('=> loading model from {}'.format(model_state_file))
        model_p.load_state_dict(torch.load(model_state_file))

    # copy model file
    this_dir = os.path.dirname(__file__)
    shutil.copy2(
        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
        final_output_dir)

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'pre_train_global_steps': 0,
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]))
    writer_dict['writer'].add_graph(model_p, (dump_input, ), verbose=False)

    logger.info(get_model_summary(model_p, dump_input))

    model_p = torch.nn.DataParallel(model_p, device_ids=cfg.GPUS).cuda()
    model_d = torch.nn.DataParallel(model_d, device_ids=cfg.GPUS).cuda()

    # define loss function (criterion) and optimizer for pose_net
    criterion_p = JointsMSELoss(
        use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda()

    optimizer_p = get_optimizer(cfg, model_p)

    # define loss function (criterion) and optimizer for domain
    criterion_d = torch.nn.BCEWithLogitsLoss().cuda()
    optimizer_d = get_optimizer(cfg, model_d)

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_pre_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_PRE_SET, True,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))

    train_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))

    valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))

    train_pre_loader = torch.utils.data.DataLoader(
        train_pre_dataset,
        batch_size=cfg.TRAIN.PRE_BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=cfg.TRAIN.SHUFFLE,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)

    syn_labels = train_dataset._load_syrip_syn_annotations()
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        sampler=BalancedBatchSampler(train_dataset, syn_labels),
        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)
    '''
    train_loader = torch.utils.data.DataLoader(   
        train_dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
        shuffle=cfg.TRAIN.SHUFFLE,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY
    )
    '''

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=False,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)

    best_perf = 0.0
    best_model = False
    last_epoch = -1
    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
    checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth')

    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
        checkpoint = torch.load(checkpoint_file)
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['perf']
        last_epoch = checkpoint['epoch']
        model_p.load_state_dict(checkpoint['state_dict'])

        optimizer_p.load_state_dict(checkpoint['optimizer'])
        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
            checkpoint_file, checkpoint['epoch']))

    # freeze some layers
    idx = 0
    print('Parametersssssssssssssss')
    for param in model_p.parameters():

        if idx <= 108:  #fix 108 for stage 2 + bottleneck  or fix 483 for stage 3 + stage 2+ bottleneck
            param.requires_grad = False
            #print(param.data.shape)
        idx = idx + 1

    lr_scheduler_p = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_p,
        cfg.TRAIN.LR_STEP,
        cfg.TRAIN.LR_FACTOR,
        last_epoch=last_epoch)

    lr_scheduler_d = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_d, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR)

    epoch_D = cfg.TRAIN.PRE_EPOCH
    losses_D_list = []
    acces_D_list = []
    acc_num_total = 0
    num = 0
    losses_d = AverageMeter()

    # Pretrained Stage
    print('Pretrained Stage:')
    print('Start to train Domain Classifier-------')
    for epoch_d in range(epoch_D):  # epoch
        model_d.train()
        model_p.train()

        for i, (input, target, target_weight,
                meta) in enumerate(train_pre_loader):  # iteration
            # compute output for pose_net
            feature_outputs, outputs = model_p(input)
            #print(feature_outputs.size())
            # compute for domain classifier
            domain_logits = model_d(feature_outputs.detach())
            domain_label = (meta['synthetic'].unsqueeze(-1) *
                            1.0).cuda(non_blocking=True)
            # print(domain_label)

            loss_d = criterion_d(domain_logits, domain_label)
            loss_d.backward(retain_graph=True)
            optimizer_d.step()

            # compute accuracy of classifier
            acc_num = 0
            for j in range(len(domain_label)):
                if (domain_logits[j] > 0 and domain_label[j] == 1.0) or (
                        domain_logits[j] < 0 and domain_label[j] == 0.0):
                    acc_num += 1
                    acc_num_total += 1
                num += 1
            acc_d = acc_num * 1.0 / input.size(0)
            acces_D_list.append(acc_d)

            optimizer_d.zero_grad()
            losses_d.update(loss_d.item(), input.size(0))

            if i % cfg.PRINT_FREQ == 0:
                msg = 'Epoch: [{0}][{1}/{2}]\t' \
                      'Accuracy_d: {3} ({4})\t' \
                      'Loss_d: {loss_d.val:.5f} ({loss_d.avg:.5f})'.format(
                          epoch_d, i, len(train_pre_loader), acc_d, acc_num_total * 1.0 / num, loss_d = losses_d)
                logger.info(msg)

                writer = writer_dict['writer']
                pre_global_steps = writer_dict['pre_train_global_steps']
                writer.add_scalar('pre_train_loss_D', losses_d.val,
                                  pre_global_steps)
                writer.add_scalar('pre_train_acc_D', acc_d, pre_global_steps)
                writer_dict['pre_train_global_steps'] = pre_global_steps + 1

            losses_D_list.append(losses_d.val)

    print('Training Stage (Step I and II):')
    losses_P_list = []
    acces_P_list = []
    losses_p = AverageMeter()
    acces_p = AverageMeter()
    for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
        lr_scheduler_p.step()

        # train for one epoch
        losses_P_list, losses_D_list, acces_P_list, acces_D_list = train_adaptive(
            cfg, train_loader, model_p, model_d, criterion_p, criterion_d,
            optimizer_p, optimizer_d, epoch, final_output_dir, tb_log_dir,
            writer_dict, losses_P_list, losses_D_list, acces_P_list,
            acces_D_list, acc_num_total, num, losses_p, acces_p, losses_d)

        # evaluate on validation set
        perf_indicator = validate_adaptive(cfg, valid_loader, valid_dataset,
                                           model_p, criterion_p,
                                           final_output_dir, tb_log_dir,
                                           writer_dict)

        if perf_indicator > best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': cfg.MODEL.NAME,
                'state_dict': model_p.state_dict(),
                'best_state_dict': model_p.module.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer_p.state_dict(),
            }, best_model, final_output_dir)

    final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
    logger.info(
        'saving final model state to {}'.format(final_model_state_file))
    torch.save(model_p.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()

    np.save('./losses_D.npy', np.array(losses_D_list))  # Adversarial-D
    np.save('./losses_P.npy', np.array(losses_P_list))  # P
    np.save('./acces_P.npy', np.array(acces_P_list))  # P
    np.save('./acces_D.npy', np.array(acces_D_list))  # D
Ejemplo n.º 8
0
                    # nn.init.constant_(m.bias, 0)


def get_pose_net(cfg, is_train, **kwargs):
    model = Poes_CPM(BasicStage, cfg)

    if is_train and cfg.MODEL.INIT_WEIGHTS:
        model.init_weights(cfg.MODEL.PRETRAINED)

    return model


if __name__ == '__main__':
    from torchsummary import summary
    from utils.utils import get_model_summary
    from utils.receptive_field import receptive_field, receptive_field_for_unit
    from utils.visual import Visualizer
    from utils.ops import *

    x_img = torch.randn((1, 3, 256, 256)).cuda()
    x_center_map = torch.randn((1, 1, 256, 256))
    # print(x_img.size())
    model = Pose_CPM(BasicStage, '').cuda()
    # out = model(x_img)
    # print(model)
    # print(out[0].size())
    # print(summary(model, (3, 256, 256)))
    print(get_model_summary(model, x_img, verbose=True))
    # rec = receptive_field(model, (3, 256, 256))
    # print(rec)
Ejemplo n.º 9
0
def main():
    args = parse_args()
    # YACS是一个轻量级库,用于定义和管理系统配置,例如那些在为科学实验设计的软件中常见的配置。
    # 这些“配置”通常涵盖诸如用于训练机器学习模型的超参数或可配置模型超参数(诸如卷积神经网络的深度)之类的概念。
    update_config(cfg, args)  # 根据手工输入的参数更新配置文件中的参数

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    # eval(): 用来执行一个字符串表达式,并返回表达式的值
    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=True)

    # copy model file
    this_dir = os.path.dirname(__file__)
    shutil.copy2(
        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
        final_output_dir)
    # logger.info(pprint.pformat(model))

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]))
    writer_dict['writer'].add_graph(model, (dump_input, ))

    logger.info(get_model_summary(model, dump_input))

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()

    # define loss function (criterion) and optimizer
    criterion = JointsMSELoss(
        use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda()

    # Data loading code
    # 归一化数据
    # ``input[channel] = (input[channel] - mean[channel]) / std[channel]``
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    # 载入mpii数据集
    train_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))
    valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=cfg.TRAIN.SHUFFLE,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=False,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)

    best_perf = 0.0
    best_model = False
    last_epoch = -1
    optimizer = get_optimizer(cfg, model)
    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
    checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth')

    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
        checkpoint = torch.load(checkpoint_file)
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['perf']
        last_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])
        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
            checkpoint_file, checkpoint['epoch']))

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        cfg.TRAIN.LR_STEP,
                                                        cfg.TRAIN.LR_FACTOR,
                                                        last_epoch=last_epoch)

    for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
        lr_scheduler.step()

        # 调用core.function在训练集上训练:
        # train for one epoch
        train(cfg, train_loader, model, criterion, optimizer, epoch,
              final_output_dir, tb_log_dir, writer_dict)

        # 调用core.function在验证集上验证:
        # evaluate on validation set
        perf_indicator = validate(cfg, valid_loader, valid_dataset, model,
                                  criterion, final_output_dir, tb_log_dir,
                                  writer_dict)

        if perf_indicator >= best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': cfg.MODEL.NAME,
                'state_dict': model.state_dict(),
                'best_state_dict': model.module.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            }, best_model, final_output_dir)

    final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
    logger.info(
        '=> saving final model state to {}'.format(final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
Ejemplo n.º 10
0
def main_worker(gpus, ngpus_per_node, args, final_output_dir, tb_log_dir):
    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    #os.environ['CUDA_VISIBLE_DEVICES']=gpus

    # Parallel setting
    print("Use GPU: {} for training".format(gpus))

    update_config(cfg, args)

    #test(cfg, args)

    # logger setting
    logger, _ = setup_logger(final_output_dir, args.rank, 'train')

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    # model initilization
    model = eval(cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=True)

    # load pretrained model before DDP initialization
    checkpoint_file = os.path.join(final_output_dir, 'model_best.pth.tar')

    if cfg.AUTO_RESUME:
        if os.path.exists(checkpoint_file):
            checkpoint = torch.load(checkpoint_file, map_location='cpu')
            state_dict = checkpoint['state_dict']

            for key in list(state_dict.keys()):
                new_key = key.replace("module.", "")
                state_dict[new_key] = state_dict.pop(key)
            model.load_state_dict(state_dict)
            logger.info("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_file, checkpoint['epoch']))

    elif cfg.MODEL.HRNET_PRETRAINED:
        logger.info("=> loading a pretrained model '{}'".format(
            cfg.MODEL.PRETRAINED))
        checkpoint = torch.load(cfg.MODEL.HRNET_PRETRAINED, map_location='cpu')

        state_dict = checkpoint['state_dict']
        for key in list(state_dict.keys()):
            new_key = key.replace("module.", "")
            state_dict[new_key] = state_dict.pop(key)

        model.load_state_dict(state_dict)

    # copy model file
    this_dir = os.path.dirname(__file__)
    shutil.copy2(
        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
        final_output_dir)
    # copy configuration file
    config_dir = args.cfg
    shutil.copy2(os.path.join(args.cfg), final_output_dir)

    # calculate GFLOPS
    dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0]))

    logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    #ops, params = get_model_complexity_info(
    #    model, (3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0]),
    #    as_strings=True, print_per_layer_stat=True, verbose=True)
    # FP16 SETTING
    if cfg.FP16.ENABLED:
        assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled."

    if cfg.FP16.STATIC_LOSS_SCALE != 1.0:
        if not cfg.FP16.ENABLED:
            print(
                "Warning:  if --fp16 is not used, static_loss_scale will be ignored."
            )

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.MODEL.SYNC_BN and not cfg.cfg.DISTRIBUTED:
        print(
            'Warning: Sync BatchNorm is only supported in distributed training.'
        )

    # Distributed Computing
    master = True
    if cfg.DISTRIBUTED:  # This block is not available
        args.local_rank += int(gpus[0])
        print('This process is using GPU', args.local_rank)
        device = args.local_rank
        master = device == int(gpus[0])
        dist.init_process_group(backend='nccl')
        if cfg.MODEL.SYNC_BN:
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if gpus is not None:
            torch.cuda.set_device(device)
            model.cuda(device)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            # workers = int(workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model,
                device_ids=[device],
                output_device=device,
                find_unused_parameters=True)
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    else:  # implement this block
        gpu_ids = eval('[' + gpus + ']')
        device = gpu_ids[0]
        print('This process is using GPU', str(device))
        model = torch.nn.DataParallel(model, gpu_ids).cuda(device)

    # Prepare loss functions
    criterion = {}
    if cfg.LOSS.WITH_HEATMAP_LOSS:
        criterion['heatmap_loss'] = HeatmapLoss().cuda()
    if cfg.LOSS.WITH_POSE2D_LOSS:
        criterion['pose2d_loss'] = JointsMSELoss().cuda()
    if cfg.LOSS.WITH_BONE_LOSS:
        criterion['bone_loss'] = BoneLengthLoss().cuda()
    if cfg.LOSS.WITH_JOINTANGLE_LOSS:
        criterion['jointangle_loss'] = JointAngleLoss().cuda()

    best_perf = 1e9
    best_model = False
    last_epoch = -1

    # optimizer must be initilized after model initilization
    optimizer = get_optimizer(cfg, model)

    if cfg.FP16.ENABLED:
        optimizer = FP16_Optimizer(
            optimizer,
            static_loss_scale=cfg.FP16.STATIC_LOSS_SCALE,
            dynamic_loss_scale=cfg.FP16.DYNAMIC_LOSS_SCALE,
            verbose=False)

    begin_epoch = cfg.TRAIN.BEGIN_EPOCH

    if not cfg.AUTO_RESUME and cfg.MODEL.HRNET_PRETRAINED:
        optimizer.load_state_dict(checkpoint['optimizer'])

    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['loss']
        optimizer.load_state_dict(checkpoint['optimizer'])

        if 'train_global_steps' in checkpoint.keys() and \
        'valid_global_steps' in checkpoint.keys():
            writer_dict['train_global_steps'] = checkpoint[
                'train_global_steps']
            writer_dict['valid_global_steps'] = checkpoint[
                'valid_global_steps']

    if cfg.FP16.ENABLED:
        logger.info("=> Using FP16 mode")
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer.optimizer,
            cfg.TRAIN.LR_STEP,
            cfg.TRAIN.LR_FACTOR,
            last_epoch=begin_epoch)
    elif cfg.TRAIN.LR_SCHEDULE == 'warmup':
        from utils.utils import get_linear_schedule_with_warmup
        lr_scheduler = get_linear_schedule_with_warmup(
            optimizer=optimizer,
            num_warmup_steps=cfg.TRAIN.WARMUP_EPOCHS,
            num_training_steps=cfg.TRAIN.END_EPOCH - cfg.TRAIN.BEGIN_EPOCH,
            last_epoch=begin_epoch)
    elif cfg.TRAIN.LR_SCHEDULE == 'multi_step':
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer,
            cfg.TRAIN.LR_STEP,
            cfg.TRAIN.LR_FACTOR,
            last_epoch=begin_epoch)
    else:
        print('Unknown learning rate schedule!')
        exit()

    # Data loading code
    train_loader_dict = make_dataloader(cfg,
                                        is_train=True,
                                        distributed=cfg.DISTRIBUTED)
    valid_loader_dict = make_dataloader(cfg,
                                        is_train=False,
                                        distributed=cfg.DISTRIBUTED)

    for i, (dataset_name,
            train_loader) in enumerate(train_loader_dict.items()):
        logger.info(
            'Training Loader {}/{}:\n'.format(i + 1, len(train_loader_dict)) +
            str(train_loader.dataset))
    for i, (dataset_name,
            valid_loader) in enumerate(valid_loader_dict.items()):
        logger.info('Validation Loader {}/{}:\n'.format(
            i + 1, len(valid_loader_dict)) + str(valid_loader.dataset))

    #writer_dict['writer'].add_graph(model, (dump_input, ))
    """
    Start training
    """
    start_time = time.time()

    with torch.autograd.set_detect_anomaly(True):
        for epoch in range(begin_epoch + 1, cfg.TRAIN.END_EPOCH + 1):
            epoch_start_time = time.time()
            # shuffle datasets with the sample random seed
            if cfg.DISTRIBUTED:
                for data_loader in train_loader_dict.values():
                    data_loader.sampler.set_epoch(epoch)
            # train for one epoch
            # get_last_lr() returns a list
            logger.info('Start training [{}/{}] lr: {:.4e}'.format(
                epoch, cfg.TRAIN.END_EPOCH - cfg.TRAIN.BEGIN_EPOCH,
                lr_scheduler.get_last_lr()[0]))
            train(cfg,
                  args,
                  master,
                  train_loader_dict,
                  model,
                  criterion,
                  optimizer,
                  epoch,
                  final_output_dir,
                  tb_log_dir,
                  writer_dict,
                  logger,
                  fp16=cfg.FP16.ENABLED,
                  device=device)

            # In PyTorch 1.1.0 and later, you should call `lr_scheduler.step()` after `optimizer.step()`.
            lr_scheduler.step()

            # evaluate on validation set

            if not cfg.WITHOUT_EVAL:
                logger.info('Start evaluating [{}/{}]'.format(
                    epoch, cfg.TRAIN.END_EPOCH - 1))
                with torch.no_grad():
                    recorder = validate(cfg,
                                        args,
                                        master,
                                        valid_loader_dict,
                                        model,
                                        criterion,
                                        final_output_dir,
                                        tb_log_dir,
                                        writer_dict,
                                        logger,
                                        device=device)

                val_total_loss = recorder.avg_total_loss

                best_model = False
                if val_total_loss < best_perf:
                    logger.info(
                        'This epoch yielded a better model with total loss {:.4f} < {:.4f}.'
                        .format(val_total_loss, best_perf))
                    best_perf = val_total_loss
                    best_model = True

            else:
                val_total_loss = 0
                best_model = True

            if master:
                logger.info(
                    '=> saving checkpoint to {}'.format(final_output_dir))
                save_checkpoint(
                    {
                        'epoch': epoch,
                        'model': cfg.EXP_NAME + '.' + cfg.MODEL.NAME,
                        'state_dict': model.state_dict(),
                        'loss': val_total_loss,
                        'optimizer': optimizer.state_dict(),
                        'train_global_steps':
                        writer_dict['train_global_steps'],
                        'valid_global_steps': writer_dict['valid_global_steps']
                    }, best_model, final_output_dir)

            print('\nEpoch {} spent {:.2f} hours\n'.format(
                epoch, (time.time() - epoch_start_time) / 3600))

            #if epoch == 3:break
    if master:
        final_model_state_file = os.path.join(
            final_output_dir, 'final_state{}.pth.tar'.format(gpus))
        logger.info(
            '=> saving final model state to {}'.format(final_model_state_file))
        torch.save(model.state_dict(), final_model_state_file)
        writer_dict['writer'].close()

        print(
            '\n[Training Accomplished] {} epochs spent {:.2f} hours\n'.format(
                cfg.TRAIN.END_EPOCH - begin_epoch + 1,
                (time.time() - start_time) / 3600))
Ejemplo n.º 11
0
def main():
    args = parse_args()
    update_config(cfg, args)
    check_config(cfg)
    pose_dir = prepare_output_dirs(args.outputDir)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'valid')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=False)

    dump_input = torch.rand(
        (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE))
    logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.TEST.MODEL_FILE:
        logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True)
    else:
        model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar')
        logger.info('=> loading model from {}'.format(model_state_file))
        # model.load_state_dict(torch.load(model_state_file))
        pretrian_model_state = torch.load(model_state_file)

        for name, param in model.state_dict().items():

            model.state_dict()[name].copy_(pretrian_model_state['1.' + name])

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
    model.eval()

    # data_loader, test_dataset = make_test_dataloader(cfg)
    if cfg.MODEL.NAME == 'pose_hourglass':
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
        ])
    else:
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
        ])

    parser = HeatmapParser(cfg)
    # Loading an video
    vidcap = cv2.VideoCapture(args.videoFile)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    if fps < args.inferenceFps:
        print('desired inference fps is ' + str(args.inferenceFps) +
              ' but video fps is ' + str(fps))
        exit()
    skip_frame_cnt = round(fps / args.inferenceFps)
    frame_width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    outcap = cv2.VideoWriter(
        '{}/{}_pose.avi'.format(
            args.outputDir,
            os.path.splitext(os.path.basename(args.videoFile))[0]),
        cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), int(skip_frame_cnt),
        (frame_width, frame_height))

    count = 0
    while vidcap.isOpened():
        total_now = time.time()
        ret, image_bgr = vidcap.read()
        count += 1

        if not ret:
            continue

        if count % skip_frame_cnt != 0:
            continue

        image_debug = image_bgr.copy()
        now = time.time()
        image = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
        # image = image_rgb.cpu().numpy()
        # size at scale 1.0
        base_size, center, scale = get_multi_scale_size(
            image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR))
        with torch.no_grad():
            final_heatmaps = None
            tags_list = []
            for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR,
                                           reverse=True)):
                input_size = cfg.DATASET.INPUT_SIZE
                image_resized, center, scale = resize_align_multi_scale(
                    image, input_size, s, min(cfg.TEST.SCALE_FACTOR))
                image_resized = transforms(image_resized)
                image_resized = image_resized.unsqueeze(0).cuda()

                outputs, heatmaps, tags = get_multi_stage_outputs(
                    cfg, model, image_resized, cfg.TEST.FLIP_TEST,
                    cfg.TEST.PROJECT2IMAGE, base_size)

                final_heatmaps, tags_list = aggregate_results(
                    cfg, s, final_heatmaps, tags_list, heatmaps, tags)

            final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR))
            tags = torch.cat(tags_list, dim=4)
            grouped, scores = parser.parse(final_heatmaps, tags,
                                           cfg.TEST.ADJUST, cfg.TEST.REFINE)

            final_results = get_final_preds(
                grouped, center, scale,
                [final_heatmaps.size(3),
                 final_heatmaps.size(2)])
        for person_joints in final_results:
            for joint in person_joints:
                x, y = int(joint[0]), int(joint[1])
                cv2.circle(image_debug, (x, y), 4, (255, 0, 0), 2)
        then = time.time()
        print("Find person pose in: {} sec".format(then - now))

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        img_file = os.path.join(pose_dir, 'pose_{:08d}.jpg'.format(count))
        cv2.imwrite(img_file, image_debug)
        outcap.write(image_debug)

    vidcap.release()
    outcap.release()
Ejemplo n.º 12
0
def main():
    args = parse_args()
    update_config(cfg, args)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    percent = args.percent  #0.6133902788734437 #0.8 #0.6133902788734437 #0.8 #0.6133902788734437#0.78

    model = getprunemodel(percent)
    this_dir = os.path.dirname(__file__)
    shutil.copy2(os.path.join(this_dir, '../lib/models/purnpose_hrnet.py'),
                 final_output_dir)

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]))
    # writer_dict['writer'].add_graph(model, (dump_input, ))
    logger.info(get_model_summary(model, dump_input))
    criterion = JointsMSELoss(
        use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda()
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))
    valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=cfg.TRAIN.SHUFFLE,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=False,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)
    best_perf = 0.0
    best_model = False
    last_epoch = -1
    optimizer = get_optimizer(cfg, model)
    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
    checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth')
    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
        checkpoint = torch.load(checkpoint_file)
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['perf']
        last_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])
        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
            checkpoint_file, checkpoint['epoch']))

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        cfg.TRAIN.LR_STEP,
                                                        cfg.TRAIN.LR_FACTOR,
                                                        last_epoch=last_epoch)

    for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
        lr_scheduler.step()

        # train for one epoch
        train(cfg, train_loader, model, criterion, optimizer, epoch,
              final_output_dir, tb_log_dir, writer_dict)

        # evaluate on validation set
        perf_indicator = validate(cfg, valid_loader, valid_dataset, model,
                                  criterion, final_output_dir, tb_log_dir,
                                  writer_dict)

        if perf_indicator >= best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': cfg.MODEL.NAME,
                'state_dict': model.state_dict(),
                'best_state_dict': model.module.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            }, best_model, final_output_dir)

    final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
    logger.info(
        '=> saving final model state to {}'.format(final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
Ejemplo n.º 13
0
def main():
    rand_state = np.random.RandomState(1311)
    args = parse_args()
    update_config(cfg, args)

    # We have 118,287 images total in the training set, so let's choose 5000 images on each cycle,
    # 50000 images total (~1/2 of total)
    images_per_cycle = 5000

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    # copy model file
    this_dir = os.path.dirname(__file__)
    shutil.copy2(
        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
        final_output_dir)

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))
    valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))

    pool_idx = list(range(len(train_dataset)))
    train_idx = []
    validation_accuracies = list()

    device = 'cuda'  # if (torch.cuda.is_available()) else 'cpu'
    progress = tqdm.tqdm(range(10))
    for cycle in progress:
        model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(
            cfg, is_train=True)
        writer_dict = {
            'writer': SummaryWriter(log_dir=tb_log_dir),
            'train_global_steps': 0,
            'valid_global_steps': 0,
        }

        dump_input = torch.rand(
            (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]))
        # writer_dict['writer'].add_graph(model, (dump_input, ))

        logger.info(get_model_summary(model, dump_input))

        if args.use_active_learning:
            model = ActiveLearning(model)
        model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()

        # define loss function (criterion) and optimizer
        # Don't use reduction, will will apply 'mean' later.
        criterion = JointsMSELoss(use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT,
                                  reduction='none').cuda()

        if args.use_active_learning and not cycle == 0:
            indices, losses = choose_indices_loss_prediction_active_learning(
                model,
                cycle,
                rand_state,
                pool_idx,
                train_dataset,
                device,
                count=images_per_cycle,
                subset_factor=5,
                is_human_pose=True)
            train_idx.extend(indices)
            if args.output_superannotate_csv_file is not None:
                write_entropies_csv(train_dataset, indices, losses,
                                    args.output_superannotate_csv_file)
        else:
            train_idx.extend(
                random_indices(pool_idx, rand_state, count=images_per_cycle))

        train_loader = torch.utils.data.DataLoader(
            data.Subset(train_dataset, train_idx),
            batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
            shuffle=cfg.TRAIN.SHUFFLE,
            num_workers=cfg.WORKERS,
            pin_memory=cfg.PIN_MEMORY)
        valid_loader = torch.utils.data.DataLoader(
            valid_dataset,
            batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
            shuffle=False,
            num_workers=cfg.WORKERS,
            pin_memory=cfg.PIN_MEMORY)

        best_perf = 0.0
        best_accuracy = 0.0
        best_model = False
        last_epoch = -1
        optimizer = get_optimizer(cfg, model)
        begin_epoch = cfg.TRAIN.BEGIN_EPOCH
        checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth')

        if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
            logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
            checkpoint = torch.load(checkpoint_file)
            begin_epoch = checkpoint['epoch']
            best_perf = checkpoint['perf']
            last_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])

            optimizer.load_state_dict(checkpoint['optimizer'])
            logger.info("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_file, checkpoint['epoch']))

        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer,
            cfg.TRAIN.LR_STEP,
            cfg.TRAIN.LR_FACTOR,
            last_epoch=last_epoch)

        for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
            lr_scheduler.step()

            # train for one epoch
            train(cfg,
                  train_loader,
                  model,
                  criterion,
                  optimizer,
                  epoch,
                  final_output_dir,
                  tb_log_dir,
                  writer_dict,
                  use_active_learning=args.use_active_learning,
                  active_learning_lamda=0.0001,
                  cycle=cycle)

            valid_model = model
            if args.use_active_learning:
                valid_model = torch.nn.DataParallel(
                    model.module.base_model, device_ids=cfg.GPUS).cuda()

            # evaluate on validation set
            perf_indicator, validation_acc = validate(
                cfg,
                valid_loader,
                valid_dataset,
                model,
                criterion,
                final_output_dir,
                tb_log_dir,
                writer_dict,
                use_active_learning=args.use_active_learning)

            if perf_indicator >= best_perf:
                best_perf = perf_indicator
                best_model = True
            else:
                best_model = False

            if validation_acc >= best_accuracy:
                best_accuracy = validation_acc

            # Martun: Don't save the checkpoints, we don't want to continue the training on the
            # next cycle, we want to start from scratch.
            # logger.info('=> saving checkpoint to {}'.format(final_output_dir))
            #save_checkpoint({
            #    'epoch': epoch + 1,
            #    'model': cfg.MODEL.NAME,
            #    'state_dict': model.state_dict(),
            #    'best_state_dict': model.module.state_dict(),
            #    'perf': perf_indicator,
            #    'optimizer': optimizer.state_dict(),
            #}, best_model, final_output_dir)

        # final_model_state_file = os.path.join(
        #     final_output_dir, 'final_state.pth'
        # )
        # logger.info('=> saving final model state to {}'.format(
        #     final_model_state_file)
        # )
        # torch.save(model.module.state_dict(), final_model_state_file)
        writer_dict['writer'].close()
        validation_accuracies.append(best_accuracy)
        print("{} accuracies: {}".format(
            "Active Learning" if args.use_active_learning else "Random",
            str(validation_accuracies)))
Ejemplo n.º 14
0
def main():
    args = parse_args()
    update_config(cfg, args)

    setup_seed(cfg.SEED)

    os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(x) for x in cfg.GPUS])

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, args.mention, 'train')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=True)
    # print(model)

    # copy model file
    this_dir = os.path.dirname(__file__)
    shutil.copy2(
        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
        final_output_dir)
    # logger.info(pprint.pformat(model))

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[1]))

    try:
        writer_dict['writer'].add_graph(model, (dump_input, ))
    except Exception as e:
        logger.info(e)

    try:
        logger.info(get_model_summary(model, dump_input))
    except:
        pass

    model = torch.nn.DataParallel(model, device_ids=list(range(len(
        cfg.GPUS)))).cuda()

    # define loss function (criterion) and optimizer
    criterion = eval(cfg.LOSS.NAME)(cfg).cuda()

    if cfg.LOSS.NAME == 'ModMSE_KL_CC_NSS_Loss':
        criterion_val = ModMSE_KL_CC_Loss(cfg).cuda()
    else:
        criterion_val = criterion

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))
    valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))

    logger.info(os.linesep +
                'train_set : {:d} entries'.format(len(train_dataset)))
    logger.info('val_set   : {:d} entries'.format(len(valid_dataset)) +
                os.linesep)

    if cfg.DATASET.SAMPLER == "":
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
            shuffle=cfg.TRAIN.SHUFFLE,
            num_workers=cfg.WORKERS,
            pin_memory=cfg.PIN_MEMORY)
        valid_loader = torch.utils.data.DataLoader(
            valid_dataset,
            batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
            shuffle=False,
            num_workers=cfg.WORKERS,
            pin_memory=cfg.PIN_MEMORY)
    elif cfg.DATASET.SAMPLER == "RandomIdentitySampler":
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            sampler=dataset.RandomIdentitySampler(
                train_dataset.images,
                cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
                cfg.DATASET.NUM_INSTANCES),
            batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS) //
            cfg.DATASET.NUM_INSTANCES,
            shuffle=False,
            num_workers=cfg.WORKERS,
            pin_memory=cfg.PIN_MEMORY)
        valid_loader = torch.utils.data.DataLoader(
            valid_dataset,
            sampler=dataset.RandomIdentitySampler(
                valid_dataset.images,
                cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
                cfg.DATASET.NUM_INSTANCES),
            batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS) //
            cfg.DATASET.NUM_INSTANCES,
            shuffle=False,
            num_workers=cfg.WORKERS,
            pin_memory=cfg.PIN_MEMORY)
    else:
        assert False

    best_perf = None
    best_model = False
    last_epoch = -1
    optimizer = get_optimizer(cfg, model)
    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
    checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth')

    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
        checkpoint = torch.load(checkpoint_file)
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['perf']
        last_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])
        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
            checkpoint_file, checkpoint['epoch']))

    if cfg.TRAIN.WARMUP_EPOCHS > 0:
        lr_scheduler = WarmupMultiStepLR(optimizer,
                                         cfg.TRAIN.LR_STEP,
                                         cfg.TRAIN.LR_FACTOR,
                                         warmup_iters=cfg.TRAIN.WARMUP_EPOCHS,
                                         last_epoch=last_epoch)
    else:
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer,
            cfg.TRAIN.LR_STEP,
            cfg.TRAIN.LR_FACTOR,
            last_epoch=last_epoch)

    for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
        lr_scheduler.step()

        # torch.cuda.empty_cache()

        # train for one epoch
        train(cfg, train_loader, model, criterion, optimizer, epoch,
              final_output_dir, tb_log_dir, writer_dict)

        # torch.cuda.empty_cache()

        # evaluate on validation set
        perf_indicator, is_larger_better = validate(cfg, valid_loader,
                                                    valid_dataset, model,
                                                    criterion_val,
                                                    final_output_dir,
                                                    tb_log_dir, writer_dict)

        if is_larger_better:
            if best_perf is None or perf_indicator >= best_perf:
                best_perf = perf_indicator
                best_model = True
            else:
                best_model = False
        else:
            if best_perf is None or perf_indicator <= best_perf:
                best_perf = perf_indicator
                best_model = True
            else:
                best_model = False

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': cfg.MODEL.NAME,
                'state_dict': model.state_dict(),
                'best_state_dict': model.module.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            }, best_model, final_output_dir)

    final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
    logger.info(
        '=> saving final model state to {}'.format(final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
def main():
    args = parse_args()
    update_config(cfg, args)

    # Add KFold training support
    isKFold = True
    cross_train_set = get_img_Ids(
        'data/tiger/annotations/person_keypoints_train.json')
    # bas_train, base_test = kfold_split_generate(cross_train_set)
    from sklearn.model_selection import KFold
    kFoldNum = 1
    kf = KFold(n_splits=5)  # Create 10 splits for the Tiger dataset
    for Ftrain, Ftest in kf.split(cross_train_set):
        train_data = [cross_train_set[i] for i in Ftrain]

        val_data = [cross_train_set[i] for i in Ftest]

        logger, final_output_dir, tb_log_dir = create_logger(
            cfg, args.cfg, 'train', str(kFoldNum))

        kFoldNum = kFoldNum + 1

        logger.info(pprint.pformat(args))
        logger.info(cfg)

        # cudnn related setting
        cudnn.benchmark = cfg.CUDNN.BENCHMARK
        torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
        torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

        model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(
            cfg, is_train=True)

        # copy model file
        this_dir = os.path.dirname(__file__)
        shutil.copy2(
            os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
            final_output_dir)
        # logger.info(pprint.pformat(model))

        writer_dict = {
            'writer': SummaryWriter(log_dir=tb_log_dir),
            'train_global_steps': 0,
            'valid_global_steps': 0,
        }

        dump_input = torch.rand(
            (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]))
        writer_dict['writer'].add_graph(model, (dump_input, ))

        logger.info(get_model_summary(model, dump_input))

        model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()

        # define loss function (criterion) and optimizer
        criterion = JointsMSELoss(
            use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda()

        # Data loading code
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

        print("TRAIN DATA COUNT : " + str(len(train_data)))
        print("VAL DATA COUNT : " + str(len(val_data)))

        train_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
            cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
            transforms.Compose([
                transforms.ToTensor(),
                normalize,
            ]), isKFold, train_data)
        valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
            cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
            transforms.Compose([
                transforms.ToTensor(),
                normalize,
            ]), False, val_data)

        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
            shuffle=cfg.TRAIN.SHUFFLE,
            num_workers=cfg.WORKERS,
            pin_memory=cfg.PIN_MEMORY)
        valid_loader = torch.utils.data.DataLoader(
            valid_dataset,
            batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
            shuffle=False,
            num_workers=cfg.WORKERS,
            pin_memory=cfg.PIN_MEMORY)

        best_perf = 0.0
        best_model = False
        last_epoch = -1
        optimizer = get_optimizer(cfg, model)
        begin_epoch = cfg.TRAIN.BEGIN_EPOCH
        checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth')

        if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
            logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
            checkpoint = torch.load(checkpoint_file)
            begin_epoch = checkpoint['epoch']
            best_perf = checkpoint['perf']
            last_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])

            optimizer.load_state_dict(checkpoint['optimizer'])
            logger.info("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_file, checkpoint['epoch']))

        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer,
            cfg.TRAIN.LR_STEP,
            cfg.TRAIN.LR_FACTOR,
            last_epoch=last_epoch)

        for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
            lr_scheduler.step()

            # train for one epoch
            train(cfg, train_loader, model, criterion, optimizer, epoch,
                  final_output_dir, tb_log_dir, writer_dict)

            # evaluate on validation set
            perf_indicator = validate(cfg, valid_loader, valid_dataset, model,
                                      criterion, final_output_dir, tb_log_dir,
                                      writer_dict)

            if perf_indicator >= best_perf:
                best_perf = perf_indicator
                best_model = True
            else:
                best_model = False

            logger.info('=> saving checkpoint to {}'.format(final_output_dir))
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'model': cfg.MODEL.NAME,
                    'state_dict': model.state_dict(),
                    'best_state_dict': model.module.state_dict(),
                    'perf': perf_indicator,
                    'optimizer': optimizer.state_dict(),
                }, best_model, final_output_dir)

        final_model_state_file = os.path.join(final_output_dir,
                                              'final_state.pth')
        logger.info(
            '=> saving final model state to {}'.format(final_model_state_file))
        torch.save(model.module.state_dict(), final_model_state_file)
        writer_dict['writer'].close()
Ejemplo n.º 16
0
def main_worker(gpu, ngpus_per_node, args, final_output_dir, tb_log_dir):

    args.gpu = gpu
    args.rank = args.rank * ngpus_per_node + gpu
    print('Init process group: dist_url: {}, world_size: {}, rank: {}'.format(cfg.DIST_URL, args.world_size, args.rank))
    dist.init_process_group(backend=cfg.DIST_BACKEND, init_method=cfg.DIST_URL, world_size=args.world_size, rank=args.rank)

    update_config(cfg, args)

    # setup logger
    logger, _ = setup_logger(final_output_dir, args.rank, 'train')

    model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(cfg, is_train=True)
    logger.info(get_model_summary(model, torch.zeros(1, 3, *cfg.MODEL.IMAGE_SIZE)))

    # copy model file
    if not cfg.MULTIPROCESSING_DISTRIBUTED or (cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0):
        this_dir = os.path.dirname(__file__)
        shutil.copy2(os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir)

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    if not cfg.MULTIPROCESSING_DISTRIBUTED or (cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0):
        dump_input = torch.rand((1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]))
        writer_dict['writer'].add_graph(model, (dump_input, ))
        # logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    if cfg.MODEL.SYNC_BN:
        model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
    
    torch.cuda.set_device(args.gpu)
    model.cuda(args.gpu)
    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])

    # define loss function (criterion) and optimizer
    criterion = JointsMSELoss(use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda(args.gpu)

    # Data loading code
    train_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
        transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
    )
    valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
        transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
    )
    
    train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
        shuffle=(train_sampler is None),
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY,
        sampler=train_sampler
    )

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
        shuffle=False,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY
    )
    logger.info(train_loader.dataset)

    best_perf = -1
    best_model = False
    last_epoch = -1
    optimizer = get_optimizer(cfg, model)
    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
    checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth')
    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
        checkpoint = torch.load(checkpoint_file)
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['perf']
        last_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])
        logger.info("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_file, checkpoint['epoch']))

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
        last_epoch=last_epoch)

    for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
        
        # train for one epoch
        train(cfg, train_loader, model, criterion, optimizer, epoch,
              final_output_dir, tb_log_dir, writer_dict)
        # In PyTorch 1.1.0 and later, you should call `lr_scheduler.step()` after `optimizer.step()`.
        lr_scheduler.step()

        # evaluate on validation set
        perf_indicator = validate(
            args, cfg, valid_loader, valid_dataset, model, criterion,
            final_output_dir, tb_log_dir, writer_dict
        )

        if perf_indicator >= best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        if not cfg.MULTIPROCESSING_DISTRIBUTED or (
                cfg.MULTIPROCESSING_DISTRIBUTED
                and args.rank == 0
        ):
            logger.info('=> saving checkpoint to {}'.format(final_output_dir))
            save_checkpoint({
                'epoch': epoch + 1,
                'model': cfg.MODEL.NAME,
                'state_dict': model.state_dict(),
                'best_state_dict': model.module.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            }, best_model, final_output_dir)

    final_model_state_file = os.path.join(
        final_output_dir, 'final_state{}.pth.tar'.format(gpu)
    )

    logger.info('saving final model state to {}'.format(
        final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
Ejemplo n.º 17
0
def main():
    args = parse_args()
    update_config(cfg, args)
    check_config(cfg)
    # pose_dir = prepare_output_dirs(args.outputDir)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'valid')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=False)

    dump_input = torch.rand(
        (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE))
    logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.TEST.MODEL_FILE:
        logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True)
    else:
        model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar')
        logger.info('=> loading model from {}'.format(model_state_file))
        pretrian_model_state = torch.load(model_state_file)

        for name, param in model.state_dict().items():

            model.state_dict()[name].copy_(pretrian_model_state['1.' + name])
        # model.load_state_dict(torch.load(model_state_file))

    # model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
    model.eval()

    # Input to the model
    # batch_size = 1
    x = torch.randn(1, 3, 256, 256, requires_grad=True)
    torch_out = model(x)

    # Export the model
    torch.onnx.export(
        model,  # model being run
        x,  # model input (or a tuple for multiple inputs)
        args.
        output_onnx,  # where to save the model (can be a file or file-like object)
        export_params=
        True,  # store the trained parameter weights inside the model file
        opset_version=11,  # the ONNX version to export the model to
        do_constant_folding=
        True,  # whether to execute constant folding for optimization
        input_names=['input'],  # the model's input names
        output_names=['output1']  # the model's output names
    )
Ejemplo n.º 18
0
def main():
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"

    # Arguments
    parser = argparse.ArgumentParser(
        description=
        'Multi-Loss Rebalancing Algorithm for Monocular Depth Estimation')
    parser.add_argument('--backbone',
                        default='PNASNet5Large',
                        type=str,
                        help='DenseNet161 (bs12) / PNASNet5Largea (bs6)')
    parser.add_argument('--decoder_scale',
                        default=1024,
                        type=int,
                        help='valid for PNASNet5Large')
    parser.add_argument('--epochs',
                        default=20,
                        type=int,
                        help='number of total epochs to run')
    parser.add_argument('--lr',
                        '--learning-rate',
                        default=0.0001,
                        type=float,
                        help='initial learning rate')
    parser.add_argument('--bs', default=8, type=int, help='batch size')
    parser.add_argument('--weight_initialization', default=True, type=bool)
    parser.add_argument('--weight_rebalancing', default=True, type=bool)
    parser.add_argument('--num_weight_rebalancing_per_epoch',
                        default=4,
                        type=int)
    parser.add_argument('--num_save_per_epoch', default=4, type=int)
    parser.add_argument('--lambda_for_adjust_start', default=3, type=float)
    parser.add_argument('--lambda_for_adjust_slope', default=-1.5, type=float)
    parser.add_argument('--lambda_for_adjust_min', default=-3, type=float)
    #parser.add_argument('--train_dataset_path', default='dataset/train_reduced05.zip', type=str)
    #parser.add_argument('--train_dataset_csv_list', default='train_reduced05/train.csv', type=str)
    parser.add_argument('--train_dataset_path',
                        default='dataset/train795.zip',
                        type=str)
    parser.add_argument('--train_dataset_csv_list',
                        default='train795/train.csv',
                        type=str)
    args = parser.parse_args()

    # image size
    original_image_size = [480, 640]
    input_image_size = [288, 384]
    # interpolation function / relu
    interpolate_bicubic_fullsize = nn.Upsample(size=original_image_size,
                                               mode='bicubic')
    relu = nn.ReLU()

    # create model
    model = network_model.create_model(args.backbone, args.decoder_scale)
    print('Summary: All Network')
    print(
        utils_utils.get_model_summary(model,
                                      torch.rand(1, 3, input_image_size[0],
                                                 input_image_size[1]).cuda(),
                                      verbose=True))
    print('Model created.')

    # Training parameters
    optimizer = torch.optim.Adam(model.parameters(), args.lr)
    batch_size = args.bs

    # loading training/testing data
    train_loader, num_train_data = utils_get_data.getTrainingData(
        batch_size, args.train_dataset_path, args.train_dataset_csv_list)

    # Model path
    model_path = utils_utils.make_model_path(args.backbone, args.decoder_scale,
                                             batch_size)

    # train scores
    train_scores = np.zeros((num_train_data, 78))  # 78 scores
    train_metrics = np.zeros((num_train_data, 8))  # 8 metrics

    # loss term
    loss_weights = utils_multi_loss.get_loss_weights()
    loss_initialize_scale = utils_multi_loss.get_loss_initialize_scale()
    loss_valid = np.array(loss_weights) > 0

    # save path
    savePath = model_path + '/weight/loss_weights.csv'
    dataframe = pd.DataFrame(loss_weights)
    dataframe.to_csv(savePath, header=False, index=False)

    # weight rebalancing argument
    weight_initialization = args.weight_initialization
    weight_rebalancing = args.weight_rebalancing
    weight_initialization_done = False
    last_rebalancing_iter = 0
    previous_total_loss = 0
    previous_loss = 0

    # iter/epoch
    iter_per_epoch = len(train_loader)

    # save iteration
    iter_list_save = utils_utils.get_notable_iter(
        iter_per_epoch, num_per_epoch=args.num_save_per_epoch)
    iter_list_rebalancing = utils_utils.get_notable_iter(
        iter_per_epoch, num_per_epoch=args.num_weight_rebalancing_per_epoch)

    # mixed precision + Dataparallel
    if APEX_AVAILABLE == True:
        use_amp = True
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O2",
                                          keep_batchnorm_fp32=True,
                                          loss_scale="dynamic")
    else:
        use_amp = False
    model = nn.DataParallel(model)

    try:
        # try to load epoch1_iter00000
        model_name = "model/epoch01_iter00000.pth"
        model.load_state_dict(torch.load(model_name))
        print('LOAD MODEL ', model_name)
    except:
        # save model
        print('THERE IS NO MODEL TO LOAD')
        model_name = model_path + "/model/epoch" + str(0 + 1).zfill(
            2) + '_iter' + str(0).zfill(5) + ".pth"
        print('SAVE MODEL:' + model_path)
        torch.save(model.state_dict(), model_name)

    # Start training...
    for epoch in range(args.epochs):
        print('---------------------------------------------------------')
        print('-------------- TRAINING OF EPOCH ' +
              str(0 + epoch + 1).zfill(2) + 'START ----------------')

        end = time.time()

        # Switch to train mode
        model.train()

        # train parameter
        current_lambda_for_adjust = max(
            args.lambda_for_adjust_start +
            epoch * args.lambda_for_adjust_slope, args.lambda_for_adjust_min)

        for i, sample_batched in enumerate(train_loader):
            optimizer.zero_grad()

            # Prepare sample and target
            image = torch.autograd.Variable(sample_batched['image'].cuda())
            depth_gt = torch.autograd.Variable(
                sample_batched['depth'].cuda(non_blocking=True))

            # depth gt
            depth_gt_input = depth_gt
            depth_gt_full = interpolate_bicubic_fullsize(depth_gt_input)
            depth_gt_for_loss = depth_gt_input
            depth_gt_for_loss = depth_gt_for_loss.cuda()
            depth_gt_for_metric = (
                relu(depth_gt_full[:, :, 0 + 20:480 - 20, 0 + 24:640 - 24] -
                     0.0001) + 0.0001)

            # Predict
            image_input = image
            depth_pred_for_loss = model(image_input).cuda()
            depth_pred_full = interpolate_bicubic_fullsize(depth_pred_for_loss)
            depth_pred_for_metric = (
                relu(depth_pred_full[:, :, 0 + 20:480 - 20, 0 + 24:640 - 24] -
                     0.0001) + 0.0001)

            # current batch size
            current_batch_size = depth_gt_for_loss.size(0)

            # compute loss
            losses = utils_multi_loss.compute_multi_loss(
                depth_pred_for_loss, depth_gt_for_loss, loss_valid)

            # compute iter loss & train_scores
            loss, l_custom, train_scores = utils_multi_loss.get_loss_1batch(
                batch_size, current_batch_size, i, num_train_data,
                loss_weights, train_scores, losses)
            metrics = utils_multi_loss.compute_multi_metric(
                depth_pred_for_metric, depth_gt_for_metric)
            train_metrics = utils_multi_loss.get_metric_1batch(
                batch_size, current_batch_size, i, num_train_data,
                train_metrics, metrics)

            # Update
            if use_amp == True:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()
            optimizer.step()

            # Measure elapsed time
            end = time.time()

            # Log progress
            if (i + 1) % 100 == 0 or (i + 1) == iter_per_epoch:
                if epoch == 0:
                    train_scores_mean = train_scores[0:(i + 1) *
                                                     batch_size].mean(axis=0)
                    train_metrics_mean = train_metrics[0:(i + 1) *
                                                       batch_size].mean(axis=0)
                else:
                    train_scores_mean = train_scores.mean(axis=0)
                    train_metrics_mean = train_metrics.mean(axis=0)

                # Print to console
                print('Epoch: [{0}][{1}/{2}]\t'.format(epoch, i + 1,
                                                       iter_per_epoch))
                utils_utils.print_metrics(train_metrics_mean)
                utils_utils.print_scores(train_scores_mean)

            if (i + 1) == 1 or (i + 1) % 1000 == 0 or (i +
                                                       1) == iter_per_epoch:
                savePath = model_path + "/current" + ".csv"
                dataframe = pd.DataFrame(train_scores)
                dataframe.to_csv(savePath, header=False, index=False)

            if i in iter_list_save:
                model_name = model_path + "/model/epoch" + str(
                    epoch + 1).zfill(2) + '_iter' + str(i).zfill(5) + ".pth"
                # save model
                print('SAVE MODEL:' + model_path + '/model')
                torch.save(model.state_dict(), model_name)

            if i in iter_list_rebalancing:
                temp_train_scores_mean = train_scores[last_rebalancing_iter *
                                                      batch_size:(i + 1) *
                                                      batch_size, :].mean(
                                                          axis=0)
                total_loss = np.sum(temp_train_scores_mean * loss_weights)
                if weight_initialization == True and weight_initialization_done == False:
                    for index_loss in range(len(loss_valid)):
                        if loss_valid[index_loss] == 1:
                            loss_weights[index_loss] = (
                                total_loss * loss_initialize_scale[index_loss]
                            ) / temp_train_scores_mean[index_loss]
                        else:
                            loss_weights[index_loss] = 0

                    # save previous record
                    weight_initialization_done = True
                    previous_total_loss = np.sum(temp_train_scores_mean *
                                                 loss_weights)
                    previous_loss = temp_train_scores_mean

                elif weight_rebalancing == True and (
                        weight_initialization_done == True
                        or weight_initialization == False):
                    temp_train_scores_mean = train_scores[
                        last_rebalancing_iter * batch_size:(i + 1) *
                        batch_size, :].mean(axis=0)
                    total_loss = np.sum(temp_train_scores_mean * loss_weights)
                    previous_loss_weights = np.array(loss_weights)
                    if previous_total_loss > 0:
                        for index_loss in range(len(loss_valid)):
                            if loss_valid[index_loss] == 1:
                                adjust_term = 1 + current_lambda_for_adjust * (
                                    (total_loss / previous_total_loss) *
                                    (previous_loss[index_loss] /
                                     temp_train_scores_mean[index_loss]) - 1)
                                adjust_term = min(max(adjust_term, 1.0 / 2.0),
                                                  2.0 / 1.0)
                                loss_weights[
                                    index_loss] = previous_loss_weights[
                                        index_loss] * adjust_term
                            else:
                                loss_weights[index_loss] = 0

                    # save previous record
                    previous_total_loss = np.sum(temp_train_scores_mean *
                                                 loss_weights)
                    previous_loss = temp_train_scores_mean

                # save - loss weights
                savePath = model_path + "/weight/weight" + str(
                    epoch + 1).zfill(2) + '_iter' + str(i).zfill(5) + ".csv"
                dataframe = pd.DataFrame(loss_weights)
                dataframe.to_csv(savePath, header=False, index=False)

                last_rebalancing_iter = (i + 1) % iter_per_epoch

        # save - each image train score
        savePath = model_path + "/score/train_epoch" + str(0 + epoch +
                                                           1).zfill(2) + ".csv"
        dataframe = pd.DataFrame(train_scores)
        dataframe.to_csv(savePath, header=False, index=False)
        # save - train mean score
        savePath = model_path + "/score/train_mean_epoch" + str(
            0 + epoch + 1).zfill(2) + ".csv"
        dataframe = pd.DataFrame(train_scores_mean)
        dataframe.to_csv(savePath, header=False, index=False)
        # save - each image train score
        savePath = model_path + "/metric/train_epoch" + str(
            0 + epoch + 1).zfill(2) + ".csv"
        dataframe = pd.DataFrame(train_metrics)
        dataframe.to_csv(savePath, header=False, index=False)
        # save - train mean score
        savePath = model_path + "/metric/train_mean_epoch" + str(
            0 + epoch + 1).zfill(2) + ".csv"
        dataframe = pd.DataFrame(train_metrics_mean)
        dataframe.to_csv(savePath, header=False, index=False)

        print('-------------- TRAINING OF EPOCH ' +
              str(0 + epoch + 1).zfill(2) + 'FINISH ---------------')
        print('---------------------------------------------------------')
        print('   ')
        print('   ')
        print('   ')
Ejemplo n.º 19
0
Archivo: train.py Proyecto: pzxdd/DAEC
def main():
    args = parse_args()
    update_config(cfg, args)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, "train")

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval("models." + cfg.MODEL.NAME + ".get_pose_net")(cfg,
                                                               is_train=True)

    # copy model file
    this_dir = os.path.dirname(__file__)
    shutil.copy2(
        os.path.join(this_dir, "../lib/models", cfg.MODEL.NAME + ".py"),
        final_output_dir)
    # logger.info(pprint.pformat(model))

    writer_dict = {
        "writer": SummaryWriter(log_dir=tb_log_dir),
        "train_global_steps": 0,
        "valid_global_steps": 0,
    }

    dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]))
    writer_dict["writer"].add_graph(model, (dump_input, ))

    logger.info(get_model_summary(model, dump_input))

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()

    # define loss function (criterion) and optimizer
    criterion = JointsMSELoss(
        use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda()

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_dataset = eval("dataset." + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))
    valid_dataset = eval("dataset." + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=cfg.TRAIN.SHUFFLE,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY,
    )
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=False,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY,
    )

    best_perf = 0.0
    best_model = False
    last_epoch = -1
    optimizer = get_optimizer(cfg, model)
    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
    checkpoint_file = os.path.join(final_output_dir, "checkpoint.pth")

    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
        checkpoint = torch.load(checkpoint_file)
        begin_epoch = checkpoint["epoch"]
        best_perf = checkpoint["perf"]
        last_epoch = checkpoint["epoch"]
        model.load_state_dict(checkpoint["state_dict"])

        optimizer.load_state_dict(checkpoint["optimizer"])
        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
            checkpoint_file, checkpoint["epoch"]))

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        cfg.TRAIN.LR_STEP,
                                                        cfg.TRAIN.LR_FACTOR,
                                                        last_epoch=last_epoch)

    for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
        lr_scheduler.step()

        # train for one epoch
        train(cfg, train_loader, model, criterion, optimizer, epoch,
              final_output_dir, tb_log_dir, writer_dict)

        # evaluate on validation set
        perf_indicator = validate(cfg, valid_loader, valid_dataset, model,
                                  criterion, final_output_dir, tb_log_dir,
                                  writer_dict)

        if perf_indicator >= best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        logger.info("=> saving checkpoint to {}".format(final_output_dir))
        save_checkpoint(
            {
                "epoch": epoch + 1,
                "model": cfg.MODEL.NAME,
                "state_dict": model.state_dict(),
                "best_state_dict": model.module.state_dict(),
                "perf": perf_indicator,
                "optimizer": optimizer.state_dict(),
            },
            best_model,
            final_output_dir,
        )

    final_model_state_file = os.path.join(final_output_dir, "final_state.pth")
    logger.info(
        "=> saving final model state to {}".format(final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict["writer"].close()
Ejemplo n.º 20
0
class Trainer(object):
    def __init__(self, args):
        self.args         = args
        self.train_dir    = args.train_dir
        self.val_dir      = args.val_dir
        self.model_arch   = args.model_arch
        self.dataset      = args.dataset


        self.workers      = 1
        self.weight_decay = 0.0005
        self.momentum     = 0.9
        self.batch_size   = 8
        self.lr           = 0.0001
        self.gamma        = 0.333
        self.step_size    = 13275
        self.sigma        = 3
        self.stride       = 8

        cudnn.benchmark   = True

        if self.dataset   ==  "LSP":
            self.numClasses  = 14
        elif self.dataset == "MPII":
            self.numClasses  = 16

        self.train_loader, self.val_loader = getDataloader(self.dataset, self.train_dir,\
            self.val_dir, self.sigma, self.stride, self.workers, self.batch_size)

        model = unipose(self.dataset, num_classes=self.numClasses,backbone='resnet',output_stride=16,sync_bn=True,freeze_bn=False, stride=self.stride)

        self.model       = model.cuda()

        self.criterion   = nn.MSELoss().cuda()

        self.optimizer   = torch.optim.Adam(self.model.parameters(), lr=self.lr)

        self.best_model  = 12345678.9

        self.iters       = 0

        if self.args.pretrained is not None:
            checkpoint = torch.load(self.args.pretrained)
            p = checkpoint['state_dict']

            state_dict = self.model.state_dict()
            model_dict = {}

            for k,v in p.items():
                if k in state_dict:
                    model_dict[k] = v

            state_dict.update(model_dict)
            self.model.load_state_dict(state_dict)
            
        self.isBest = 0
        self.bestPCK  = 0
        self.bestPCKh = 0

    # Print model summary and metrics
    dump_input = torch.rand((1, 3, 368, 368]))
    print(get_model_summary(self.modelmodel, dump_input))

    def training(self, epoch):
        train_loss = 0.0
        self.model.train()
        print("Epoch " + str(epoch) + ':') 
        tbar = tqdm(self.train_loader)

        for i, (input, heatmap, centermap, img_path) in enumerate(tbar):
            learning_rate = adjust_learning_rate(self.optimizer, self.iters, self.lr, policy='step',
                                                 gamma=self.gamma, step_size=self.step_size)

            input_var     =     input.cuda()
            heatmap_var   =    heatmap.cuda()

            self.optimizer.zero_grad()

            heat = self.model(input_var)

            loss_heat   = self.criterion(heat,  heatmap_var)

            loss = loss_heat

            train_loss += loss_heat.item()

            loss.backward()
            self.optimizer.step()

            tbar.set_description('Train loss: %.6f' % (train_loss / ((i + 1)*self.batch_size)))

            self.iters += 1

            if i == 10000:
            	break
def main():
    args = parse_args()
    update_config(cfg, args)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    t_checkpoints = cfg.KD.TEACHER  #注意是在student配置文件中修改
    train_type = cfg.KD.TRAIN_TYPE  #注意是在student配置文件中修改
    train_type = get_train_type(train_type, t_checkpoints)
    logger.info('=> train type is {} '.format(train_type))

    if train_type == 'FPD':
        cfg_name = 'student_' + os.path.basename(args.cfg).split('.')[0]
    else:
        cfg_name = os.path.basename(args.cfg).split('.')[0]
    save_yaml_file(cfg_name, cfg, final_output_dir)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=True)

    # fpd method, default NORMAL
    if train_type == 'FPD':
        tcfg = cfg.clone()
        tcfg.defrost()
        tcfg.merge_from_file(args.tcfg)
        tcfg.freeze()
        tcfg_name = 'teacher_' + os.path.basename(args.tcfg).split('.')[0]
        save_yaml_file(tcfg_name, tcfg, final_output_dir)
        # teacher model
        tmodel = eval('models.' + tcfg.MODEL.NAME + '.get_pose_net')(
            tcfg, is_train=False)

        load_checkpoint(t_checkpoints,
                        tmodel,
                        strict=True,
                        model_info='teacher_' + tcfg.MODEL.NAME)

        tmodel = torch.nn.DataParallel(tmodel, device_ids=cfg.GPUS).cuda()
        # define kd_pose loss function (criterion) and optimizer
        kd_pose_criterion = JointsMSELoss(
            use_target_weight=tcfg.LOSS.USE_TARGET_WEIGHT).cuda()

    # copy model file
    this_dir = os.path.dirname(__file__)
    shutil.copy2(
        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
        final_output_dir)
    # logger.info(pprint.pformat(model))

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]))
    writer_dict['writer'].add_graph(model, (dump_input, ))

    logger.info(get_model_summary(model, dump_input))

    if cfg.TRAIN.CHECKPOINT:
        load_checkpoint(cfg.TRAIN.CHECKPOINT,
                        model,
                        strict=True,
                        model_info='student_' + cfg.MODEL.NAME)
    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()

    # you can choose or replace pose_loss and kd_pose_loss type, including mse,kl,ohkm loss ect
    # define pose loss function (criterion) and optimizer
    pose_criterion = JointsMSELoss(
        use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda()

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))
    valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=cfg.TRAIN.SHUFFLE,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=False,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)

    best_perf = 0.0
    best_model = False
    last_epoch = -1
    optimizer = get_optimizer(cfg, model)
    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
    checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth')

    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
        checkpoint = torch.load(checkpoint_file)
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['perf']
        last_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])
        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
            checkpoint_file, checkpoint['epoch']))

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        cfg.TRAIN.LR_STEP,
                                                        cfg.TRAIN.LR_FACTOR,
                                                        last_epoch=last_epoch)

    # evaluate on validation set
    validate(cfg, valid_loader, valid_dataset, tmodel, pose_criterion,
             final_output_dir, tb_log_dir, writer_dict)
    validate(cfg, valid_loader, valid_dataset, model, pose_criterion,
             final_output_dir, tb_log_dir, writer_dict)

    for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
        lr_scheduler.step()

        # fpd method, default NORMAL
        if train_type == 'FPD':
            # train for one epoch
            fpd_train(cfg, train_loader, model, tmodel, pose_criterion,
                      kd_pose_criterion, optimizer, epoch, final_output_dir,
                      tb_log_dir, writer_dict)
        else:
            # train for one epoch
            train(cfg, train_loader, model, pose_criterion, optimizer, epoch,
                  final_output_dir, tb_log_dir, writer_dict)

        # evaluate on validation set
        perf_indicator = validate(cfg, valid_loader, valid_dataset, model,
                                  pose_criterion, final_output_dir, tb_log_dir,
                                  writer_dict)

        if perf_indicator >= best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': cfg.MODEL.NAME,
                'state_dict': model.state_dict(),
                'best_state_dict': model.module.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            }, best_model, final_output_dir)

    final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
    logger.info(
        '=> saving final model state to {}'.format(final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
Ejemplo n.º 22
0
def main():
    args = parse_args()
    update_config(cfg, args)

    if args.prevModelDir and args.modelDir:
        # copy pre models for philly
        copy_prev_models(args.prevModelDir, args.modelDir)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=True)

    # copy model file
    this_dir = os.path.dirname(__file__)
    shutil.copy2(
        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
        final_output_dir)
    # logger.info(pprint.pformat(model))

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]))
    writer_dict['writer'].add_graph(model, (dump_input, ))

    logger.info(get_model_summary(model, dump_input))

    model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda()

    # define loss function (criterion) and optimizer
    criterion = JointsMSELoss(
        use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda()

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))
    valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=cfg.TRAIN.SHUFFLE,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=False,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)

    best_perf = 0.0
    best_model = False
    last_epoch = -1
    optimizer = get_optimizer(cfg, model)
    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
    checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth')

    # if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
    #     logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
    #     checkpoint = torch.load(checkpoint_file)
    #     begin_epoch = checkpoint['epoch']
    #     best_perf = checkpoint['perf']
    #     last_epoch = checkpoint['epoch']
    #     model.load_state_dict(checkpoint['state_dict'])
    #
    #     optimizer.load_state_dict(checkpoint['optimizer'])
    #     logger.info("=> loaded checkpoint '{}' (epoch {})".format(
    #         checkpoint_file, checkpoint['epoch']))

    # checkpoint = torch.load('output/jd/pose_hrnet/crop_face/checkpoint.pth')
    # model.load_state_dict(checkpoint['state_dict'])

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        cfg.TRAIN.LR_STEP,
                                                        cfg.TRAIN.LR_FACTOR,
                                                        last_epoch=last_epoch)

    for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
        lr_scheduler.step()

        # train for one epoch
        train(cfg, train_loader, model, criterion, optimizer, epoch,
              final_output_dir, tb_log_dir, writer_dict)

        # evaluate on validation set
        # perf_indicator = validate(
        #     cfg, valid_loader, valid_dataset, model, criterion,
        #     final_output_dir, tb_log_dir, writer_dict
        # )
        #
        # if perf_indicator >= best_perf:
        #     best_perf = perf_indicator
        #     best_model = True
        # else:
        #     best_model = False

        # import tqdm
        # import cv2
        # import numpy as np
        # from lib.utils.imutils import im_to_numpy, im_to_torch
        # flip = True
        # full_result = []
        # for i, (inputs,target, target_weight, meta) in enumerate(valid_loader):
        #     with torch.no_grad():
        #         input_var = torch.autograd.Variable(inputs.cuda())
        #         if flip == True:
        #             flip_inputs = inputs.clone()
        #             for i, finp in enumerate(flip_inputs):
        #                 finp = im_to_numpy(finp)
        #                 finp = cv2.flip(finp, 1)
        #                 flip_inputs[i] = im_to_torch(finp)
        #             flip_input_var = torch.autograd.Variable(flip_inputs.cuda())
        #
        #         # compute output
        #         refine_output = model(input_var)
        #         score_map = refine_output.data.cpu()
        #         score_map = score_map.numpy()
        #
        #         if flip == True:
        #             flip_output = model(flip_input_var)
        #             flip_score_map = flip_output.data.cpu()
        #             flip_score_map = flip_score_map.numpy()
        #
        #             for i, fscore in enumerate(flip_score_map):
        #                 fscore = fscore.transpose((1, 2, 0))
        #                 fscore = cv2.flip(fscore, 1)
        #                 fscore = list(fscore.transpose((2, 0, 1)))
        #                 for (q, w) in train_dataset.flip_pairs:
        #                     fscore[q], fscore[w] = fscore[w], fscore[q]
        #                 fscore = np.array(fscore)
        #                 score_map[i] += fscore
        #                 score_map[i] /= 2
        #
        #         # ids = meta['imgID'].numpy()
        #         # det_scores = meta['det_scores']
        #         for b in range(inputs.size(0)):
        #             # details = meta['augmentation_details']
        #             # imgid = meta['imgid'][b]
        #             # print(imgid)
        #             # category = meta['category'][b]
        #             # print(category)
        #             single_result_dict = {}
        #             single_result = []
        #
        #             single_map = score_map[b]
        #             r0 = single_map.copy()
        #             r0 /= 255
        #             r0 += 0.5
        #             v_score = np.zeros(106)
        #             for p in range(106):
        #                 single_map[p] /= np.amax(single_map[p])
        #                 border = 10
        #                 dr = np.zeros((112 + 2 * border, 112 + 2 * border))
        #                 dr[border:-border, border:-border] = single_map[p].copy()
        #                 dr = cv2.GaussianBlur(dr, (7, 7), 0)
        #                 lb = dr.argmax()
        #                 y, x = np.unravel_index(lb, dr.shape)
        #                 dr[y, x] = 0
        #                 lb = dr.argmax()
        #                 py, px = np.unravel_index(lb, dr.shape)
        #                 y -= border
        #                 x -= border
        #                 py -= border + y
        #                 px -= border + x
        #                 ln = (px ** 2 + py ** 2) ** 0.5
        #                 delta = 0.25
        #                 if ln > 1e-3:
        #                     x += delta * px / ln
        #                     y += delta * py / ln
        #                 x = max(0, min(x, 112 - 1))
        #                 y = max(0, min(y, 112 - 1))
        #                 resy = float((4 * y + 2) / 112 * (450))
        #                 resx = float((4 * x + 2) / 112 * (450))
        #                 # resy = float((4 * y + 2) / cfg.data_shape[0] * (450))
        #                 # resx = float((4 * x + 2) / cfg.data_shape[1] * (450))
        #                 v_score[p] = float(r0[p, int(round(y) + 1e-10), int(round(x) + 1e-10)])
        #                 single_result.append(resx)
        #                 single_result.append(resy)
        #             if len(single_result) != 0:
        #                 result = []
        #                 # result.append(imgid)
        #                 j = 0
        #                 while j < len(single_result):
        #                     result.append(float(single_result[j]))
        #                     result.append(float(single_result[j + 1]))
        #                     j += 2
        #                 full_result.append(result)
        model.eval()

        import numpy as np
        from core.inference import get_final_preds
        from utils.transforms import flip_back
        import csv

        num_samples = len(valid_dataset)
        all_preds = np.zeros((num_samples, 106, 3), dtype=np.float32)
        all_boxes = np.zeros((num_samples, 6))
        image_path = []
        filenames = []
        imgnums = []
        idx = 0
        full_result = []
        with torch.no_grad():
            for i, (input, target, target_weight,
                    meta) in enumerate(valid_loader):
                # compute output
                outputs = model(input)
                if isinstance(outputs, list):
                    output = outputs[-1]
                else:
                    output = outputs

                if cfg.TEST.FLIP_TEST:
                    # this part is ugly, because pytorch has not supported negative index
                    # input_flipped = model(input[:, :, :, ::-1])
                    input_flipped = np.flip(input.cpu().numpy(), 3).copy()
                    input_flipped = torch.from_numpy(input_flipped).cuda()
                    outputs_flipped = model(input_flipped)

                    if isinstance(outputs_flipped, list):
                        output_flipped = outputs_flipped[-1]
                    else:
                        output_flipped = outputs_flipped

                    output_flipped = flip_back(output_flipped.cpu().numpy(),
                                               valid_dataset.flip_pairs)
                    output_flipped = torch.from_numpy(
                        output_flipped.copy()).cuda()

                    # feature is not aligned, shift flipped heatmap for higher accuracy
                    if cfg.TEST.SHIFT_HEATMAP:
                        output_flipped[:, :, :, 1:] = \
                            output_flipped.clone()[:, :, :, 0:-1]

                    output = (output + output_flipped) * 0.5

                target = target.cuda(non_blocking=True)
                target_weight = target_weight.cuda(non_blocking=True)

                loss = criterion(output, target, target_weight)

                num_images = input.size(0)
                # measure accuracy and record loss

                c = meta['center'].numpy()
                s = meta['scale'].numpy()
                # print(c.shape)
                # print(s.shape)
                # print(c[:3, :])
                # print(s[:3, :])
                score = meta['score'].numpy()

                preds, maxvals = get_final_preds(cfg,
                                                 output.clone().cpu().numpy(),
                                                 c, s)

                # print(preds.shape)
                for b in range(input.size(0)):
                    result = []
                    # pic_name=meta['image'][b].split('/')[-1]
                    # result.append(pic_name)
                    for points in range(106):
                        # result.append(str(int(preds[b][points][0])) + ' ' + str(int(preds[b][points][1])))
                        result.append(float(preds[b][points][0]))
                        result.append(float(preds[b][points][1]))

                    full_result.append(result)

                all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2]
                all_preds[idx:idx + num_images, :, 2:3] = maxvals
                # double check this all_boxes parts
                all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2]
                all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2]
                all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1)
                all_boxes[idx:idx + num_images, 5] = score
                image_path.extend(meta['image'])

                idx += num_images

        # with open('res.csv', 'w', newline='') as f:
        #     writer = csv.writer(f)
        #     writer.writerows(full_result)
        gt = []
        with open("/home/sk49/workspace/cy/jd/val.txt") as f:
            for line in f.readlines():
                rows = list(map(float, line.strip().split(' ')[1:]))
                gt.append(rows)

        error = 0
        for i in range(len(gt)):
            error = NME(full_result[i], gt[i]) + error
        print(error)

        log_file = []
        log_file.append(
            [epoch,
             optimizer.state_dict()['param_groups'][0]['lr'], error])

        with open('log_file.csv', 'a', newline='') as f:
            writer1 = csv.writer(f)
            writer1.writerows(log_file)
            # logger.close()

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': cfg.MODEL.NAME,
                'state_dict': model.state_dict(),
                'best_state_dict': model.module.state_dict(),
                # 'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            },
            best_model,
            final_output_dir)

    final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
    logger.info(
        '=> saving final model state to {}'.format(final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
Ejemplo n.º 23
0
def main_worker(
        gpu, ngpus_per_node, args, final_output_dir, tb_log_dir
):
    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    if cfg.FP16.ENABLED:
        assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled."

    if cfg.FP16.STATIC_LOSS_SCALE != 1.0:
        if not cfg.FP16.ENABLED:
            print("Warning:  if --fp16 is not used, static_loss_scale will be ignored.")

    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if cfg.MULTIPROCESSING_DISTRIBUTED:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        print('Init process group: dist_url: {}, world_size: {}, rank: {}'.
              format(args.dist_url, args.world_size, args.rank))
        dist.init_process_group(
            backend=cfg.DIST_BACKEND,
            init_method=args.dist_url,
            world_size=args.world_size,
            rank=args.rank
        )

    update_config(cfg, args)

    # setup logger
    logger, _ = setup_logger(final_output_dir, args.rank, 'train')

    model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
        cfg, is_train=True
    )

    # copy model file
    if not cfg.MULTIPROCESSING_DISTRIBUTED or (
            cfg.MULTIPROCESSING_DISTRIBUTED
            and args.rank % ngpus_per_node == 0
    ):
        this_dir = os.path.dirname(__file__)
        shutil.copy2(
            os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
            final_output_dir
        )

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    if not cfg.MULTIPROCESSING_DISTRIBUTED or (
            cfg.MULTIPROCESSING_DISTRIBUTED
            and args.rank % ngpus_per_node == 0
    ):
        dump_input = torch.rand(
            (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)
        )
        #writer_dict['writer'].add_graph(model, (dump_input, ))
        logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.MODEL.SYNC_BN and not args.distributed:
        print('Warning: Sync BatchNorm is only supported in distributed training.')

    if args.distributed:
        if cfg.MODEL.SYNC_BN:
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            # args.workers = int(args.workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu]
            )
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    loss_factory = MultiLossFactory(cfg).cuda()

    # Data loading code
    train_loader = make_dataloader(
        cfg, is_train=True, distributed=args.distributed
    )
    logger.info(train_loader.dataset)

    best_perf = -1
    best_model = False
    last_epoch = -1
    optimizer = get_optimizer(cfg, model)

    if cfg.FP16.ENABLED:
        optimizer = FP16_Optimizer(
            optimizer,
            static_loss_scale=cfg.FP16.STATIC_LOSS_SCALE,
            dynamic_loss_scale=cfg.FP16.DYNAMIC_LOSS_SCALE
        )

    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
    checkpoint_file = os.path.join(
        final_output_dir, 'checkpoint.pth.tar')
    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
        checkpoint = torch.load(checkpoint_file)
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['perf']
        last_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])
        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
            checkpoint_file, checkpoint['epoch']))

    # if cfg.FP16.ENABLED:
    #     lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
    #         optimizer.optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
    #         last_epoch=last_epoch
    #     )
    # else:
    #     lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
    #         optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
    #         last_epoch=last_epoch
    #     )

    for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
        # train one epoch
        do_train(cfg, model, train_loader, loss_factory, optimizer, epoch,
                 final_output_dir, tb_log_dir, writer_dict, fp16=cfg.FP16.ENABLED)

        # In PyTorch 1.1.0 and later, you should call `lr_scheduler.step()` after `optimizer.step()`.
        # lr_scheduler.step()

        perf_indicator = epoch
        if perf_indicator >= best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        if not cfg.MULTIPROCESSING_DISTRIBUTED or (
                cfg.MULTIPROCESSING_DISTRIBUTED
                and args.rank == 0
        ):
            logger.info('=> saving checkpoint to {}'.format(final_output_dir))
            save_checkpoint({
                'epoch': epoch + 1,
                'model': cfg.MODEL.NAME,
                'state_dict': model.state_dict(),
                'best_state_dict': model.module.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            }, best_model, final_output_dir)

    final_model_state_file = os.path.join(
        final_output_dir, 'final_state{}.pth.tar'.format(gpu)
    )

    logger.info('saving final model state to {}'.format(
        final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
Ejemplo n.º 24
0
def main_worker(
        gpu, ngpus_per_node, args, final_output_dir, tb_log_dir
):
    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if cfg.MULTIPROCESSING_DISTRIBUTED:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            # 通过节点序号来计算进程在所有进程之中的序号
            args.rank = args.rank * ngpus_per_node + gpu
        print('Init process group: dist_url: {}, world_size: {}, rank: {}'.
              format(args.dist_url, args.world_size, args.rank))
        dist.init_process_group(
            backend=cfg.DIST_BACKEND,
            init_method=args.dist_url,
            world_size=args.world_size,
            rank=args.rank
        )

    update_config(cfg, args)

    # setup logger
    logger, _ = setup_logger(final_output_dir, args.rank, 'train')

    model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
        cfg, is_train=True
    )

    # copy model file
    if not cfg.MULTIPROCESSING_DISTRIBUTED or (
            cfg.MULTIPROCESSING_DISTRIBUTED
            and args.rank % ngpus_per_node == 0
    ):
        this_dir = os.path.dirname(__file__)
        shutil.copy2(
            os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
            final_output_dir
        )

    # 利用tensorboard可视化结果
    writer_dict = {
        'writer': SummaryWriter(logdir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }


    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            # args.workers = int(args.workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu]
            )
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        model = torch.nn.DataParallel(model).cuda()

    if not cfg.MULTIPROCESSING_DISTRIBUTED or (
            cfg.MULTIPROCESSING_DISTRIBUTED
            and args.rank % ngpus_per_node == 0
    ):
        dump_input = torch.rand(
            (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)
        ).cuda()
        #writer_dict['writer'].add_graph(model, (dump_input, ))
        logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    # define loss function (criterion) and optimizer
    loss_factory = MultiLossFactory(cfg).cuda()

    # Data loading code
        train_loader = make_dataloader(
            cfg, is_train=True, distributed=args.distributed
        )
def main():
    args = parse_args()
    update_config(cfg, args)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    # 用于加快训练速度,同时避免benchmark的随机性
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(
        cfg, is_train=True)  # eval()函数执行一个字符串表达式,并返回表达式的值

    # copy model file
    this_dir = os.path.dirname(__file__)  # 取当前路径
    shutil.copy2(
        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
        final_output_dir)
    # logger.info(pprint.pformat(model))

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]))
    writer_dict['writer'].add_graph(model, (dump_input, ))

    logger.info(get_model_summary(model, dump_input))  # 记录模型日志

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
    #model = torch.nn.DataParallel(model, device_ids=[0]).cuda()
    # 多GPU训练
    # define loss function (criterion) and optimizer
    criterion = JointsMSELoss(
        use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda()
    regress_loss = RegLoss(use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda()
    # Data loading code
    normalize = transforms.Normalize(
        # 使用Imagenet的均值和标准差进行归一化
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225])
    train_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))
    valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))  # 图像处理

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=cfg.TRAIN.SHUFFLE,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY,
    )
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=False,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY,
    )

    best_perf = 0.0
    best_model = False
    last_epoch = -1
    optimizer = get_optimizer(cfg, model)
    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
    checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth')

    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
        checkpoint = torch.load(checkpoint_file)
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['perf']
        last_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])
        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
            checkpoint_file, checkpoint['epoch']))

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        cfg.TRAIN.LR_STEP,
                                                        cfg.TRAIN.LR_FACTOR,
                                                        last_epoch=last_epoch)

    for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
        lr_scheduler.step()

        # train for one epoch
        train(cfg, train_loader, model, criterion, regress_loss, optimizer,
              epoch, final_output_dir, tb_log_dir, writer_dict)

        # evaluate on validation set
        perf_indicator = validate(cfg, valid_loader, valid_dataset, model,
                                  criterion, regress_loss, final_output_dir,
                                  tb_log_dir, writer_dict)

        if perf_indicator >= best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': cfg.MODEL.NAME,
                'state_dict': model.state_dict(),
                'best_state_dict': model.module.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            }, best_model, final_output_dir)

    final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
    logger.info(
        '=> saving final model state to {}'.format(final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
Ejemplo n.º 26
0
def main():
    # 对输入参数进行解析
    args = parse_args()
    # 根据输入参数对cfg进行更新
    update_config(cfg, args)

    # 创建logger,用于记录训练过程的打印信息
    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    # 使用GPU的一些相关设置
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    # 根据配置文件构建网络
    # 两个模型:models.pose_hrnet和models.pose_resnet,用get_pose_net这个函数可以获得网络结构
    print('models.' + cfg.MODEL.NAME + '.get_pose_net')
    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=True)

    # copy model file
    # 拷贝lib/models/pose_hrnet.py文件到输出目录之中
    this_dir = os.path.dirname(__file__)
    shutil.copy2(
        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
        final_output_dir)
    # logger.info(pprint.pformat(model))

    # 用于训练信息的图形化显示
    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    # 用于模型的图形化显示
    dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]))
    writer_dict['writer'].add_graph(model, (dump_input, ))

    logger.info(get_model_summary(model, dump_input))

    # 让模型支持多GPU训练
    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()

    # define loss function (criterion) and optimizer
    # 用于计算loss
    criterion = JointsMSELoss(
        use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda()

    # Data loading code
    # 对输入图像数据进行正则化处理
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    # 创建训练以及测试数据的迭代器
    train_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))
    valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=cfg.TRAIN.SHUFFLE,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=False,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)

    # 模型加载以及优化策略的相关配置
    best_perf = 0.0
    best_model = False
    last_epoch = -1
    optimizer = get_optimizer(cfg, model)
    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
    checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth')

    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
        checkpoint = torch.load(checkpoint_file)
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['perf']
        last_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])
        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
            checkpoint_file, checkpoint['epoch']))

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        cfg.TRAIN.LR_STEP,
                                                        cfg.TRAIN.LR_FACTOR,
                                                        last_epoch=last_epoch)

    # 循环迭代进行训练
    for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
        lr_scheduler.step()

        # train for one epoch
        train(cfg, train_loader, model, criterion, optimizer, epoch,
              final_output_dir, tb_log_dir, writer_dict)

        # evaluate on validation set
        perf_indicator = validate(cfg, valid_loader, valid_dataset, model,
                                  criterion, final_output_dir, tb_log_dir,
                                  writer_dict)

        if perf_indicator >= best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': cfg.MODEL.NAME,
                'state_dict': model.state_dict(),
                'best_state_dict': model.module.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            }, best_model, final_output_dir)

    # 模型保存
    final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
    logger.info(
        '=> saving final model state to {}'.format(final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
Ejemplo n.º 27
0
def main():
    args = get_args()
    # get student config
    student_cfg = get_student_cfg(cfg, args.student_file)
    student_cfg.LOG_DIR = args.log
    student_cfg.PRINT_FREQ = int(args.print_freq)
    if args.mode == 'test':
        student_cfg.DATASET.TEST = 'test2017'
    logger, final_output_dir, tb_log_dir = create_logger(
        student_cfg, args.student_file, 'valid')

    logger.info(pprint.pformat(args))
    logger.info(student_cfg)

    # cudnn related setting
    cudnn.benchmark = student_cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = student_cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = student_cfg.CUDNN.ENABLED

    dev = 'cuda' if torch.cuda.is_available() else 'cpu'

    model = PoseHigherResolutionNet(student_cfg)
    model.load_state_dict(torch.load(args.model_file))

    dump_input = torch.rand(
        (1, 3, student_cfg.DATASET.INPUT_SIZE, student_cfg.DATASET.INPUT_SIZE))
    logger.info(
        get_model_summary(model, dump_input, verbose=student_cfg.VERBOSE))

    model = torch.nn.DataParallel(model, device_ids=student_cfg.GPUS).cuda()
    model.eval()

    data_loader, test_dataset = make_test_dataloader(student_cfg)

    transforms = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
    ])

    parser = HeatmapParser(student_cfg)
    all_preds = []
    all_scores = []

    pbar = tqdm(
        total=len(test_dataset)) if student_cfg.TEST.LOG_PROGRESS else None
    for i, (images, annos) in enumerate(data_loader):
        assert 1 == images.size(0), 'Test batch size should be 1'

        image = images[0].cpu().numpy()
        # size at scale 1.0
        base_size, center, scale = get_multi_scale_size(
            image, student_cfg.DATASET.INPUT_SIZE, 1.0,
            min(student_cfg.TEST.SCALE_FACTOR))

        with torch.no_grad():
            final_heatmaps = None
            tags_list = []
            for idx, s in enumerate(
                    sorted(student_cfg.TEST.SCALE_FACTOR, reverse=True)):
                input_size = student_cfg.DATASET.INPUT_SIZE
                image_resized, center, scale = resize_align_multi_scale(
                    image, input_size, s, min(student_cfg.TEST.SCALE_FACTOR))
                image_resized = transforms(image_resized)
                image_resized = image_resized.unsqueeze(0).cuda()

                outputs, heatmaps, tags = get_multi_stage_outputs(
                    student_cfg, model, image_resized,
                    student_cfg.TEST.FLIP_TEST, student_cfg.TEST.PROJECT2IMAGE,
                    base_size)

                final_heatmaps, tags_list = aggregate_results(
                    student_cfg, s, final_heatmaps, tags_list, heatmaps, tags)

            final_heatmaps = final_heatmaps / float(
                len(student_cfg.TEST.SCALE_FACTOR))
            tags = torch.cat(tags_list, dim=4)
            grouped, scores = parser.parse(final_heatmaps, tags,
                                           student_cfg.TEST.ADJUST,
                                           student_cfg.TEST.REFINE)

            final_results = get_final_preds(
                grouped, center, scale,
                [final_heatmaps.size(3),
                 final_heatmaps.size(2)])

        if student_cfg.TEST.LOG_PROGRESS:
            pbar.update()

        if i % student_cfg.PRINT_FREQ == 0:
            prefix = '{}_{}'.format(
                os.path.join(final_output_dir, 'result_valid'), i)
            # logger.info('=> write {}'.format(prefix))
            save_valid_image(image,
                             final_results,
                             '{}.jpg'.format(prefix),
                             dataset=test_dataset.name)
            # save_debug_images(cfg, image_resized, None, None, outputs, prefix)

        all_preds.append(final_results)
        all_scores.append(scores)

    if student_cfg.TEST.LOG_PROGRESS:
        pbar.close()

    name_values, _ = test_dataset.evaluate(cfg, all_preds, all_scores,
                                           final_output_dir)

    if isinstance(name_values, list):
        for name_value in name_values:
            _print_name_value(logger, name_value, cfg.MODEL.NAME)
    else:
        _print_name_value(logger, name_values, cfg.MODEL.NAME)
def main():
    args = parse_args()
    update_config(cfg, args)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
        cfg, is_train=True
    )

    # copy model file
    this_dir = os.path.dirname(__file__)
    shutil.copy2(
        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
        final_output_dir)
    # logger.info(pprint.pformat(model))

    dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])
    )

    logger.info(get_model_summary(model, dump_input))

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()

    # define loss function (criterion) and optimizer
    heatmapLoss = JointsMSELoss(
        use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT #true
    ).cuda()

    # Data loading code
    train_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, 'train',
        transforms.Compose([
            transforms.ToTensor(),
        ])
    )
    valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, 'val',
        transforms.Compose([
            transforms.ToTensor(),
        ])
    )

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
        shuffle=cfg.TRAIN.SHUFFLE,
        num_workers=cfg.WORKERS,
        pin_memory=True
    )

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=False,
        num_workers=cfg.WORKERS,
        pin_memory=True
    )

    best_perf = 0.0
    best_model = False
    last_epoch = -1
    optimizer = get_optimizer(cfg, model)
    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
    checkpoint_file = os.path.join(
        final_output_dir, 'checkpoint.pth'
    )

    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
        checkpoint = torch.load(checkpoint_file)
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['perf']
        last_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])
        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
            checkpoint_file, checkpoint['epoch']))

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
        last_epoch=last_epoch
    )

    for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
        lr_scheduler.step()

        # train for one epoch
        train(cfg, train_loader, model, heatmapLoss, optimizer, epoch,
              final_output_dir)


        # evaluate on validation set
        perf_indicator = validate(
            cfg, valid_loader, valid_dataset, model, heatmapLoss,
            final_output_dir
        )

        if perf_indicator >= best_perf:
            best_perf = perf_indicator
            best_model = True
            best_model_state_file = os.path.join(
                final_output_dir, 'best_model.pth'
            )
            logger.info('=> saving best model state to {}'.format(
                best_model_state_file)
            )
        else:
            best_model = False

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint({
            'epoch': epoch + 1,
            'model': cfg.MODEL.NAME,
            'state_dict': model.state_dict(),
            'best_state_dict': model.module.state_dict(),
            'perf': perf_indicator,
            'optimizer': optimizer.state_dict(),
        }, best_model, final_output_dir)

    final_model_state_file = os.path.join(
        final_output_dir, 'final_state.pth'
    )
    logger.info('=> saving final model state to {}'.format(
        final_model_state_file)
    )
    torch.save(model.module.state_dict(), final_model_state_file)