def main(): args = parse_args() update_config(cfg, args) check_config(cfg) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) dump_input = torch.rand( (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)) logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True) else: model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar') logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() model.eval() data_loader, test_dataset = make_test_dataloader(cfg) if cfg.MODEL.NAME == 'pose_hourglass': transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), ]) else: transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) parser = HeatmapParser(cfg) all_preds = [] all_scores = [] # pbar = tqdm(total=len(test_dataset)) if cfg.TEST.LOG_PROGRESS else None pbar = tqdm(total=len(test_dataset)) for i, (images, annos) in enumerate(data_loader): assert 1 == images.size(0), 'Test batch size should be 1' image = images[0].cpu().numpy() # size at scale 1.0 base_size, center, scale = get_multi_scale_size( image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR)) with torch.no_grad(): final_heatmaps = None tags_list = [] for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)): input_size = cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( image, input_size, s, min(cfg.TEST.SCALE_FACTOR)) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( cfg, model, image_resized, cfg.TEST.FLIP_TEST, cfg.TEST.PROJECT2IMAGE, base_size) final_heatmaps, tags_list = aggregate_results( cfg, s, final_heatmaps, tags_list, heatmaps, tags) final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = parser.parse(final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)]) if cfg.RESCORE.USE: try: scores = rescore_valid(cfg, final_results, scores) except: print("got one.") # if cfg.TEST.LOG_PROGRESS: # pbar.update() pbar.update() if i % cfg.PRINT_FREQ == 0: prefix = '{}_{}'.format( os.path.join(final_output_dir, 'result_valid'), i) # logger.info('=> write {}'.format(prefix)) save_valid_image(image, final_results, '{}.jpg'.format(prefix), dataset=test_dataset.name) # for scale_idx in range(len(outputs)): # prefix_scale = prefix + '_output_{}'.format( # # cfg.DATASET.OUTPUT_SIZE[scale_idx] # scale_idx # ) # save_debug_images( # cfg, images, None, None, # outputs[scale_idx], prefix_scale # ) all_preds.append(final_results) all_scores.append(scores) if cfg.TEST.LOG_PROGRESS: pbar.close() name_values, _ = test_dataset.evaluate(cfg, all_preds, all_scores, final_output_dir) if isinstance(name_values, list): for name_value in name_values: _print_name_value(logger, name_value, cfg.MODEL.NAME) else: _print_name_value(logger, name_values, cfg.MODEL.NAME)
def main(): args = get_args() # get student config student_cfg = get_student_cfg(cfg, args.student_file) student_cfg.LOG_DIR = args.log student_cfg.PRINT_FREQ = int(args.print_freq) if args.mode == 'test': student_cfg.DATASET.TEST = 'test2017' logger, final_output_dir, tb_log_dir = create_logger( student_cfg, args.student_file, 'valid') logger.info(pprint.pformat(args)) logger.info(student_cfg) # cudnn related setting cudnn.benchmark = student_cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = student_cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = student_cfg.CUDNN.ENABLED dev = 'cuda' if torch.cuda.is_available() else 'cpu' model = PoseHigherResolutionNet(student_cfg) model.load_state_dict(torch.load(args.model_file)) dump_input = torch.rand( (1, 3, student_cfg.DATASET.INPUT_SIZE, student_cfg.DATASET.INPUT_SIZE)) logger.info( get_model_summary(model, dump_input, verbose=student_cfg.VERBOSE)) model = torch.nn.DataParallel(model, device_ids=student_cfg.GPUS).cuda() model.eval() data_loader, test_dataset = make_test_dataloader(student_cfg) transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) parser = HeatmapParser(student_cfg) all_preds = [] all_scores = [] pbar = tqdm( total=len(test_dataset)) if student_cfg.TEST.LOG_PROGRESS else None for i, (images, annos) in enumerate(data_loader): assert 1 == images.size(0), 'Test batch size should be 1' image = images[0].cpu().numpy() # size at scale 1.0 base_size, center, scale = get_multi_scale_size( image, student_cfg.DATASET.INPUT_SIZE, 1.0, min(student_cfg.TEST.SCALE_FACTOR)) with torch.no_grad(): final_heatmaps = None tags_list = [] for idx, s in enumerate( sorted(student_cfg.TEST.SCALE_FACTOR, reverse=True)): input_size = student_cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( image, input_size, s, min(student_cfg.TEST.SCALE_FACTOR)) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( student_cfg, model, image_resized, student_cfg.TEST.FLIP_TEST, student_cfg.TEST.PROJECT2IMAGE, base_size) final_heatmaps, tags_list = aggregate_results( student_cfg, s, final_heatmaps, tags_list, heatmaps, tags) final_heatmaps = final_heatmaps / float( len(student_cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = parser.parse(final_heatmaps, tags, student_cfg.TEST.ADJUST, student_cfg.TEST.REFINE) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)]) if student_cfg.TEST.LOG_PROGRESS: pbar.update() if i % student_cfg.PRINT_FREQ == 0: prefix = '{}_{}'.format( os.path.join(final_output_dir, 'result_valid'), i) # logger.info('=> write {}'.format(prefix)) save_valid_image(image, final_results, '{}.jpg'.format(prefix), dataset=test_dataset.name) # save_debug_images(cfg, image_resized, None, None, outputs, prefix) all_preds.append(final_results) all_scores.append(scores) if student_cfg.TEST.LOG_PROGRESS: pbar.close() name_values, _ = test_dataset.evaluate(cfg, all_preds, all_scores, final_output_dir) if isinstance(name_values, list): for name_value in name_values: _print_name_value(logger, name_value, cfg.MODEL.NAME) else: _print_name_value(logger, name_values, cfg.MODEL.NAME)
def main(): args = parse_args() update_config(cfg, args) check_config(cfg) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) dump_input = torch.rand( (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)) logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True) else: model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar') logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() model.eval() test_dataset = HIEDataset(DATA_PATH) data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=False) if cfg.MODEL.NAME == 'pose_hourglass': transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), ]) else: transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize( # mean=[0.485, 0.456, 0.406], # std=[0.229, 0.224, 0.225] mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) parser = HeatmapParser(cfg) all_preds = [] all_scores = [] pbar = tqdm(total=len(test_dataset)) if cfg.TEST.LOG_PROGRESS else None for i, images in enumerate(data_loader): # for i, (images, annos) in enumerate(data_loader): assert 1 == images.size(0), 'Test batch size should be 1' image = images[0].cpu().numpy() # size at scale 1.0 if (i % 100 == 0): print("Start process images %d" % i) base_size, center, scale = get_multi_scale_size( image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR)) # print("Multi-scale end") with torch.no_grad(): final_heatmaps = None tags_list = [] for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)): input_size = cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( image, input_size, s, min(cfg.TEST.SCALE_FACTOR)) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( cfg, model, image_resized, cfg.TEST.FLIP_TEST, cfg.TEST.PROJECT2IMAGE, base_size) final_heatmaps, tags_list = aggregate_results( cfg, s, final_heatmaps, tags_list, heatmaps, tags) final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = parser.parse(final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)]) if cfg.TEST.LOG_PROGRESS: pbar.update() if i % cfg.PRINT_FREQ == 0: prefix = '{}_{}'.format( os.path.join(final_output_dir, 'result_valid'), i) # logger.info('=> write {}'.format(prefix)) # save_valid_image(image, final_results, '{}.jpg'.format(prefix), dataset=test_dataset.name) # save_valid_image(image, final_results, '{}.jpg'.format(prefix),dataset='HIE20') # save_debug_images(cfg, image_resized, None, None, outputs, prefix) all_preds.append(final_results) all_scores.append(scores) if cfg.TEST.LOG_PROGRESS: pbar.close() # save preds and scores as json test_dataset.save_json(all_preds, all_scores) print('Save finished!')
def main(): args = parse_args() update_config(cfg, args) check_config(cfg) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'test' ) model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.TEST.MODEL_FILE: model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True) else: model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar') model.load_state_dict(torch.load(model_state_file)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() model.eval() if cfg.MODEL.NAME == 'pose_hourglass': transforms = torchvision.transforms.Compose( [ torchvision.transforms.ToTensor(), ] ) else: transforms = torchvision.transforms.Compose( [ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) ] ) HMparser = HeatmapParser(cfg) # ans, scores res_folder = os.path.join(args.outdir, 'results') if not os.path.exists(res_folder): os.makedirs(res_folder) video_name = args.video_path.split('/')[-1].split('.')[0] res_file = os.path.join(res_folder, '{}.json'.format(video_name)) # read frames in video stream = cv2.VideoCapture(args.video_path) assert stream.isOpened(), 'Cannot capture source' # fourcc = int(stream.get(cv2.CAP_PROP_FOURCC)) fps = stream.get(cv2.CAP_PROP_FPS) frameSize = (int(stream.get(cv2.CAP_PROP_FRAME_WIDTH)), int(stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) video_dir = os.path.join(args.outdir, 'video', args.data) if not os.path.exists(video_dir): os.makedirs(video_dir) image_dir=os.path.join(args.outdir, 'images', args.data) if not os.path.exists(image_dir): os.makedirs(image_dir) if args.video_format == 'mp4': fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') video_path = os.path.join(video_dir, '{}.mp4'.format(video_name)) else: fourcc = cv2.VideoWriter_fourcc(*'XVID') video_path = os.path.join(video_dir, '{}.avi'.format(video_name)) if args.save_video: out = cv2.VideoWriter(video_path, fourcc, fps, frameSize) num = 0 annolist = [] while (True): ret, image = stream.read() print("num:", num) if ret is False: break all_preds = [] all_scores = [] # size at scale 1.0 base_size, center, scale = get_multi_scale_size( image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR) ) with torch.no_grad(): final_heatmaps = None tags_list = [] for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)): input_size = cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( image, input_size, s, min(cfg.TEST.SCALE_FACTOR) ) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( cfg, model, image_resized, cfg.TEST.FLIP_TEST, cfg.TEST.PROJECT2IMAGE, base_size ) final_heatmaps, tags_list = aggregate_results( cfg, s, final_heatmaps, tags_list, heatmaps, tags ) final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = HMparser.parse( final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE ) final_results = get_final_preds( # joints for all persons in a image grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)] ) image=draw_image(image, final_results, dataset=args.data) all_preds.append(final_results) all_scores.append(scores) img_id = num num += 1 file_name = '{}.jpg'.format(str(img_id).zfill(6)) annorect = person_result(all_preds, scores, img_id) annolist.append({ 'annorect': annorect, 'ignore_regions': [], 'image': [{'name': file_name}] }) # print(annorect) if args.save_video: out.write(image) if args.save_img: img_path = os.path.join(image_dir, file_name) cv2.imwrite(img_path, image) final_results = {'annolist': annolist} with open(res_file, 'w') as f: json.dump(final_results, f) print('=> create test json finished!') # print('=> finished! you can check the output video on {}'.format(save_path)) stream.release() # out.release() cv2.destroyAllWindows()
def main(): args = parse_args() update_config(cfg, args) check_config(cfg) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) dump_input = torch.rand( (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)) logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True) else: model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar') logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() model.eval() if cfg.MODEL.NAME == 'pose_hourglass': transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), ]) else: transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) transforms_pre = torchvision.transforms.Compose([ ToNumpy(), ]) # iterate over all datasets datasets_root_path = "/media/jld/DATOS_JLD/datasets" datasets = ["cityscapes", "kitti", "tsinghua"] # testing sets from cityscapes and kitti does not have groundtruth --> processing not required datasplits = [["train", "val"], ["train"], ["train", "val", "test"]] keypoints_output_root_path = "/media/jld/DATOS_JLD/git-repos/paper-revista-keypoints/results" model_name = osp.basename( cfg.TEST.MODEL_FILE).split('.')[0] # Model name + configuration for dsid, dataset in enumerate(datasets): dataset_root_path = osp.join(datasets_root_path, dataset) output_root_path = osp.join(keypoints_output_root_path, dataset) for datasplit in datasplits[dsid]: loggur.info(f"Processing split {datasplit} of {dataset}") input_img_dir = osp.join(dataset_root_path, datasplit) output_kps_json_dir = osp.join(output_root_path, datasplit, model_name) loggur.info(f"Input image dir: {input_img_dir}") loggur.info(f"Output pose JSON dir: {output_kps_json_dir}") # test_dataset = torchvision.datasets.ImageFolder("/media/jld/DATOS_JLD/git-repos/paper-revista-keypoints/test_images/", transform=transforms_pre) test_dataset = dsjld.BaseDataset(input_img_dir, output_kps_json_dir, transform=transforms_pre) test_dataset.generate_io_samples_pairs() # Stablish weight of keypoints scores (like openpifpaf in https://github.com/vita-epfl/openpifpaf/blob/master/openpifpaf/decoder/annotation.py#L44) n_keypoints = 17 kps_score_weights = numpy.ones((17, )) kps_score_weights[:3] = 3.0 # Normalize weights to sum 1 kps_score_weights /= numpy.sum(kps_score_weights) data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=False) parser = HeatmapParser(cfg) all_preds = [] all_scores = [] pbar = tqdm( total=len(test_dataset)) # if cfg.TEST.LOG_PROGRESS else None for i, (img, imgidx) in enumerate(data_loader): assert 1 == img.size(0), 'Test batch size should be 1' img = img[0].cpu().numpy() # size at scale 1.0 base_size, center, scale = get_multi_scale_size( img, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR)) with torch.no_grad(): final_heatmaps = None tags_list = [] for idx, s in enumerate( sorted(cfg.TEST.SCALE_FACTOR, reverse=True)): input_size = cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( img, input_size, s, min(cfg.TEST.SCALE_FACTOR)) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( cfg, model, image_resized, cfg.TEST.FLIP_TEST, cfg.TEST.PROJECT2IMAGE, base_size) final_heatmaps, tags_list = aggregate_results( cfg, s, final_heatmaps, tags_list, heatmaps, tags) final_heatmaps = final_heatmaps / float( len(cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = parser.parse(final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)]) # if cfg.TEST.LOG_PROGRESS: pbar.update() # Save all keypoints in a JSON dict final_json_results = [] for kps in final_results: kpsdict = {} x = kps[:, 0] y = kps[:, 1] kps_scores = kps[:, 2] kpsdict['keypoints'] = kps[:, 0:3].tolist() # bounding box by means of minmax approach (without zero elements) xmin = numpy.float64(numpy.min(x[numpy.nonzero(x)])) xmax = numpy.float64(numpy.max(x)) width = numpy.float64(xmax - xmin) ymin = numpy.float64(numpy.min(y[numpy.nonzero(y)])) ymax = numpy.float64(numpy.max(y)) height = numpy.float64(ymax - ymin) kpsdict['bbox'] = [xmin, ymin, width, height] # Calculate pose score as a weighted mean of keypoints scores kpsdict['score'] = numpy.float64( numpy.sum(kps_score_weights * numpy.sort(kps_scores)[::-1])) final_json_results.append(kpsdict) with open(test_dataset.output_json_files_list[imgidx], "w") as f: json.dump(final_json_results, f) all_preds.append(final_results) all_scores.append(scores) if cfg.TEST.LOG_PROGRESS: pbar.close()
def validate(config, val_loader, val_dataset, model, output_dir, tb_log_dir, writer_dict=None): model.eval() if config.MODEL.NAME == 'pose_hourglass': transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), ]) else: transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # rmse = AverageMeter() parser = HeatmapParser(config) all_preds = [] all_scores = [] pbar = tqdm(total=len(val_dataset)) if config.TEST.LOG_PROGRESS else None for i, (images, annos) in enumerate(val_loader): assert 1 == images.size(0), 'Test batch size should be 1' image = images[0].cpu().numpy() # size at scale 1.0 base_size, center, scale = get_multi_scale_size( image, config.DATASET.INPUT_SIZE, 1.0, min(config.TEST.SCALE_FACTOR)) with torch.no_grad(): final_heatmaps = None tags_list = [] for idx, s in enumerate( sorted(config.TEST.SCALE_FACTOR, reverse=True)): input_size = config.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( image, input_size, s, min(config.TEST.SCALE_FACTOR)) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( config, model, image_resized, config.TEST.FLIP_TEST, config.TEST.PROJECT2IMAGE, base_size) final_heatmaps, tags_list = aggregate_results( config, s, final_heatmaps, tags_list, heatmaps, tags) final_heatmaps = final_heatmaps / float( len(config.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = parser.parse(final_heatmaps, tags, config.TEST.ADJUST, config.TEST.REFINE) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)]) if config.TEST.LOG_PROGRESS: pbar.update() # if i % config.PRINT_FREQ == 0: # prefix = '{}_{}'.format(os.path.join(output_dir, 'result_valid'), i) # # logger.info('=> write {}'.format(prefix)) # save_valid_image(image, final_results, '{}.jpg'.format(prefix), dataset=val_dataset.name) # # save_debug_images(config, image_resized, None, None, outputs, prefix) all_preds.append(final_results) all_scores.append(scores) if config.TEST.LOG_PROGRESS: pbar.close() results, res_file = val_dataset.evaluate(config, all_preds, all_scores, output_dir) ################################## gt_file = val_dataset._get_anno_file_name() coco = COCO(gt_file) coco_dt = coco.loadRes(res_file) coco_eval = COCOeval(coco, coco_dt, 'keypoints') coco_eval._prepare() gts_ = coco_eval._gts dts_ = coco_eval._dts p = coco_eval.params p.imgIds = list(np.unique(p.imgIds)) if p.useCats: p.catIds = list(np.unique(p.catIds)) p.maxDets = sorted(p.maxDets) # loop through images, area range, max detection number catIds = p.catIds if p.useCats else [-1] pcutoff01 = 0.1 pcutoff06 = 0.6 mean_rmse_list = [] mean_rmse_pcutoff01_list = [] mean_rmse_pcutoff06_list = [] for catId in catIds: for imgId in p.imgIds: # dimension here should be Nxm gts = gts_[imgId, catId] dts = dts_[imgId, catId] if len(gts) != 0 and len(dts) != 0: npgt = np.array(gts[0]["keypoints"]) npdt = np.array(dts[0]["keypoints"]) mask01 = npdt[2::3] >= pcutoff01 mask06 = npdt[2::3] >= pcutoff06 RMSE = np.sqrt((npgt[0::3] - npdt[0::3])**2 + (npgt[1::3] - npdt[1::3])**2) RMSE_pcutoff01 = RMSE[mask01] RMSE_pcutoff06 = RMSE[mask06] mean_rmse = np.round(np.nanmean(RMSE.flatten()), 2) mean_rmse_pcutoff01 = np.nanmean(RMSE_pcutoff01.flatten()) mean_rmse_pcutoff06 = np.nanmean(RMSE_pcutoff06.flatten()) mean_rmse_list.append(mean_rmse) mean_rmse_pcutoff01_list.append(mean_rmse_pcutoff01) mean_rmse_pcutoff06_list.append(mean_rmse_pcutoff06) print(f"Mean RMSE: {np.mean(mean_rmse_list)}") print( f"Mean RMSE p-cutoff 0.1: {np.round(np.mean(mean_rmse_pcutoff01_list),2)}" ) print( f"Mean RMSE p-cutoff 0.6: {np.round(np.mean(mean_rmse_pcutoff06_list),2)}" ) global_steps = writer_dict['valid_global_steps'] writer_dict["writer"].add_scalar("val_rmse", np.mean(mean_rmse_list), global_steps) writer_dict["writer"].add_scalar("val_rmse_pcutoff_0.1", np.mean(mean_rmse_pcutoff01_list), global_steps) writer_dict["writer"].add_scalar("val_rmse_pcutoff_0.6", np.mean(mean_rmse_pcutoff06_list), global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return np.mean(mean_rmse_list)
def main(): args = parse_args() update_config(cfg, args) check_config(cfg) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid' ) logger.info(pprint.pformat(args)) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( cfg, is_train=False ) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True) else: model_state_file = os.path.join( final_output_dir, 'model_best.pth.tar' ) logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() model.eval() data_loader, test_dataset = make_test_dataloader(cfg) if cfg.MODEL.NAME == 'pose_hourglass': transforms = torchvision.transforms.Compose( [ torchvision.transforms.ToTensor(), ] ) else: transforms = torchvision.transforms.Compose( [ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) ] ) parser = HeatmapParser(cfg) vid_file = 0 # Or video file path print("Opening Camera " + str(vid_file)) cap = cv2.VideoCapture(vid_file) while True: ret, image = cap.read() a = datetime.datetime.now() # size at scale 1.0 base_size, center, scale = get_multi_scale_size( image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR) ) with torch.no_grad(): final_heatmaps = None tags_list = [] for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)): input_size = cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( image, input_size, s, min(cfg.TEST.SCALE_FACTOR) ) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( cfg, model, image_resized, cfg.TEST.FLIP_TEST, cfg.TEST.PROJECT2IMAGE, base_size ) final_heatmaps, tags_list = aggregate_results( cfg, s, final_heatmaps, tags_list, heatmaps, tags ) final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = parser.parse( final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE ) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)] ) b = datetime.datetime.now() inf_time = (b - a).total_seconds()*1000 print("Inf time {} ms".format(inf_time)) # Display the resulting frame for person in final_results: color = np.random.randint(0, 255, size=3) color = [int(i) for i in color] add_joints(image, person, color, test_dataset.name, cfg.TEST.DETECTION_THRESHOLD) image = cv2.putText(image, "{:.2f} ms / frame".format(inf_time), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA) cv2.imshow('frame', image) if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(): args = parse_args() update_config(cfg, args) check_config(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( cfg, is_train=False ) if cfg.FP16.ENABLED: model = network_to_half(model) print("Initilaized.") if cfg.TEST.MODEL_FILE: model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True) else: raise Exception("No weight file. Would you like to test with your hammer?") model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() model.eval() # data_loader, test_dataset = make_test_dataloader(cfg) if cfg.MODEL.NAME == 'pose_hourglass': transforms = torchvision.transforms.Compose( [ torchvision.transforms.ToTensor(), ] ) else: # 默认是用这种 transforms = torchvision.transforms.Compose( [ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) ] ) parser = HeatmapParser(cfg) print("Load model successfully.") ENABLE_CAMERA = 1 ENABLE_VIDEO = 1 VIDEO_ROTATE = 0 if ENABLE_CAMERA: # 读取视频流 cap = cv2.VideoCapture(-1) ret, image = cap.read() x, y = image.shape[0:2] print((x, y)) # 创建视频文件 fourcc = cv2.VideoWriter_fourcc(*'I420') # fourcc = cv2.VideoWriter_fourcc(*'X264') out = cv2.VideoWriter('./result.avi', fourcc, 24, (y, x), True) while ret: ret, image = cap.read() if not ret: break # 实时视频自动禁用scale search base_size, center, scale = get_multi_scale_size( image, cfg.DATASET.INPUT_SIZE, 1.0, 1.0 ) with torch.no_grad(): final_heatmaps = None tags_list = [] input_size = cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( image, input_size, 1.0, 1.0 ) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( cfg, model, image_resized, cfg.TEST.FLIP_TEST, cfg.TEST.PROJECT2IMAGE, base_size ) final_heatmaps, tags_list = aggregate_results( cfg, 1.0, final_heatmaps, tags_list, heatmaps, tags ) final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = parser.parse( final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE ) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)] ) detection = save_demo_image(image, final_results, mode=1) detection = cv2.cvtColor(detection, cv2.COLOR_BGR2RGB) cv2.imshow("Pose Estimation", detection) out.write(detection) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() out.release() os.system("ffmpeg -i result.avi -c:v libx265 camera.mp4") cv2.destroyAllWindows() elif ENABLE_VIDEO: # 读取视频流 video_name = "./videos/test04.mp4" cap = cv2.VideoCapture(video_name) # 创建视频文件 fourcc = cv2.VideoWriter_fourcc(*'I420') out = cv2.VideoWriter('./result.avi', fourcc, 24, (704, 576), True) while cap.isOpened(): ret, image = cap.read() if not ret: break if VIDEO_ROTATE: # 仅适用于扔实心球 image = cv2.resize(image, (960, 540)).transpose((1, 0, 2)) # 实时视频自动禁用scale search base_size, center, scale = get_multi_scale_size( image, cfg.DATASET.INPUT_SIZE, 1.0, 1.0 ) with torch.no_grad(): final_heatmaps = None tags_list = [] input_size = cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( image, input_size, 1.0, 1.0 ) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( cfg, model, image_resized, cfg.TEST.FLIP_TEST, cfg.TEST.PROJECT2IMAGE, base_size ) final_heatmaps, tags_list = aggregate_results( cfg, 1.0, final_heatmaps, tags_list, heatmaps, tags ) final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = parser.parse( final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE ) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)] ) detection = save_demo_image(image, final_results, mode=1) detection = cv2.cvtColor(detection, cv2.COLOR_BGR2RGB) cv2.imshow("Pose Estimation", detection) out.write(detection) # print("frame") cv2.waitKey(1) cap.release() out.release() os.system("ffmpeg -i result.avi -c:v libx265 det04.mp4") cv2.destroyAllWindows() else: img_name = "./test.jpg" images = cv2.imread(img_name) image = images # size at scale 1.0 base_size, center, scale = get_multi_scale_size( image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR) ) with torch.no_grad(): final_heatmaps = None tags_list = [] print(cfg.TEST.SCALE_FACTOR) for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)): input_size = cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( image, input_size, s, min(cfg.TEST.SCALE_FACTOR) ) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( cfg, model, image_resized, cfg.TEST.FLIP_TEST, cfg.TEST.PROJECT2IMAGE, base_size ) final_heatmaps, tags_list = aggregate_results( cfg, s, final_heatmaps, tags_list, heatmaps, tags ) final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = parser.parse( final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE ) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)] ) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) save_demo_image(image, final_results, file_name="./result.jpg")
def main(): args = parse_args() update_config(cfg, args) check_config(cfg) pose_dir = prepare_output_dirs(args.outputDir) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) dump_input = torch.rand( (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)) logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True) else: model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar') logger.info('=> loading model from {}'.format(model_state_file)) # model.load_state_dict(torch.load(model_state_file)) pretrian_model_state = torch.load(model_state_file) for name, param in model.state_dict().items(): model.state_dict()[name].copy_(pretrian_model_state['1.' + name]) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() model.eval() # data_loader, test_dataset = make_test_dataloader(cfg) if cfg.MODEL.NAME == 'pose_hourglass': transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), ]) else: transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) parser = HeatmapParser(cfg) # Loading an video vidcap = cv2.VideoCapture(args.videoFile) fps = vidcap.get(cv2.CAP_PROP_FPS) if fps < args.inferenceFps: print('desired inference fps is ' + str(args.inferenceFps) + ' but video fps is ' + str(fps)) exit() skip_frame_cnt = round(fps / args.inferenceFps) frame_width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT)) outcap = cv2.VideoWriter( '{}/{}_pose.avi'.format( args.outputDir, os.path.splitext(os.path.basename(args.videoFile))[0]), cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), int(skip_frame_cnt), (frame_width, frame_height)) count = 0 while vidcap.isOpened(): total_now = time.time() ret, image_bgr = vidcap.read() count += 1 if not ret: continue if count % skip_frame_cnt != 0: continue image_debug = image_bgr.copy() now = time.time() image = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) # image = image_rgb.cpu().numpy() # size at scale 1.0 base_size, center, scale = get_multi_scale_size( image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR)) with torch.no_grad(): final_heatmaps = None tags_list = [] for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)): input_size = cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( image, input_size, s, min(cfg.TEST.SCALE_FACTOR)) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( cfg, model, image_resized, cfg.TEST.FLIP_TEST, cfg.TEST.PROJECT2IMAGE, base_size) final_heatmaps, tags_list = aggregate_results( cfg, s, final_heatmaps, tags_list, heatmaps, tags) final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = parser.parse(final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)]) for person_joints in final_results: for joint in person_joints: x, y = int(joint[0]), int(joint[1]) cv2.circle(image_debug, (x, y), 4, (255, 0, 0), 2) then = time.time() print("Find person pose in: {} sec".format(then - now)) if cv2.waitKey(1) & 0xFF == ord('q'): break img_file = os.path.join(pose_dir, 'pose_{:08d}.jpg'.format(count)) cv2.imwrite(img_file, image_debug) outcap.write(image_debug) vidcap.release() outcap.release()
def worker(gpu_id, dataset, indices, cfg, logger, final_output_dir, pred_queue): os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id) model = eval("models." + cfg.MODEL.NAME + ".get_pose_net")(cfg, is_train=False) dump_input = torch.rand( (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.TEST.MODEL_FILE: logger.info("=> loading model from {}".format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True) else: model_state_file = os.path.join(final_output_dir, "model_best.pth.tar") logger.info("=> loading model from {}".format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() model.eval() sub_dataset = torch.utils.data.Subset(dataset, indices) data_loader = torch.utils.data.DataLoader(sub_dataset, sampler=None, batch_size=1, shuffle=False, num_workers=0, pin_memory=False) if cfg.MODEL.NAME == "pose_hourglass": transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), ]) else: transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) parser = HeatmapParser(cfg) all_preds = [] all_scores = [] pbar = tqdm(total=len(sub_dataset)) if cfg.TEST.LOG_PROGRESS else None for i, (images, annos) in enumerate(data_loader): assert 1 == images.size(0), 'Test batch size should be 1' image = images[0].cpu().numpy() # size at scale 1.0 base_size, center, scale = get_multi_scale_size( image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR)) with torch.no_grad(): final_heatmaps = None tags_list = [] for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)): input_size = cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( image, input_size, s, min(cfg.TEST.SCALE_FACTOR)) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( cfg, model, image_resized, cfg.TEST.FLIP_TEST, cfg.TEST.PROJECT2IMAGE, base_size) final_heatmaps, tags_list = aggregate_results( cfg, s, final_heatmaps, tags_list, heatmaps, tags) final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) visual = False if visual: visual_heatmap = torch.max(final_skelemaps[0], dim=0, keepdim=True)[0] visual_heatmap = (visual_heatmap.cpu().numpy().repeat( 3, 0).transpose(1, 2, 0)) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] visual_img = (image_resized[0].cpu().numpy().transpose( 1, 2, 0).astype(np.float32)) visual_img = visual_img[:, :, ::-1] * np.array(std).reshape( 1, 1, 3) + np.array(mean).reshape(1, 1, 3) visual_img = visual_img * 255 test_data = cv2.addWeighted( visual_img.astype(np.float32), 0.0, visual_heatmap.astype(np.float32) * 255, 1.0, 0, ) cv2.imwrite("test_data/{}.jpg".format(i), test_data) grouped, scores = parser.parse(final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)]) if cfg.TEST.LOG_PROGRESS: pbar.update() data_idx = indices[i] img_id = dataset.ids[data_idx] file_name = dataset.coco.loadImgs(img_id)[0]["file_name"] for idx in range(len(final_results)): all_preds.append({ "keypoints": final_results[idx][:, :3].reshape(-1, ).astype( np.float).tolist(), "image_id": int(file_name[-16:-4]), "score": float(scores[idx]), "category_id": 1 }) if cfg.TEST.LOG_PROGRESS: pbar.close() pred_queue.put_nowait(all_preds)
def worker(gpu_id, img_list, cfg, logger, final_output_dir, save_dir, pred_queue): os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id) model = eval("models." + cfg.MODEL.NAME + ".get_pose_net")(cfg, is_train=False) dump_input = torch.rand((1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.TEST.MODEL_FILE: logger.info("=> loading model from {}".format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True) else: model_state_file = os.path.join(final_output_dir, "model_best.pth.tar") logger.info("=> loading model from {}".format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() model.eval() if cfg.MODEL.NAME == "pose_hourglass": transforms = torchvision.transforms.Compose( [torchvision.transforms.ToTensor(),] ) else: transforms = torchvision.transforms.Compose( [ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ), ] ) parser = HeatmapParser(cfg) all_preds = [] all_scores = [] pbar = tqdm(total=len(img_list)) if cfg.TEST.LOG_PROGRESS else None for i, img_path in enumerate(img_list): image_name = img_path.split('/')[-1].split('.')[0] image = cv2.imread( img_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION ) # size at scale 1.0 base_size, center, scale = get_multi_scale_size( image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR) ) with torch.no_grad(): final_heatmaps = None tags_list = [] for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)): input_size = cfg.DATASET.INPUT_SIZE image_resized, center, scale = resize_align_multi_scale( image, input_size, s, min(cfg.TEST.SCALE_FACTOR) ) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() outputs, heatmaps, tags = get_multi_stage_outputs( cfg, model, image_resized, cfg.TEST.FLIP_TEST, cfg.TEST.PROJECT2IMAGE, base_size ) final_heatmaps, tags_list = aggregate_results( cfg, s, final_heatmaps, tags_list, heatmaps, tags ) final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR)) tags = torch.cat(tags_list, dim=4) grouped, scores = parser.parse( final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE ) final_results = get_final_preds( grouped, center, scale, [final_heatmaps.size(3), final_heatmaps.size(2)] ) if cfg.TEST.LOG_PROGRESS: pbar.update() for idx in range(len(final_results)): all_preds.append({ "keypoints": final_results[idx][:,:3].reshape(-1,).astype(np.float).tolist(), "image_name": image_name, "score": float(scores[idx]), "category_id": 1 }) skeleton_map = draw_skeleton(image, np.array(final_results)) cv2.imwrite( os.path.join(save_dir, "{}.jpg".format(image_name)), skeleton_map ) if cfg.TEST.LOG_PROGRESS: pbar.close() pred_queue.put_nowait(all_preds)