def main(): parser = argparse.ArgumentParser() parser.add_argument("--local_rank", type=int, default=0) # parser.add_argument("--iter", "-i", type=int, default=-1) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) dist.init_process_group(backend="nccl", init_method="env://") synchronize() if is_main_process() and not os.path.exists(cfg.PRESENT_DIR): os.mkdir(cfg.PRESENT_DIR) logger = get_logger( cfg.DATASET.NAME, cfg.PRESENT_DIR, args.local_rank, 'present_log.txt') # if args.iter == -1: # logger.info("Please designate one iteration.") model = MSPN(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(cfg.MODEL.DEVICE) model_file = "/home/zqr/codes/MSPN/lib/models/mspn_2xstg_coco.pth" if os.path.exists(model_file): state_dict = torch.load( model_file, map_location=lambda storage, loc: storage) state_dict = state_dict['model'] model.load_state_dict(state_dict) data_loader = get_present_loader(cfg, num_gpus, args.local_rank, cfg.INFO_PATH, is_dist=distributed) results = inference(model, data_loader, logger, device) synchronize() if is_main_process(): logger.info("Dumping results ...") results.sort( key=lambda res: (res['image_id'], res['score']), reverse=True) results_path = os.path.join(cfg.PRESENT_DIR, 'results.json') with open(results_path, 'w') as f: json.dump(results, f) logger.info("Get all results.") for res in results: data_numpy = cv2.imread(os.path.join( cfg.IMG_FOLDER, res['image_id']), cv2.IMREAD_COLOR) img = data_loader.ori_dataset.visualize( data_numpy, res['keypoints'], res['score']) cv2.imwrite(os.path.join(cfg.PRESENT_DIR, res['image_id']), img)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--local_rank", type=int, default=0) parser.add_argument("--iter", "-i", type=int, default=-1) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) dist.init_process_group(backend="nccl", init_method="env://") synchronize() if is_main_process() and not os.path.exists(cfg.TEST_DIR): os.mkdir(cfg.TEST_DIR) logger = get_logger(cfg.DATASET.NAME, cfg.TEST_DIR, args.local_rank, 'test_log.txt') if args.iter == -1: logger.info("Please designate one iteration.") model = MSPN(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(cfg.MODEL.DEVICE) model_file = os.path.join(cfg.OUTPUT_DIR, "iter-{}.pth".format(args.iter)) if os.path.exists(model_file): state_dict = torch.load(model_file, map_location=lambda storage, loc: storage) state_dict = state_dict['model'] model.load_state_dict(state_dict) data_loader = get_test_loader(cfg, num_gpus, args.local_rank, 'val', is_dist=distributed) results = inference(model, data_loader, logger, device) synchronize() if is_main_process(): logger.info("Dumping results ...") results.sort(key=lambda res: (res['image_id'], res['score']), reverse=True) results_path = os.path.join(cfg.TEST_DIR, 'results.json') with open(results_path, 'w') as f: json.dump(results, f) logger.info("Get all results.") data_loader.ori_dataset.evaluate(results_path)
def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu, logger): if is_main_process(): logger.info("Accumulating ...") all_predictions = all_gather(predictions_per_gpu) if not is_main_process(): return predictions = list() for p in all_predictions: predictions.extend(p) return predictions
def compute_on_dataset(model, data_loader, device): model.eval() results = list() cpu_device = torch.device("cpu") data = tqdm(data_loader) if is_main_process() else data_loader for _, batch in enumerate(data): imgs, scores, centers, scales, img_ids = batch imgs = imgs.to(device) with torch.no_grad(): outputs = model(imgs) outputs = outputs.to(cpu_device).numpy() if cfg.TEST.FLIP: imgs_flipped = np.flip(imgs.to(cpu_device).numpy(), 3).copy() imgs_flipped = torch.from_numpy(imgs_flipped).to(device) outputs_flipped = model(imgs_flipped) outputs_flipped = outputs_flipped.to(cpu_device).numpy() outputs_flipped = flip_back(outputs_flipped, cfg.DATASET.KEYPOINT.FLIP_PAIRS) outputs = (outputs + outputs_flipped) * 0.5 centers = np.array(centers) scales = np.array(scales) preds, maxvals = get_results(outputs, centers, scales, cfg.TEST.GAUSSIAN_KERNEL, cfg.TEST.SHIFT_RATIOS) preds = np.concatenate((preds, maxvals), axis=2) results.append(preds) return results
def write_metrics(metrics_dict, storage): metrics_dict = { k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v) for k, v in metrics_dict.items() } # gather metrics among all workers for logging all_metrics_dict = gather(metrics_dict) if is_main_process(): max_keys = ("data_time", "best_acc1") for m_k in max_keys: if m_k in all_metrics_dict[0]: # data_time among workers can have high variance. The actual latency # caused by data_time is the maximum among workers. m_v = np.max([x.pop(m_k) for x in all_metrics_dict]) storage.put_scalar(m_k, m_v) # average the rest metrics metrics_dict = { k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() } total_losses_reduced = sum(v if 'loss' in k else 0 for k, v in metrics_dict.items()) storage.put_scalar("total_loss", total_losses_reduced) if len(metrics_dict) >= 1: storage.put_scalars(**metrics_dict)
def logging_rank(sstr, local_logger=None): if is_main_process(): if local_logger is not None: local_logger.info(sstr) else: logger.info(sstr) return 0
def main(args): cfg = get_cfg() cfg.merge_from_file(args.cfg_file) cfg.merge_from_list(args.opts) cfg = infer_cfg(cfg) cfg.freeze() # logging_rank(cfg) if not os.path.isdir(cfg.CKPT): mkdir_p(cfg.CKPT) setup_logging(cfg.CKPT) # Calculate Params & FLOPs & Activations n_params, conv_flops, model_flops, conv_activs, model_activs = 0, 0, 0, 0, 0 if is_main_process() and cfg.MODEL_ANALYSE: model = Generalized_CNN(cfg) model.eval() analyser = Analyser(cfg, model, param_details=False) n_params = analyser.get_params()[1] conv_flops, model_flops = analyser.get_flops_activs(cfg.TEST.SCALE[0], cfg.TEST.SCALE[1], mode='flops') conv_activs, model_activs = analyser.get_flops_activs( cfg.TEST.SCALE[0], cfg.TEST.SCALE[1], mode='activations') del model synchronize() # Create model model = Generalized_CNN(cfg) logging_rank(model) # Load model test_weights = get_weights(cfg.CKPT, cfg.TEST.WEIGHTS) load_weights(model, test_weights) logging_rank('Params: {} | FLOPs: {:.4f}M / Conv_FLOPs: {:.4f}M | ' 'ACTIVATIONs: {:.4f}M / Conv_ACTIVATIONs: {:.4f}M'.format( n_params, model_flops, conv_flops, model_activs, conv_activs)) model.eval() model.to(torch.device(cfg.DEVICE)) # Create testing dataset and loader datasets = build_dataset(cfg, is_train=False) test_loader = make_test_data_loader(cfg, datasets) synchronize() # Build hooks all_hooks = build_test_hooks(args.cfg_file.split('/')[-1], log_period=1, num_warmup=0) # Build test engine test_engine = TestEngine(cfg, model) # Test test(cfg, test_engine, test_loader, datasets, all_hooks)
def inference(model, data_loader, logger, device="cuda"): predictions = compute_on_dataset(model, data_loader, device) synchronize() predictions = _accumulate_predictions_from_multiple_gpus( predictions, logger) if not is_main_process(): return return predictions
def test(cfg, test_engine, loader, datasets, all_hooks): total_timer = Timer() total_timer.tic() all_results = [[] for _ in range(4)] eval = Evaluation(cfg) with torch.no_grad(): loader = iter(loader) for i in range(len(loader)): all_hooks.iter_tic() all_hooks.data_tic() inputs, targets, idx = next(loader) all_hooks.data_toc() all_hooks.infer_tic() result = test_engine(inputs, targets) all_hooks.infer_toc() all_hooks.post_tic() eval_results = eval.post_processing(result, targets, idx, datasets) all_results = [ results + eva for results, eva in zip(all_results, eval_results) ] all_hooks.post_toc() all_hooks.iter_toc() if is_main_process(): all_hooks.log_stats(i, 0, len(loader), len(datasets)) all_results = list(zip(*all_gather(all_results))) all_results = [[item for sublist in results for item in sublist] for results in all_results] if is_main_process(): total_timer.toc(average=False) logging_rank('Total inference time: {:.3f}s'.format( total_timer.average_time)) eval.evaluation(datasets, all_results)
def __init__(self, cfg, model): self.cfg = cfg self.model = model self.result = {} self.features = [] self.extra_fields = get_extra_fields(self.cfg.TEST.DATASETS[0]) self.preprocess_inputs = PreprocessInputs(self.cfg) if is_main_process(): if not os.path.isdir(os.path.join(self.cfg.CKPT, 'test')): mkdir_p(os.path.join(self.cfg.CKPT, 'test')) if self.cfg.VIS.ENABLED: if not os.path.exists(os.path.join(self.cfg.CKPT, 'vis')): mkdir_p(os.path.join(self.cfg.CKPT, 'vis'))
def compute_on_dataset(model, data_loader, device): model.eval() results = list() cpu_device = torch.device("cpu") data = tqdm(data_loader) if is_main_process() else data_loader for _, batch in enumerate(data): imgs, scores, centers, scales, img_ids = batch imgs = imgs.to(device) with torch.no_grad(): outputs = model(imgs) outputs = outputs.to(cpu_device).numpy() if cfg.TEST.FLIP: imgs_flipped = np.flip(imgs.to(cpu_device).numpy(), 3).copy() imgs_flipped = torch.from_numpy(imgs_flipped).to(device) outputs_flipped = model(imgs_flipped) outputs_flipped = outputs_flipped.to(cpu_device).numpy() outputs_flipped = flip_back( outputs_flipped, cfg.DATASET.KEYPOINT.FLIP_PAIRS) outputs = (outputs + outputs_flipped) * 0.5 centers = np.array(centers) scales = np.array(scales) outputs = outputs/255.0 preds, maxvals = get_max_preds(outputs) preds = post(preds, outputs) for i in range(preds.shape[0]): preds[i] = transform_preds( preds[i], centers[i], scales[i], [48, 64] ) kp_scores = maxvals.squeeze().mean(axis=1) preds = np.concatenate((preds, maxvals), axis=2) for i in range(preds.shape[0]): keypoints = preds[i].reshape(-1).tolist() score = scores[i] * kp_scores[i] image_id = img_ids[i] results.append(dict(image_id=image_id, category_id=1, keypoints=keypoints, score=score)) return results
def build_train_hooks(cfg, optimizer, scheduler, max_iter, warmup_iter, ignore_warmup_time=False, precise_bn_args=None): """ Build a list of default train hooks. """ start_iter = scheduler.iteration ret = [ IterationTimer(max_iter, start_iter, warmup_iter, ignore_warmup_time), LRScheduler(optimizer, scheduler), ] if cfg.TEST.PRECISE_BN.ENABLED: if get_bn_modules(precise_bn_args[1]): ret.append( PreciseBN(precise_bn_args, cfg.TEST.PRECISE_BN.PERIOD, cfg.TEST.PRECISE_BN.NUM_ITER, max_iter)) if is_main_process(): write_ret = [CommonMetricPrinter(cfg.CKPT, max_iter)] if cfg.TRAIN.SAVE_AS_JSON: write_ret.append(JSONWriter(os.path.join(cfg.CKPT, "metrics.json"))) if cfg.TRAIN.USE_TENSORBOARD: log_dir = os.path.join(cfg.CKPT, "tensorboard_log") if os.path.exists(log_dir): shutil.rmtree(log_dir) os.mkdir(log_dir) write_ret.append(TensorboardXWriter(log_dir)) ret.append(PeriodicWriter(cfg, write_ret, max_iter)) return ret
def setup_logging(path, local_logger=None, local_plain_formatter=None): """Collect logger information""" if not os.path.isdir(path): mkdir_p(path) filename = os.path.join(path, "log.txt") fh = logging.StreamHandler(cached_log_stream(filename)) fh.setLevel(logging.DEBUG) formatter = plain_formatter if local_plain_formatter is None else local_plain_formatter fh.setFormatter(formatter) ch = None if is_main_process(): ch = logging.StreamHandler(stream=sys.stdout) ch.setLevel(logging.DEBUG) ch.setFormatter(formatter) if local_logger is None: logger.addHandler(fh) if ch is not None: logger.addHandler(ch) return logger else: local_logger.addHandler(fh) if ch is not None: local_logger.addHandler(ch) return local_logger
def generate_3d_point_pairs(model, refine_model, data_loader, cfg, device, output_dir=''): os.makedirs(output_dir, exist_ok=True) model.eval() if refine_model is not None: refine_model.eval() result = dict() result['model_pattern'] = cfg.DATASET.NAME result['3d_pairs'] = [] # 3d_pairs has items like{'pred_2d':[[x,y,detZ,score]...], 'gt_2d':[[x,y,Z,visual_type]...], # 'pred_3d':[[X,Y,Z,score]...], 'gt_3d':[[X,Y,X]...], # 'root_d': (abs depth of root (float value) pred by network), # 'image_path': relative image path} kpt_num = cfg.DATASET.KEYPOINT.NUM data = tqdm(data_loader) if is_main_process() else data_loader for idx, batch in enumerate(data): imgs, img_path, scales = batch meta_data = None imgs = imgs.to(device) with torch.no_grad(): outputs_2d, outputs_3d, outputs_rd = model(imgs) outputs_3d = outputs_3d.cpu() outputs_rd = outputs_rd.cpu() if cfg.DO_FLIP: imgs_flip = torch.flip(imgs, [-1]) outputs_2d_flip, outputs_3d_flip, outputs_rd_flip = model( imgs_flip) outputs_2d_flip = torch.flip(outputs_2d_flip, dims=[-1]) # outputs_3d_flip = torch.flip(outputs_3d_flip, dims=[-1]) # outputs_rd_flip = torch.flip(outputs_rd_flip, dims=[-1]) keypoint_pair = cfg.DATASET.KEYPOINT.FLIP_ORDER paf_pair = cfg.DATASET.PAF.FLIP_CHANNEL paf_abs_pair = [x + kpt_num for x in paf_pair] pair = keypoint_pair + paf_abs_pair for i in range(len(pair)): if i >= kpt_num and (i - kpt_num) % 2 == 0: outputs_2d[:, i] += outputs_2d_flip[:, pair[i]] * -1 else: outputs_2d[:, i] += outputs_2d_flip[:, pair[i]] outputs_2d[:, kpt_num:] *= 0.5 for i in range(len(imgs)): if meta_data is not None: # remove person who was blocked new_gt_bodys = [] annotation = meta_data[i].numpy() scale = scales[i] for j in range(len(annotation)): if annotation[j, cfg.DATASET.ROOT_IDX, 3] > 1: new_gt_bodys.append(annotation[j]) gt_bodys = np.asarray(new_gt_bodys) if len(gt_bodys) == 0: continue # groundtruth:[person..[keypoints..[x, y, Z, score(0:None, 1:invisible, 2:visible), X, Y, Z, # f_x, f_y, cx, cy]]] if len(gt_bodys[0][0]) < 11: scale['f_x'] = gt_bodys[0, 0, 7] scale['f_y'] = gt_bodys[0, 0, 7] scale['cx'] = scale['img_width'] / 2 scale['cy'] = scale['img_height'] / 2 else: scale['f_x'] = gt_bodys[0, 0, 7] scale['f_y'] = gt_bodys[0, 0, 8] scale['cx'] = gt_bodys[0, 0, 9] scale['cy'] = gt_bodys[0, 0, 10] else: gt_bodys = None # use default values scale = {k: scales[k][i].numpy() for k in scales} scale['f_x'] = scale['img_width'] scale['f_y'] = scale['img_width'] scale['cx'] = scale['img_width'] / 2 scale['cy'] = scale['img_height'] / 2 hmsIn = outputs_2d[i] # if the first pair is [1, 0], uncomment the code below # hmsIn[cfg.DATASET.KEYPOINT.NUM:cfg.DATASET.KEYPOINT.NUM+2] *= -1 # outputs_3d[i, 0] *= -1 hmsIn[:cfg.DATASET.KEYPOINT.NUM] /= 255 hmsIn[cfg.DATASET.KEYPOINT.NUM:] /= 127 rDepth = outputs_rd[i][0] # no batch implementation yet pred_bodys_2d = dapalib.connect(hmsIn, rDepth, cfg.DATASET.ROOT_IDX, distFlag=True) if len(pred_bodys_2d) > 0: pred_bodys_2d[:, :, : 2] *= cfg.dataset.STRIDE # resize poses to the input-net shape pred_bodys_2d = pred_bodys_2d.numpy() pafs_3d = outputs_3d[i].numpy().transpose(1, 2, 0) root_d = outputs_rd[i][0].numpy() paf_3d_upsamp = cv2.resize( pafs_3d, (cfg.INPUT_SHAPE[1], cfg.INPUT_SHAPE[0]), interpolation=cv2.INTER_NEAREST) root_d_upsamp = cv2.resize( root_d, (cfg.INPUT_SHAPE[1], cfg.INPUT_SHAPE[0]), interpolation=cv2.INTER_NEAREST) # generate 3d prediction bodys pred_bodys_2d = register_pred(pred_bodys_2d, gt_bodys) if len(pred_bodys_2d) == 0: continue pred_rdepths = generate_relZ(pred_bodys_2d, paf_3d_upsamp, root_d_upsamp, scale) pred_bodys_3d = gen_3d_pose(pred_bodys_2d, pred_rdepths, scale) if refine_model is not None: new_pred_bodys_3d = lift_and_refine_3d_pose( pred_bodys_2d, pred_bodys_3d, refine_model, device=device, root_n=cfg.DATASET.ROOT_IDX) else: new_pred_bodys_3d = pred_bodys_3d overray_result(new_pred_bodys_3d, imgs[i]) return imgs[i]