def get_rpn_box_proposals(im, args): cfg.immutable(False) """Load a yaml config file and merge it into the global config.""" merge_cfg_from_file(args.rpn_cfg) '''Number of GPUs to use (applies to both training and testing)''' cfg.NUM_GPUS = 1 '''Indicates the model's computation terminates with the production of RPN proposals (i.e., it outputs proposals ONLY, no actual object detections)''' cfg.MODEL.RPN_ONLY = True '''Number of top scoring RPN proposals to keep before applying NMS When FPN is used, this is *per FPN level* (not total)''' cfg.TEST.RPN_PRE_NMS_TOP_N = 10000 '''Number of top scoring RPN proposals to keep after applying NMS his is the total number of RPN proposals produced (for both FPN and non-FPN cases)''' cfg.TEST.RPN_POST_NMS_TOP_N = 2000 '''Call this function in your script after you have finished setting all cfg values that are necessary (e.g., merging a config from a file, merging command line config options, etc.)''' assert_and_infer_cfg() """Initialize a model from the global cfg. Loads test-time weights and creates the networks in the Caffe2 workspace. """ model = model_engine.initialize_model_from_cfg(args.rpn_pkl) with c2_utils.NamedCudaScope(0): """Generate RPN proposals on a single image.""" boxes, scores = rpn_engine.im_proposals(model, im) return boxes, scores
def main(args): logger = logging.getLogger(__name__) dummy_coco_dataset = dummy_datasets.get_coco_dataset() cfg_orig = yaml.load(yaml.dump(cfg)) im = cv2.imread(args.im_file) if args.rpn_pkl is not None: proposal_boxes, _proposal_scores = get_rpn_box_proposals(im, args) workspace.ResetWorkspace() else: proposal_boxes = None cls_boxes, cls_segms, cls_keyps = None, None, None for i in range(0, len(args.models_to_run), 2): pkl = args.models_to_run[i] yml = args.models_to_run[i + 1] cfg.immutable(False) merge_cfg_from_cfg(cfg_orig) merge_cfg_from_file(yml) if len(pkl) > 0: weights_file = pkl else: weights_file = cfg.TEST.WEIGHTS cfg.NUM_GPUS = 1 assert_and_infer_cfg() model = model_engine.initialize_model_from_cfg(weights_file) with c2_utils.NamedCudaScope(0): cls_boxes_, cls_segms_, cls_keyps_ = \ model_engine.im_detect_all(model, im, proposal_boxes) cls_boxes = cls_boxes_ if cls_boxes_ is not None else cls_boxes cls_segms = cls_segms_ if cls_segms_ is not None else cls_segms cls_keyps = cls_keyps_ if cls_keyps_ is not None else cls_keyps workspace.ResetWorkspace() out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(args.im_file) + '.pdf') ) logger.info('Processing {} -> {}'.format(args.im_file, out_name)) vis_utils.vis_one_image( im[:, :, ::-1], args.im_file, args.output_dir, cls_boxes, cls_segms, cls_keyps, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=0.7, kp_thresh=2 )
def get_rpn_box_proposals(im, args): cfg.immutable(False) merge_cfg_from_file(args.rpn_cfg) cfg.NUM_GPUS = 1 cfg.MODEL.RPN_ONLY = True cfg.TEST.RPN_PRE_NMS_TOP_N = 10000 cfg.TEST.RPN_POST_NMS_TOP_N = 2000 assert_and_infer_cfg() model = model_engine.initialize_model_from_cfg(args.rpn_pkl) with c2_utils.NamedCudaScope(0): boxes, scores = rpn_engine.im_proposals(model, im) return boxes, scores
def initialize_model_from_cfg(args, roidb=None, gpu_id=0): """Initialize a model from the global cfg. Loads test-time weights and set to evaluation mode. """ model = model_builder.Generalized_RCNN() model.eval() cfg.immutable(False) cfg.TEST.CLASS_SPLIT = {'source': roidb[0]['source'], 'target': roidb[0]['target']} cfg.immutable(True) if 'word_embeddings' in roidb[0]: model.Box_Outs.set_word_embedding(torch.tensor(roidb[0]['word_embeddings'])) if cfg.MODEL.IGNORE_CLASSES: if cfg.MODEL.IGNORE_CLASSES == 'all': roidb[0]['all'] = roidb[0]['source'] + roidb[0]['target'] model._ignore_classes = roidb[0][cfg.MODEL.IGNORE_CLASSES] model.Box_Outs._ignore_classes = roidb[0][cfg.MODEL.IGNORE_CLASSES] if True: tmp = {} for rel in roidb[0]['relationships']: tmp[(rel['subject_id'], rel['object_id'])] = \ tmp.get((rel['subject_id'], rel['object_id']), []) + [rel['rel_id']] if cfg.MODEL.RELATION_COOCCUR: for k in tmp: tmp[k] = [1] if cfg.MODEL.NUM_RELATIONS > 0: model.Rel_Outs.relationship_dict = tmp if args.cuda: model.cuda() if args.load_ckpt: load_name = args.load_ckpt logger.info("loading checkpoint %s", load_name) checkpoint = torch.load(load_name, map_location=lambda storage, loc: storage) net_utils.load_ckpt(model, checkpoint['model']) if args.load_detectron: logger.info("loading detectron weights %s", args.load_detectron) load_detectron_weight(model, args.load_detectron) model = mynn.DataParallel(model, cpu_keywords=['im_info', 'roidb'], minibatch=True) return model
def configure_bbox_reg_weights(model, saved_cfg): """Compatibility for old models trained with bounding box regression mean/std normalization (instead of fixed weights). """ if 'MODEL' not in saved_cfg or 'BBOX_REG_WEIGHTS' not in saved_cfg.MODEL: logger.warning('Model from weights file was trained before config key ' 'MODEL.BBOX_REG_WEIGHTS was added. Forcing ' 'MODEL.BBOX_REG_WEIGHTS = (1., 1., 1., 1.) to ensure ' 'correct **inference** behavior.') # Generally we don't allow modifying the config, but this is a one-off # hack to support some very old models is_immutable = cfg.is_immutable() cfg.immutable(False) cfg.MODEL.BBOX_REG_WEIGHTS = (1., 1., 1., 1.) cfg.immutable(is_immutable) logger.info('New config:') logger.info(pprint.pformat(cfg)) assert not model.train, ( 'This model was trained with an older version of the code that ' 'used bounding box regression mean/std normalization. It can no ' 'longer be used for training. To upgrade it to a trainable model ' 'please use fb/compat/convert_bbox_reg_normalized_model.py.')
for scoped_name, blob in restored_all_params.items(): unscoped_name = c2_utils.UnscopeName(scoped_name) np.testing.assert_array_equal(blob, orig_gpu_0_params[unscoped_name]) if __name__ == '__main__': workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) logger = utils.logging.setup_logging(__name__) logger.setLevel(logging.DEBUG) logging.getLogger('roi_data.loader').setLevel(logging.INFO) np.random.seed(cfg.RNG_SEED) output_dir = tempfile.mkdtemp() # Generate config for test cfg.MODEL.TYPE = 'generalized_rcnn' cfg.MODEL.CONV_BODY = 'FPN.add_fpn_ResNet50_conv5_body' cfg.MODEL.NUM_CLASSES = 81 cfg.MODEL.FASTER_RCNN = True cfg.FPN.FPN_ON = True cfg.FPN.MULTILEVEL_ROIS = True cfg.FPN.MULTILEVEL_RPN = True cfg.FAST_RCNN.ROI_BOX_HEAD = 'fast_rcnn_heads.add_roi_2mlp_head' cfg.FAST_RCNN.ROI_XFORM_METHOD = 'RoIAlign' cfg.OUTPUT_DIR = output_dir cfg.TRAIN.DATASETS = ('coco_2014_minival',) cfg.TRAIN.WEIGHTS = b'' for num_gpu in range(workspace.NumCudaDevices()): cfg.immutable(False) cfg.NUM_GPUS = num_gpu + 1 assert_and_infer_cfg() test_restore_checkpoint()
def main(): """Main function""" args = parse_args() print('Called with args:') print(args) if not torch.cuda.is_available(): sys.exit("Need a CUDA device to run the code.") if args.cuda or cfg.NUM_GPUS > 0: cfg.CUDA = True else: raise ValueError("Need Cuda device to run !") if args.dataset == "vrd": cfg.TRAIN.DATASETS = ('vrd_train',) cfg.MODEL.NUM_CLASSES = 101 cfg.MODEL.NUM_PRD_CLASSES = 70 # exclude background elif args.dataset == "vg_mini": cfg.TRAIN.DATASETS = ('vg_train_mini',) cfg.MODEL.NUM_CLASSES = 151 cfg.MODEL.NUM_PRD_CLASSES = 50 # exclude background elif args.dataset == "vg": cfg.TRAIN.DATASETS = ('vg_train',) cfg.MODEL.NUM_CLASSES = 151 cfg.MODEL.NUM_PRD_CLASSES = 50 # exclude background elif args.dataset == "oi_rel": cfg.TRAIN.DATASETS = ('oi_rel_train',) # cfg.MODEL.NUM_CLASSES = 62 cfg.MODEL.NUM_CLASSES = 58 cfg.MODEL.NUM_PRD_CLASSES = 9 # rel, exclude background elif args.dataset == "oi_rel_mini": cfg.TRAIN.DATASETS = ('oi_rel_train_mini',) # cfg.MODEL.NUM_CLASSES = 62 cfg.MODEL.NUM_CLASSES = 58 cfg.MODEL.NUM_PRD_CLASSES = 9 # rel, exclude background else: raise ValueError("Unexpected args.dataset: {}".format(args.dataset)) cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) ### Adaptively adjust some configs ### cfg.SOLVER.BASE_LR = 0.0033 cfg.SOLVER.GAMMA = 0.33 if args.dataset == "vrd": cfg.SOLVER.STEPS = [1000,3000,8000,14000] cfg.SOLVER.MAX_ITER = 22680 cfg.MODEL.STAGE_TWO = True cfg.TRAIN.FG_REL_SIZE_PER_IM = 128 cfg.TRAIN.FG_REL_FRACTION = 0.5 if args.dataset == "vg": cfg.SOLVER.STEPS = [0,90000,120000] cfg.SOLVER.MAX_ITER = 125446 cfg.MODEL.STAGE_TWO = True cfg.TRAIN.FG_REL_SIZE_PER_IM = 256 cfg.TRAIN.FG_REL_FRACTION = 0.5 #cfg.MODEL.FEATLOSS_WEIGHT = 0.05 cfg.NUM_GPUS = torch.cuda.device_count() original_batch_size = cfg.NUM_GPUS * cfg.TRAIN.IMS_PER_BATCH original_ims_per_batch = cfg.TRAIN.IMS_PER_BATCH original_num_gpus = cfg.NUM_GPUS if args.batch_size is None: args.batch_size = original_batch_size assert (args.batch_size % cfg.NUM_GPUS) == 0, \ 'batch_size: %d, NUM_GPUS: %d' % (args.batch_size, cfg.NUM_GPUS) cfg.TRAIN.IMS_PER_BATCH = args.batch_size // cfg.NUM_GPUS effective_batch_size = args.iter_size * args.batch_size print('effective_batch_size = batch_size * iter_size = %d * %d' % (args.batch_size, args.iter_size)) print('Adaptive config changes:') print(' effective_batch_size: %d --> %d' % (original_batch_size, effective_batch_size)) print(' NUM_GPUS: %d --> %d' % (original_num_gpus, cfg.NUM_GPUS)) print(' IMS_PER_BATCH: %d --> %d' % (original_ims_per_batch, cfg.TRAIN.IMS_PER_BATCH)) ### Adjust learning based on batch size change linearly # For iter_size > 1, gradients are `accumulated`, so lr is scaled based # on batch_size instead of effective_batch_size old_base_lr = cfg.SOLVER.BASE_LR cfg.SOLVER.BASE_LR *= args.batch_size / original_batch_size print('Adjust BASE_LR linearly according to batch_size change:\n' ' BASE_LR: {} --> {}'.format(old_base_lr, cfg.SOLVER.BASE_LR)) ### Adjust solver steps step_scale = original_batch_size / effective_batch_size old_solver_steps = cfg.SOLVER.STEPS old_max_iter = cfg.SOLVER.MAX_ITER cfg.SOLVER.STEPS = list(map(lambda x: int(x * step_scale + 0.5), cfg.SOLVER.STEPS)) cfg.SOLVER.MAX_ITER = int(cfg.SOLVER.MAX_ITER * step_scale + 0.5) print('Adjust SOLVER.STEPS and SOLVER.MAX_ITER linearly based on effective_batch_size change:\n' ' SOLVER.STEPS: {} --> {}\n' ' SOLVER.MAX_ITER: {} --> {}'.format(old_solver_steps, cfg.SOLVER.STEPS, old_max_iter, cfg.SOLVER.MAX_ITER)) # Scale FPN rpn_proposals collect size (post_nms_topN) in `collect` function # of `collect_and_distribute_fpn_rpn_proposals.py` # # post_nms_topN = int(cfg[cfg_key].RPN_POST_NMS_TOP_N * cfg.FPN.RPN_COLLECT_SCALE + 0.5) if cfg.FPN.FPN_ON and cfg.MODEL.FASTER_RCNN: cfg.FPN.RPN_COLLECT_SCALE = cfg.TRAIN.IMS_PER_BATCH / original_ims_per_batch print('Scale FPN rpn_proposals collect size directly propotional to the change of IMS_PER_BATCH:\n' ' cfg.FPN.RPN_COLLECT_SCALE: {}'.format(cfg.FPN.RPN_COLLECT_SCALE)) if args.num_workers is not None: cfg.DATA_LOADER.NUM_THREADS = args.num_workers print('Number of data loading threads: %d' % cfg.DATA_LOADER.NUM_THREADS) ### Overwrite some solver settings from command line arguments if args.optimizer is not None: cfg.SOLVER.TYPE = args.optimizer if args.lr is not None: cfg.SOLVER.BASE_LR = args.lr if args.lr_decay_gamma is not None: cfg.SOLVER.GAMMA = args.lr_decay_gamma assert_and_infer_cfg() timers = defaultdict(Timer) ### Dataset ### timers['roidb'].tic() roidb, ratio_list, ratio_index = combined_roidb_for_training( cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES) timers['roidb'].toc() roidb_size = len(roidb) logger.info('{:d} roidb entries'.format(roidb_size)) logger.info('Takes %.2f sec(s) to construct roidb', timers['roidb'].average_time) # Effective training sample size for one epoch train_size = roidb_size // args.batch_size * args.batch_size batchSampler = BatchSampler( sampler=MinibatchSampler(ratio_list, ratio_index), batch_size=args.batch_size, drop_last=True ) dataset = RoiDataLoader( roidb, cfg.MODEL.NUM_CLASSES, training=True) dataloader = torch.utils.data.DataLoader( dataset, batch_sampler=batchSampler, num_workers=cfg.DATA_LOADER.NUM_THREADS, collate_fn=collate_minibatch) dataiterator = iter(dataloader) ### Model ### maskRCNN = Generalized_RCNN() if cfg.CUDA: maskRCNN.cuda() ### Optimizer ### # record backbone params, i.e., conv_body and box_head params gn_params = [] backbone_bias_params = [] backbone_bias_param_names = [] prd_branch_bias_params = [] prd_branch_bias_param_names = [] backbone_nonbias_params = [] backbone_nonbias_param_names = [] prd_branch_nonbias_params = [] prd_branch_nonbias_param_names = [] for key, value in dict(maskRCNN.named_parameters()).items(): if value.requires_grad: if 'gn' in key: gn_params.append(value) elif 'Conv_Body' in key or 'Box_Head' in key or 'Box_Outs' in key or 'RPN' in key: if 'bias' in key: backbone_bias_params.append(value) backbone_bias_param_names.append(key) else: backbone_nonbias_params.append(value) backbone_nonbias_param_names.append(key) else: if 'bias' in key: prd_branch_bias_params.append(value) prd_branch_bias_param_names.append(key) else: prd_branch_nonbias_params.append(value) prd_branch_nonbias_param_names.append(key) # Learning rate of 0 is a dummy value to be set properly at the start of training params = [ {'params': backbone_nonbias_params, 'lr': 0, 'weight_decay': cfg.SOLVER.WEIGHT_DECAY}, {'params': backbone_bias_params, 'lr': 0 * (cfg.SOLVER.BIAS_DOUBLE_LR + 1), 'weight_decay': cfg.SOLVER.WEIGHT_DECAY if cfg.SOLVER.BIAS_WEIGHT_DECAY else 0}, {'params': prd_branch_nonbias_params, 'lr': 0, 'weight_decay': cfg.SOLVER.WEIGHT_DECAY}, {'params': prd_branch_bias_params, 'lr': 0 * (cfg.SOLVER.BIAS_DOUBLE_LR + 1), 'weight_decay': cfg.SOLVER.WEIGHT_DECAY if cfg.SOLVER.BIAS_WEIGHT_DECAY else 0}, {'params': gn_params, 'lr': 0, 'weight_decay': cfg.SOLVER.WEIGHT_DECAY_GN} ] if cfg.SOLVER.TYPE == "SGD": optimizer = torch.optim.SGD(params, momentum=cfg.SOLVER.MOMENTUM) elif cfg.SOLVER.TYPE == "Adam": optimizer = torch.optim.Adam(params) ### Load checkpoint if args.load_ckpt: load_name = args.load_ckpt logging.info("loading checkpoint %s", load_name) checkpoint = torch.load(load_name, map_location=lambda storage, loc: storage) net_utils_rel.load_ckpt_rel(maskRCNN, checkpoint['model']) if args.resume: args.start_step = checkpoint['step'] + 1 if 'train_size' in checkpoint: # For backward compatibility if checkpoint['train_size'] != train_size: print('train_size value: %d different from the one in checkpoint: %d' % (train_size, checkpoint['train_size'])) # reorder the params in optimizer checkpoint's params_groups if needed # misc_utils.ensure_optimizer_ckpt_params_order(param_names, checkpoint) # There is a bug in optimizer.load_state_dict on Pytorch 0.3.1. # However it's fixed on master. # optimizer.load_state_dict(checkpoint['optimizer']) misc_utils.load_optimizer_state_dict(optimizer, checkpoint['optimizer']) del checkpoint torch.cuda.empty_cache() if args.load_detectron: #TODO resume for detectron weights (load sgd momentum values) logging.info("loading Detectron weights %s", args.load_detectron) load_detectron_weight(maskRCNN, args.load_detectron) # lr = optimizer.param_groups[0]['lr'] # lr of non-bias parameters, for commmand line outputs. lr = optimizer.param_groups[2]['lr'] # lr of non-backbone parameters, for commmand line outputs. backbone_lr = optimizer.param_groups[0]['lr'] # lr of backbone parameters, for commmand line outputs. maskRCNN = mynn.DataParallel(maskRCNN, cpu_keywords=['im_info', 'roidb'], minibatch=True) ### Training Setups ### args.run_name = misc_utils.get_run_name() + '_step_with_prd_cls_v' + str(cfg.MODEL.SUBTYPE) output_dir = misc_utils.get_output_dir(args, args.run_name) args.cfg_filename = os.path.basename(args.cfg_file) if not args.no_save: if not os.path.exists(output_dir): os.makedirs(output_dir) blob = {'cfg': yaml.dump(cfg), 'args': args} with open(os.path.join(output_dir, 'config_and_args.pkl'), 'wb') as f: pickle.dump(blob, f, pickle.HIGHEST_PROTOCOL) if args.use_tfboard: from tensorboardX import SummaryWriter # Set the Tensorboard logger tblogger = SummaryWriter(output_dir) ### Training Loop ### maskRCNN.train() # CHECKPOINT_PERIOD = int(cfg.TRAIN.SNAPSHOT_ITERS / cfg.NUM_GPUS) #CHECKPOINT_PERIOD = cfg.SOLVER.MAX_ITER / cfg.TRAIN.SNAPSHOT_FREQ CHECKPOINT_PERIOD = cfg.SOLVER.MAX_ITER / 8 # Set index for decay steps decay_steps_ind = None for i in range(1, len(cfg.SOLVER.STEPS)): if cfg.SOLVER.STEPS[i] >= args.start_step: decay_steps_ind = i break if decay_steps_ind is None: decay_steps_ind = len(cfg.SOLVER.STEPS) training_stats = TrainingStats( args, args.disp_interval, tblogger if args.use_tfboard and not args.no_save else None) try: logger.info('Training starts !') step = args.start_step superview = [] for step in range(args.start_step, cfg.SOLVER.MAX_ITER): # Warm up if step < cfg.SOLVER.WARM_UP_ITERS: method = cfg.SOLVER.WARM_UP_METHOD if method == 'constant': warmup_factor = cfg.SOLVER.WARM_UP_FACTOR elif method == 'linear': alpha = step / cfg.SOLVER.WARM_UP_ITERS warmup_factor = cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha else: raise KeyError('Unknown SOLVER.WARM_UP_METHOD: {}'.format(method)) lr_new = cfg.SOLVER.BASE_LR * warmup_factor net_utils_rel.update_learning_rate_rel(optimizer, lr, lr_new) # lr = optimizer.param_groups[0]['lr'] lr = optimizer.param_groups[2]['lr'] backbone_lr = optimizer.param_groups[0]['lr'] assert lr == lr_new elif step == cfg.SOLVER.WARM_UP_ITERS: net_utils_rel.update_learning_rate_rel(optimizer, lr, cfg.SOLVER.BASE_LR) # lr = optimizer.param_groups[0]['lr'] lr = optimizer.param_groups[2]['lr'] backbone_lr = optimizer.param_groups[0]['lr'] assert lr == cfg.SOLVER.BASE_LR # Learning rate decay if decay_steps_ind < len(cfg.SOLVER.STEPS) and \ step == cfg.SOLVER.STEPS[decay_steps_ind]: logger.info('Decay the learning on step %d', step) lr_new = lr * cfg.SOLVER.GAMMA net_utils_rel.update_learning_rate_rel(optimizer, lr, lr_new) # lr = optimizer.param_groups[0]['lr'] lr = optimizer.param_groups[2]['lr'] backbone_lr = optimizer.param_groups[0]['lr'] assert lr == lr_new decay_steps_ind += 1 training_stats.IterTic() for inner_iter in range(args.iter_size): try: input_data = next(dataiterator) except StopIteration: dataiterator = iter(dataloader) input_data = next(dataiterator) for key in input_data: if key != 'roidb': # roidb is a list of ndarrays with inconsistent length input_data[key] = list(map(Variable, input_data[key])) net_outputs = maskRCNN(**input_data) optimizer.zero_grad() training_stats.UpdateIterStats(net_outputs, step, inner_iter) loss = net_outputs['total_loss'] loss.backward() if step == 20000: cfg.immutable(False) cfg.MODEL.MEMORY_SAVE_UPDATE = True cfg.immutable(True) if step == 40000: cfg.immutable(False) cfg.MODEL.MEMORY_TRAIN_UPDATE = True cfg.immutable(True) class_count = maskRCNN.module.RelDN.class_count.clone() class_count[class_count==0] = 1 memory_ini = maskRCNN.module.RelDN.memory_save / class_count.float().unsqueeze(1) maskRCNN.module.RelDN.memory_train.data = memory_ini.clone() np.save('memory_ini.npy',memory_ini.cpu().numpy()) maskRCNN.module.RelDN.mix_scores.fc_hallucinator.load_state_dict(maskRCNN.module.RelDN.mix_scores.linear_classifier.state_dict()) optimizer.step() training_stats.IterToc() training_stats.LogIterStats(step, lr, backbone_lr) if (step+1) % CHECKPOINT_PERIOD == 0: save_ckpt(output_dir, args, step, train_size, maskRCNN, optimizer) # ---- Training ends ---- # Save last checkpoint save_ckpt(output_dir, args, step, train_size, maskRCNN, optimizer) np.save('result{}.npy'.format(step),superview) except (RuntimeError, KeyboardInterrupt): del dataiterator logger.info('Save ckpt on exception ...') np.save('result{}.npy'.format(step),superview) save_ckpt(output_dir, args, step, train_size, maskRCNN, optimizer) logger.info('Save ckpt done.') stack_trace = traceback.format_exc() print(stack_trace) finally: if args.use_tfboard and not args.no_save: tblogger.close()
def main(args): """A dummy COCO dataset that includes only the 'classes' field.""" dummy_coco_dataset = dummy_datasets.get_coco_dataset() ''''load initial Detectron config system''' cfg_orig = yaml.load(yaml.dump(cfg)) print("video is :",args.video_name) cap = cv2.VideoCapture(args.video_name) while cap.isOpened(): ret, frame = cap.read() if not ret: break t1 = time.time() frame = cv2.resize(frame,dsize=(1280,720)) if args.rpn_pkl is not None: proposal_boxes, _proposal_scores = get_rpn_box_proposals(frame, args) workspace.ResetWorkspace() else: proposal_boxes = None cls_boxes, cls_segms, cls_keyps = None, None, None for i in range(0, len(args.models_to_run), 2): pkl = args.models_to_run[i] yml = args.models_to_run[i + 1] cfg.immutable(False) '''load initial global Detectron config system''' merge_cfg_from_cfg(cfg_orig) """Load a yaml config file and merge it into the global config.""" merge_cfg_from_file(yml) if len(pkl) > 0: weights_file = pkl else: weights_file = cfg.TEST.WEIGHTS '''Number of GPUs to use''' cfg.NUM_GPUS = 1 assert_and_infer_cfg() '''Initialize a model from the global cfg.''' model = model_engine.initialize_model_from_cfg(weights_file) with c2_utils.NamedCudaScope(0): '''Inference detecting all''' cls_boxes_, cls_segms_, cls_keyps_ = model_engine.im_detect_all(model, frame, proposal_boxes) cls_boxes = cls_boxes_ if cls_boxes_ is not None else cls_boxes cls_segms = cls_segms_ if cls_segms_ is not None else cls_segms cls_keyps = cls_keyps_ if cls_keyps_ is not None else cls_keyps workspace.ResetWorkspace() """Constructs a numpy array with the detections visualized.""" frame = vis_utils.vis_one_image_opencv( frame, cls_boxes, segms=cls_segms, keypoints=cls_keyps, thresh=0.8, kp_thresh=2, show_box=True, dataset=dummy_coco_dataset, show_class=True) t2 = time.time() durr = float(t2-t1) fps = 1.0 / durr cv2.putText(frame,"fps:%.3f"%fps,(20,20),4, 0.5, (0, 255, 0), 1, cv2.LINE_AA) cv2.imshow('Detection', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def detect_pose( img_output_list, ckpt, cfg_dict, anchor_poses, njts, gpuid=-1): """ detect poses in a list of image img_output_list: list of couple (path_to_image, path_to_outputfile) ckpt_fname: path to the model weights cfg_dict: directory of configuration anchor_poses: file containing the anchor_poses or directly the anchor poses njts: number of joints in the model gpuid: -1 for using cpu mode, otherwise device_id """ # load the anchor poses and the network if gpuid>=0: assert torch.cuda.is_available(), "You should launch the script on cpu if cuda is not available" torch.device('cuda:0') else: torch.device('cpu') # load config and network print('loading the model') cfg.immutable(False) _merge_a_into_b(cfg_dict, cfg) cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False cfg.CUDA = gpuid>=0 assert_and_infer_cfg() model = LCRNet(njts) if cfg.CUDA: model.cuda() net_utils.load_ckpt(model, ckpt) model = mynn.DataParallel(model, cpu_keywords=['im_info', 'roidb'], minibatch=True, device_ids=[0]) model.eval() output = [] # iterate over image for imgname, outputname in img_output_list: print('processing '+imgname) # load the images and prepare the blob im = cv2.imread( imgname ) inputs, im_scale = _get_blobs(im, None, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) # prepare blobs # forward if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') # Add multi-level rois for FPN if cfg.PYTORCH_VERSION_LESS_THAN_040: # forward inputs['data'] = [Variable(torch.from_numpy(inputs['data']), volatile=True)] inputs['im_info'] = [Variable(torch.from_numpy(inputs['im_info']), volatile=True)] return_dict = model(**inputs) else: inputs['data'] = [torch.from_numpy(inputs['data'])] inputs['im_info'] = [torch.from_numpy(inputs['im_info'])] with torch.no_grad(): return_dict = model(**inputs) # get boxes rois = return_dict['rois'].data.cpu().numpy() boxes = rois[:, 1:5] / im_scale # get scores scores = return_dict['cls_score'].data.cpu().numpy().squeeze() scores = scores.reshape([-1, scores.shape[-1]]) # In case there is 1 proposal # get pose_deltas pose_deltas = return_dict['pose_pred'].data.cpu().numpy().squeeze() # project poses on boxes boxes_size = boxes[:,2:4]-boxes[:,0:2] offset = np.concatenate( ( boxes[:,:2], np.zeros((boxes.shape[0],3),dtype=np.float32)), axis=1) # x,y top-left corner for each box scale = np.concatenate( ( boxes_size[:,:2], np.ones((boxes.shape[0],3),dtype=np.float32)), axis=1) # width, height for each box offset_poses = np.tile( np.concatenate( [np.tile( offset[:,k:k+1], (1,njts)) for k in range(NT)], axis=1), (1,anchor_poses.shape[0])) # x,y top-left corner for each pose scale_poses = np.tile( np.concatenate( [np.tile( scale[:,k:k+1], (1,njts)) for k in range(NT)], axis=1), (1,anchor_poses.shape[0])) # x- y- scale for each pose pred_poses = offset_poses + np.tile( anchor_poses.reshape(1,-1), (boxes.shape[0],1) ) * scale_poses # put anchor poses into the boxes pred_poses += scale_poses * pose_deltas[:,njts*NT:] # apply regression (do not consider the one for the background class) """# we save only the poses with score over th with at minimum 500 ones th = 0.1/(scores.shape[1]-1) Nmin = min(500, scores[:,1:].size-1) if np.sum( scores[:,1:]>th ) < Nmin: # set thresholds to keep at least Nmin boxes th = - np.sort( -scores[:,1:].ravel() )[Nmin-1] where = list(zip(*np.where(scores[:,1:]>=th ))) # which one to save nPP = len(where) # number to save regpose2d = np.empty((nPP,njts*2), dtype=np.float32) # regressed 2D pose regpose3d = np.empty((nPP,njts*3), dtype=np.float32) # regressed 3D pose regscore = np.empty((nPP,1), dtype=np.float32) # score of the regressed pose regprop = np.empty((nPP,1), dtype=np.float32) # index of the proposal among the candidate boxes regclass = np.empty((nPP,1), dtype=np.float32) # index of the anchor pose class for ii, (i,j) in enumerate(where): regpose2d[ii,:] = pred_poses[i, j*njts*5:j*njts*5+njts*2] regpose3d[ii,:] = pred_poses[i, j*njts*5+njts*2:j*njts*5+njts*5] regscore[ii,0] = scores[i,1+j] regprop[ii,0] = i+1 regclass[ii,0] = j+1 tosave = {'regpose2d': regpose2d, 'regpose3d': regpose3d, 'regscore': regscore, 'regprop': regprop, 'regclass': regclass, 'rois': boxes, }""" #pred_poses = pred_poses.reshape( pred_poses.shape[0], njts, 5, anchor_poses.shape[0]).transpose( (0,3,1,2) ) pred_poses = pred_poses.reshape( pred_poses.shape[0], anchor_poses.shape[0], 5, njts).transpose( (0,1,3,2) ) tosave = {'pose2d': pred_poses[:, :, :, :2], 'pose3d': pred_poses[:, :, :, 2:], 'rois': boxes, 'score': scores } output.append( tosave ) if outputname is not None: outputdir = os.path.dirname(outputname) if len(outputdir)>0 and not os.path.isdir(outputdir): os.system('mkdir -p '+os.path.dirname(outputname)) savemat(outputname, tosave, do_compression=True) return output