def __init__(self, score_thresh=0.05, nms=0.5, detections_per_img=100, box_coder=None, cls_agnostic_bbox_reg=False, bbox_aug_enabled=False): """ Arguments: score_thresh (float) nms (float) detections_per_img (int) box_coder (BoxCoder) """ super(PostProcessor, self).__init__() self.score_thresh = score_thresh self.nms = nms self.detections_per_img = detections_per_img if box_coder is None: box_coder = BoxCoder(weights=(10., 10., 5., 5.)) self.box_coder = box_coder self.cls_agnostic_bbox_reg = cls_agnostic_bbox_reg self.bbox_aug_enabled = bbox_aug_enabled
def __init__(self, cfg, crop_size, mode, post_branch, size_divisible=0): self.size_divisible = size_divisible self.mode = mode self.crop_size = crop_size self.special_deal = cfg.SEARCH.PREFIX_ANCHOR self.post_branch = post_branch if self.mode == 0: if self.post_branch == "retina": self.anchor_generator = make_anchor_generator_retinanet(cfg) self.box_coder = BoxCoder(weights=(10., 10., 5., 5.)) self.matcher = Matcher( cfg.MODEL.RETINANET.FG_IOU_THRESHOLD, cfg.MODEL.RETINANET.BG_IOU_THRESHOLD, allow_low_quality_matches=True ) self.loss_evaluator = RetinaNetLossComputation( cfg, self.matcher, self.box_coder ) elif self.post_branch == "densebox": self.loss_evaluator = DenseBoxLossComputation(cfg) else: raise ValueError("Post {} do not support now".format(self.post_branch))
def __init__( self, pre_nms_top_n, post_nms_top_n, nms_thresh, nms_method, nms_sigma, nms_min_score, min_size, box_coder=None, fpn_post_nms_top_n=None, ): """ Arguments: pre_nms_top_n (int) post_nms_top_n (int) nms_thresh (float) min_size (int) box_coder (BoxCoder) fpn_post_nms_top_n (int) """ super(RPNPostProcessor, self).__init__() self.pre_nms_top_n = pre_nms_top_n self.post_nms_top_n = post_nms_top_n self.nms_thresh = nms_thresh self.min_size = min_size self.nms_method = nms_method self.nms_sigma = nms_sigma self.nms_min_score = nms_min_score if box_coder is None: box_coder = BoxCoder(weights=(1.0, 1.0, 1.0, 1.0)) self.box_coder = box_coder if fpn_post_nms_top_n is None: fpn_post_nms_top_n = post_nms_top_n self.fpn_post_nms_top_n = fpn_post_nms_top_n
def __init__(self, cfg): super(RetinaNetModule, self).__init__() self.cfg = cfg.clone() anchor_generator = make_anchor_generator_retinanet(cfg) head = RetinaNetHead(cfg) box_coder = BoxCoder(weights=(10., 10., 5., 5.)) if self.cfg.MODEL.SPARSE_MASK_ON or self.cfg.MODEL.SPARSE_MASK_ON: raise NotImplementedError else: box_selector_test = make_retinanet_postprocessor( cfg, 100, box_coder) box_selector_train = None loss_evaluator = make_free_anchor_loss_evaluator(cfg, box_coder) if cfg.FREEANCHOR.FREEANCHOR_ON \ else make_retinanet_loss_evaluator(cfg, box_coder) self.anchor_generator = anchor_generator self.head = head self.box_selector_test = box_selector_test self.box_selector_train = box_selector_train self.loss_evaluator = loss_evaluator
def make_roi_box_loss_evaluator(cfg): matcher = Matcher( cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD, cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD, allow_low_quality_matches=False, ) bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS box_coder = BoxCoder(weights=bbox_reg_weights) fg_bg_sampler = BalancedPositiveNegativeSampler( cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION ) cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG if cfg.MODEL.ROI_BOX_HEAD.USE_FOCAL: print("USING FOCAL LOSS FOR BOX HEAD") raise NotImplementedError() from maskrcnn_benchmark.modeling.rpn.retinanet.loss import SigmoidFocalLoss cls_loss = SigmoidFocalLoss( cfg.MODEL.ROI_BOX_HEAD.FOCAL.LOSS_GAMMA, cfg.MODEL.ROI_BOX_HEAD.FOCAL.LOSS_ALPHA ) else: cls_loss = F.cross_entropy loss_evaluator = FastRCNNLossComputation( matcher, fg_bg_sampler, box_coder, cls_loss, cls_agnostic_bbox_reg ) return loss_evaluator
def make_roi_box_loss_evaluator(cfg): matcher = Matcher( cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD, cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD, allow_low_quality_matches=False, ) bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS box_coder = BoxCoder(weights=bbox_reg_weights) fg_bg_sampler = BalancedPositiveNegativeSampler( cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION ) cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG loss_evaluator = FastRCNNLossComputation( matcher, fg_bg_sampler, box_coder, cls_agnostic_bbox_reg ) return loss_evaluator
def __init__(self, pre_nms_thresh, pre_nms_top_n, nms_thresh, fpn_post_nms_top_n, min_size, num_classes, box_coder=None, scheme="fl"): """ Arguments: pre_nms_thresh (float) pre_nms_top_n (int) nms_thresh (float) fpn_post_nms_top_n (int) min_size (int) num_classes (int) box_coder (BoxCoder) """ super(RetinaNetPostProcessor, self).__init__(pre_nms_thresh, 0, nms_thresh, min_size) self.pre_nms_thresh = pre_nms_thresh self.pre_nms_top_n = pre_nms_top_n self.nms_thresh = nms_thresh self.fpn_post_nms_top_n = fpn_post_nms_top_n self.min_size = min_size self.num_classes = num_classes if box_coder is None: box_coder = BoxCoder(weights=(10., 10., 5., 5.)) self.box_coder = box_coder if scheme == "obj": self.forward_for_single_feature_map = self.forward_for_single_feature_map_obj else: self.forward_for_single_feature_map = self.forward_for_single_feature_map_fl
def __init__(self, cfg, in_channels): super(RPNModule, self).__init__() self.cfg = cfg.clone() anchor_generator = make_anchor_generator(cfg) rpn_head = registry.RPN_HEADS[cfg.MODEL.RPN.RPN_HEAD] head = rpn_head( cfg, in_channels, anchor_generator.num_anchors_per_location()[0] ) rpn_box_coder = BoxCoder(weights=(1.0, 1.0, 1.0, 1.0)) box_selector_train = make_rpn_postprocessor(cfg, rpn_box_coder, is_train=True) box_selector_test = make_rpn_postprocessor(cfg, rpn_box_coder, is_train=False) loss_evaluator = make_rpn_loss_evaluator(cfg, rpn_box_coder) self.anchor_generator = anchor_generator self.head = head self.box_selector_train = box_selector_train self.box_selector_test = box_selector_test self.loss_evaluator = loss_evaluator
def make_roi_box_post_processor(cfg): use_fpn = cfg.MODEL.ROI_HEADS.USE_FPN bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS box_coder = BoxCoder(weights=bbox_reg_weights) score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH nms_thresh = cfg.MODEL.ROI_HEADS.NMS detections_per_img = cfg.MODEL.ROI_HEADS.DETECTIONS_PER_IMG cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG evaluation_flags = cfg.TEST.EVALUATION_FLAGS #### return multiple post processor with different mode # 0 : conv cls + conv reg # 1 : fc cls + fc cls # 2 : fc cls + conv reg # 3 : fc cls + conv reg (double-head-ext) #------ # evaluation_flags: 1 1 1 1 postprocessor = [] for i, value in enumerate(evaluation_flags): print(i, value) if value == 1: postprocessor_ = PostProcessor(score_thresh, nms_thresh, detections_per_img, box_coder, cls_agnostic_bbox_reg, mode=i) postprocessor.append(postprocessor_) assert (len(postprocessor) > 0) return postprocessor
def make_roi_box_post_processor(cfg): use_fpn = cfg.MODEL.ROI_HEADS.USE_FPN bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS box_coder = BoxCoder(weights=bbox_reg_weights) score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH nms_thresh = cfg.MODEL.ROI_HEADS.NMS detections_per_img = cfg.MODEL.ROI_HEADS.DETECTIONS_PER_IMG cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG classification_activate = cfg.MODEL.ROI_BOX_HEAD.CLASSIFICATION_ACTIVATE nms_policy = cfg.MODEL.ROI_HEADS.NMS_POLICY postprocessor = PostProcessor( score_thresh, nms_thresh, detections_per_img, box_coder, cls_agnostic_bbox_reg, classification_activate=classification_activate, nms_policy=nms_policy, cfg=cfg, ) return postprocessor
def make_roi_box_post_processor(cfg): use_fpn = cfg.MODEL.ROI_HEADS.USE_FPN bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS box_coder = BoxCoder(weights=bbox_reg_weights) score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH nms_thresh = cfg.MODEL.ROI_HEADS.NMS detections_per_img = cfg.MODEL.ROI_HEADS.DETECTIONS_PER_IMG cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG bbox_aug_enabled = cfg.TEST.BBOX_AUG.ENABLED amodal_inference = cfg.INPUT.AMODAL postprocessor = PostProcessor( score_thresh, nms_thresh, detections_per_img, box_coder, cls_agnostic_bbox_reg, bbox_aug_enabled, amodal_inference ) return postprocessor
def __init__(self, cfg, in_channels): super(RetinaNetModule, self).__init__() self.cfg = cfg.clone() anchor_generator = make_anchor_generator_retinanet(cfg) head = RetinaNetHead(cfg, in_channels) box_coder = BoxCoder(weights=(10., 10., 5., 5.)) box_selector_train = make_retinanet_postprocessor(cfg, box_coder, is_train=True) box_selector_test = make_retinanet_postprocessor(cfg, box_coder, is_train=False) loss_evaluator = make_retinanet_loss_evaluator(cfg, box_coder) self.anchor_generator = anchor_generator self.head = head self.box_selector_train = box_selector_train self.box_selector_test = box_selector_test self.loss_evaluator = loss_evaluator self.box_subsumple = make_retinanet_box_subsample(cfg)
def make_roi_box_loss_evaluator(cfg): matcher = Matcher( cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD, cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD, allow_low_quality_matches=False, ) bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS box_coder = BoxCoder(weights=bbox_reg_weights) fg_bg_sampler = BalancedPositiveNegativeSampler( cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION ) cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG class_loss_func = cfg.MODEL.ROI_BOX_HEAD.CLASS_LOSS_FUNC class_loss_extra_argv = {} if class_loss_func == "CrossEntropyLoss": class_loss_extra_argv["smooth_eps"] = cfg.MODEL.ROI_BOX_HEAD.CLASS_LOSS_SMOOTH_EPS if class_loss_func == "SoftmaxFocalLoss": class_loss_extra_argv["alpha"] = cfg.MODEL.ROI_BOX_HEAD.CLASS_LOSS_ALPHA class_loss_extra_argv["gamma"] = cfg.MODEL.ROI_BOX_HEAD.CLASS_LOSS_GAMMA class_loss_extra_argv["smooth"] = cfg.MODEL.ROI_BOX_HEAD.CLASS_LOSS_SMOOTH class_loss_extra_argv["num_classes"] = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES loss_evaluator = FastRCNNLossComputation( matcher, fg_bg_sampler, box_coder, cls_agnostic_bbox_reg, class_loss_func=class_loss_func, class_loss_extra_argv=class_loss_extra_argv, ) return loss_evaluator
def test_box_decoder(self): """ Match unit test UtilsBoxesTest.TestBboxTransformRandom in caffe2/operators/generate_proposals_op_util_boxes_test.cc """ box_coder = BoxCoder(weights=(1.0, 1.0, 1.0, 1.0)) bbox = torch.from_numpy( np.array([ 175.62031555, 20.91103172, 253.352005, 155.0145874, 169.24636841, 4.85241556, 228.8605957, 105.02092743, 181.77426147, 199.82876587, 192.88427734, 214.0255127, 174.36262512, 186.75761414, 296.19091797, 231.27906799, 22.73153877, 92.02596283, 135.5695343, 208.80291748, ]).astype(np.float32).reshape(-1, 4)) deltas = torch.from_numpy( np.array([ 0.47861834, 0.13992102, 0.14961673, 0.71495209, 0.29915856, -0.35664671, 0.89018666, 0.70815367, -0.03852064, 0.44466892, 0.49492538, 0.71409376, 0.28052918, 0.02184832, 0.65289006, 1.05060139, -0.38172557, -0.08533806, -0.60335309, 0.79052375, ]).astype(np.float32).reshape(-1, 4)) gt_bbox = (np.array([ 206.949539, -30.715202, 297.387665, 244.448486, 143.871216, -83.342888, 290.502289, 121.053398, 177.430283, 198.666245, 196.295273, 228.703079, 152.251892, 145.431564, 387.215454, 274.594238, 5.062420, 11.040955, 66.328903, 269.686218, ]).astype(np.float32).reshape(-1, 4)) results = box_coder.decode(deltas, bbox) np.testing.assert_allclose(results.detach().numpy(), gt_bbox, atol=1e-4)
def do_train( model, model_ema, data_loader, optimizer, scheduler, checkpointer, device, local_rank, checkpoint_period, cfg_arg, arguments, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") meters_ema = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] ema_decay = arguments["ema_decay"] loss_semi = arguments['loss_semi'] temporal_save_path = cfg_arg["temporal_save_path"] model.train() model_ema.train() box_coder = BoxCoder(weights=(10., 10., 5., 5.)) temporal_ens = {} start_training_time = time.time() end = time.time() labeled_database = arguments["HYPER_PARAMETERS"]['LABELED_DATABASE'] temporal_supervised_losses = [] for iteration, (images, targets_with_trans_info, idx) in enumerate(data_loader, start_iter): targets = [_iter[0] for _iter in targets_with_trans_info] trans_info = [_iter[1] for _iter in targets_with_trans_info] try: db_idx, img_idx, idx_name, bboxes_batch = map_to_img( data_loader, idx) temporal_ens_bboxes = [ ensemble_bboxes(_boxes, _im_sz, arguments["ANCHOR_STRIDES"], arguments["HYPER_PARAMETERS"]['ENS_THRE'], device) for _boxes, _im_sz in zip(bboxes_batch, images.image_sizes) ] img_size = [(_sz[1], _sz[0]) for _sz in images.image_sizes] pred_trans_info = copy.deepcopy(trans_info) temporal_ens_pred = [] for i, _sz in enumerate(img_size): pred_trans_info[i][1] = _sz temporal_ens_per = [ trans_reverse(_temporal_ens, pred_trans_info[i]).to(device) for _temporal_ens in temporal_ens_bboxes[i] ] temporal_ens_pred.append(temporal_ens_per) db_w = [] for i, _db in enumerate(db_idx): if _db not in labeled_database: _bbox = BoxList( torch.zeros([1, 4]), (images.image_sizes[i][1], images.image_sizes[i][0]), mode="xyxy") _bbox.add_field('labels', torch.ones([1])) targets[i] = _bbox db_w.append(0.) else: db_w.append(1.) if any(len(target) < 1 for target in targets): logger.error( f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" ) continue data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration images = images.to(device) targets = [target.to(device) for target in targets] update_ema_variables(model, model_ema, ema_decay, iteration) _loss_dict, result = model(images, targets) #---------------------loss masked by with torch.no_grad(): _loss_dict_ema, result_ema = model_ema(images, targets) is_labeled_db_weight = torch.tensor( db_w, dtype=torch.float32).to(device) loss_dict = {} loss_dict_ema = {} for _key in _loss_dict.keys(): loss_dict[_key] = torch.sum( torch.stack(_loss_dict[_key], dim=0) * is_labeled_db_weight) loss_dict_ema[_key] = torch.sum( torch.stack(_loss_dict_ema[_key], dim=0) * is_labeled_db_weight) # loss_dict = _loss_dict # loss_dict_ema = _loss_dict_ema #result_origin = [trans_reverse(_res,_info) for _res,_info in zip(result_ema,trans_info)] #result_origin = predict_collect_postprocess(arguments['postprocess'],result_ema,trans_info) result_origin = predict_retina_postprocess( arguments['postprocess'], box_coder, result_ema, trans_info, images.image_sizes) # any_zeros = [_iter.bbox.shape[0] == 0 for _iter in temporal_ens_pred] # if any(any_zeros): # loss_dict['semi_box_reg'] = torch.tensor(0,dtype=torch.float32,device=device) # loss_dict['semi_cls'] = torch.tensor(0,dtype=torch.float32,device=device) # else: # semi_loss = loss_semi( # result, temporal_ens_pred) # for _key in semi_loss.keys(): # loss_dict[_key] = torch.sum(torch.stack(semi_loss[_key],dim=0) * (1 - db_weight)) * arguments["semi_weight"] #balance losses with torch.no_grad(): supversed_loss = (loss_dict['loss_retina_cls'] + loss_dict['loss_retina_reg']) / ( np.sum(db_w) + 0.1) temporal_supervised_losses.append(supversed_loss) temporal_supervised_losses = temporal_supervised_losses[-100:] sup_loss = torch.stack(temporal_supervised_losses).mean() meters.update(sup_loss=sup_loss) if get_world_size() > 1: torch.distributed.all_reduce( torch.stack(temporal_supervised_losses).mean(), op=torch.distributed.ReduceOp.SUM) balance_weight = min(1. / (sup_loss / 0.28)**12, 1.) semi_loss = semi_loss_fn( result, result_ema, temporal_ens_pred, images.image_sizes, box_coder, n_cls=arguments["HYPER_PARAMETERS"]['NCLS'], reg_cons_w=arguments["HYPER_PARAMETERS"]['REG_CONSIST_WEIGHT']) semi_loss_weight = semi_weight_by_epoch( iteration, start_iter=arguments["HYPER_PARAMETERS"]['EPOCH_BATCH_NUM'] * arguments["HYPER_PARAMETERS"]['START_ITER'], rampup_length=arguments["HYPER_PARAMETERS"]['EPOCH_BATCH_NUM'] * arguments["HYPER_PARAMETERS"]['RAMPUP_LENGTH'], consistence_weight=arguments["HYPER_PARAMETERS"] ['CONSISTENCE_WEIGHT'], consistence_trunc=arguments["HYPER_PARAMETERS"] ['MAX_CONSISTENT_LOSS']) #semi_weight_by_epoch(iteration) for _key in semi_loss.keys(): #loss_dict[_key] = torch.sum(semi_loss[_key] * (1 - is_labeled_db_weight))*semi_loss_weight*balance_weight # not used labeled loss_dict[_key] = torch.sum(semi_loss[_key]) * semi_loss_weight for i, (_id, _labeled) in enumerate(zip(idx_name, db_w)): # if _labeled == 1: # continue result_dict = { 'iteration': iteration, 'result': result_origin[i] } if _id in temporal_ens.keys(): temporal_ens[_id].append(result_dict) else: temporal_ens[_id] = [result_dict] #print('id={},{},scores={}----------{}'.format(idx_name[0],idx_name[1],result_origin[0].get_field('objectness')[:5],result_origin[1].get_field('objectness')[:5])) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) loss_dict_reduced_ema = reduce_loss_dict(loss_dict_ema) losses_reduced_ema = sum( loss for loss in loss_dict_reduced_ema.values()) meters_ema.update(loss=losses_reduced_ema, **loss_dict_reduced_ema) optimizer.zero_grad() # Note: If mixed precision is not used, this ends up doing nothing # Otherwise apply loss scaling for mixed-precision recipe with amp.scale_loss(losses, optimizer) as scaled_losses: scaled_losses.backward() if not iteration < arguments["HYPER_PARAMETERS"][ 'EPOCH_BATCH_NUM'] * arguments["HYPER_PARAMETERS"][ 'START_ITER']: optimizer.step() #scheduler.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "{meters_ema}", "lr: {lr:.6f}", "semi_w:{semi_w:2.3f}", "supervised loss{sup_loss:2.3f}," "balance_weight{balance_weight:2.3f}," "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), meters_ema=str(meters_ema), lr=optimizer.param_groups[0]["lr"], semi_w=semi_loss_weight, sup_loss=sup_loss, balance_weight=balance_weight, memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if (iteration - 50) % 100 == 0: for _key in temporal_ens.keys(): for _iter in temporal_ens[_key]: str_folder = os.path.join( temporal_save_path, _key) #"{}/{}".format(temporal_save_path,_key) str_file = '{}/{}_loc{}_iter_x{:07d}.pt'.format( str_folder, _key, local_rank, _iter['iteration']) if not os.path.exists(str_folder): os.makedirs(str_folder) torch.save(_iter['result'], str_file) del _iter['result'] del temporal_ens temporal_ens = {} if iteration % checkpoint_period == 0: save_time = time.time() checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter: checkpointer.save("model_final", **arguments) except Exception as e: print('error in file ', idx_name, img_idx) raise e total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def __init__(self, cfg, in_channels): super(NewROIBoxHead, self).__init__(cfg, in_channels) self.bbox_dict = dict(bbox=None, target=None) self.box_coder = BoxCoder(weights=cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default= "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default=None, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) # Initialize mixed-precision if necessary use_mixed_precision = cfg.DTYPE == 'float16' amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) postprocessor = make_retinanet_postprocessor( cfg, BoxCoder(weights=(10.0, 10.0, 5.0, 5.0)), False) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) semi_loss = make_semi_box_loss_evaluator(cfg) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, postprocessor, semi_loss, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, anchor_strides=cfg.MODEL.RETINANET.ANCHOR_STRIDES, ) synchronize()
def make_roi_box_loss_evaluator(cfg): matcher = Matcher( cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD, cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD, allow_low_quality_matches=False, ) bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS box_coder = BoxCoder(weights=bbox_reg_weights) fg_bg_sampler = BalancedPositiveNegativeSampler( cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION) cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG if cfg.MODEL.ROI_BOX_HEAD.USE_FOCAL_LOSS: focal_loss = SigmoidFocalLoss(cfg.MODEL.ROI_BOX_HEAD.FOCAL_LOSS.GAMMA, cfg.MODEL.ROI_BOX_HEAD.FOCAL_LOSS.ALPHA) # focal_loss = SoftmaxFocalLoss( # class_num = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES-1, # gamma=cfg.MODEL.RPN.FOCAL_LOSS.GAMMA, # alpha=cfg.MODEL.RPN.FOCAL_LOSS.ALPHA, # ) else: focal_loss = None if cfg.MODEL.ROI_BOX_HEAD.USE_CLASS_BALANCE_LOSS and \ os.path.isfile(cfg.MODEL.ROI_BOX_HEAD.CLASS_BALANCE_LOSS.WEIGHT_FILE): num_class_list = ClassBalanceLoss.load_class_samples( filename=cfg.MODEL.ROI_BOX_HEAD.CLASS_BALANCE_LOSS.WEIGHT_FILE, category_type='category') class_balance_weight = ClassBalanceLoss( device=torch.device(cfg.MODEL.DEVICE), num_class_list=num_class_list, alpha=cfg.MODEL.ROI_BOX_HEAD.CLASS_BALANCE_LOSS.ALPHA, beta=cfg.MODEL.ROI_BOX_HEAD.CLASS_BALANCE_LOSS.BETA) else: class_balance_weight = None if cfg.MODEL.ROI_BOX_HEAD.USE_WING_LOSS: wing_loss = WingLoss( width=cfg.MODEL.ROI_BOX_HEAD.WING_LOSS.WIDTH, curvature=cfg.MODEL.ROI_BOX_HEAD.WING_LOSS.SIGMA, ) else: wing_loss = None if cfg.MODEL.ROI_BOX_HEAD.USE_SELF_ADJUST_SMOOTH_L1_LOSS: adjust_smooth_l1_loss = AdjustSmoothL1Loss( 4, beta=cfg.MODEL.ROI_BOX_HEAD.SELF_ADJUST_SMOOTH_L1_LOSS. BBOX_REG_BETA) else: adjust_smooth_l1_loss = None if cfg.MODEL.ROI_BOX_HEAD.USE_BALANCE_L1_LOSS: balance_l1_loss = BalancedL1Loss( alpha=cfg.MODEL.ROI_BOX_HEAD.BALANCE_L1_LOSS.ALPHA, beta=cfg.MODEL.ROI_BOX_HEAD.BALANCE_L1_LOSS.BETA, gamma=cfg.MODEL.ROI_BOX_HEAD.BALANCE_L1_LOSS.GAMMA) else: balance_l1_loss = None loss_evaluator = FastRCNNLossComputation( matcher, fg_bg_sampler, box_coder, cls_agnostic_bbox_reg, focal_loss=focal_loss, class_balance_weight=class_balance_weight, wing_loss=wing_loss, adjust_smooth_l1_loss=adjust_smooth_l1_loss, balance_l1_loss=balance_l1_loss, ) return loss_evaluator
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--ckpt", help="The path to the checkpoint for test, default is the latest checkpoint.", default=None, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) str_folder = './tempor_pred_coco_bn8/' str_img = '/JPEGImages/' str_output = 'output/' args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() # postprocessor = PostProcessor_retina( # score_thresh = 0.05, # nms = 0.5, # detections_per_img = 100, # box_coder = BoxCoder(weights=(10.0, 10.0, 5.0, 5.0)), # cls_agnostic_bbox_reg = False, # bbox_aug_enabled = False # ) postprocessor = make_retinanet_postprocessor(cfg, BoxCoder(weights=(10.0, 10.0, 5.0, 5.0)), False) temporal_ens_list = glob.glob(str_folder+'*') #-----------------filter train db json_file = './MS_COCO/annotations/instances_valminusminival2014.json' with open(json_file,'r') as f: json_info = json.load(f) img_name_list = [_img['file_name'].replace('.jpg','') for _img in json_info['images']] temporal_ens = [] for _path in temporal_ens_list: _img_id = os.path.basename(_path) if _img_id in img_name_list: temporal_ens.append(_path) print('initial end ----------------------') #----------------------------end predcit_dict = {} # single for _iter in tqdm(temporal_ens): # if _iter.find('/003636')<0: # continue pts = glob.glob(os.path.join(_iter,'*.pt')) img_id = os.path.basename(_iter) pts_iter = [int(_id.split('_x')[-1].replace('.pt','')) for _id in pts] idx_sorted = np.argsort(pts_iter) pts_sorted = np.array(pts)[idx_sorted] #bbox = one_pt_scores(pts_sorted,postprocessor) #bbox = multi_pt_scores(pts_sorted,postprocessor) #bbox = checks(pts_sorted,postprocessor) bbox = multi_align_ens(pts_sorted,postprocessor) # try: # bbox = multi_align_ens(pts_sorted,postprocessor) # except: # print('except in file',_iter) predcit_dict[img_id] = bbox[0].to('cpu') #parallel # pool = mp.Pool(mp.cpu_count()) # predcit_list = [pool.apply(multi_process, args=(_iter, postprocessor)) for _iter in tqdm(temporal_ens)] # for _item in predcit_list: # predcit_dict[_item[0]] = _item[1][0] torch.save(predcit_dict,'tmp.pt') predcit_dict = torch.load('tmp.pt') mAp_scores = mAP(cfg,predcit_dict) print('mAp is ',mAp_scores) print('process_end')
def train(cfg, local_rank, distributed): model = create_model(cfg) model_ema = create_model(cfg, ema=True) device = torch.device(cfg.MODEL.DEVICE) model.to(device) model_ema.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) # Initialize mixed-precision training use_mixed_precision = cfg.DTYPE == "float16" amp_opt_level = 'O1' if use_mixed_precision else 'O0' model, optimizer = amp.initialize(model, optimizer, opt_level=amp_opt_level) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) model_ema = DistributedDataParallel(model_ema) arguments = {} cfg_arg = {} arguments["iteration"] = 0 arguments["semi_weight"] = cfg.SEMI.SEMI_WEIGHT cfg_arg["temporal_save_path"] = cfg.SEMI.TEMPORAL_SAVE_PATH arguments['loss_semi'] = make_semi_box_loss_evaluator(cfg) output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) data_loader = make_data_loader_semi( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD update_ema_variables(model, model_ema) # arguments["iteration"] = 0 # optimizer = make_optimizer(cfg, model) # scheduler = make_lr_scheduler(cfg, optimizer) arguments["ema_decay"] = cfg.SEMI.EMA_DECAY arguments["ANCHOR_STRIDES"] = cfg.MODEL.RETINANET.ANCHOR_STRIDES arguments["HYPER_PARAMETERS"] = cfg.SEMI.HYPER_PARAMETERS arguments['postprocess'] = make_retinanet_semi_postprocessor( cfg, BoxCoder(weights=(10., 10., 5., 5.)), True) for g in optimizer.param_groups: g['lr'] = 0.0005 do_train( model, model_ema, data_loader, optimizer, scheduler, checkpointer, device, local_rank, checkpoint_period, cfg_arg, arguments, ) return model
def forward(self, features, proposals, targets=None, proposals_sampled=None): """ Arguments: features (list[Tensor]): feature-maps from possibly several levels proposals (list[BoxList]): proposal boxes targets (list[BoxList], optional): the ground-truth targets. Returns: x (Tensor): the result of the feature extractor proposals (list[BoxList]): during training, the subsampled proposals are returned. During testing, the predicted boxlists are returned losses (dict[Tensor]): During training, returns the losses for the head. During testing, returns an empty dict. """ if self.training: # Faster R-CNN subsamples during training the proposals with a fixed # positive / negative ratio if proposals_sampled is None: with torch.no_grad(): proposals_sampled = self.loss_evaluator.subsample( proposals, targets) proposals = proposals_sampled # extract features that will be fed to the final classifier. The # feature_extractor generally corresponds to the pooler + heads x = self.feature_extractor(features, proposals) # final classifier that converts the features into predictions class_logits, box_regression = self.predictor(x) if not self.training: result = self.post_processor((class_logits, box_regression), proposals) return x, result, {} # TODO: loss is not needed for mean teacher when MT_ON if not self.cfg.MODEL.ROI_BOX_HEAD.FREEZE_WEIGHT: loss_classifier, loss_box_reg = self.loss_evaluator( [class_logits], [box_regression], proposals) if self.cfg.MODEL.ROI_BOX_HEAD.OUTPUT_DECODED_PROPOSAL: bbox_reg_weights = self.cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS box_coder = BoxCoder(weights=bbox_reg_weights) boxes_per_image = [len(box) for box in proposals] concat_boxes = torch.cat([a.bbox for a in proposals], dim=0) decoded_proposals = box_coder.decode( box_regression.view(sum(boxes_per_image), -1), concat_boxes) decoded_proposals = decoded_proposals.split(boxes_per_image, dim=0) # decoded_proposals = self.post_processor((class_logits, box_regression), proposals) # make sure there are valid proposals for i, boxes in enumerate(decoded_proposals): if len(boxes) > 0: proposals[i].bbox = boxes.reshape(-1, 4) loss_dict = dict() if self.cfg.MODEL.MT_ON: loss_dict.update(class_logits=class_logits, box_logits=box_regression) # loss_dict.update(class_logits=x, box_logits=x) # proposals_sampled.add_field('class_logits', class_logits) # proposals_sampled.add_field('box_logits', box_regression) if not self.is_mt and not self.cfg.MODEL.ROI_BOX_HEAD.FREEZE_WEIGHT: loss_dict.update( dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg)) return x, proposals, loss_dict
def make_roi_box_loss_evaluator(cfg): matcher = Matcher( cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD, cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD, allow_low_quality_matches=False, ) bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS box_coder = BoxCoder(weights=bbox_reg_weights) fg_bg_sampler = BalancedPositiveNegativeSampler( cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION ) cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG cls_loss_fn_type = cfg.MODEL.ROI_HEADS.CLASSIFICATION_LOSS_FN cls_loss = {} if cls_loss_fn_type == "CE": cls_loss['fn'] = _cross_entropy_with_kwargs cls_loss['avg'] = False elif cls_loss_fn_type == "Focal": cls_loss['fn'] = SigmoidFocalLoss( cfg.MODEL.ROI_HEADS.FOCAL_LOSS_GAMMA, cfg.MODEL.ROI_HEADS.FOCAL_LOSS_ALPHA, ) cls_loss['avg'] = True elif cls_loss_fn_type == "ReducedFocal": cls_loss['fn'] = SigmoidReducedFocalLoss( cfg.MODEL.ROI_HEADS.FOCAL_LOSS_GAMMA, cfg.MODEL.ROI_HEADS.FOCAL_LOSS_ALPHA, cfg.MODEL.ROI_HEADS.REDUCED_FOCAL_LOSS_CUTOFF, cfg.MODEL.ROI_HEADS.CLASSIFICATION_LOSS_NORM, ) cls_loss['avg'] = True elif cls_loss_fn_type == "Class": # me being a lazy f**k # counts_dict = {6: 895135, 49: 1414550, 10: 54917, 54: 22780, 52: 5242, 11: 26608, 48: 5792, 7: 30454, 14: 17734, 3: 3190, 53: 7473, 51: 5739, 43: 3053, 5: 10108, 12: 16454, 17: 790, 56: 7281, 60: 464, 9: 15035, 13: 3403, 58: 5736, 57: 11459, 36: 836, 39: 1670, 20: 7744, 45: 1411, 19: 5213, 15: 4169, 8: 4671, 44: 4012, 25: 5254, 24: 3585, 29: 3201, 40: 5770, 34: 880, 26: 3107, 47: 2846, 59: 1609, 16: 582, 46: 248, 42: 464, 55: 594, 32: 1740, 27: 1089, 30: 1037, 28: 934, 50: 1285, 1: 322, 2: 1909, 4: 359, 38: 269, 37: 279, 35: 897, 23: 475, 22: 521, 31: 2504, 21: 442, 18: 70, 41: 1230, 33: 439} raise ValueError("deprecated class loss") elif cls_loss_fn_type == "AreaFocal": cls_loss['fn'] = SigmoidAreaReducedFocalLoss( cfg.MODEL.ROI_HEADS.FOCAL_LOSS_GAMMA, cfg.MODEL.ROI_HEADS.FOCAL_LOSS_ALPHA, cfg.MODEL.ROI_HEADS.AREA_LOSS_BETA, cfg.MODEL.ROI_HEADS.REDUCED_FOCAL_LOSS_CUTOFF, cfg.MODEL.ROI_HEADS.AREA_LOSS_THRESHOLD, cfg.MODEL.ROI_HEADS.CLASSIFICATION_LOSS_NORM, ) cls_loss['avg'] = True elif cls_loss_fn_type == "Area": cls_loss['fn'] = AreaLoss( cfg.MODEL.ROI_HEADS.AREA_LOSS_BETA, cfg.MODEL.ROI_HEADS.AREA_LOSS_THRESHOLD, ) cls_loss['avg'] = True else: raise ValueError("invalid classification loss type: {}".format(cls_loss_fn_type)) loss_evaluator = FastRCNNLossComputation( matcher, fg_bg_sampler, box_coder, cls_loss, cfg.MODEL.ROI_HEADS.CLS_LOSS_WT, cfg.MODEL.ROI_HEADS.BBOX_LOSS_WT, cls_agnostic_bbox_reg, ) return loss_evaluator
for box in boxes: box = box.to(torch.int64) top_left, bottom_right = box[:2].tolist(), box[2:].tolist() print(top_left, bottom_right) image = cv2.rectangle(image, tuple(top_left), tuple(bottom_right), (0, 0, 255), 1) return image temporal_ens = glob.glob(str_folder + '**/*.pt', recursive=True) postprocessor = PostProcessor(score_thresh=0.05, nms=0.5, detections_per_img=100, box_coder=BoxCoder(weights=(10.0, 10.0, 5.0, 5.0)), cls_agnostic_bbox_reg=False, bbox_aug_enabled=False) for _iter in temporal_ens: str_file = os.path.basename(_iter).split('_x')[0] _id = str_file.replace('_iter', '') boxes = torch.load(_iter) boxes_nms = postprocessor.filter_results(boxes, 21) str_id = _id + '.jpg' print(str_img + str_id) # if(str_id.find('009726')) < 0: # continue image = cv2.imread(str_img + str_id)
def __init__(self, cfg, det_roi_head_feature_extractor: torch.nn.Module): super(DetProposalVGHead, self).__init__() self.cfg = cfg self.det_roi_head_feature_extractor = det_roi_head_feature_extractor self.obj_embed_dim = self.det_roi_head_feature_extractor.out_channels # 1024 self.phrase_embed_dim = 1024 self.phrase_embed = PhraseEmbeddingSent( cfg, phrase_embed_dim=self.phrase_embed_dim, bidirectional=True) self.recognition_dim = 1024 if cfg.MODEL.VG.SPATIAL_FEAT: self.obj_embed_dim = self.obj_embed_dim + 256 self.visual_embedding = nn.Sequential( nn.Linear(self.obj_embed_dim, self.recognition_dim), nn.LeakyReLU(), nn.Linear(self.recognition_dim, self.recognition_dim)) self.visual_embedding_topN = nn.Sequential( nn.Linear(self.obj_embed_dim, self.recognition_dim), nn.LeakyReLU(), nn.Linear(self.recognition_dim, self.recognition_dim)) self.similarity_input_dim = self.recognition_dim + self.phrase_embed_dim * 3 self.similarity = nn.Sequential( nn.Linear(self.similarity_input_dim, 256), nn.LeakyReLU(), nn.Linear(256, 1)) self.similarity_topN = nn.Sequential( nn.Linear(self.similarity_input_dim, 256), nn.LeakyReLU(), nn.Linear(256, 1)) self.box_reg = nn.Sequential(nn.Linear(self.similarity_input_dim, 256), nn.LeakyReLU(), nn.Linear(256, 4)) self.box_reg_topN = nn.Sequential( nn.Linear(self.similarity_input_dim, 256), nn.LeakyReLU(), nn.Linear(256, 4)) if cfg.MODEL.RELATION_ON: if cfg.MODEL.RELATION.INTRA_LAN: # self.phrase_mps = WordPhraseGraph(cfg, hidden_dim=self.phrase_embed_dim) self.phrase_mps = WordPhraseGraphV1( cfg, hidden_dim=self.phrase_embed_dim) if cfg.MODEL.RELATION.VISUAL_GRAPH: self.visual_graph = StructureGraphMessagePassingInNodesV3Update( self.phrase_embed_dim) if cfg.MODEL.RELATION.RELATION_FEATURES: self.relation_pair_wise_spatial_embedding_linear = nn.Sequential( nn.Linear(64 * 64 * 2, 1024), nn.LeakyReLU(), nn.Linear(1024, 256)) self.relation_visual_embedding = nn.Sequential( nn.Linear(self.obj_embed_dim + 256, self.recognition_dim), nn.LeakyReLU(), nn.Linear(self.recognition_dim, self.recognition_dim)) self.relation_union_embedding = nn.Sequential( nn.Linear(self.recognition_dim * 3, self.recognition_dim), nn.LeakyReLU(), nn.Linear(self.recognition_dim, self.recognition_dim)) self.relation_similarity = nn.Sequential( nn.Linear(self.similarity_input_dim, 256), nn.LeakyReLU(), nn.Linear(256, 1)) self.box_coder = BoxCoder(weights=cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS) self.VGLoss = VGLossComputeTwoStageSep(cfg)