Beispiel #1
0
 def __init__(self,
              score_thresh=0.05,
              nms=0.5,
              detections_per_img=100,
              box_coder=None,
              cls_agnostic_bbox_reg=False,
              bbox_aug_enabled=False):
     """
     Arguments:
         score_thresh (float)
         nms (float)
         detections_per_img (int)
         box_coder (BoxCoder)
     """
     super(PostProcessor, self).__init__()
     self.score_thresh = score_thresh
     self.nms = nms
     self.detections_per_img = detections_per_img
     if box_coder is None:
         box_coder = BoxCoder(weights=(10., 10., 5., 5.))
     self.box_coder = box_coder
     self.cls_agnostic_bbox_reg = cls_agnostic_bbox_reg
     self.bbox_aug_enabled = bbox_aug_enabled
Beispiel #2
0
 def __init__(self, cfg, crop_size, mode, post_branch, size_divisible=0):
     self.size_divisible = size_divisible
     self.mode = mode
     self.crop_size = crop_size
     self.special_deal = cfg.SEARCH.PREFIX_ANCHOR
     self.post_branch = post_branch
     
     if self.mode == 0:
         if self.post_branch == "retina":
             self.anchor_generator = make_anchor_generator_retinanet(cfg)
             self.box_coder = BoxCoder(weights=(10., 10., 5., 5.))
             self.matcher = Matcher(
                 cfg.MODEL.RETINANET.FG_IOU_THRESHOLD,
                 cfg.MODEL.RETINANET.BG_IOU_THRESHOLD,
                 allow_low_quality_matches=True
             )
             self.loss_evaluator = RetinaNetLossComputation(
                 cfg, self.matcher, self.box_coder
             )
         elif self.post_branch == "densebox":
             self.loss_evaluator = DenseBoxLossComputation(cfg)
         else:
             raise ValueError("Post {} do not support now".format(self.post_branch))
Beispiel #3
0
    def __init__(
        self,
        pre_nms_top_n,
        post_nms_top_n,
        nms_thresh,
        nms_method,
        nms_sigma,
        nms_min_score,
        min_size,
        box_coder=None,
        fpn_post_nms_top_n=None,
    ):
        """
        Arguments:
            pre_nms_top_n (int)
            post_nms_top_n (int)
            nms_thresh (float)
            min_size (int)
            box_coder (BoxCoder)
            fpn_post_nms_top_n (int)
        """
        super(RPNPostProcessor, self).__init__()
        self.pre_nms_top_n = pre_nms_top_n
        self.post_nms_top_n = post_nms_top_n
        self.nms_thresh = nms_thresh
        self.min_size = min_size
        self.nms_method = nms_method
        self.nms_sigma = nms_sigma
        self.nms_min_score = nms_min_score

        if box_coder is None:
            box_coder = BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))
        self.box_coder = box_coder

        if fpn_post_nms_top_n is None:
            fpn_post_nms_top_n = post_nms_top_n
        self.fpn_post_nms_top_n = fpn_post_nms_top_n
Beispiel #4
0
    def __init__(self, cfg):
        super(RetinaNetModule, self).__init__()

        self.cfg = cfg.clone()

        anchor_generator = make_anchor_generator_retinanet(cfg)
        head = RetinaNetHead(cfg)
        box_coder = BoxCoder(weights=(10., 10., 5., 5.))

        if self.cfg.MODEL.SPARSE_MASK_ON or self.cfg.MODEL.SPARSE_MASK_ON:
            raise NotImplementedError
        else:
            box_selector_test = make_retinanet_postprocessor(
                cfg, 100, box_coder)
        box_selector_train = None

        loss_evaluator = make_free_anchor_loss_evaluator(cfg, box_coder) if cfg.FREEANCHOR.FREEANCHOR_ON \
            else make_retinanet_loss_evaluator(cfg, box_coder)

        self.anchor_generator = anchor_generator
        self.head = head
        self.box_selector_test = box_selector_test
        self.box_selector_train = box_selector_train
        self.loss_evaluator = loss_evaluator
Beispiel #5
0
def make_roi_box_loss_evaluator(cfg):
    matcher = Matcher(
        cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD,
        cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD,
        allow_low_quality_matches=False,
    )

    bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS
    box_coder = BoxCoder(weights=bbox_reg_weights)

    fg_bg_sampler = BalancedPositiveNegativeSampler(
        cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION
    )

    cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG

    if cfg.MODEL.ROI_BOX_HEAD.USE_FOCAL:
        print("USING FOCAL LOSS FOR BOX HEAD")
        raise NotImplementedError()
        from maskrcnn_benchmark.modeling.rpn.retinanet.loss import SigmoidFocalLoss
        cls_loss = SigmoidFocalLoss(
            cfg.MODEL.ROI_BOX_HEAD.FOCAL.LOSS_GAMMA, cfg.MODEL.ROI_BOX_HEAD.FOCAL.LOSS_ALPHA
        )
    else:
        cls_loss = F.cross_entropy


    loss_evaluator = FastRCNNLossComputation(
        matcher, 
        fg_bg_sampler, 
        box_coder, 
        cls_loss,
        cls_agnostic_bbox_reg
    )

    return loss_evaluator
Beispiel #6
0
def make_roi_box_loss_evaluator(cfg):
    matcher = Matcher(
        cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD,
        cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD,
        allow_low_quality_matches=False,
    )

    bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS
    box_coder = BoxCoder(weights=bbox_reg_weights)

    fg_bg_sampler = BalancedPositiveNegativeSampler(
        cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION
    )

    cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG

    loss_evaluator = FastRCNNLossComputation(
        matcher,
        fg_bg_sampler,
        box_coder,
        cls_agnostic_bbox_reg
    )

    return loss_evaluator
Beispiel #7
0
    def __init__(self,
                 pre_nms_thresh,
                 pre_nms_top_n,
                 nms_thresh,
                 fpn_post_nms_top_n,
                 min_size,
                 num_classes,
                 box_coder=None,
                 scheme="fl"):
        """
        Arguments:
            pre_nms_thresh (float)
            pre_nms_top_n (int)
            nms_thresh (float)
            fpn_post_nms_top_n (int)
            min_size (int)
            num_classes (int)
            box_coder (BoxCoder)
        """
        super(RetinaNetPostProcessor, self).__init__(pre_nms_thresh, 0,
                                                     nms_thresh, min_size)
        self.pre_nms_thresh = pre_nms_thresh
        self.pre_nms_top_n = pre_nms_top_n
        self.nms_thresh = nms_thresh
        self.fpn_post_nms_top_n = fpn_post_nms_top_n
        self.min_size = min_size
        self.num_classes = num_classes

        if box_coder is None:
            box_coder = BoxCoder(weights=(10., 10., 5., 5.))
        self.box_coder = box_coder

        if scheme == "obj":
            self.forward_for_single_feature_map = self.forward_for_single_feature_map_obj
        else:
            self.forward_for_single_feature_map = self.forward_for_single_feature_map_fl
    def __init__(self, cfg, in_channels):
        super(RPNModule, self).__init__()

        self.cfg = cfg.clone()

        anchor_generator = make_anchor_generator(cfg)

        rpn_head = registry.RPN_HEADS[cfg.MODEL.RPN.RPN_HEAD]
        head = rpn_head(
            cfg, in_channels, anchor_generator.num_anchors_per_location()[0]
        )

        rpn_box_coder = BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))

        box_selector_train = make_rpn_postprocessor(cfg, rpn_box_coder, is_train=True)
        box_selector_test = make_rpn_postprocessor(cfg, rpn_box_coder, is_train=False)

        loss_evaluator = make_rpn_loss_evaluator(cfg, rpn_box_coder)

        self.anchor_generator = anchor_generator
        self.head = head
        self.box_selector_train = box_selector_train
        self.box_selector_test = box_selector_test
        self.loss_evaluator = loss_evaluator
Beispiel #9
0
def make_roi_box_post_processor(cfg):
    use_fpn = cfg.MODEL.ROI_HEADS.USE_FPN

    bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS
    box_coder = BoxCoder(weights=bbox_reg_weights)

    score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH
    nms_thresh = cfg.MODEL.ROI_HEADS.NMS
    detections_per_img = cfg.MODEL.ROI_HEADS.DETECTIONS_PER_IMG
    cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG

    evaluation_flags = cfg.TEST.EVALUATION_FLAGS

    #### return multiple post processor with different mode
    # 0 : conv cls + conv reg
    # 1 : fc cls + fc cls
    # 2 : fc cls + conv reg
    # 3 : fc cls + conv reg (double-head-ext)
    #------
    # evaluation_flags: 1 1 1 1

    postprocessor = []
    for i, value in enumerate(evaluation_flags):
        print(i, value)
        if value == 1:
            postprocessor_ = PostProcessor(score_thresh,
                                           nms_thresh,
                                           detections_per_img,
                                           box_coder,
                                           cls_agnostic_bbox_reg,
                                           mode=i)
            postprocessor.append(postprocessor_)

    assert (len(postprocessor) > 0)

    return postprocessor
Beispiel #10
0
def make_roi_box_post_processor(cfg):
    use_fpn = cfg.MODEL.ROI_HEADS.USE_FPN

    bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS
    box_coder = BoxCoder(weights=bbox_reg_weights)

    score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH
    nms_thresh = cfg.MODEL.ROI_HEADS.NMS
    detections_per_img = cfg.MODEL.ROI_HEADS.DETECTIONS_PER_IMG
    cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG
    classification_activate = cfg.MODEL.ROI_BOX_HEAD.CLASSIFICATION_ACTIVATE
    nms_policy = cfg.MODEL.ROI_HEADS.NMS_POLICY

    postprocessor = PostProcessor(
        score_thresh,
        nms_thresh,
        detections_per_img,
        box_coder,
        cls_agnostic_bbox_reg,
        classification_activate=classification_activate,
        nms_policy=nms_policy,
        cfg=cfg,
    )
    return postprocessor
Beispiel #11
0
def make_roi_box_post_processor(cfg):
    use_fpn = cfg.MODEL.ROI_HEADS.USE_FPN

    bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS
    box_coder = BoxCoder(weights=bbox_reg_weights)

    score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH
    nms_thresh = cfg.MODEL.ROI_HEADS.NMS
    detections_per_img = cfg.MODEL.ROI_HEADS.DETECTIONS_PER_IMG
    cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG
    bbox_aug_enabled = cfg.TEST.BBOX_AUG.ENABLED

    amodal_inference = cfg.INPUT.AMODAL

    postprocessor = PostProcessor(
        score_thresh,
        nms_thresh,
        detections_per_img,
        box_coder,
        cls_agnostic_bbox_reg,
        bbox_aug_enabled,
        amodal_inference
    )
    return postprocessor
Beispiel #12
0
    def __init__(self, cfg, in_channels):
        super(RetinaNetModule, self).__init__()

        self.cfg = cfg.clone()

        anchor_generator = make_anchor_generator_retinanet(cfg)
        head = RetinaNetHead(cfg, in_channels)
        box_coder = BoxCoder(weights=(10., 10., 5., 5.))

        box_selector_train = make_retinanet_postprocessor(cfg,
                                                          box_coder,
                                                          is_train=True)
        box_selector_test = make_retinanet_postprocessor(cfg,
                                                         box_coder,
                                                         is_train=False)

        loss_evaluator = make_retinanet_loss_evaluator(cfg, box_coder)

        self.anchor_generator = anchor_generator
        self.head = head
        self.box_selector_train = box_selector_train
        self.box_selector_test = box_selector_test
        self.loss_evaluator = loss_evaluator
        self.box_subsumple = make_retinanet_box_subsample(cfg)
Beispiel #13
0
def make_roi_box_loss_evaluator(cfg):
    matcher = Matcher(
        cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD,
        cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD,
        allow_low_quality_matches=False,
    )

    bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS
    box_coder = BoxCoder(weights=bbox_reg_weights)

    fg_bg_sampler = BalancedPositiveNegativeSampler(
        cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION
    )

    cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG

    class_loss_func = cfg.MODEL.ROI_BOX_HEAD.CLASS_LOSS_FUNC
    class_loss_extra_argv = {}
    if class_loss_func == "CrossEntropyLoss":
        class_loss_extra_argv["smooth_eps"] = cfg.MODEL.ROI_BOX_HEAD.CLASS_LOSS_SMOOTH_EPS
    if class_loss_func == "SoftmaxFocalLoss":
        class_loss_extra_argv["alpha"] = cfg.MODEL.ROI_BOX_HEAD.CLASS_LOSS_ALPHA
        class_loss_extra_argv["gamma"] = cfg.MODEL.ROI_BOX_HEAD.CLASS_LOSS_GAMMA
        class_loss_extra_argv["smooth"] = cfg.MODEL.ROI_BOX_HEAD.CLASS_LOSS_SMOOTH
        class_loss_extra_argv["num_classes"] = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES

    loss_evaluator = FastRCNNLossComputation(
        matcher, 
        fg_bg_sampler, 
        box_coder, 
        cls_agnostic_bbox_reg,
        class_loss_func=class_loss_func,
        class_loss_extra_argv=class_loss_extra_argv,
    )

    return loss_evaluator
    def test_box_decoder(self):
        """ Match unit test UtilsBoxesTest.TestBboxTransformRandom in
            caffe2/operators/generate_proposals_op_util_boxes_test.cc
        """
        box_coder = BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))
        bbox = torch.from_numpy(
            np.array([
                175.62031555,
                20.91103172,
                253.352005,
                155.0145874,
                169.24636841,
                4.85241556,
                228.8605957,
                105.02092743,
                181.77426147,
                199.82876587,
                192.88427734,
                214.0255127,
                174.36262512,
                186.75761414,
                296.19091797,
                231.27906799,
                22.73153877,
                92.02596283,
                135.5695343,
                208.80291748,
            ]).astype(np.float32).reshape(-1, 4))

        deltas = torch.from_numpy(
            np.array([
                0.47861834,
                0.13992102,
                0.14961673,
                0.71495209,
                0.29915856,
                -0.35664671,
                0.89018666,
                0.70815367,
                -0.03852064,
                0.44466892,
                0.49492538,
                0.71409376,
                0.28052918,
                0.02184832,
                0.65289006,
                1.05060139,
                -0.38172557,
                -0.08533806,
                -0.60335309,
                0.79052375,
            ]).astype(np.float32).reshape(-1, 4))

        gt_bbox = (np.array([
            206.949539,
            -30.715202,
            297.387665,
            244.448486,
            143.871216,
            -83.342888,
            290.502289,
            121.053398,
            177.430283,
            198.666245,
            196.295273,
            228.703079,
            152.251892,
            145.431564,
            387.215454,
            274.594238,
            5.062420,
            11.040955,
            66.328903,
            269.686218,
        ]).astype(np.float32).reshape(-1, 4))

        results = box_coder.decode(deltas, bbox)

        np.testing.assert_allclose(results.detach().numpy(),
                                   gt_bbox,
                                   atol=1e-4)
Beispiel #15
0
def do_train(
    model,
    model_ema,
    data_loader,
    optimizer,
    scheduler,
    checkpointer,
    device,
    local_rank,
    checkpoint_period,
    cfg_arg,
    arguments,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    meters_ema = MetricLogger(delimiter="  ")

    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    ema_decay = arguments["ema_decay"]
    loss_semi = arguments['loss_semi']
    temporal_save_path = cfg_arg["temporal_save_path"]
    model.train()
    model_ema.train()
    box_coder = BoxCoder(weights=(10., 10., 5., 5.))
    temporal_ens = {}
    start_training_time = time.time()
    end = time.time()
    labeled_database = arguments["HYPER_PARAMETERS"]['LABELED_DATABASE']
    temporal_supervised_losses = []

    for iteration, (images, targets_with_trans_info,
                    idx) in enumerate(data_loader, start_iter):
        targets = [_iter[0] for _iter in targets_with_trans_info]
        trans_info = [_iter[1] for _iter in targets_with_trans_info]

        try:
            db_idx, img_idx, idx_name, bboxes_batch = map_to_img(
                data_loader, idx)
            temporal_ens_bboxes = [
                ensemble_bboxes(_boxes, _im_sz, arguments["ANCHOR_STRIDES"],
                                arguments["HYPER_PARAMETERS"]['ENS_THRE'],
                                device)
                for _boxes, _im_sz in zip(bboxes_batch, images.image_sizes)
            ]

            img_size = [(_sz[1], _sz[0]) for _sz in images.image_sizes]
            pred_trans_info = copy.deepcopy(trans_info)
            temporal_ens_pred = []

            for i, _sz in enumerate(img_size):
                pred_trans_info[i][1] = _sz
                temporal_ens_per = [
                    trans_reverse(_temporal_ens, pred_trans_info[i]).to(device)
                    for _temporal_ens in temporal_ens_bboxes[i]
                ]
                temporal_ens_pred.append(temporal_ens_per)

            db_w = []
            for i, _db in enumerate(db_idx):
                if _db not in labeled_database:
                    _bbox = BoxList(
                        torch.zeros([1, 4]),
                        (images.image_sizes[i][1], images.image_sizes[i][0]),
                        mode="xyxy")
                    _bbox.add_field('labels', torch.ones([1]))
                    targets[i] = _bbox
                    db_w.append(0.)
                else:
                    db_w.append(1.)

            if any(len(target) < 1 for target in targets):
                logger.error(
                    f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}"
                )
                continue
            data_time = time.time() - end
            iteration = iteration + 1
            arguments["iteration"] = iteration

            images = images.to(device)
            targets = [target.to(device) for target in targets]
            update_ema_variables(model, model_ema, ema_decay, iteration)

            _loss_dict, result = model(images, targets)
            #---------------------loss masked by
            with torch.no_grad():
                _loss_dict_ema, result_ema = model_ema(images, targets)
                is_labeled_db_weight = torch.tensor(
                    db_w, dtype=torch.float32).to(device)

            loss_dict = {}
            loss_dict_ema = {}
            for _key in _loss_dict.keys():
                loss_dict[_key] = torch.sum(
                    torch.stack(_loss_dict[_key], dim=0) *
                    is_labeled_db_weight)
                loss_dict_ema[_key] = torch.sum(
                    torch.stack(_loss_dict_ema[_key], dim=0) *
                    is_labeled_db_weight)

            # loss_dict = _loss_dict
            # loss_dict_ema = _loss_dict_ema

            #result_origin = [trans_reverse(_res,_info) for _res,_info in zip(result_ema,trans_info)]
            #result_origin = predict_collect_postprocess(arguments['postprocess'],result_ema,trans_info)
            result_origin = predict_retina_postprocess(
                arguments['postprocess'], box_coder, result_ema, trans_info,
                images.image_sizes)

            # any_zeros = [_iter.bbox.shape[0] == 0 for _iter in temporal_ens_pred]
            # if any(any_zeros):
            #     loss_dict['semi_box_reg'] = torch.tensor(0,dtype=torch.float32,device=device)
            #     loss_dict['semi_cls'] = torch.tensor(0,dtype=torch.float32,device=device)
            # else:
            #     semi_loss = loss_semi(
            #         result, temporal_ens_pred)
            #     for _key in semi_loss.keys():
            #         loss_dict[_key] = torch.sum(torch.stack(semi_loss[_key],dim=0) * (1 - db_weight)) * arguments["semi_weight"]

            #balance losses
            with torch.no_grad():
                supversed_loss = (loss_dict['loss_retina_cls'] +
                                  loss_dict['loss_retina_reg']) / (
                                      np.sum(db_w) + 0.1)
            temporal_supervised_losses.append(supversed_loss)
            temporal_supervised_losses = temporal_supervised_losses[-100:]
            sup_loss = torch.stack(temporal_supervised_losses).mean()
            meters.update(sup_loss=sup_loss)

            if get_world_size() > 1:
                torch.distributed.all_reduce(
                    torch.stack(temporal_supervised_losses).mean(),
                    op=torch.distributed.ReduceOp.SUM)
            balance_weight = min(1. / (sup_loss / 0.28)**12, 1.)

            semi_loss = semi_loss_fn(
                result,
                result_ema,
                temporal_ens_pred,
                images.image_sizes,
                box_coder,
                n_cls=arguments["HYPER_PARAMETERS"]['NCLS'],
                reg_cons_w=arguments["HYPER_PARAMETERS"]['REG_CONSIST_WEIGHT'])
            semi_loss_weight = semi_weight_by_epoch(
                iteration,
                start_iter=arguments["HYPER_PARAMETERS"]['EPOCH_BATCH_NUM'] *
                arguments["HYPER_PARAMETERS"]['START_ITER'],
                rampup_length=arguments["HYPER_PARAMETERS"]['EPOCH_BATCH_NUM']
                * arguments["HYPER_PARAMETERS"]['RAMPUP_LENGTH'],
                consistence_weight=arguments["HYPER_PARAMETERS"]
                ['CONSISTENCE_WEIGHT'],
                consistence_trunc=arguments["HYPER_PARAMETERS"]
                ['MAX_CONSISTENT_LOSS'])  #semi_weight_by_epoch(iteration)
            for _key in semi_loss.keys():
                #loss_dict[_key] = torch.sum(semi_loss[_key] * (1 - is_labeled_db_weight))*semi_loss_weight*balance_weight # not used labeled
                loss_dict[_key] = torch.sum(semi_loss[_key]) * semi_loss_weight

            for i, (_id, _labeled) in enumerate(zip(idx_name, db_w)):
                # if _labeled == 1:
                #     continue
                result_dict = {
                    'iteration': iteration,
                    'result': result_origin[i]
                }
                if _id in temporal_ens.keys():
                    temporal_ens[_id].append(result_dict)
                else:
                    temporal_ens[_id] = [result_dict]

            #print('id={},{},scores={}----------{}'.format(idx_name[0],idx_name[1],result_origin[0].get_field('objectness')[:5],result_origin[1].get_field('objectness')[:5]))
            losses = sum(loss for loss in loss_dict.values())

            # reduce losses over all GPUs for logging purposes
            loss_dict_reduced = reduce_loss_dict(loss_dict)

            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
            meters.update(loss=losses_reduced, **loss_dict_reduced)

            loss_dict_reduced_ema = reduce_loss_dict(loss_dict_ema)
            losses_reduced_ema = sum(
                loss for loss in loss_dict_reduced_ema.values())
            meters_ema.update(loss=losses_reduced_ema, **loss_dict_reduced_ema)

            optimizer.zero_grad()
            # Note: If mixed precision is not used, this ends up doing nothing
            # Otherwise apply loss scaling for mixed-precision recipe
            with amp.scale_loss(losses, optimizer) as scaled_losses:
                scaled_losses.backward()

            if not iteration < arguments["HYPER_PARAMETERS"][
                    'EPOCH_BATCH_NUM'] * arguments["HYPER_PARAMETERS"][
                        'START_ITER']:
                optimizer.step()
            #scheduler.step()

            batch_time = time.time() - end
            end = time.time()
            meters.update(time=batch_time, data=data_time)

            eta_seconds = meters.time.global_avg * (max_iter - iteration)
            eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

            if iteration % 20 == 0 or iteration == max_iter:
                logger.info(
                    meters.delimiter.join([
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "{meters_ema}",
                        "lr: {lr:.6f}",
                        "semi_w:{semi_w:2.3f}",
                        "supervised loss{sup_loss:2.3f},"
                        "balance_weight{balance_weight:2.3f},"
                        "max mem: {memory:.0f}",
                    ]).format(
                        eta=eta_string,
                        iter=iteration,
                        meters=str(meters),
                        meters_ema=str(meters_ema),
                        lr=optimizer.param_groups[0]["lr"],
                        semi_w=semi_loss_weight,
                        sup_loss=sup_loss,
                        balance_weight=balance_weight,
                        memory=torch.cuda.max_memory_allocated() / 1024.0 /
                        1024.0,
                    ))

            if (iteration - 50) % 100 == 0:
                for _key in temporal_ens.keys():
                    for _iter in temporal_ens[_key]:
                        str_folder = os.path.join(
                            temporal_save_path,
                            _key)  #"{}/{}".format(temporal_save_path,_key)
                        str_file = '{}/{}_loc{}_iter_x{:07d}.pt'.format(
                            str_folder, _key, local_rank, _iter['iteration'])
                        if not os.path.exists(str_folder):
                            os.makedirs(str_folder)
                        torch.save(_iter['result'], str_file)
                        del _iter['result']

                del temporal_ens
                temporal_ens = {}

            if iteration % checkpoint_period == 0:
                save_time = time.time()
                checkpointer.save("model_{:07d}".format(iteration),
                                  **arguments)

            if iteration == max_iter:
                checkpointer.save("model_final", **arguments)

        except Exception as e:
            print('error in file ', idx_name, img_idx)
            raise e

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Beispiel #16
0
    def __init__(self, cfg, in_channels):
        super(NewROIBoxHead, self).__init__(cfg, in_channels)
        self.bbox_dict = dict(bbox=None, target=None)

        self.box_coder = BoxCoder(weights=cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS)
Beispiel #17
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--ckpt",
        help=
        "The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    # Initialize mixed-precision if necessary
    use_mixed_precision = cfg.DTYPE == 'float16'
    amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)
    postprocessor = make_retinanet_postprocessor(
        cfg, BoxCoder(weights=(10.0, 10.0, 5.0, 5.0)), False)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    semi_loss = make_semi_box_loss_evaluator(cfg)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            postprocessor,
            semi_loss,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            anchor_strides=cfg.MODEL.RETINANET.ANCHOR_STRIDES,
        )
        synchronize()
Beispiel #18
0
def make_roi_box_loss_evaluator(cfg):
    matcher = Matcher(
        cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD,
        cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD,
        allow_low_quality_matches=False,
    )

    bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS
    box_coder = BoxCoder(weights=bbox_reg_weights)

    fg_bg_sampler = BalancedPositiveNegativeSampler(
        cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE,
        cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION)

    cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG

    if cfg.MODEL.ROI_BOX_HEAD.USE_FOCAL_LOSS:
        focal_loss = SigmoidFocalLoss(cfg.MODEL.ROI_BOX_HEAD.FOCAL_LOSS.GAMMA,
                                      cfg.MODEL.ROI_BOX_HEAD.FOCAL_LOSS.ALPHA)
        # focal_loss = SoftmaxFocalLoss(
        #     class_num = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES-1,
        #     gamma=cfg.MODEL.RPN.FOCAL_LOSS.GAMMA,
        #     alpha=cfg.MODEL.RPN.FOCAL_LOSS.ALPHA,
        # )
    else:
        focal_loss = None

    if cfg.MODEL.ROI_BOX_HEAD.USE_CLASS_BALANCE_LOSS and \
        os.path.isfile(cfg.MODEL.ROI_BOX_HEAD.CLASS_BALANCE_LOSS.WEIGHT_FILE):
        num_class_list = ClassBalanceLoss.load_class_samples(
            filename=cfg.MODEL.ROI_BOX_HEAD.CLASS_BALANCE_LOSS.WEIGHT_FILE,
            category_type='category')
        class_balance_weight = ClassBalanceLoss(
            device=torch.device(cfg.MODEL.DEVICE),
            num_class_list=num_class_list,
            alpha=cfg.MODEL.ROI_BOX_HEAD.CLASS_BALANCE_LOSS.ALPHA,
            beta=cfg.MODEL.ROI_BOX_HEAD.CLASS_BALANCE_LOSS.BETA)
    else:
        class_balance_weight = None

    if cfg.MODEL.ROI_BOX_HEAD.USE_WING_LOSS:
        wing_loss = WingLoss(
            width=cfg.MODEL.ROI_BOX_HEAD.WING_LOSS.WIDTH,
            curvature=cfg.MODEL.ROI_BOX_HEAD.WING_LOSS.SIGMA,
        )
    else:
        wing_loss = None

    if cfg.MODEL.ROI_BOX_HEAD.USE_SELF_ADJUST_SMOOTH_L1_LOSS:
        adjust_smooth_l1_loss = AdjustSmoothL1Loss(
            4,
            beta=cfg.MODEL.ROI_BOX_HEAD.SELF_ADJUST_SMOOTH_L1_LOSS.
            BBOX_REG_BETA)
    else:
        adjust_smooth_l1_loss = None

    if cfg.MODEL.ROI_BOX_HEAD.USE_BALANCE_L1_LOSS:
        balance_l1_loss = BalancedL1Loss(
            alpha=cfg.MODEL.ROI_BOX_HEAD.BALANCE_L1_LOSS.ALPHA,
            beta=cfg.MODEL.ROI_BOX_HEAD.BALANCE_L1_LOSS.BETA,
            gamma=cfg.MODEL.ROI_BOX_HEAD.BALANCE_L1_LOSS.GAMMA)
    else:
        balance_l1_loss = None

    loss_evaluator = FastRCNNLossComputation(
        matcher,
        fg_bg_sampler,
        box_coder,
        cls_agnostic_bbox_reg,
        focal_loss=focal_loss,
        class_balance_weight=class_balance_weight,
        wing_loss=wing_loss,
        adjust_smooth_l1_loss=adjust_smooth_l1_loss,
        balance_l1_loss=balance_l1_loss,
    )

    return loss_evaluator
Beispiel #19
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--ckpt",
        help="The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    str_folder = './tempor_pred_coco_bn8/'
    str_img = '/JPEGImages/'
    str_output = 'output/'

    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()




    # postprocessor = PostProcessor_retina(
    #     score_thresh = 0.05,
    #     nms = 0.5,
    #     detections_per_img = 100,
    #     box_coder = BoxCoder(weights=(10.0, 10.0, 5.0, 5.0)),
    #     cls_agnostic_bbox_reg = False,
    #     bbox_aug_enabled = False
    # )
    postprocessor = make_retinanet_postprocessor(cfg, BoxCoder(weights=(10.0, 10.0, 5.0, 5.0)), False)

    temporal_ens_list = glob.glob(str_folder+'*')
    #-----------------filter train db
    json_file = './MS_COCO/annotations/instances_valminusminival2014.json'
    with open(json_file,'r') as f:
        json_info = json.load(f)
    img_name_list = [_img['file_name'].replace('.jpg','') for _img in json_info['images']]
    temporal_ens = []
    for _path in temporal_ens_list:
        _img_id = os.path.basename(_path)
        if _img_id in img_name_list:
            temporal_ens.append(_path)
    print('initial end ----------------------')
    #----------------------------end
    predcit_dict = {}
    # single 
    for _iter in tqdm(temporal_ens):
        # if _iter.find('/003636')<0:
        #     continue
        pts = glob.glob(os.path.join(_iter,'*.pt'))
        img_id = os.path.basename(_iter)
        pts_iter = [int(_id.split('_x')[-1].replace('.pt','')) for _id in pts]
        idx_sorted = np.argsort(pts_iter)
        pts_sorted = np.array(pts)[idx_sorted]

        #bbox = one_pt_scores(pts_sorted,postprocessor)
        #bbox = multi_pt_scores(pts_sorted,postprocessor)
        #bbox = checks(pts_sorted,postprocessor)
        bbox = multi_align_ens(pts_sorted,postprocessor)

        # try:
        #     bbox = multi_align_ens(pts_sorted,postprocessor)
        # except:
        #     print('except in file',_iter)

        predcit_dict[img_id] = bbox[0].to('cpu')

    #parallel 
    # pool = mp.Pool(mp.cpu_count())
    # predcit_list = [pool.apply(multi_process, args=(_iter, postprocessor)) for _iter in tqdm(temporal_ens)]
    # for _item in predcit_list:
    #     predcit_dict[_item[0]] = _item[1][0]

    torch.save(predcit_dict,'tmp.pt')
    predcit_dict = torch.load('tmp.pt')
    mAp_scores = mAP(cfg,predcit_dict)
    print('mAp is ',mAp_scores)
    print('process_end')
Beispiel #20
0
def train(cfg, local_rank, distributed):
    model = create_model(cfg)
    model_ema = create_model(cfg, ema=True)

    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)
    model_ema.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level=amp_opt_level)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )
        model_ema = DistributedDataParallel(model_ema)

    arguments = {}
    cfg_arg = {}
    arguments["iteration"] = 0
    arguments["semi_weight"] = cfg.SEMI.SEMI_WEIGHT
    cfg_arg["temporal_save_path"] = cfg.SEMI.TEMPORAL_SAVE_PATH
    arguments['loss_semi'] = make_semi_box_loss_evaluator(cfg)

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                         output_dir, save_to_disk)
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader_semi(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD
    update_ema_variables(model, model_ema)

    # arguments["iteration"] = 0
    # optimizer = make_optimizer(cfg, model)
    # scheduler = make_lr_scheduler(cfg, optimizer)
    arguments["ema_decay"] = cfg.SEMI.EMA_DECAY
    arguments["ANCHOR_STRIDES"] = cfg.MODEL.RETINANET.ANCHOR_STRIDES
    arguments["HYPER_PARAMETERS"] = cfg.SEMI.HYPER_PARAMETERS
    arguments['postprocess'] = make_retinanet_semi_postprocessor(
        cfg, BoxCoder(weights=(10., 10., 5., 5.)), True)

    for g in optimizer.param_groups:
        g['lr'] = 0.0005

    do_train(
        model,
        model_ema,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        local_rank,
        checkpoint_period,
        cfg_arg,
        arguments,
    )

    return model
    def forward(self,
                features,
                proposals,
                targets=None,
                proposals_sampled=None):
        """
        Arguments:
            features (list[Tensor]): feature-maps from possibly several levels
            proposals (list[BoxList]): proposal boxes
            targets (list[BoxList], optional): the ground-truth targets.

        Returns:
            x (Tensor): the result of the feature extractor
            proposals (list[BoxList]): during training, the subsampled proposals
                are returned. During testing, the predicted boxlists are returned
            losses (dict[Tensor]): During training, returns the losses for the
                head. During testing, returns an empty dict.
        """

        if self.training:
            # Faster R-CNN subsamples during training the proposals with a fixed
            # positive / negative ratio
            if proposals_sampled is None:
                with torch.no_grad():
                    proposals_sampled = self.loss_evaluator.subsample(
                        proposals, targets)
            proposals = proposals_sampled

        # extract features that will be fed to the final classifier. The
        # feature_extractor generally corresponds to the pooler + heads
        x = self.feature_extractor(features, proposals)
        # final classifier that converts the features into predictions
        class_logits, box_regression = self.predictor(x)

        if not self.training:
            result = self.post_processor((class_logits, box_regression),
                                         proposals)
            return x, result, {}

        # TODO: loss is not needed for mean teacher when MT_ON
        if not self.cfg.MODEL.ROI_BOX_HEAD.FREEZE_WEIGHT:
            loss_classifier, loss_box_reg = self.loss_evaluator(
                [class_logits], [box_regression], proposals)

        if self.cfg.MODEL.ROI_BOX_HEAD.OUTPUT_DECODED_PROPOSAL:
            bbox_reg_weights = self.cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS
            box_coder = BoxCoder(weights=bbox_reg_weights)
            boxes_per_image = [len(box) for box in proposals]
            concat_boxes = torch.cat([a.bbox for a in proposals], dim=0)
            decoded_proposals = box_coder.decode(
                box_regression.view(sum(boxes_per_image), -1), concat_boxes)
            decoded_proposals = decoded_proposals.split(boxes_per_image, dim=0)
            # decoded_proposals = self.post_processor((class_logits, box_regression), proposals)
            # make sure there are valid proposals
            for i, boxes in enumerate(decoded_proposals):
                if len(boxes) > 0:
                    proposals[i].bbox = boxes.reshape(-1, 4)

        loss_dict = dict()

        if self.cfg.MODEL.MT_ON:
            loss_dict.update(class_logits=class_logits,
                             box_logits=box_regression)
            # loss_dict.update(class_logits=x, box_logits=x)
            # proposals_sampled.add_field('class_logits', class_logits)
            # proposals_sampled.add_field('box_logits', box_regression)

        if not self.is_mt and not self.cfg.MODEL.ROI_BOX_HEAD.FREEZE_WEIGHT:
            loss_dict.update(
                dict(loss_classifier=loss_classifier,
                     loss_box_reg=loss_box_reg))

        return x, proposals, loss_dict
def make_roi_box_loss_evaluator(cfg):
    matcher = Matcher(
        cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD,
        cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD,
        allow_low_quality_matches=False,
    )

    bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS
    box_coder = BoxCoder(weights=bbox_reg_weights)

    fg_bg_sampler = BalancedPositiveNegativeSampler(
        cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION
    )

    cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG

    cls_loss_fn_type = cfg.MODEL.ROI_HEADS.CLASSIFICATION_LOSS_FN
    cls_loss = {}
    if cls_loss_fn_type == "CE":
        cls_loss['fn'] = _cross_entropy_with_kwargs
        cls_loss['avg'] = False
    elif cls_loss_fn_type == "Focal":
        cls_loss['fn'] = SigmoidFocalLoss(
            cfg.MODEL.ROI_HEADS.FOCAL_LOSS_GAMMA,
            cfg.MODEL.ROI_HEADS.FOCAL_LOSS_ALPHA,
        )
        cls_loss['avg'] = True
    elif cls_loss_fn_type == "ReducedFocal":
        cls_loss['fn'] = SigmoidReducedFocalLoss(
            cfg.MODEL.ROI_HEADS.FOCAL_LOSS_GAMMA,
            cfg.MODEL.ROI_HEADS.FOCAL_LOSS_ALPHA,
            cfg.MODEL.ROI_HEADS.REDUCED_FOCAL_LOSS_CUTOFF,
            cfg.MODEL.ROI_HEADS.CLASSIFICATION_LOSS_NORM,
        )
        cls_loss['avg'] = True
    elif cls_loss_fn_type == "Class":
        # me being a lazy f**k
        # counts_dict = {6: 895135, 49: 1414550, 10: 54917, 54: 22780, 52: 5242, 11: 26608, 48: 5792, 7: 30454, 14: 17734, 3: 3190, 53: 7473, 51: 5739, 43: 3053, 5: 10108, 12: 16454, 17: 790, 56: 7281, 60: 464, 9: 15035, 13: 3403, 58: 5736, 57: 11459, 36: 836, 39: 1670, 20: 7744, 45: 1411, 19: 5213, 15: 4169, 8: 4671, 44: 4012, 25: 5254, 24: 3585, 29: 3201, 40: 5770, 34: 880, 26: 3107, 47: 2846, 59: 1609, 16: 582, 46: 248, 42: 464, 55: 594, 32: 1740, 27: 1089, 30: 1037, 28: 934, 50: 1285, 1: 322, 2: 1909, 4: 359, 38: 269, 37: 279, 35: 897, 23: 475, 22: 521, 31: 2504, 21: 442, 18: 70, 41: 1230, 33: 439}
        raise ValueError("deprecated class loss")
    elif cls_loss_fn_type == "AreaFocal":
        cls_loss['fn'] = SigmoidAreaReducedFocalLoss(
            cfg.MODEL.ROI_HEADS.FOCAL_LOSS_GAMMA,
            cfg.MODEL.ROI_HEADS.FOCAL_LOSS_ALPHA,
            cfg.MODEL.ROI_HEADS.AREA_LOSS_BETA,
            cfg.MODEL.ROI_HEADS.REDUCED_FOCAL_LOSS_CUTOFF,
            cfg.MODEL.ROI_HEADS.AREA_LOSS_THRESHOLD,
            cfg.MODEL.ROI_HEADS.CLASSIFICATION_LOSS_NORM,
        )
        cls_loss['avg'] = True
    elif cls_loss_fn_type == "Area":
        cls_loss['fn'] = AreaLoss(
            cfg.MODEL.ROI_HEADS.AREA_LOSS_BETA,
            cfg.MODEL.ROI_HEADS.AREA_LOSS_THRESHOLD,
        )
        cls_loss['avg'] = True
    else:
        raise ValueError("invalid classification loss type: {}".format(cls_loss_fn_type))

    loss_evaluator = FastRCNNLossComputation(
        matcher, 
        fg_bg_sampler, 
        box_coder, 
        cls_loss,
        cfg.MODEL.ROI_HEADS.CLS_LOSS_WT,
        cfg.MODEL.ROI_HEADS.BBOX_LOSS_WT,
        cls_agnostic_bbox_reg,
    )

    return loss_evaluator
Beispiel #23
0
    for box in boxes:
        box = box.to(torch.int64)
        top_left, bottom_right = box[:2].tolist(), box[2:].tolist()
        print(top_left, bottom_right)
        image = cv2.rectangle(image, tuple(top_left), tuple(bottom_right),
                              (0, 0, 255), 1)

    return image


temporal_ens = glob.glob(str_folder + '**/*.pt', recursive=True)

postprocessor = PostProcessor(score_thresh=0.05,
                              nms=0.5,
                              detections_per_img=100,
                              box_coder=BoxCoder(weights=(10.0, 10.0, 5.0,
                                                          5.0)),
                              cls_agnostic_bbox_reg=False,
                              bbox_aug_enabled=False)

for _iter in temporal_ens:
    str_file = os.path.basename(_iter).split('_x')[0]
    _id = str_file.replace('_iter', '')
    boxes = torch.load(_iter)

    boxes_nms = postprocessor.filter_results(boxes, 21)

    str_id = _id + '.jpg'
    print(str_img + str_id)
    # if(str_id.find('009726')) < 0:
    #     continue
    image = cv2.imread(str_img + str_id)
Beispiel #24
0
    def __init__(self, cfg, det_roi_head_feature_extractor: torch.nn.Module):
        super(DetProposalVGHead, self).__init__()
        self.cfg = cfg
        self.det_roi_head_feature_extractor = det_roi_head_feature_extractor
        self.obj_embed_dim = self.det_roi_head_feature_extractor.out_channels  # 1024
        self.phrase_embed_dim = 1024

        self.phrase_embed = PhraseEmbeddingSent(
            cfg, phrase_embed_dim=self.phrase_embed_dim, bidirectional=True)
        self.recognition_dim = 1024

        if cfg.MODEL.VG.SPATIAL_FEAT:
            self.obj_embed_dim = self.obj_embed_dim + 256

        self.visual_embedding = nn.Sequential(
            nn.Linear(self.obj_embed_dim, self.recognition_dim),
            nn.LeakyReLU(),
            nn.Linear(self.recognition_dim, self.recognition_dim))

        self.visual_embedding_topN = nn.Sequential(
            nn.Linear(self.obj_embed_dim, self.recognition_dim),
            nn.LeakyReLU(),
            nn.Linear(self.recognition_dim, self.recognition_dim))

        self.similarity_input_dim = self.recognition_dim + self.phrase_embed_dim * 3

        self.similarity = nn.Sequential(
            nn.Linear(self.similarity_input_dim, 256), nn.LeakyReLU(),
            nn.Linear(256, 1))

        self.similarity_topN = nn.Sequential(
            nn.Linear(self.similarity_input_dim, 256), nn.LeakyReLU(),
            nn.Linear(256, 1))

        self.box_reg = nn.Sequential(nn.Linear(self.similarity_input_dim, 256),
                                     nn.LeakyReLU(), nn.Linear(256, 4))

        self.box_reg_topN = nn.Sequential(
            nn.Linear(self.similarity_input_dim, 256), nn.LeakyReLU(),
            nn.Linear(256, 4))

        if cfg.MODEL.RELATION_ON:

            if cfg.MODEL.RELATION.INTRA_LAN:
                # self.phrase_mps = WordPhraseGraph(cfg, hidden_dim=self.phrase_embed_dim)
                self.phrase_mps = WordPhraseGraphV1(
                    cfg, hidden_dim=self.phrase_embed_dim)

            if cfg.MODEL.RELATION.VISUAL_GRAPH:
                self.visual_graph = StructureGraphMessagePassingInNodesV3Update(
                    self.phrase_embed_dim)

            if cfg.MODEL.RELATION.RELATION_FEATURES:

                self.relation_pair_wise_spatial_embedding_linear = nn.Sequential(
                    nn.Linear(64 * 64 * 2, 1024), nn.LeakyReLU(),
                    nn.Linear(1024, 256))

                self.relation_visual_embedding = nn.Sequential(
                    nn.Linear(self.obj_embed_dim + 256, self.recognition_dim),
                    nn.LeakyReLU(),
                    nn.Linear(self.recognition_dim, self.recognition_dim))

                self.relation_union_embedding = nn.Sequential(
                    nn.Linear(self.recognition_dim * 3, self.recognition_dim),
                    nn.LeakyReLU(),
                    nn.Linear(self.recognition_dim, self.recognition_dim))

                self.relation_similarity = nn.Sequential(
                    nn.Linear(self.similarity_input_dim, 256), nn.LeakyReLU(),
                    nn.Linear(256, 1))
        self.box_coder = BoxCoder(weights=cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS)
        self.VGLoss = VGLossComputeTwoStageSep(cfg)