Exemple #1
0
 def from_config(cls, cfg):
     backbone = build_backbone(cfg)
     backbone_shape = backbone.output_shape()
     try:
         feature_shapes = [
             backbone_shape[f] for f in cfg.MODEL.FCOS.IN_FEATURES
         ]
     except KeyError:
         raise KeyError(
             f"Available keys: {backbone_shape.keys()}.  Requested keys: {cfg.MODEL.FCOS.IN_FEATURES}"
         )
     head = FCOSHead(
         input_shape=feature_shapes,
         num_classes=cfg.MODEL.FCOS.NUM_CLASSES,
         conv_dims=[feature_shapes[0].channels] * cfg.MODEL.FCOS.NUM_CONVS,
         norm=cfg.MODEL.FCOS.HEAD_NORM,
     )
     return {
         "backbone": backbone,
         "head": head,
         "pixel_mean": cfg.MODEL.PIXEL_MEAN,
         "pixel_std": cfg.MODEL.PIXEL_STD,
         "num_classes": cfg.MODEL.FCOS.NUM_CLASSES,
         "head_in_features": cfg.MODEL.FCOS.IN_FEATURES,
         # Loss parameters:
         "focal_loss_alpha": cfg.MODEL.FCOS.FOCAL_LOSS_ALPHA,
         "focal_loss_gamma": cfg.MODEL.FCOS.FOCAL_LOSS_GAMMA,
         # Inference parameters:
         "test_score_thresh": cfg.MODEL.FCOS.SCORE_THRESH_TEST,
         "test_topk_candidates": cfg.MODEL.FCOS.TOPK_CANDIDATES_TEST,
         "test_nms_thresh": cfg.MODEL.FCOS.NMS_THRESH_TEST,
         "max_detections_per_image": cfg.TEST.DETECTIONS_PER_IMAGE,
     }
Exemple #2
0
 def from_config(cls, cfg):
     backbone = build_backbone(cfg)
     return {
         "backbone":
         backbone,
         # "proposal_generator": build_proposal_generator(cfg, backbone.output_shape()),
         "proposal_generator":
         None,
         "load_proposals":
         cfg.MODEL.LOAD_PROPOSALS,
         "roi_heads":
         build_roi_heads(cfg, backbone.output_shape()),
         "input_format":
         cfg.INPUT.FORMAT,
         "vis_period":
         cfg.VIS_PERIOD,
         "pixel_mean":
         cfg.MODEL.PIXEL_MEAN,
         "pixel_std":
         cfg.MODEL.PIXEL_STD,
         "cpg":
         True if "CSC" in cfg.MODEL.ROI_HEADS.NAME
         or "WSJDS" in cfg.MODEL.ROI_HEADS.NAME
         or "XROIHeads" in cfg.MODEL.ROI_HEADS.NAME
         # if "CSC" in cfg.MODEL.ROI_HEADS.NAME or "WSJDS" in cfg.MODEL.ROI_HEADS.NAME
         else False,
     }
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.cfg = cfg

        # fmt: off
        self.num_classes = cfg.MODEL.CENTERNET.NUM_CLASSES
        # Loss parameters:
        # Inference parameters:
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # fmt: on
        self.backbone = build_backbone(
            cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))
        self.upsample = CenternetDeconv(cfg)
        self.head = CenternetHead(cfg)

        self.mean, self.std = cfg.MODEL.PIXEL_MEAN, cfg.MODEL.PIXEL_STD
        pixel_mean = (torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1))
        pixel_std = (torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1))
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std

        self.to(self.device)
Exemple #4
0
 def from_config(cls, cfg):
     backbone = build_backbone(cfg)
     return {
         "backbone":
         backbone,
         "proposal_generator":
         build_proposal_generator(cfg, backbone.output_shape()),
         "roi_heads":
         build_roi_heads(cfg, backbone.output_shape()),
         "input_format":
         cfg.INPUT.FORMAT,
         "vis_period":
         cfg.VIS_PERIOD,
         "pixel_mean":
         cfg.MODEL.PIXEL_MEAN,
         "pixel_std":
         cfg.MODEL.PIXEL_STD,
         "kd_args":
         cfg.KD,
         "teacher":
         build_teacher(cfg),
         "teacher_input_format":
         cfg.TEACHER.INPUT.FORMAT,
         "teacher_pixel_mean":
         cfg.TEACHER.MODEL.PIXEL_MEAN,
         "teacher_pixel_std":
         cfg.TEACHER.MODEL.PIXEL_STD,
     }
Exemple #5
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        if cfg.MODEL.CONDINST.MASK_HEAD.USE_MULTI:
            from .dynamic_mask_head_multi import build_dynamic_mask_head
            self.mask_head = build_dynamic_mask_head(cfg)
        else:
            from .dynamic_mask_head_old import build_dynamic_mask_head
            self.mask_head = build_dynamic_mask_head(cfg)
        self.mask_branch = build_mask_branch(cfg, self.backbone.output_shape())
        self.mask_out_stride = cfg.MODEL.CONDINST.MASK_OUT_STRIDE
        self.max_proposals = cfg.MODEL.CONDINST.MAX_PROPOSALS

        # build top module
        in_channels = self.proposal_generator.in_channels_to_top_module

        self.controller = nn.Conv2d(in_channels,
                                    self.mask_head.num_gen_params,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        torch.nn.init.normal_(self.controller.weight, std=0.01)
        torch.nn.init.constant_(self.controller.bias, 0)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Exemple #6
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.mask_branch = build_mask_branch(cfg, self.backbone.output_shape())
        self.mask_pred = build_mask_pred(cfg)

        self.mask_out_stride = cfg.MODEL.EMBEDMASK.MASK_OUT_STRIDE

        self.max_proposals = cfg.MODEL.EMBEDMASK.MAX_PROPOSALS
        self.topk_proposals_per_im = cfg.MODEL.EMBEDMASK.TOPK_PROPOSALS_PER_IM

        self.mask_th = cfg.MODEL.EMBEDMASK.MASK_TH

        # build proposal head
        in_channels = self.proposal_generator.in_channels_to_top_module

        self.proposal_head = ProposalHead(cfg, in_channels)

        # build pixel head
        self.pixel_head = EmbedHead(
            cfg, cfg.MODEL.EMBEDMASK.MASK_BRANCH.OUT_CHANNELS)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Exemple #7
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())

        self.refinement_head = build_edge_det_head(
            cfg, self.backbone.output_shape())

        self.mask_result_src = cfg.MODEL.DANCE.MASK_IN

        self.semantic_filter = cfg.MODEL.DANCE.SEMANTIC_FILTER
        self.semantic_filter_th = cfg.MODEL.DANCE.SEMANTIC_FILTER_TH

        self.need_concave_hull = (True if cfg.MODEL.SNAKE_HEAD.LOSS_TYPE
                                  == "chamfer" else False)

        self.roi_size = cfg.MODEL.DANCE.ROI_SIZE

        self.re_compute_box = cfg.MODEL.DANCE.RE_COMP_BOX

        self.visualize_path = cfg.MODEL.SNAKE_HEAD.VIS_PATH

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            -1, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            -1, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Exemple #8
0
    def __init__(self, cfg):
        super().__init__()
        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())

        self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
Exemple #9
0
def setup(file):
    # get cfg
    cfg = get_cfg()
    cfg.merge_from_file(file)
    cfg.SOLVER.IMS_PER_BATCH = 2

    # get data loader iter
    data_loader = build_detection_train_loader(cfg)
    data_loader_iter = iter(data_loader)
    batched_inputs = next(data_loader_iter)

    # build anchors
    backbone = build_backbone(cfg).to(device)
    images = [x["image"].to(device) for x in batched_inputs]
    images = ImageList.from_tensors(images, backbone.size_divisibility)
    features = backbone(images.tensor.float())

    input_shape = backbone.output_shape()
    in_features = cfg.MODEL.RPN.IN_FEATURES
    anchor_generator = build_anchor_generator(
        cfg, [input_shape[f] for f in in_features])
    anchors = anchor_generator([features[f] for f in in_features])
    anchors = Boxes.cat(anchors).to(device)

    # build matcher
    raw_matcher = Matcher(cfg.MODEL.RPN.IOU_THRESHOLDS,
                          cfg.MODEL.RPN.IOU_LABELS,
                          allow_low_quality_matches=True)
    matcher = TopKMatcher(cfg.MODEL.RPN.IOU_THRESHOLDS,
                          cfg.MODEL.RPN.IOU_LABELS, 9)

    return cfg, data_loader_iter, anchors, matcher, raw_matcher
Exemple #10
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        # loss weight
        self.instance_loss_weight = cfg.MODEL.SOGNET.INSTANCE_LOSS_WEIGHT

        # options when combining instance & semantic outputs
        # TODO: build inference
        self.stuff_area_limit = cfg.MODEL.SOGNET.POSTPROCESS.STUFF_AREA_LIMIT
        self.stuff_num_classes = (cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES -
                                  cfg.MODEL.ROI_HEADS.NUM_CLASSES)

        self.combine_on = cfg.MODEL.SOGNET.COMBINE.ENABLED
        if self.combine_on:
            self.combine_overlap_threshold = cfg.MODEL.SOGNET.COMBINE.OVERLAP_THRESH
            self.combine_stuff_area_limit = cfg.MODEL.SOGNET.COMBINE.STUFF_AREA_LIMIT
            self.combine_instances_confidence_threshold = (
                cfg.MODEL.SOGNET.COMBINE.INSTANCES_CONFIDENCE_THRESH)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.sem_seg_head = build_sem_seg_head(cfg,
                                               self.backbone.output_shape())
        self.panoptic_head = build_panoptic_head(cfg)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Exemple #11
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.cfg = cfg

        # fmt: off
        self.num_classes = cfg.MODEL.CENTERNET.NUM_CLASSES
        # Loss parameters:
        # Inference parameters:
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # fmt: on
        self.backbone = build_backbone(
            cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))
        self.upsample = build_upsample_layers(cfg)
        self.head = build_head(cfg)
        # self.cls_head = cfg.build_cls_head(cfg)
        # self.wh_head = cfg.build_width_height_head(cfg)
        # self.reg_head = cfg.build_center_reg_head(cfg)

        # backbone_shape = self.backbone.output_shape()
        # feature_shapes = [backbone_shape[f] for f in self.in_features]

        self.mean, self.std = cfg.MODEL.PIXEL_MEAN, cfg.MODEL.PIXEL_STD
        # pixel_mean = torch.Tensor(self.mean).to(self.device).view(3, 1, 1)
        # pixel_std = torch.Tensor(self.std).to(self.device).view(3, 1, 1)
        # self.normalizer = lambda x: (x - pixel_mean) / pixel_std

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std

        self.to(self.device)
Exemple #12
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = (torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            num_channels, 1, 1))
        pixel_std = (torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            num_channels, 1, 1))
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)

        if cfg.MODEL.BACKBONE.FREEZE:
            for p in self.backbone.parameters():
                p.requires_grad = False
            print("froze backbone parameters")

        if cfg.MODEL.PROPOSAL_GENERATOR.FREEZE:
            for p in self.proposal_generator.parameters():
                p.requires_grad = False
            print("froze proposal generator parameters")

        if cfg.MODEL.ROI_HEADS.FREEZE_FEAT:
            for p in self.roi_heads.box_head.parameters():
                p.requires_grad = False
            print("froze roi_box_head parameters")
Exemple #13
0
    def __init__(self, cfg):
        super().__init__(cfg)
        self.in_features = cfg.MODEL.FCOS.IN_FEATURES

        # Loss parameters:
        # defined by method<get_ground_truth>
        self.num_points_per_level = None
        self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
        self.center_sampling_radius = cfg.MODEL.FCOS.CENTER_SAMPLING_RADIUS
        self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS

        self.focal_loss_alpha = cfg.MODEL.FCOS.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.FCOS.FOCAL_LOSS_GAMMA
        self.iou_loss_type = cfg.MODEL.FCOS.IOU_LOSS_TYPE

        # Inference parameters:
        self.score_thresh = 0.3
        self.pre_nms_thresh = cfg.MODEL.FCOS.INFERENCE_TH
        self.pre_nms_top_n = cfg.MODEL.FCOS.PRE_NMS_TOP_N
        self.nms_thresh = cfg.MODEL.FCOS.NMS_TH
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        self.min_size = 0
        self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES

        self.backbone = build_backbone(cfg)

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        self.head = FCOSRetinaNetHead(cfg, feature_shapes)

        self.register_buffer("pixel_mean",
                             torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std",
                             torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
Exemple #14
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
        self.mask_head = build_dynamic_mask_head(cfg)
        self.mask_branch = build_mask_branch(cfg, self.backbone.output_shape())
        self.iuv_head = build_iuv_head(cfg)
        self.iuv_fea_dim = cfg.MODEL.CONDINST.IUVHead.CHANNELS
        self.s_ins_fea_dim = cfg.MODEL.CONDINST.MASK_HEAD.CHANNELS
        assert self.iuv_fea_dim+self.s_ins_fea_dim == cfg.MODEL.CONDINST.MASK_BRANCH.OUT_CHANNELS
        self.mask_out_stride = cfg.MODEL.CONDINST.MASK_OUT_STRIDE
        self.max_proposals = cfg.MODEL.CONDINST.MAX_PROPOSALS

        # build top module
        in_channels = self.proposal_generator.in_channels_to_top_module

        self.controller = nn.Conv2d(
            in_channels, self.mask_head.num_gen_params,
            kernel_size=3, stride=1, padding=1
        )
        torch.nn.init.normal_(self.controller.weight, std=0.01)
        torch.nn.init.constant_(self.controller.bias, 0)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std

        self._init_densepose_head(cfg)

        self.to(self.device)
Exemple #15
0
    def __init__(self, cfg):
        super().__init__()

        self.image_size = cfg.MODEL.SSD.IMAGE_SIZE
        self.num_classes = cfg.MODEL.SSD.NUM_CLASSES
        self.in_features = cfg.MODEL.SSD.IN_FEATURES
        self.extra_layer_arch = cfg.MODEL.SSD.EXTRA_LAYER_ARCH["SIZE{}".format(
            self.image_size)]
        self.l2norm_scale = cfg.MODEL.SSD.L2NORM_SCALE
        # Loss parameters:
        self.loss_alpha = cfg.MODEL.SSD.LOSS_ALPHA
        self.smooth_l1_loss_beta = cfg.MODEL.SSD.SMOOTH_L1_LOSS_BETA
        self.negative_positive_ratio = cfg.MODEL.SSD.NEGATIVE_POSITIVE_RATIO
        # Inference parameters:
        self.score_threshold = cfg.MODEL.SSD.SCORE_THRESH_TEST
        self.nms_threshold = cfg.MODEL.SSD.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # Vis parameters
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT

        self.backbone = build_backbone(cfg)

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]

        # Build extra layers
        self.extra_layers = self._make_extra_layers(
            feature_shapes[-1].channels, self.extra_layer_arch)
        extra_layer_channels = [
            c for c in self.extra_layer_arch if isinstance(c, int)
        ]
        feature_shapes += [
            ShapeSpec(channels=c) for c in extra_layer_channels[1::2]
        ]

        # Head
        self.head = SSDHead(cfg, feature_shapes)
        self.l2norm = L2Norm(backbone_shape[self.in_features[0]].channels,
                             self.l2norm_scale)
        self.default_box_generator = DefaultBox(cfg)
        self.default_boxes = self.default_box_generator()

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.SSD.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.SSD.IOU_THRESHOLDS,
            cfg.MODEL.SSD.IOU_LABELS,
            allow_low_quality_matches=False,
        )

        self.register_buffer("pixel_mean",
                             torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std",
                             torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))

        # Initialization
        self._init_weights()
Exemple #16
0
 def from_config(cls, cfg):
     backbone = build_backbone(cfg)
     backbone_shape = backbone.output_shape()
     backbone_level = cfg.MODEL.YOLOF.ENCODER.BACKBONE_LEVEL
     feature_shapes = [backbone_shape[backbone_level]]
     encoder = DilatedEncoder(cfg, backbone_shape)
     decoder = Decoder(cfg)
     anchor_generator = build_anchor_generator(cfg, feature_shapes)
     return {
         "backbone":
         backbone,
         "encoder":
         encoder,
         "decoder":
         decoder,
         "anchor_generator":
         anchor_generator,
         "box2box_transform":
         YOLOFBox2BoxTransform(
             weights=cfg.MODEL.YOLOF.BOX_TRANSFORM.BBOX_REG_WEIGHTS,
             add_ctr_clamp=cfg.MODEL.YOLOF.BOX_TRANSFORM.ADD_CTR_CLAMP,
             ctr_clamp=cfg.MODEL.YOLOF.BOX_TRANSFORM.CTR_CLAMP),
         "anchor_matcher":
         UniformMatcher(cfg.MODEL.YOLOF.MATCHER.TOPK),
         "pixel_mean":
         cfg.MODEL.PIXEL_MEAN,
         "pixel_std":
         cfg.MODEL.PIXEL_STD,
         "num_classes":
         cfg.MODEL.YOLOF.DECODER.NUM_CLASSES,
         "backbone_level":
         backbone_level,
         # Ignore thresholds:
         "pos_ignore_thresh":
         cfg.MODEL.YOLOF.POS_IGNORE_THRESHOLD,
         "neg_ignore_thresh":
         cfg.MODEL.YOLOF.NEG_IGNORE_THRESHOLD,
         # Loss parameters:
         "focal_loss_alpha":
         cfg.MODEL.YOLOF.LOSSES.FOCAL_LOSS_ALPHA,
         "focal_loss_gamma":
         cfg.MODEL.YOLOF.LOSSES.FOCAL_LOSS_GAMMA,
         "box_reg_loss_type":
         cfg.MODEL.YOLOF.LOSSES.BBOX_REG_LOSS_TYPE,
         # Inference parameters:
         "test_score_thresh":
         cfg.MODEL.YOLOF.SCORE_THRESH_TEST,
         "test_topk_candidates":
         cfg.MODEL.YOLOF.TOPK_CANDIDATES_TEST,
         "test_nms_thresh":
         cfg.MODEL.YOLOF.NMS_THRESH_TEST,
         "max_detections_per_image":
         cfg.MODEL.YOLOF.DETECTIONS_PER_IMAGE,
         # Vis parameters
         "vis_period":
         cfg.VIS_PERIOD,
         "input_format":
         cfg.INPUT.FORMAT,
     }
Exemple #17
0
    def test_rpn(self):
        torch.manual_seed(121)
        cfg = get_cfg()
        backbone = build_backbone(cfg)
        proposal_generator = RPN(cfg, backbone.output_shape())
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        image_shape = (15, 15)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
        gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
        gt_instances = Instances(image_shape)
        gt_instances.gt_boxes = Boxes(gt_boxes)
        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, [gt_instances[0], gt_instances[1]]
            )

        expected_losses = {
            "loss_rpn_cls": torch.tensor(0.0804563984),
            "loss_rpn_loc": torch.tensor(0.0990132466),
        }
        for name in expected_losses.keys():
            err_msg = "proposal_losses[{}] = {}, expected losses = {}".format(
                name, proposal_losses[name], expected_losses[name]
            )
            self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg)

        expected_proposal_boxes = [
            Boxes(torch.tensor([[0, 0, 10, 10], [7.3365392685, 0, 10, 10]])),
            Boxes(
                torch.tensor(
                    [
                        [0, 0, 30, 20],
                        [0, 0, 16.7862777710, 13.1362524033],
                        [0, 0, 30, 13.3173446655],
                        [0, 0, 10.8602609634, 20],
                        [7.7165775299, 0, 27.3875980377, 20],
                    ]
                )
            ),
        ]

        expected_objectness_logits = [
            torch.tensor([0.1225359365, -0.0133192837]),
            torch.tensor([0.1415634006, 0.0989848152, 0.0565387346, -0.0072308783, -0.0428492837]),
        ]

        for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip(
            proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits
        ):
            self.assertEqual(len(proposal), len(expected_proposal_box))
            self.assertEqual(proposal.image_size, im_size)
            self.assertTrue(
                torch.allclose(proposal.proposal_boxes.tensor, expected_proposal_box.tensor)
            )
            self.assertTrue(torch.allclose(proposal.objectness_logits, expected_objectness_logit))
Exemple #18
0
    def test_rpn(self):
        torch.manual_seed(121)
        cfg = get_cfg()
        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RPN"
        cfg.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator"
        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1)
        backbone = build_backbone(cfg)
        proposal_generator = build_proposal_generator(cfg,
                                                      backbone.output_shape())
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        image_shape = (15, 15)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
        gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]],
                                dtype=torch.float32)
        gt_instances = Instances(image_shape)
        gt_instances.gt_boxes = Boxes(gt_boxes)
        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, gt_instances)

        expected_losses = {
            "loss_rpn_cls": torch.tensor(0.0804563984),
            "loss_rpn_loc": torch.tensor(0.0990132466),
        }
        for name in expected_losses.keys():
            assert torch.allclose(proposal_losses[name], expected_losses[name])

        expected_proposal_boxes = [
            Boxes(torch.tensor([[0, 0, 10, 10], [7.3365392685, 0, 10, 10]])),
            Boxes(
                torch.tensor([
                    [0, 0, 30, 20],
                    [0, 0, 16.7862777710, 13.1362524033],
                    [0, 0, 30, 13.3173446655],
                    [0, 0, 10.8602609634, 20],
                    [7.7165775299, 0, 27.3875980377, 20],
                ])),
        ]

        expected_objectness_logits = [
            torch.tensor([0.1225359365, -0.0133192837]),
            torch.tensor([
                0.1415634006, 0.0989848152, 0.0565387346, -0.0072308783,
                -0.0428492837
            ]),
        ]

        for i in range(len(image_sizes)):
            assert len(proposals[i]) == len(expected_proposal_boxes[i])
            assert proposals[i].image_size == (image_sizes[i][0],
                                               image_sizes[i][1])
            assert torch.allclose(proposals[i].proposal_boxes.tensor,
                                  expected_proposal_boxes[i].tensor)
            assert torch.allclose(proposals[i].objectness_logits,
                                  expected_objectness_logits[i])
Exemple #19
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        self.scale_ranges = cfg.MODEL.SOLOV2.FPN_SCALE_RANGES
        self.strides = cfg.MODEL.SOLOV2.FPN_INSTANCE_STRIDES
        self.sigma = cfg.MODEL.SOLOV2.SIGMA

        # Instance parameters.
        self.num_classes = cfg.MODEL.SOLOV2.NUM_CLASSES
        self.num_kernels = cfg.MODEL.SOLOV2.NUM_KERNELS
        self.num_grids = cfg.MODEL.SOLOV2.NUM_GRIDS

        self.instance_in_features = cfg.MODEL.SOLOV2.INSTANCE_IN_FEATURES
        self.instance_strides = cfg.MODEL.SOLOV2.FPN_INSTANCE_STRIDES
        self.instance_in_channels = cfg.MODEL.SOLOV2.INSTANCE_IN_CHANNELS  # = fpn.
        self.instance_channels = cfg.MODEL.SOLOV2.INSTANCE_CHANNELS

        # Mask parameters.
        self.mask_on = cfg.MODEL.MASK_ON
        self.mask_in_features = cfg.MODEL.SOLOV2.MASK_IN_FEATURES
        self.mask_in_channels = cfg.MODEL.SOLOV2.MASK_IN_CHANNELS
        self.mask_channels = cfg.MODEL.SOLOV2.MASK_CHANNELS
        self.num_masks = cfg.MODEL.SOLOV2.NUM_MASKS

        # Inference parameters.
        self.max_before_nms = cfg.MODEL.SOLOV2.NMS_PRE
        self.score_threshold = cfg.MODEL.SOLOV2.SCORE_THR
        self.update_threshold = cfg.MODEL.SOLOV2.UPDATE_THR
        self.mask_threshold = cfg.MODEL.SOLOV2.MASK_THR
        self.max_per_img = cfg.MODEL.SOLOV2.MAX_PER_IMG
        self.nms_kernel = cfg.MODEL.SOLOV2.NMS_KERNEL
        self.nms_sigma = cfg.MODEL.SOLOV2.NMS_SIGMA
        self.nms_type = cfg.MODEL.SOLOV2.NMS_TYPE

        # build the backbone.
        self.backbone = build_backbone(cfg)
        backbone_shape = self.backbone.output_shape()

        # build the ins head.
        instance_shapes = [
            backbone_shape[f] for f in self.instance_in_features
        ]
        self.ins_head = SOLOv2InsHead(cfg, instance_shapes)

        # build the mask head.
        mask_shapes = [backbone_shape[f] for f in self.mask_in_features]
        self.mask_head = SOLOv2MaskHead(cfg, mask_shapes)

        # image transform
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
 def verify_rpn(self, conv_dims, expected_conv_dims):
     torch.manual_seed(121)
     cfg = get_cfg()
     cfg.MODEL.RPN.CONV_DIMS = conv_dims
     backbone = build_backbone(cfg)
     proposal_generator = RPN(cfg, backbone.output_shape())
     for k, conv in enumerate(proposal_generator.rpn_head.conv):
         self.assertEqual(expected_conv_dims[k], conv.out_channels)
     return proposal_generator
Exemple #21
0
    def __init__(self, cfg):
        super().__init__()

        # get the deice of the model
        self.device = torch.device(cfg.MODEL.DEVICE)

        # fmt: off
        self.num_classes = cfg.MODEL.TENSOR_MASK.NUM_CLASSES
        self.in_features = cfg.MODEL.TENSOR_MASK.IN_FEATURES
        self.anchor_sizes = cfg.MODEL.ANCHOR_GENERATOR.SIZES
        self.num_levels = len(cfg.MODEL.ANCHOR_GENERATOR.SIZES)
        # Loss parameters:
        self.focal_loss_alpha = cfg.MODEL.TENSOR_MASK.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.TENSOR_MASK.FOCAL_LOSS_GAMMA
        # Inference parameters:
        self.score_threshold = cfg.MODEL.TENSOR_MASK.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.TENSOR_MASK.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.TENSOR_MASK.NMS_THRESH_TEST
        self.detections_im = cfg.TEST.DETECTIONS_PER_IMAGE
        # Mask parameters:
        self.mask_on = cfg.MODEL.MASK_ON
        self.mask_loss_weight = cfg.MODEL.TENSOR_MASK.MASK_LOSS_WEIGHT
        self.mask_pos_weight = torch.tensor(
            cfg.MODEL.TENSOR_MASK.POSITIVE_WEIGHT,
            dtype=torch.float32,
            device=self.device)
        self.bipyramid_on = cfg.MODEL.TENSOR_MASK.BIPYRAMID_ON
        # fmt: on

        # build the backbone
        self.backbone = build_backbone(cfg)

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        feature_strides = [x.stride for x in feature_shapes]
        # build anchors
        self.anchor_generator = TensorMaskAnchorGenerator(cfg, feature_shapes)
        self.num_anchors = self.anchor_generator.num_cell_anchors[0]
        anchors_min_level = cfg.MODEL.ANCHOR_GENERATOR.SIZES[0]
        self.mask_sizes = [
            size // feature_strides[0] for size in anchors_min_level
        ]
        self.min_anchor_size = min(anchors_min_level) - feature_strides[0]

        # head of the TensorMask
        self.head = TensorMaskHead(cfg, self.num_levels, self.num_anchors,
                                   self.mask_sizes, feature_shapes)
        # box transform
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.TENSOR_MASK.BBOX_REG_WEIGHTS)
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
    def __init__(self, cfg):
        super(RetinaNet, self).__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        # fmt: off
        self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features = cfg.MODEL.RETINANET.IN_FEATURES
        # Loss parameters:
        self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA
        # Inference parameters:
        self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # Vis parameters
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT
        # fmt: on

        self.backbone = build_backbone(cfg)

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        self.head = RetinaNetHead(cfg, feature_shapes)
        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            num_channels, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            num_channels, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
        """
    In Detectron1, loss is normalized by number of foreground samples in the batch.
    When batch size is 1 per GPU, #foreground has a large variance and
    using it lead to lower performance. Here we maintain an EMA of #foreground to
    stabilize the normalizer.
    """
        self.loss_normalizer = 100  # initialize with any reasonable #fg that's not too small
        self.loss_normalizer_momentum = 0.9
Exemple #23
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(-1, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(-1, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
    def __init__(self, cfg):
        super().__init__()

        self.num_classes = cfg.MODEL.RETINAFACE.NUM_CLASSES
        self.in_features = cfg.MODEL.RETINAFACE.IN_FEATURES
        # loss parameters
        self.focal_loss_alpha = cfg.MODEL.RETINAFACE.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.RETINAFACE.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta = cfg.MODEL.RETINAFACE.SMOOTH_L1_LOSS_BETA
        self.loc_weight = cfg.MODEL.RETINAFACE.LOC_WEIGHT
        # inference parameters
        self.score_threshold = cfg.MODEL.RETINAFACE.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.RETINAFACE.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.RETINAFACE.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # visualize parameters
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT

        self.backbone = build_backbone(cfg)
        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        self.head = RetinaFaceHead(cfg, feature_shapes)
        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.RETINAFACE.BBOX_REG_WEIGHTS
        )
        self.landmark2landmark_transform = Landmark2LandmarkTransform(
            weights=cfg.MODEL.RETINAFACE.LANDMARK_REG_WEIGHTS
        )
        self.matcher = Matcher(
            cfg.MODEL.RETINAFACE.IOU_THRESHOLDS,
            cfg.MODEL.RETINAFACE.IOU_LABELS,
            allow_low_quality_matches=True
        )
        self.register_buffer(
            "pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)
        )
        self.register_buffer(
            "pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)
        )

        """
        In Detectron1, loss is normalized by number of foreground samples in the 
        batch. When batch size is 1 per GPU, #foreground has a large variance and
        using it lead to lower performance. Here we maintain an EMA of #foreground
        to stabilize the normalizer.
        """
        # initialize with any reasonable #fg that's not too small
        self.loss_normalizer = 100
        self.loss_normalizer_momentum = 0.9
    def __init__(self, cfg):
        super().__init__()

        self.num_classes = cfg.MODEL.CLSNET.NUM_CLASSES
        self.in_features = cfg.MODEL.CLSNET.IN_FEATURES
        self.bottom_up = build_backbone(cfg)
        self.criterion = nn.CrossEntropyLoss()

        self.register_buffer("pixel_mean",
                             torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std",
                             torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
Exemple #26
0
    def __init__(self, cfg):
        super().__init__()

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
    def test_rroi_heads(self):
        torch.manual_seed(121)
        cfg = get_cfg()
        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
        cfg.MODEL.ROI_HEADS.NAME = "RROIHeads"
        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated"
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1)
        backbone = build_backbone(cfg)
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}

        image_shape = (15, 15)
        gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]],
                                 dtype=torch.float32)
        gt_instance0 = Instances(image_shape)
        gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0)
        gt_instance0.gt_classes = torch.tensor([2, 1])
        gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]],
                                 dtype=torch.float32)
        gt_instance1 = Instances(image_shape)
        gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1)
        gt_instance1.gt_classes = torch.tensor([1, 2])
        gt_instances = [gt_instance0, gt_instance1]

        proposal_generator = build_proposal_generator(cfg,
                                                      backbone.output_shape())
        roi_heads = build_roi_heads(cfg, backbone.output_shape())

        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, gt_instances)
            _, detector_losses = roi_heads(images, features, proposals,
                                           gt_instances)

        expected_losses = {
            "loss_cls": torch.tensor(4.381618499755859),
            "loss_box_reg": torch.tensor(0.0011829272843897343),
        }
        for name in expected_losses.keys():
            err_msg = "detector_losses[{}] = {}, expected losses = {}".format(
                name, detector_losses[name], expected_losses[name])
            self.assertTrue(
                torch.allclose(detector_losses[name], expected_losses[name]),
                err_msg)
Exemple #28
0
    def __init__(self, cfg):
        super().__init__()

        self.mask_on = cfg.MODEL.MASK_ON

        self.backbone = build_backbone(cfg)
        self.position_embedding = build_position_encoding(cfg)

        self.register_buffer("pixel_mean",
                             torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std",
                             torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
Exemple #29
0
    def __init__(self, cfg):
        super().__init__()
        # fmt: off
        self.device                   = torch.device(cfg.MODEL.DEVICE)
        self.num_classes              = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features              = cfg.MODEL.RETINANET.IN_FEATURES
        # Mask parameters:
        self.discard_mask_area        = cfg.MODEL.YOLACT.DISCARD_MASK_AREA
        self.num_masks                = cfg.MODEL.YOLACT.NUM_MASKS
        # Loss parameters:
        self.sem_seg_alpha            = cfg.MODEL.YOLACT.SEM_SEG_ALPHA
        self.mask_alpha               = cfg.MODEL.YOLACT.MASK_ALPHA
        self.mask_reweight            = cfg.MODEL.YOLACT.MASK_REWEIGHT
        self.maskiou_alpha            = cfg.MODEL.YOLACT.MASKIOU_ALPHA
        self.focal_loss_alpha         = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma         = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta      = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA
        # Inference parameters:
        self.score_threshold          = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates          = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold            = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # fmt: on

        # retinanet_resnet_fpn_backbone
        self.backbone = build_backbone(cfg)
        # dict[str->ShapeSpec]
        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        # base retinanet add mask coefficient branch 
        self.head = YolactHead(cfg, feature_shapes)
        # which layer output of backbone to protonet. see offical yolact's cfg.proto_src.
        # default is `res2`, but this is `res3`
        self.protonet = ProtoNet(cfg, feature_shapes[0])
        # to mask scoring
        self.maskiou_net = MaskIouNet(cfg)
        # semantic segmentation to help training
        self.semantic_seg_conv = nn.Conv2d(feature_shapes[0].channels, self.num_classes, 1)
        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Exemple #30
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT
        self.current_video = None
        self.frame_idx = 0

        if cfg.MODEL.SPATIOTEMPORAL.FREEZE_BACKBONE:
            self.freeze_component(self.backbone)

        if cfg.MODEL.SPATIOTEMPORAL.FREEZE_PROPOSAL_GENERATOR:
            self.freeze_component(self.proposal_generator)

        self.long_term = cfg.MODEL.SPATIOTEMPORAL.LONG_TERM
        self.temporal_dropout = cfg.MODEL.SPATIOTEMPORAL.TEMPORAL_DROPOUT
        self.num_frames = cfg.MODEL.SPATIOTEMPORAL.NUM_FRAMES
        self.num_keyframes = cfg.MODEL.SPATIOTEMPORAL.NUM_KEYFRAMES
        self.keyframe_interval = cfg.MODEL.SPATIOTEMPORAL.KEYFRAME_INTERVAL
        self.reference_frame_idx = -1

        if cfg.MODEL.SPATIOTEMPORAL.FORWARD_AGGREGATION:
            # (f_{t-NUM_FRAMES}, ..., f_{t-1}, f_t, f_{t+1}, ..., f_{t+NUM_FRAMES})
            self.num_frames = (2 * self.num_frames) + 1
            self.reference_frame_idx = cfg.MODEL.SPATIOTEMPORAL.NUM_FRAMES

        if self.temporal_dropout:
            assert cfg.MODEL.SPATIOTEMPORAL.FORWARD_AGGREGATION, "Temporal dropout without forward aggregation."
        
        if self.temporal_dropout:
            self.reference_frame_idx = cfg.MODEL.SPATIOTEMPORAL.NUM_FRAMES
            self.train_reference_frame_idx = 1
        else:
            self.train_reference_frame_idx = self.reference_frame_idx

        self.short_term_feature_buffer = deque(maxlen=self.num_frames)
        self.long_term_feature_buffer = deque(maxlen=self.num_keyframes)
        self.long_term_roi_buffer = deque(maxlen=self.num_keyframes)
        # RPN buffers
        self.predict_proposals = None
        self.predict_objectness_logits = None

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(num_channels, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(num_channels, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)