def test_dump_IR_tracing(self):
        cfg = get_cfg()
        cfg.MODEL.RESNETS.DEPTH = 18
        cfg.MODEL.RESNETS.RES2_OUT_CHANNELS = 64

        class Mod(nn.Module):
            def forward(self, x):
                return tuple(self.m(x).values())

        model = Mod()
        model.m = build_backbone(cfg)
        model.eval()

        with torch.no_grad():
            ts_model = torch.jit.trace(model, (torch.rand(2, 3, 224, 224), ))

        with tempfile.TemporaryDirectory(prefix="detectron2_test") as d:
            dump_torchscript_IR(ts_model, d)
            # check that the files are created
            for name in [
                    "model_ts_code", "model_ts_IR", "model_ts_IR_inlined",
                    "model"
            ]:
                fname = os.path.join(d, name + ".txt")
                self.assertTrue(os.stat(fname).st_size > 0, fname)
Beispiel #2
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.mask_on = cfg.MODEL.MASK_ON

        self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
        self.num_classes = cfg.MODEL.MSBCNet.NUM_CLASSES
        self.num_proposals = cfg.MODEL.MSBCNet.NUM_PROPOSALS
        self.hidden_dim = cfg.MODEL.MSBCNet.HIDDEN_DIM
        self.num_heads = cfg.MODEL.MSBCNet.NUM_HEADS

        # Build Backbone.
        self.backbone = build_backbone(cfg)
        self.size_divisibility = self.backbone.size_divisibility

        # Build Proposals.
        self.init_proposal_features = nn.Embedding(self.num_proposals, self.hidden_dim)
        self.init_proposal_boxes = nn.Embedding(self.num_proposals, 4)
        nn.init.constant_(self.init_proposal_boxes.weight[:, :2], 0.5)
        nn.init.constant_(self.init_proposal_boxes.weight[:, 2:], 1.0)

        # Build Dynamic Head.
        self.head = DynamicHead(cfg=cfg, roi_input_shape=self.backbone.output_shape())

        # Loss parameters:
        class_weight = cfg.MODEL.MSBCNet.CLASS_WEIGHT
        giou_weight = cfg.MODEL.MSBCNet.GIOU_WEIGHT
        l1_weight = cfg.MODEL.MSBCNet.L1_WEIGHT
        no_object_weight = cfg.MODEL.MSBCNet.NO_OBJECT_WEIGHT
        self.deep_supervision = cfg.MODEL.MSBCNet.DEEP_SUPERVISION
        self.use_focal = cfg.MODEL.MSBCNet.USE_FOCAL

        # Build Criterion.
        matcher = HungarianMatcher(cfg=cfg,
                                   cost_class=class_weight,
                                   cost_bbox=l1_weight,
                                   cost_giou=giou_weight,
                                   use_focal=self.use_focal)
        weight_dict = {"loss_ce": class_weight, "loss_bbox": l1_weight, "loss_giou": giou_weight}
        if self.deep_supervision:
            aux_weight_dict = {}
            for i in range(self.num_heads - 1):
                aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()})
            weight_dict.update(aux_weight_dict)

        losses = ["labels", "boxes"]

        self.criterion = SetCriterion(cfg=cfg,
                                      num_classes=self.num_classes,
                                      matcher=matcher,
                                      weight_dict=weight_dict,
                                      eos_coef=no_object_weight,
                                      losses=losses,
                                      use_focal=self.use_focal)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Beispiel #3
0
 def __init__(self, cfg):
     nn.Module.__init__(self)
     self.backbone = build_backbone(cfg)
     self.out_features = ["out"]
     assert cfg.MODEL.BACKBONE.SIMPLE is True
     self.feature_strides = [cfg.MODEL.BACKBONE.STRIDE]
     self.num_channels = [cfg.MODEL.BACKBONE.CHANNEL]
     self.strides = [cfg.MODEL.BACKBONE.STRIDE]
Beispiel #4
0
 def __init__(self, cfg):
     super().__init__()
     self.backbone = build_backbone(cfg)
     backbone_shape = self.backbone.output_shape()
     self.feature_strides = [
         backbone_shape[f].stride for f in backbone_shape.keys()
     ]
     self.num_channels = backbone_shape[list(
         backbone_shape.keys())[-1]].channels
Beispiel #5
0
 def __init__(self, cfg):
     super(MaskedBackbone, self).__init__()
     self.backbone = build_backbone(cfg)
     self.strides = [
         self.backbone.output_shape()[key].stride
         for key in self.backbone.output_shape().keys()
     ]
     self.num_channels = self.backbone.output_shape()[
         cfg.MODEL.RESNETS.OUT_FEATURES[-1]].channels
Beispiel #6
0
 def __init__(self, cfg):
     super().__init__()
     self.mean, self.std = cfg.MODEL.PIXEL_MEAN, cfg.MODEL.PIXEL_STD
     self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
     self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
     
     self.backbone = build_backbone(cfg)
     self.proposal_generator = build_proposal_generator(
         cfg, self.backbone.output_shape()) # TODO: change to a more precise name
Beispiel #7
0
 def __init__(self, cfg):
     super().__init__()
     self.backbone = build_backbone(cfg)
     backbone_shape = self.backbone.output_shape()
     self.feature_strides = [
         backbone_shape[f].stride for f in backbone_shape.keys()
     ]
     self.num_channels = backbone_shape[list(
         backbone_shape.keys())[cfg.MODEL.DETR.INDEX_FEEDFORWARD]].channels
Beispiel #8
0
    def __init__(self, cfg=None):
        super(ClassificationBackbone, self).__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)
        self.num_classes = 1000

        self.backbone = build_backbone(cfg)


        self.to(self.device)
Beispiel #9
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)
        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.mask_threshold = cfg.MODEL.ROI_MASK_HEAD.MASK_THRESHOLD
        self.nms = cfg.MODEL.ROI_MASK_HEAD.NMS
        self.depth_head_on = cfg.MODEL.DEPTH_ON
        if self.depth_head_on:
            self.depth_head = build_depth_head(cfg)
        self.camera_on = cfg.MODEL.CAMERA_ON
        if self.camera_on:
            self.camera_head = build_camera_head(cfg)
        self.input_format = cfg.INPUT.FORMAT

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = (torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            num_channels, 1, 1))
        pixel_std = (torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            num_channels, 1, 1))
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.embedding_on = cfg.MODEL.EMBEDDING_ON
        if self.embedding_on:
            self._asnet_on = (
                cfg.MODEL.ROI_EMBEDDING_HEAD.NAME == "EmbeddingRCNNASNetHead")
            self.embedding_loss_weight = cfg.MODEL.ROI_EMBEDDING_HEAD.LOSS_WEIGHT
            if cfg.MODEL.ROI_EMBEDDING_HEAD.LOSS_TYPE == "TripletLoss":
                if not self._asnet_on:
                    self.embedding_loss = OnlineTripletLoss(
                        cfg.MODEL.ROI_EMBEDDING_HEAD.MARGIN,
                        cfg.MODEL.DEVICE,
                        selector_type=cfg.MODEL.ROI_EMBEDDING_HEAD.
                        TRIPLET_SELECTOR_TYPE,
                    )
                else:
                    self.embedding_loss = CooperativeTripletLoss(
                        cfg.MODEL.ROI_EMBEDDING_HEAD.MARGIN,
                        cfg.MODEL.DEVICE,
                        selector_type=cfg.MODEL.ROI_EMBEDDING_HEAD.
                        TRIPLET_SELECTOR_TYPE,
                    )
            else:
                raise NotImplementedError
        self._eval_gt_box = cfg.TEST.EVAL_GT_BOX
        self.to(self.device)
        self._freeze = cfg.MODEL.FREEZE
        for layers in self._freeze:
            layer = layers.split(".")
            final = self
            for l in layer:
                final = getattr(final, l)
            for params in final.parameters():
                params.requires_grad = False
Beispiel #10
0
 def __init__(self, cfg):
     nn.Module.__init__(self)
     self.backbone = build_backbone(cfg)
     self.out_features = cfg.MODEL.FBNET_V2.OUT_FEATURES
     self.feature_strides = list(self.backbone._out_feature_strides.values())
     self.num_channels = [
         self.backbone._out_feature_channels[k] for k in self.out_features
     ]
     self.strides = [
         self.backbone._out_feature_strides[k] for k in self.out_features
     ]
Beispiel #11
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        
        self.nms = cfg.MODEL.OneNet.NMS # 是否使用非极大值抑制,默认是false
        # [res2, res3, res4, res5]
        self.in_features = cfg.MODEL.OneNet.IN_FEATURES
        # num_classes:80
        self.num_classes = cfg.MODEL.OneNet.NUM_CLASSES
        # 100 is the limit for coco
        self.num_boxes = cfg.TEST.DETECTIONS_PER_IMAGE

        # Build Backbone.
        # use resnet50 as backbone
        self.backbone = build_backbone(cfg)
        # default is 0
        self.size_divisibility = self.backbone.size_divisibility
        
        # Build Head.
        # return a class_logits and pred_boxes
        # backbone_shape: dict['res{k}':ShapeSpec(channel,...,stride)]
        # 描述每个feature map 的channel, cur_stride 的信息
        self.head = Head(cfg=cfg, backbone_shape=self.backbone.output_shape())

        # Loss parameters:
        # 2.0
        class_weight = cfg.MODEL.OneNet.CLASS_WEIGHT
        # 2.0
        giou_weight = cfg.MODEL.OneNet.GIOU_WEIGHT
        # 5.0 distance between center point
        l1_weight = cfg.MODEL.OneNet.L1_WEIGHT

        # Build Criterion.
        matcher = MinCostMatcher(cfg=cfg,
                                   cost_class=class_weight, 
                                   cost_bbox=l1_weight, 
                                   cost_giou=giou_weight)
        weight_dict = {"loss_ce": class_weight, "loss_bbox": l1_weight, "loss_giou": giou_weight}

        losses = ["labels", "boxes"]

        self.criterion = SetCriterion(cfg=cfg,
                                      num_classes=self.num_classes,
                                      matcher=matcher,
                                      weight_dict=weight_dict,
                                      losses=losses)
        # pixel_mean:list[3]
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        # x:C x H x W, normalize the pixel
        self.to(self.device)
Beispiel #12
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        
        self.nms = cfg.MODEL.OneNet.NMS
        self.in_features = cfg.MODEL.OneNet.IN_FEATURES
        self.num_classes = cfg.MODEL.OneNet.NUM_CLASSES
        self.num_boxes = cfg.TEST.DETECTIONS_PER_IMAGE
        self.head_type = cfg.MODEL.OneNet.HEAD
        
        # Build Backbone.
        self.backbone = build_backbone(cfg)
        self.size_divisibility = self.backbone.size_divisibility
        
        # Build Head.
        if self.head_type == "CenterNet":
            self.head = Head(cfg=cfg, backbone_shape=self.backbone.output_shape())
        elif self.head_type == 'RetinaNet':
            backbone_shape = self.backbone.output_shape()
            feature_shapes = [backbone_shape[f] for f in cfg.MODEL.OneNet.IN_FEATURES]
            self.head = RetinaHead(cfg=cfg, feature_shapes=feature_shapes)
        elif self.head_type == "FCOS":
            self.head = FCOSHead(cfg=cfg)
        else:
            raise NotImplementedError        
        
        # Build Criterion.
        matcher = MinCostMatcher(cfg=cfg,
                                 cost_class=cfg.MODEL.OneNet.CLASS_COST, 
                                 cost_bbox=cfg.MODEL.OneNet.L1_COST, 
                                 cost_giou=cfg.MODEL.OneNet.GIOU_COST)
        
        # Loss parameters:
        class_weight = cfg.MODEL.OneNet.CLASS_WEIGHT
        giou_weight = cfg.MODEL.OneNet.GIOU_WEIGHT
        l1_weight = cfg.MODEL.OneNet.L1_WEIGHT
        weight_dict = {"loss_ce": class_weight, "loss_bbox": l1_weight, "loss_giou": giou_weight}

        losses = ["labels", "boxes"]

        self.criterion = SetCriterion(cfg=cfg,
                                      num_classes=self.num_classes,
                                      matcher=matcher,
                                      weight_dict=weight_dict,
                                      losses=losses)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Beispiel #13
0
    def __init__(self, cfg=None):
        super(Yolov3, self).__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)
        self.num_classes = cfg.MODEL.YOLOV3.NUM_CLASSES
        self.norm = cfg.MODEL.YOLOV3.NORM
        self.in_features = cfg.MODEL.YOLOV3.IN_FEATURES
        self.anchors = cfg.MODEL.ANCHOR_GENERATOR.SIZES

        # Inference parameters:
        self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE

        self.backbone = build_backbone(cfg)

        self.head = Yolov3Head(in_features=self.in_features,
                               in_channels=[
                                   self.backbone._out_feature_channels[f]
                                   for f in self.in_features
                               ],
                               out_channels=cfg.MODEL.YOLOV3.HEAD.OUT_CHANNELS,
                               num_classes=self.num_classes,
                               num_anchors_per_cell=3,
                               norm=self.norm)
        backbone_shape = self.backbone.output_shape()
        self.feature_strides = [
            backbone_shape[f].stride for f in self.in_features
        ]
        self.feature_shapes = [backbone_shape[f] for f in self.in_features]

        self.anchor_generator = build_anchor_generator(cfg,
                                                       self.feature_shapes)
        self.grid_generator = build_grid_generator(cfg, self.feature_shapes)
        self.stride_generator = build_stride_generator(cfg,
                                                       self.feature_shapes)

        # self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )

        self.normalizer = lambda x: x / 255.0
        self.to(self.device)
        self.get_conv_bn_modules()

        self.bce_loss = nn.BCELoss()
        self.sigmoid = nn.Sigmoid()
Beispiel #14
0
    def __init__(self, cfg):
        super().__init__()

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        self.register_buffer("pixel_mean",
                             torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std",
                             torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
Beispiel #15
0
    def test_build_rpn_heads_with_rotated_anchor_generator(self):
        """ Make sure rpn heads work with rotated anchor generator"""

        self.assertGreater(len(rpn.RPN_HEAD_REGISTRY._obj_map), 0)

        for name, builder in rpn.RPN_HEAD_REGISTRY._obj_map.items():
            logger.info("Testing {}...".format(name))
            cfg = GeneralizedRCNNRunner().get_default_cfg()
            if name in RPN_CFGS:
                cfg.merge_from_file(RPN_CFGS[name])

            cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"

            backbone = build_backbone(cfg)
            backbone_shape = backbone.output_shape()
            rpn_input_shape = [
                backbone_shape[x] for x in cfg.MODEL.RPN.IN_FEATURES
            ]
            rpn_head = builder(cfg, rpn_input_shape)

            in_channels = list(backbone_shape.values())[0].channels
            anchor_generator = build_anchor_generator(cfg, rpn_input_shape)
            num_anchors = anchor_generator.num_cell_anchors[0]
            box_dim = anchor_generator.box_dim

            N, C_in, H, W = 2, in_channels, 24, 32
            input = torch.rand([N, C_in, H, W], dtype=torch.float32)
            LAYERS = len(cfg.MODEL.RPN.IN_FEATURES)
            out = rpn_head([input] * LAYERS)
            self.assertEqual(len(out), 2)
            logits, bbox_reg = out
            for idx in range(LAYERS):
                self.assertEqual(
                    logits[idx].shape,
                    torch.Size([
                        input.shape[0], num_anchors, input.shape[2],
                        input.shape[3]
                    ]),
                )
                self.assertEqual(
                    bbox_reg[idx].shape,
                    torch.Size([
                        logits[idx].shape[0],
                        num_anchors * box_dim,
                        logits[idx].shape[2],
                        logits[idx].shape[3],
                    ]),
                )
Beispiel #16
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        self.nms = cfg.MODEL.OneNet.NMS
        self.in_features = cfg.MODEL.OneNet.IN_FEATURES
        self.num_classes = cfg.MODEL.OneNet.NUM_CLASSES
        self.num_boxes = cfg.TEST.DETECTIONS_PER_IMAGE

        # Build Backbone.
        self.backbone = build_backbone(cfg)
        self.size_divisibility = self.backbone.size_divisibility

        # Build Head.
        self.head = Head(cfg=cfg, backbone_shape=self.backbone.output_shape())

        # Loss parameters:
        class_weight = cfg.MODEL.OneNet.CLASS_WEIGHT
        giou_weight = cfg.MODEL.OneNet.GIOU_WEIGHT
        l1_weight = cfg.MODEL.OneNet.L1_WEIGHT

        # Build Criterion.
        matcher = MinCostMatcher(cfg=cfg,
                                 cost_class=class_weight,
                                 cost_bbox=l1_weight,
                                 cost_giou=giou_weight)
        weight_dict = {
            "loss_ce": class_weight,
            "loss_bbox": l1_weight,
            "loss_giou": giou_weight
        }

        losses = ["labels", "boxes"]

        self.criterion = SetCriterion(cfg=cfg,
                                      num_classes=self.num_classes,
                                      matcher=matcher,
                                      weight_dict=weight_dict,
                                      losses=losses)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Beispiel #17
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            -1, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            -1, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)

        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT
Beispiel #18
0
 def __init__(self, cfg):
     super().__init__()
     self.backbone = build_backbone(cfg)
     self.sem_seg_head = build_sem_seg_head(cfg,
                                            self.backbone.output_shape())
     self.ins_embed_head = build_ins_embed_branch(
         cfg, self.backbone.output_shape())
     self.register_buffer("pixel_mean",
                          torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
     self.register_buffer("pixel_std",
                          torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
     self.meta = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
     self.stuff_area = cfg.MODEL.PANOPTIC_DEEPLAB.STUFF_AREA
     self.threshold = cfg.MODEL.PANOPTIC_DEEPLAB.CENTER_THRESHOLD
     self.nms_kernel = cfg.MODEL.PANOPTIC_DEEPLAB.NMS_KERNEL
     self.top_k = cfg.MODEL.PANOPTIC_DEEPLAB.TOP_K_INSTANCE
     self.predict_instances = cfg.MODEL.PANOPTIC_DEEPLAB.PREDICT_INSTANCES
Beispiel #19
0
    def __init__(self, cfg):
        super().__init__()
        self.backbone = build_backbone(cfg)
        backbone_shape = self.backbone.output_shape()
        if cfg.MODEL.DETR.NUM_FEATURE_LEVELS > 1:
            self.strides = [8, 16, 32]
        else:
            self.strides = [32]

        if cfg.MODEL.RESNETS.RES5_DILATION == 2:
            # fix dilation from d2
            self.backbone.stages[-1][0].conv2.dilation = (1, 1)
            self.backbone.stages[-1][0].conv2.padding = (1, 1)
            self.strides[-1] = self.strides[-1] // 2

        self.feature_strides = [backbone_shape[f].stride for f in backbone_shape.keys()]
        self.num_channels = [backbone_shape[k].channels for k in backbone_shape.keys()]
Beispiel #20
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            num_channels, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            num_channels, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Beispiel #21
0
 def __init__(self, args, cfg, device, num_max_regions):
     super(RFGenerator, self).__init__()
     self.device = device
     self.cfg = cfg
     self.backbone = build_backbone(self.cfg)
     self.pooler_resolution = 14
     self.canonical_level = 4
     self.canonical_scale_factor = 2 ** self.canonical_level
     self.pooler_scales = (1 / self.canonical_scale_factor,)
     self.sampling_ratio = 0
     self.proposal_generator = build_proposal_generator(self.cfg, self.backbone.output_shape())
     self.roi_pooler = ROIPooler(
         output_size=self.pooler_resolution,
         scales=self.pooler_scales,
         sampling_ratio=self.sampling_ratio,
         pooler_type="ROIPool"
     )
     self.num_max_regions = num_max_regions
     self.args = args
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        # build feature extraction backbone
        self.backbone = build_backbone(cfg)

        # build classification model
        if cfg.MODEL.MULTI_TASK.CLASSIFICATION_ON:
            self.classifier_in_features = cfg.MODEL.MULTI_TASK.CLASSIFICATION_IN_FEATURES
            self.classifier = build_multilabel_classifier(cfg)
        else:
            self.classifier = None

        # build segmentation model
        if cfg.MODEL.MULTI_TASK.SEGMENTATION_ON:
            self.metal_segmentation_in_features = cfg.MODEL.MULTI_TASK.SEGMENTATION_IN_FEATURES
            self.metal_segmentation = build_metal_segmentation_model(cfg, self.backbone.out_feature_strides)
        else:
            self.metal_segmentation = None

        # build object detection model
        if cfg.MODEL.MULTI_TASK.DETECTION_ON:
            self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
            self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        else:
            self.proposal_generator = None
            self.roi_heads = None

        # TODO: build multi-task layer
        self.multi_loss_layer = build_multitask_loss_layer(cfg)

        # other setting
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(num_channels, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(num_channels, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)    # all to cuda
Beispiel #23
0
 def __init__(self, cfg):
     super().__init__()
     self.backbone = build_backbone(cfg)
     self.sem_seg_head = build_sem_seg_head(cfg, self.backbone.output_shape())
     self.ins_embed_head = build_ins_embed_branch(cfg, self.backbone.output_shape())
     self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
     self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
     self.meta = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
     self.stuff_area = cfg.MODEL.PANOPTIC_DEEPLAB.STUFF_AREA
     self.threshold = cfg.MODEL.PANOPTIC_DEEPLAB.CENTER_THRESHOLD
     self.nms_kernel = cfg.MODEL.PANOPTIC_DEEPLAB.NMS_KERNEL
     self.top_k = cfg.MODEL.PANOPTIC_DEEPLAB.TOP_K_INSTANCE
     self.predict_instances = cfg.MODEL.PANOPTIC_DEEPLAB.PREDICT_INSTANCES
     self.input_format = cfg.INPUT.FORMAT
     self.use_depthwise_separable_conv = cfg.MODEL.PANOPTIC_DEEPLAB.USE_DEPTHWISE_SEPARABLE_CONV
     assert (
         cfg.MODEL.SEM_SEG_HEAD.USE_DEPTHWISE_SEPARABLE_CONV
         == cfg.MODEL.PANOPTIC_DEEPLAB.USE_DEPTHWISE_SEPARABLE_CONV
     )
     self.size_divisibility = cfg.MODEL.PANOPTIC_DEEPLAB.SIZE_DIVISIBILITY
     self.benchmark_network_speed = cfg.MODEL.PANOPTIC_DEEPLAB.BENCHMARK_NETWORK_SPEED
Beispiel #24
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone_level = cfg.MODEL.YOLOF.ENCODER.BACKBONE_LEVEL
        self.backbone = build_backbone(cfg)
        self.nums_classes = cfg.MODEL.YOLOF.DECODER.NUM_CLASSES

        # build anchor generator
        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[self.backbone_level]]
        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)

        # build encode decode
        self.encoder = DilatedEncoder(cfg, backbone_shape)
        self.decoder = Decoder(cfg)

        # prepare ground truth
        self.box2box_transform = YOLOFBox2BoxTransform(
            weights=cfg.MODEL.YOLOF.BOX_TRANSFORM.BBOX_REG_WEIGHTS,
            add_ctr_clamp=cfg.MODEL.YOLOF.BOX_TRANSFORM.ADD_CTR_CLAMP,
            ctr_clamp=cfg.MODEL.YOLOF.BOX_TRANSFORM.CTR_CLAMP)
        self.anchor_matcher = UniformMatcher(cfg.MODEL.YOLOF.MATCHER.TOPK)
        self.test_score_thresh = 0.05
        self.test_nms_thresh = 0.6
        self.test_topk_candidates = 1000
        self.max_detections_per_image = 100

        # build loss
        self.losses = Losses(cfg)

        # get normalizer
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Beispiel #25
0
    def __init__(self, cfg):
        super().__init__()

        in_channels = cfg.MODEL.AVOD.IN_CHANNELS
        self.device = torch.device(cfg.MODEL.DEVICE)
        self.head = build_avod_head(cfg, in_channels)
        self.box_selector_test = build_avod_postprocessor(cfg)
        self.loss_evaluator = build_avod_loss_evaluator(cfg)
        self.fpn_strides = cfg.MODEL.AVOD.FPN_STRIDES
        self.in_features = cfg.MODEL.AVOD.IN_FEATURES

        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone = build_backbone(cfg)

        #        backbone_shape = self.backbone.output_shape()
        #        feature_shapes = [backbone_shape[f] for f in self.in_features]

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
from detectron2.engine import DefaultPredictor
from detectron2.modeling import build_backbone
from detectron2.config import get_cfg

confidence_threshold = 0.5
config_file = "../detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml"
model_weights = "../detectron2/demo/faster_rcnn_R_101_C4_3x.pkl"

cfg = get_cfg()
cfg.merge_from_file(config_file)
cfg.MODEL.WEIGHTS =model_weights
cfg.MODEL.RETINANET.SCORE_THRESH_TEST = confidence_threshold
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = confidence_threshold
cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = confidence_threshold
cfg.freeze()
backbone = build_backbone(cfg)
predictor = DefaultPredictor(cfg)

def getFeature(img,raw_boxes):
    """
    The input is the image and the bounding boxes;
    The output is a list which contain serveal features corresponding to the bounding boxes
    """
    raw_height, raw_width = img.shape[:2]
    image = predictor.transform_gen.get_transform(img).apply_image(img)
    image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
    inputs = [{"image": image, "height": raw_height, "width": raw_width}]
    images = predictor.model.preprocess_image(inputs)
    features = predictor.model.backbone(images.tensor)
    new_height, new_width = image.shape[:2]
    scale_x = 1. * new_width / raw_width
Beispiel #27
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        self.nms = cfg.MODEL.OneNet.NMS
        self.in_features = cfg.MODEL.OneNet.IN_FEATURES
        self.num_classes = cfg.MODEL.OneNet.NUM_CLASSES
        self.num_boxes = cfg.TEST.DETECTIONS_PER_IMAGE

        # Build Backbone.
        self.backbone = build_backbone(cfg)
        self.size_divisibility = self.backbone.size_divisibility

        # Build Head.
        self.head = FCOSHead(cfg)
        self.mask_branch = build_mask_branch(cfg, self.backbone.output_shape())
        self.mask_head = build_dynamic_mask_head(cfg)

        # build top module
        in_channels = self.backbone.output_shape()[
            self.in_features[0]].channels
        self.mask_out_stride = cfg.MODEL.CONDINST.MASK_OUT_STRIDE
        self.controller = nn.Conv2d(in_channels,
                                    self.mask_head.num_gen_params,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)

        # Loss parameters:
        class_weight = cfg.MODEL.OneNet.CLASS_WEIGHT
        giou_weight = cfg.MODEL.OneNet.GIOU_WEIGHT
        l1_weight = cfg.MODEL.OneNet.L1_WEIGHT
        mask_weight = 2

        # Build Criterion.
        matcher = MinCostMatcher(cfg=cfg,
                                 cost_class=class_weight,
                                 cost_bbox=l1_weight,
                                 cost_giou=giou_weight)
        weight_dict = {
            "loss_ce": class_weight,
            "loss_bbox": l1_weight,
            "loss_giou": giou_weight,
            "loss_mask": mask_weight
        }

        losses = ["labels", "boxes"]

        self.criterion = SetCriterion(cfg=cfg,
                                      num_classes=self.num_classes,
                                      matcher=matcher,
                                      weight_dict=weight_dict,
                                      losses=losses)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Beispiel #28
0
    def __init__(self, cfg):
        super().__init__()

        self.cfg = cfg

        self.device = torch.device(cfg.MODEL.DEVICE)

        self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
        self.num_classes = cfg.MODEL.ISTR.NUM_CLASSES
        self.num_proposals = cfg.MODEL.ISTR.NUM_PROPOSALS
        self.hidden_dim = cfg.MODEL.ISTR.HIDDEN_DIM
        self.num_heads = cfg.MODEL.ISTR.NUM_HEADS

        # Build Backbone.
        self.backbone = build_backbone(cfg)
        self.size_divisibility = self.backbone.size_divisibility

        # Build Proposals.
        self.pos_embeddings = nn.Embedding(self.num_proposals, self.hidden_dim)
        self.init_proposal_boxes = nn.Embedding(self.num_proposals, 4)
        nn.init.constant_(self.init_proposal_boxes.weight[:, :2], 0.5)
        nn.init.constant_(self.init_proposal_boxes.weight[:, 2:], 1.0)

        # --------
        self.IFE = ImgFeatExtractor(cfg)
        self.mask_encoding = PCAMaskEncoding(cfg)
        # encoding parameters.
        components_path = cfg.MODEL.ISTR.PATH_COMPONENTS
        # update parameters.
        parameters = np.load(components_path)
        components = nn.Parameter(torch.from_numpy(
            parameters['components_c'][0]).float().to(self.device),
                                  requires_grad=False)
        explained_variances = nn.Parameter(torch.from_numpy(
            parameters['explained_variance_c'][0]).float().to(self.device),
                                           requires_grad=False)
        means = nn.Parameter(torch.from_numpy(
            parameters['mean_c'][0]).float().to(self.device),
                             requires_grad=False)
        self.mask_encoding.components = components
        self.mask_encoding.explained_variances = explained_variances
        self.mask_encoding.means = means

        # Build Dynamic Head.
        self.head = DynamicHead(cfg=cfg,
                                roi_input_shape=self.backbone.output_shape())

        # Loss parameters:
        class_weight = cfg.MODEL.ISTR.CLASS_WEIGHT
        giou_weight = cfg.MODEL.ISTR.GIOU_WEIGHT
        l1_weight = cfg.MODEL.ISTR.L1_WEIGHT
        no_object_weight = cfg.MODEL.ISTR.NO_OBJECT_WEIGHT
        mask_weight = cfg.MODEL.ISTR.MASK_WEIGHT

        self.deep_supervision = cfg.MODEL.ISTR.DEEP_SUPERVISION

        # Build Criterion.
        matcher = HungarianMatcher(cfg=cfg,
                                   cost_class=class_weight,
                                   cost_bbox=l1_weight,
                                   cost_giou=giou_weight,
                                   cost_mask=mask_weight)
        weight_dict = {
            "loss_ce": class_weight,
            "loss_bbox": l1_weight,
            "loss_giou": giou_weight,
            "loss_feat": mask_weight,
            "loss_dice": mask_weight
        }
        if self.deep_supervision:
            aux_weight_dict = {}
            for i in range(self.num_heads - 1):
                aux_weight_dict.update(
                    {k + f"_{i}": v
                     for k, v in weight_dict.items()})
            weight_dict.update(aux_weight_dict)

        losses = ["labels", "boxes", "masks"]

        self.criterion = SetCriterion(cfg=cfg,
                                      num_classes=self.num_classes,
                                      matcher=matcher,
                                      weight_dict=weight_dict,
                                      eos_coef=no_object_weight,
                                      losses=losses)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Beispiel #29
0
import torch
from detectron2.modeling import build_backbone, build_proposal_generator, build_roi_heads

from backbone import cfg
from backbone import build_shufflenetv2_fpn_backbone

backbone_model = build_backbone(cfg)
print(backbone_model)
torch.save(backbone_model.state_dict(), '1.backbone.pth')

proposal_model = build_proposal_generator(cfg, backbone_model.output_shape())
print(proposal_model)
torch.save(proposal_model.state_dict(), '2.rpn.pth')

roi_model = build_roi_heads(cfg, backbone_model.output_shape())
print(roi_model)
torch.save(roi_model.state_dict(), '3.roi.pth')