Beispiel #1
0
 def from_config(cls, cfg):
     backbone = build_backbone(cfg)
     return {
         "backbone":
         backbone,
         "proposal_generator":
         build_proposal_generator(cfg, backbone.output_shape()),
         "roi_heads":
         build_roi_heads(cfg, backbone.output_shape()),
         "input_format":
         cfg.INPUT.FORMAT,
         "vis_period":
         cfg.VIS_PERIOD,
         "pixel_mean":
         cfg.MODEL.PIXEL_MEAN,
         "pixel_std":
         cfg.MODEL.PIXEL_STD,
         "kd_args":
         cfg.KD,
         "teacher":
         build_teacher(cfg),
         "teacher_input_format":
         cfg.TEACHER.INPUT.FORMAT,
         "teacher_pixel_mean":
         cfg.TEACHER.MODEL.PIXEL_MEAN,
         "teacher_pixel_std":
         cfg.TEACHER.MODEL.PIXEL_STD,
     }
    def __init__(self, cfg):
        super().__init__()

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT

        # self.auxiliary_proposal_generator = build_aux_proposal_generator(cfg, self.backbone.output_shape())
        self.auxiliary_proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
        self.auxiliary_roi_heads = build_roi_heads(cfg, self.backbone.output_shape())

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
Beispiel #3
0
 def from_config(cls, cfg):
     backbone = build_backbone(cfg)
     return {
         "backbone":
         backbone,
         # "proposal_generator": build_proposal_generator(cfg, backbone.output_shape()),
         "proposal_generator":
         None,
         "load_proposals":
         cfg.MODEL.LOAD_PROPOSALS,
         "roi_heads":
         build_roi_heads(cfg, backbone.output_shape()),
         "input_format":
         cfg.INPUT.FORMAT,
         "vis_period":
         cfg.VIS_PERIOD,
         "pixel_mean":
         cfg.MODEL.PIXEL_MEAN,
         "pixel_std":
         cfg.MODEL.PIXEL_STD,
         "cpg":
         True if "CSC" in cfg.MODEL.ROI_HEADS.NAME
         or "WSJDS" in cfg.MODEL.ROI_HEADS.NAME
         or "XROIHeads" in cfg.MODEL.ROI_HEADS.NAME
         # if "CSC" in cfg.MODEL.ROI_HEADS.NAME or "WSJDS" in cfg.MODEL.ROI_HEADS.NAME
         else False,
     }
Beispiel #4
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        # loss weight
        self.instance_loss_weight = cfg.MODEL.SOGNET.INSTANCE_LOSS_WEIGHT

        # options when combining instance & semantic outputs
        # TODO: build inference
        self.stuff_area_limit = cfg.MODEL.SOGNET.POSTPROCESS.STUFF_AREA_LIMIT
        self.stuff_num_classes = (cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES -
                                  cfg.MODEL.ROI_HEADS.NUM_CLASSES)

        self.combine_on = cfg.MODEL.SOGNET.COMBINE.ENABLED
        if self.combine_on:
            self.combine_overlap_threshold = cfg.MODEL.SOGNET.COMBINE.OVERLAP_THRESH
            self.combine_stuff_area_limit = cfg.MODEL.SOGNET.COMBINE.STUFF_AREA_LIMIT
            self.combine_instances_confidence_threshold = (
                cfg.MODEL.SOGNET.COMBINE.INSTANCES_CONFIDENCE_THRESH)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.sem_seg_head = build_sem_seg_head(cfg,
                                               self.backbone.output_shape())
        self.panoptic_head = build_panoptic_head(cfg)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Beispiel #5
0
    def test_rroi_heads(self):
        torch.manual_seed(121)
        cfg = get_cfg()
        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
        cfg.MODEL.ROI_HEADS.NAME = "RROIHeads"
        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated"
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1)
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
        feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)}

        image_shape = (15, 15)
        gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]],
                                 dtype=torch.float32)
        gt_instance0 = Instances(image_shape)
        gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0)
        gt_instance0.gt_classes = torch.tensor([2, 1])
        gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]],
                                 dtype=torch.float32)
        gt_instance1 = Instances(image_shape)
        gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1)
        gt_instance1.gt_classes = torch.tensor([1, 2])
        gt_instances = [gt_instance0, gt_instance1]

        proposal_generator = build_proposal_generator(cfg, feature_shape)
        roi_heads = build_roi_heads(cfg, feature_shape)

        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, gt_instances)
            _, detector_losses = roi_heads(images, features, proposals,
                                           gt_instances)

        detector_losses.update(proposal_losses)
        expected_losses = {
            "loss_cls": 4.365657806396484,
            "loss_box_reg": 0.0015851043863222003,
            "loss_rpn_cls": 0.2427729219198227,
            "loss_rpn_loc": 0.3646621108055115,
        }
        succ = all(
            torch.allclose(detector_losses[name],
                           torch.tensor(expected_losses.get(name, 0.0)))
            for name in detector_losses.keys())
        self.assertTrue(
            succ,
            "Losses has changed! New losses: {}".format(
                {k: v.item()
                 for k, v in detector_losses.items()}),
        )
    def __init__(self, cfg):
        super().__init__()
        # Detectron 2 expects a dict of ShapeSpec object as input_shape
        input_shape = dict()
        for name, shape in zip(cfg.MODEL.RPN.IN_FEATURES, [4, 8, 16, 32]):
            input_shape[name] = ShapeSpec(channels=256, stride=shape)

        self.rpn = build_proposal_generator(cfg, input_shape=input_shape)

        self.roi_heads = build_roi_heads(cfg, input_shape)
    def test_rroi_heads(self):
        torch.manual_seed(121)
        cfg = get_cfg()
        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
        cfg.MODEL.ROI_HEADS.NAME = "RROIHeads"
        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated"
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1)
        backbone = build_backbone(cfg)
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}

        image_shape = (15, 15)
        gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]],
                                 dtype=torch.float32)
        gt_instance0 = Instances(image_shape)
        gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0)
        gt_instance0.gt_classes = torch.tensor([2, 1])
        gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]],
                                 dtype=torch.float32)
        gt_instance1 = Instances(image_shape)
        gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1)
        gt_instance1.gt_classes = torch.tensor([1, 2])
        gt_instances = [gt_instance0, gt_instance1]

        proposal_generator = build_proposal_generator(cfg,
                                                      backbone.output_shape())
        roi_heads = build_roi_heads(cfg, backbone.output_shape())

        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, gt_instances)
            _, detector_losses = roi_heads(images, features, proposals,
                                           gt_instances)

        expected_losses = {
            "loss_cls": torch.tensor(4.381618499755859),
            "loss_box_reg": torch.tensor(0.0011829272843897343),
        }
        for name in expected_losses.keys():
            err_msg = "detector_losses[{}] = {}, expected losses = {}".format(
                name, detector_losses[name], expected_losses[name])
            self.assertTrue(
                torch.allclose(detector_losses[name], expected_losses[name]),
                err_msg)
Beispiel #8
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT
        self.current_video = None
        self.frame_idx = 0

        if cfg.MODEL.SPATIOTEMPORAL.FREEZE_BACKBONE:
            self.freeze_component(self.backbone)

        if cfg.MODEL.SPATIOTEMPORAL.FREEZE_PROPOSAL_GENERATOR:
            self.freeze_component(self.proposal_generator)

        self.long_term = cfg.MODEL.SPATIOTEMPORAL.LONG_TERM
        self.temporal_dropout = cfg.MODEL.SPATIOTEMPORAL.TEMPORAL_DROPOUT
        self.num_frames = cfg.MODEL.SPATIOTEMPORAL.NUM_FRAMES
        self.num_keyframes = cfg.MODEL.SPATIOTEMPORAL.NUM_KEYFRAMES
        self.keyframe_interval = cfg.MODEL.SPATIOTEMPORAL.KEYFRAME_INTERVAL
        self.reference_frame_idx = -1

        if cfg.MODEL.SPATIOTEMPORAL.FORWARD_AGGREGATION:
            # (f_{t-NUM_FRAMES}, ..., f_{t-1}, f_t, f_{t+1}, ..., f_{t+NUM_FRAMES})
            self.num_frames = (2 * self.num_frames) + 1
            self.reference_frame_idx = cfg.MODEL.SPATIOTEMPORAL.NUM_FRAMES

        if self.temporal_dropout:
            assert cfg.MODEL.SPATIOTEMPORAL.FORWARD_AGGREGATION, "Temporal dropout without forward aggregation."
        
        if self.temporal_dropout:
            self.reference_frame_idx = cfg.MODEL.SPATIOTEMPORAL.NUM_FRAMES
            self.train_reference_frame_idx = 1
        else:
            self.train_reference_frame_idx = self.reference_frame_idx

        self.short_term_feature_buffer = deque(maxlen=self.num_frames)
        self.long_term_feature_buffer = deque(maxlen=self.num_keyframes)
        self.long_term_roi_buffer = deque(maxlen=self.num_keyframes)
        # RPN buffers
        self.predict_proposals = None
        self.predict_objectness_logits = None

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(num_channels, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(num_channels, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Beispiel #9
0
 def from_config(cls, cfg):
     backbone = build_backbone(cfg)
     out_shape = backbone.output_shape()
     return {
         "backbone": backbone,
         "proposal_generator": build_proposal_generator(cfg, out_shape),
         "roi_heads": build_roi_heads(cfg, out_shape),
         "unsupervised_head": build_unsupervised_head(cfg, out_shape),
         "input_format": cfg.INPUT.FORMAT,
         "vis_period": cfg.VIS_PERIOD,
         "pixel_mean": cfg.MODEL.PIXEL_MEAN,
         "pixel_std": cfg.MODEL.PIXEL_STD,
     }
Beispiel #10
0
def build_teacher(cfg):
    teacher_cfg = cfg.TEACHER
    backbone = build_backbone(teacher_cfg)
    if not 'Retina' in teacher_cfg.MODEL.META_ARCHITECTURE:
        proposal_generator = build_proposal_generator(teacher_cfg,
                                                      backbone.output_shape())
        roi_heads = build_roi_heads(teacher_cfg, backbone.output_shape())
    else:
        proposal_generator = None
        roi_heads = None
    teacher = Teacher(backbone, proposal_generator, roi_heads)
    for param in teacher.parameters():
        param.requires_grad = False
    return teacher
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.bua_caffe = cfg.MODEL.BUA.CAFFE
        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        self.extract_on = cfg.MODEL.BUA.EXTRACT_FEATS
        self.extractor = cfg.MODEL.BUA.EXTRACTOR
        self.to(self.device)
Beispiel #12
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(num_channels, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(num_channels, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
    def test_roi_heads(self):
        torch.manual_seed(121)
        cfg = get_cfg()
        cfg.MODEL.ROI_HEADS.NAME = "StandardROIHeads"
        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5)
        backbone = build_backbone(cfg)
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}

        image_shape = (15, 15)
        gt_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]],
                                 dtype=torch.float32)
        gt_instance0 = Instances(image_shape)
        gt_instance0.gt_boxes = Boxes(gt_boxes0)
        gt_instance0.gt_classes = torch.tensor([2, 1])
        gt_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]],
                                 dtype=torch.float32)
        gt_instance1 = Instances(image_shape)
        gt_instance1.gt_boxes = Boxes(gt_boxes1)
        gt_instance1.gt_classes = torch.tensor([1, 2])
        gt_instances = [gt_instance0, gt_instance1]

        proposal_generator = build_proposal_generator(cfg,
                                                      backbone.output_shape())
        roi_heads = build_roi_heads(cfg, backbone.output_shape())

        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, gt_instances)
            _, detector_losses = roi_heads(images, features, proposals,
                                           gt_instances)

        expected_losses = {
            "loss_cls": torch.tensor(4.4236516953),
            "loss_box_reg": torch.tensor(0.0091214813),
        }
        for name in expected_losses.keys():
            self.assertTrue(
                torch.allclose(detector_losses[name], expected_losses[name]))
Beispiel #14
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.backbone = build_backbone(cfg)
        self.attention = build_attention(cfg)
        self.mse_loss = nn.MSELoss(
            reduction="sum") if cfg.MODEL.ATTENTION_LOSS else None
        self.mse_weight = cfg.MODEL.ATTENTION_LOSS_WEIGHT
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT
        self.tmp = nn.Linear(10, 10)

        trans_center = pickle.load(open(cfg.MODEL.TRANSFORM_CENTER, 'rb'))
        trans_center['pos_center'] = torch.FloatTensor(
            trans_center['pos_center']).to(self.device)
        trans_center['neg_center'] = torch.FloatTensor(
            trans_center['neg_center']).to(self.device)
        self.trans_center = trans_center
        self.transformation = build_transformation()
        self.box_head = deepcopy(self.roi_heads.box_head)
        self.box_predictor = deepcopy(self.roi_heads.box_predictor)
        self.sl1_loss = nn.SmoothL1Loss(
            reduction="none") if cfg.MODEL.TRANSFORM_LOSS else None
        self.sl1_weight = cfg.MODEL.TRANSFORM_LOSS_WEIGHT
        self.reg_loss = cfg.MODEL.REG_LOSS

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            num_channels, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            num_channels, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Beispiel #15
0
    def __init__(self, cfg):
        super().__init__()

        self.instance_loss_weight = cfg.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT

        # options when combining instance & semantic outputs
        self.combine_on = cfg.MODEL.PANOPTIC_FPN.COMBINE.ENABLED
        self.combine_overlap_threshold = cfg.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH
        self.combine_stuff_area_limit = cfg.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT
        self.combine_instances_confidence_threshold = (
            cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.sem_seg_head = build_sem_seg_head(cfg,
                                               self.backbone.output_shape())

        self.register_buffer("pixel_mean",
                             torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std",
                             torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
Beispiel #16
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.backbone = build_backbone(cfg)
        self.attention = build_attention(cfg)
        self.mse_loss = nn.MSELoss(
            reduction="sum") if cfg.MODEL.ATTENTION_LOSS else None
        self.mse_weight = cfg.MODEL.ATTENTION_LOSS_WEIGHT
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT
        self.tmp = nn.Linear(10, 10)

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            num_channels, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            num_channels, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)