def __init__(self, cfg):
        super().__init__()

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT

        # self.auxiliary_proposal_generator = build_aux_proposal_generator(cfg, self.backbone.output_shape())
        self.auxiliary_proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
        self.auxiliary_roi_heads = build_roi_heads(cfg, self.backbone.output_shape())

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
Example #2
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
        self.mask_head = build_dynamic_mask_head(cfg)
        self.mask_branch = build_mask_branch(cfg, self.backbone.output_shape())
        self.iuv_head = build_iuv_head(cfg)
        self.iuv_fea_dim = cfg.MODEL.CONDINST.IUVHead.CHANNELS
        self.s_ins_fea_dim = cfg.MODEL.CONDINST.MASK_HEAD.CHANNELS
        assert self.iuv_fea_dim+self.s_ins_fea_dim == cfg.MODEL.CONDINST.MASK_BRANCH.OUT_CHANNELS
        self.mask_out_stride = cfg.MODEL.CONDINST.MASK_OUT_STRIDE
        self.max_proposals = cfg.MODEL.CONDINST.MAX_PROPOSALS

        # build top module
        in_channels = self.proposal_generator.in_channels_to_top_module

        self.controller = nn.Conv2d(
            in_channels, self.mask_head.num_gen_params,
            kernel_size=3, stride=1, padding=1
        )
        torch.nn.init.normal_(self.controller.weight, std=0.01)
        torch.nn.init.constant_(self.controller.bias, 0)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std

        self._init_densepose_head(cfg)

        self.to(self.device)
Example #3
0
 def from_config(cls, cfg):
     backbone = build_backbone(cfg)
     return {
         "backbone":
         backbone,
         "proposal_generator":
         build_proposal_generator(cfg, backbone.output_shape()),
         "roi_heads":
         build_roi_heads(cfg, backbone.output_shape()),
         "input_format":
         cfg.INPUT.FORMAT,
         "vis_period":
         cfg.VIS_PERIOD,
         "pixel_mean":
         cfg.MODEL.PIXEL_MEAN,
         "pixel_std":
         cfg.MODEL.PIXEL_STD,
         "kd_args":
         cfg.KD,
         "teacher":
         build_teacher(cfg),
         "teacher_input_format":
         cfg.TEACHER.INPUT.FORMAT,
         "teacher_pixel_mean":
         cfg.TEACHER.MODEL.PIXEL_MEAN,
         "teacher_pixel_std":
         cfg.TEACHER.MODEL.PIXEL_STD,
     }
Example #4
0
    def __init__(self, cfg):
        super().__init__()
        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())

        self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
Example #5
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        if cfg.MODEL.CONDINST.MASK_HEAD.USE_MULTI:
            from .dynamic_mask_head_multi import build_dynamic_mask_head
            self.mask_head = build_dynamic_mask_head(cfg)
        else:
            from .dynamic_mask_head_old import build_dynamic_mask_head
            self.mask_head = build_dynamic_mask_head(cfg)
        self.mask_branch = build_mask_branch(cfg, self.backbone.output_shape())
        self.mask_out_stride = cfg.MODEL.CONDINST.MASK_OUT_STRIDE
        self.max_proposals = cfg.MODEL.CONDINST.MAX_PROPOSALS

        # build top module
        in_channels = self.proposal_generator.in_channels_to_top_module

        self.controller = nn.Conv2d(in_channels,
                                    self.mask_head.num_gen_params,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        torch.nn.init.normal_(self.controller.weight, std=0.01)
        torch.nn.init.constant_(self.controller.bias, 0)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Example #6
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = (torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            num_channels, 1, 1))
        pixel_std = (torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            num_channels, 1, 1))
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)

        if cfg.MODEL.BACKBONE.FREEZE:
            for p in self.backbone.parameters():
                p.requires_grad = False
            print("froze backbone parameters")

        if cfg.MODEL.PROPOSAL_GENERATOR.FREEZE:
            for p in self.proposal_generator.parameters():
                p.requires_grad = False
            print("froze proposal generator parameters")

        if cfg.MODEL.ROI_HEADS.FREEZE_FEAT:
            for p in self.roi_heads.box_head.parameters():
                p.requires_grad = False
            print("froze roi_box_head parameters")
Example #7
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())

        self.refinement_head = build_edge_det_head(
            cfg, self.backbone.output_shape())

        self.mask_result_src = cfg.MODEL.DANCE.MASK_IN

        self.semantic_filter = cfg.MODEL.DANCE.SEMANTIC_FILTER
        self.semantic_filter_th = cfg.MODEL.DANCE.SEMANTIC_FILTER_TH

        self.need_concave_hull = (True if cfg.MODEL.SNAKE_HEAD.LOSS_TYPE
                                  == "chamfer" else False)

        self.roi_size = cfg.MODEL.DANCE.ROI_SIZE

        self.re_compute_box = cfg.MODEL.DANCE.RE_COMP_BOX

        self.visualize_path = cfg.MODEL.SNAKE_HEAD.VIS_PATH

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            -1, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            -1, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Example #8
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        # loss weight
        self.instance_loss_weight = cfg.MODEL.SOGNET.INSTANCE_LOSS_WEIGHT

        # options when combining instance & semantic outputs
        # TODO: build inference
        self.stuff_area_limit = cfg.MODEL.SOGNET.POSTPROCESS.STUFF_AREA_LIMIT
        self.stuff_num_classes = (cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES -
                                  cfg.MODEL.ROI_HEADS.NUM_CLASSES)

        self.combine_on = cfg.MODEL.SOGNET.COMBINE.ENABLED
        if self.combine_on:
            self.combine_overlap_threshold = cfg.MODEL.SOGNET.COMBINE.OVERLAP_THRESH
            self.combine_stuff_area_limit = cfg.MODEL.SOGNET.COMBINE.STUFF_AREA_LIMIT
            self.combine_instances_confidence_threshold = (
                cfg.MODEL.SOGNET.COMBINE.INSTANCES_CONFIDENCE_THRESH)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.sem_seg_head = build_sem_seg_head(cfg,
                                               self.backbone.output_shape())
        self.panoptic_head = build_panoptic_head(cfg)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Example #9
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.mask_branch = build_mask_branch(cfg, self.backbone.output_shape())
        self.mask_pred = build_mask_pred(cfg)

        self.mask_out_stride = cfg.MODEL.EMBEDMASK.MASK_OUT_STRIDE

        self.max_proposals = cfg.MODEL.EMBEDMASK.MAX_PROPOSALS
        self.topk_proposals_per_im = cfg.MODEL.EMBEDMASK.TOPK_PROPOSALS_PER_IM

        self.mask_th = cfg.MODEL.EMBEDMASK.MASK_TH

        # build proposal head
        in_channels = self.proposal_generator.in_channels_to_top_module

        self.proposal_head = ProposalHead(cfg, in_channels)

        # build pixel head
        self.pixel_head = EmbedHead(
            cfg, cfg.MODEL.EMBEDMASK.MASK_BRANCH.OUT_CHANNELS)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Example #10
0
    def __init__(self, cfg):
        super().__init__()
        # Detectron 2 expects a dict of ShapeSpec object as input_shape
        input_shape = dict()
        for name, shape in zip(cfg.MODEL.RPN.IN_FEATURES, [4, 8, 16, 32]):
            input_shape[name] = ShapeSpec(channels=256, stride=shape)

        self.rpn = build_proposal_generator(cfg, input_shape=input_shape)

        self.roi_heads = build_roi_heads(cfg, input_shape)
Example #11
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(-1, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(-1, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Example #12
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT
        self.current_video = None
        self.frame_idx = 0

        if cfg.MODEL.SPATIOTEMPORAL.FREEZE_BACKBONE:
            self.freeze_component(self.backbone)

        if cfg.MODEL.SPATIOTEMPORAL.FREEZE_PROPOSAL_GENERATOR:
            self.freeze_component(self.proposal_generator)

        self.long_term = cfg.MODEL.SPATIOTEMPORAL.LONG_TERM
        self.temporal_dropout = cfg.MODEL.SPATIOTEMPORAL.TEMPORAL_DROPOUT
        self.num_frames = cfg.MODEL.SPATIOTEMPORAL.NUM_FRAMES
        self.num_keyframes = cfg.MODEL.SPATIOTEMPORAL.NUM_KEYFRAMES
        self.keyframe_interval = cfg.MODEL.SPATIOTEMPORAL.KEYFRAME_INTERVAL
        self.reference_frame_idx = -1

        if cfg.MODEL.SPATIOTEMPORAL.FORWARD_AGGREGATION:
            # (f_{t-NUM_FRAMES}, ..., f_{t-1}, f_t, f_{t+1}, ..., f_{t+NUM_FRAMES})
            self.num_frames = (2 * self.num_frames) + 1
            self.reference_frame_idx = cfg.MODEL.SPATIOTEMPORAL.NUM_FRAMES

        if self.temporal_dropout:
            assert cfg.MODEL.SPATIOTEMPORAL.FORWARD_AGGREGATION, "Temporal dropout without forward aggregation."
        
        if self.temporal_dropout:
            self.reference_frame_idx = cfg.MODEL.SPATIOTEMPORAL.NUM_FRAMES
            self.train_reference_frame_idx = 1
        else:
            self.train_reference_frame_idx = self.reference_frame_idx

        self.short_term_feature_buffer = deque(maxlen=self.num_frames)
        self.long_term_feature_buffer = deque(maxlen=self.num_keyframes)
        self.long_term_roi_buffer = deque(maxlen=self.num_keyframes)
        # RPN buffers
        self.predict_proposals = None
        self.predict_objectness_logits = None

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(num_channels, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(num_channels, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Example #13
0
 def from_config(cls, cfg):
     backbone = build_backbone(cfg)
     out_shape = backbone.output_shape()
     return {
         "backbone": backbone,
         "proposal_generator": build_proposal_generator(cfg, out_shape),
         "roi_heads": build_roi_heads(cfg, out_shape),
         "unsupervised_head": build_unsupervised_head(cfg, out_shape),
         "input_format": cfg.INPUT.FORMAT,
         "vis_period": cfg.VIS_PERIOD,
         "pixel_mean": cfg.MODEL.PIXEL_MEAN,
         "pixel_std": cfg.MODEL.PIXEL_STD,
     }
Example #14
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
        self.refinement_head = SnakeFPNHead(cfg, self.backbone.output_shape())
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(-1, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(-1, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)

        self.gt_input = cfg.TEST.GT_IN.WHAT if cfg.TEST.GT_IN.ON else (None,)
Example #15
0
def build_teacher(cfg):
    teacher_cfg = cfg.TEACHER
    backbone = build_backbone(teacher_cfg)
    if not 'Retina' in teacher_cfg.MODEL.META_ARCHITECTURE:
        proposal_generator = build_proposal_generator(teacher_cfg,
                                                      backbone.output_shape())
        roi_heads = build_roi_heads(teacher_cfg, backbone.output_shape())
    else:
        proposal_generator = None
        roi_heads = None
    teacher = Teacher(backbone, proposal_generator, roi_heads)
    for param in teacher.parameters():
        param.requires_grad = False
    return teacher
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.bua_caffe = cfg.MODEL.BUA.CAFFE
        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        self.extract_on = cfg.MODEL.BUA.EXTRACT_FEATS
        self.extractor = cfg.MODEL.BUA.EXTRACTOR
        self.to(self.device)
Example #17
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(num_channels, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(num_channels, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Example #18
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.refinement_head = build_edge_det_head(
            cfg, self.backbone.output_shape())

        self.visualize_path = cfg.MODEL.DANCE.VIS_PATH

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            -1, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            -1, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Example #19
0
    def __init__(self, cfg):
        super().__init__()
        # pylint: disable=no-member
        self.device = torch.device(cfg.MODEL.DEVICE)
        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape()
        )
        self.from_config(cfg)
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.ss_head = build_ss_head(
            cfg, self.backbone.bottom_up.output_shape()
        )

        for i in range(len(self.ss_head)):
            setattr(self, "ss_head_{}".format(i), self.ss_head[i])

        self.to(self.device)
Example #20
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.instance_loss_weight = cfg.MODEL.BLENDMASK.INSTANCE_LOSS_WEIGHT

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.blender = build_blender(cfg)
        self.basis_module = build_basis_module(cfg,
                                               self.backbone.output_shape())

        # options when combining instance & semantic outputs
        self.combine_on = cfg.MODEL.PANOPTIC_FPN.COMBINE.ENABLED
        if self.combine_on:
            self.panoptic_module = build_sem_seg_head(
                cfg, self.backbone.output_shape())
            self.combine_overlap_threshold = cfg.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH
            self.combine_stuff_area_limit = cfg.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT
            self.combine_instances_confidence_threshold = (
                cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH)

        # build top module
        in_channels = cfg.MODEL.FPN.OUT_CHANNELS
        num_bases = cfg.MODEL.BASIS_MODULE.NUM_BASES
        attn_size = cfg.MODEL.BLENDMASK.ATTN_SIZE
        attn_len = num_bases * attn_size * attn_size
        self.top_layer = nn.Conv2d(in_channels,
                                   attn_len,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        torch.nn.init.normal_(self.top_layer.weight, std=0.01)
        torch.nn.init.constant_(self.top_layer.bias, 0)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Example #21
0
    def __init__(self, cfg):
        super().__init__()

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        self.register_buffer("pixel_mean",
                             torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std",
                             torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))

        self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES

        self.support_way = cfg.INPUT.FS.SUPPORT_WAY
        self.support_shot = cfg.INPUT.FS.SUPPORT_SHOT
Example #22
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.mask_head = build_dynamic_mask_head(cfg)
        self.mask_branch = build_mask_branch(cfg, self.backbone.output_shape())

        self.mask_out_stride = cfg.MODEL.CONDINST.MASK_OUT_STRIDE

        self.max_proposals = cfg.MODEL.CONDINST.MAX_PROPOSALS
        self.topk_proposals_per_im = cfg.MODEL.CONDINST.TOPK_PROPOSALS_PER_IM

        # boxinst configs
        self.boxinst_enabled = cfg.MODEL.BOXINST.ENABLED
        self.bottom_pixels_removed = cfg.MODEL.BOXINST.BOTTOM_PIXELS_REMOVED
        self.pairwise_size = cfg.MODEL.BOXINST.PAIRWISE.SIZE
        self.pairwise_dilation = cfg.MODEL.BOXINST.PAIRWISE.DILATION
        self.pairwise_color_thresh = cfg.MODEL.BOXINST.PAIRWISE.COLOR_THRESH

        # build top module
        in_channels = self.proposal_generator.in_channels_to_top_module

        self.controller = nn.Conv2d(in_channels,
                                    self.mask_head.num_gen_params,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        torch.nn.init.normal_(self.controller.weight, std=0.01)
        torch.nn.init.constant_(self.controller.bias, 0)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Example #23
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.backbone = build_backbone(cfg)
        self.attention = build_attention(cfg)
        self.mse_loss = nn.MSELoss(
            reduction="sum") if cfg.MODEL.ATTENTION_LOSS else None
        self.mse_weight = cfg.MODEL.ATTENTION_LOSS_WEIGHT
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT
        self.tmp = nn.Linear(10, 10)

        trans_center = pickle.load(open(cfg.MODEL.TRANSFORM_CENTER, 'rb'))
        trans_center['pos_center'] = torch.FloatTensor(
            trans_center['pos_center']).to(self.device)
        trans_center['neg_center'] = torch.FloatTensor(
            trans_center['neg_center']).to(self.device)
        self.trans_center = trans_center
        self.transformation = build_transformation()
        self.box_head = deepcopy(self.roi_heads.box_head)
        self.box_predictor = deepcopy(self.roi_heads.box_predictor)
        self.sl1_loss = nn.SmoothL1Loss(
            reduction="none") if cfg.MODEL.TRANSFORM_LOSS else None
        self.sl1_weight = cfg.MODEL.TRANSFORM_LOSS_WEIGHT
        self.reg_loss = cfg.MODEL.REG_LOSS

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            num_channels, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            num_channels, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Example #24
0
    def __init__(self, cfg):
        super().__init__()

        self.instance_loss_weight = cfg.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT

        # options when combining instance & semantic outputs
        self.combine_on = cfg.MODEL.PANOPTIC_FPN.COMBINE.ENABLED
        self.combine_overlap_threshold = cfg.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH
        self.combine_stuff_area_limit = cfg.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT
        self.combine_instances_confidence_threshold = (
            cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH)

        self.backbone = build_backbone(cfg)
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.sem_seg_head = build_sem_seg_head(cfg,
                                               self.backbone.output_shape())

        self.register_buffer("pixel_mean",
                             torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std",
                             torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
Example #25
0
 def from_config(cls, cfg):
     backbone = build_backbone(cfg)
     return {
         "backbone":
         backbone,
         "proposal_generator":
         build_proposal_generator(cfg, backbone.output_shape()),
         "roi_heads":
         build_roi_heads(cfg, backbone.output_shape()),
         "input_format":
         cfg.INPUT.FORMAT,
         "vis_period":
         cfg.VIS_PERIOD,
         "pixel_mean":
         cfg.MODEL.PIXEL_MEAN,
         "pixel_std":
         cfg.MODEL.PIXEL_STD,
         "has_cpg":
         True if "CSC" in cfg.MODEL.ROI_HEADS.NAME
         or "WSJDS" in cfg.MODEL.ROI_HEADS.NAME
         # or "UWSODROIHeads" in cfg.MODEL.ROI_HEADS.NAME
         else False,
     }
Example #26
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.backbone = build_backbone(cfg)
        self.attention = build_attention(cfg)
        self.mse_loss = nn.MSELoss(
            reduction="sum") if cfg.MODEL.ATTENTION_LOSS else None
        self.mse_weight = cfg.MODEL.ATTENTION_LOSS_WEIGHT
        self.proposal_generator = build_proposal_generator(
            cfg, self.backbone.output_shape())
        self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT
        self.tmp = nn.Linear(10, 10)

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            num_channels, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            num_channels, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Example #27
0
    def test_rrpn(self):
        torch.manual_seed(121)
        cfg = get_cfg()
        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]
        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1]]
        cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [[0, 60]]
        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
        backbone = build_backbone(cfg)
        proposal_generator = build_proposal_generator(cfg,
                                                      backbone.output_shape())
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        image_shape = (15, 15)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
        gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]],
                                dtype=torch.float32)
        gt_instances = Instances(image_shape)
        gt_instances.gt_boxes = RotatedBoxes(gt_boxes)
        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, [gt_instances[0], gt_instances[1]])

        expected_losses = {
            "loss_rpn_cls": torch.tensor(0.04291602224),
            "loss_rpn_loc": torch.tensor(0.145077362),
        }
        for name in expected_losses.keys():
            err_msg = "proposal_losses[{}] = {}, expected losses = {}".format(
                name, proposal_losses[name], expected_losses[name])
            self.assertTrue(
                torch.allclose(proposal_losses[name], expected_losses[name]),
                err_msg)

        expected_proposal_box = torch.tensor([
            [-1.77999556, 0.78155339, 68.04367828, 14.78156471, 60.59333801],
            [13.82740974, -1.50282836, 34.67269897, 29.19676590, -3.81942749],
            [8.10392570, -0.99071521, 145.39100647, 32.13126373, 3.67242432],
            [5.00000000, 4.57370186, 10.00000000, 9.14740372, 0.89196777],
        ])

        expected_objectness_logit = torch.tensor(
            [0.10924313, 0.09881870, 0.07649877, 0.05858029])

        torch.set_printoptions(precision=8, sci_mode=False)

        self.assertEqual(len(proposals), len(image_sizes))

        proposal = proposals[0]
        # It seems that there's some randomness in the result across different machines:
        # This test can be run on a local machine for 100 times with exactly the same result,
        # However, a different machine might produce slightly different results,
        # thus the atol here.
        err_msg = "computed proposal boxes = {}, expected {}".format(
            proposal.proposal_boxes.tensor, expected_proposal_box)
        self.assertTrue(
            torch.allclose(proposal.proposal_boxes.tensor[:4],
                           expected_proposal_box,
                           atol=1e-5),
            err_msg,
        )

        err_msg = "computed objectness logits = {}, expected {}".format(
            proposal.objectness_logits, expected_objectness_logit)
        self.assertTrue(
            torch.allclose(proposal.objectness_logits[:4],
                           expected_objectness_logit,
                           atol=1e-5),
            err_msg,
        )
Example #28
0
 def __init__(self, cfg, writer=None):
     super(SingleObjectDetector, self).__init__()
     self.cfg = cfg
     self.writer = writer
     self.alexnet = AlexNetExtractor()
     self.rpn = build_proposal_generator(cfg, self.alexnet.output_shape())
Example #29
0
    def test_rrpn(self):
        torch.manual_seed(121)
        cfg = get_cfg()
        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]
        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1]]
        cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [[0, 60]]
        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
        backbone = build_backbone(cfg)
        proposal_generator = build_proposal_generator(cfg, backbone.output_shape())
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        image_shape = (15, 15)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
        gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32)
        gt_instances = Instances(image_shape)
        gt_instances.gt_boxes = RotatedBoxes(gt_boxes)
        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, [gt_instances[0], gt_instances[1]]
            )

        expected_losses = {
            "loss_rpn_cls": torch.tensor(0.043263837695121765),
            "loss_rpn_loc": torch.tensor(0.14432406425476074),
        }
        for name in expected_losses.keys():
            err_msg = "proposal_losses[{}] = {}, expected losses = {}".format(
                name, proposal_losses[name], expected_losses[name]
            )
            self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg)

        expected_proposal_boxes = [
            RotatedBoxes(
                torch.tensor(
                    [
                        [0.60189795, 1.24095452, 61.98131943, 18.03621292, -4.07244873],
                        [15.64940453, 1.69624567, 59.59749603, 16.34339333, 2.62692475],
                        [-3.02982378, -2.69752932, 67.90952301, 59.62455750, 59.97010040],
                        [16.71863365, 1.98309708, 35.61507797, 32.81484985, 62.92267227],
                        [0.49432933, -7.92979717, 67.77606201, 62.93098450, -1.85656738],
                        [8.00880814, 1.36017394, 121.81007385, 32.74150467, 50.44297409],
                        [16.44299889, -4.82221127, 63.39775848, 61.22503662, 54.12270737],
                        [5.00000000, 5.00000000, 10.00000000, 10.00000000, -0.76943970],
                        [17.64130402, -0.98095351, 61.40377808, 16.28918839, 55.53118134],
                        [0.13016054, 4.60568953, 35.80157471, 32.30180359, 62.52872086],
                        [-4.26460743, 0.39604485, 124.30079651, 31.84611320, -1.58203125],
                        [7.52815342, -0.91636634, 62.39784622, 15.45565224, 60.79549789],
                    ]
                )
            ),
            RotatedBoxes(
                torch.tensor(
                    [
                        [0.07734215, 0.81635046, 65.33510590, 17.34688377, -1.51821899],
                        [-3.41833067, -3.11320257, 64.17595673, 60.55617905, 58.27033234],
                        [20.67383385, -6.16561556, 63.60531998, 62.52315903, 54.85546494],
                        [15.00000000, 10.00000000, 30.00000000, 20.00000000, -0.18218994],
                        [9.22646523, -6.84775209, 62.09895706, 65.46472931, -2.74307251],
                        [15.00000000, 4.93451595, 30.00000000, 9.86903191, -0.60272217],
                        [8.88342094, 2.65560246, 120.95362854, 32.45022202, 55.75970078],
                        [16.39088631, 2.33887148, 34.78761292, 35.61492920, 60.81977463],
                        [9.78298569, 10.00000000, 19.56597137, 20.00000000, -0.86660767],
                        [1.28576660, 5.49873352, 34.93610382, 33.22600174, 60.51599884],
                        [17.58912468, -1.63270092, 62.96052551, 16.45713997, 52.91245270],
                        [5.64749718, -1.90428460, 62.37649155, 16.19474792, 61.09543991],
                        [0.82255805, 2.34931135, 118.83985901, 32.83671188, 56.50753784],
                        [-5.33874989, 1.64404404, 125.28501892, 33.35424042, -2.80731201],
                    ]
                )
            ),
        ]

        expected_objectness_logits = [
            torch.tensor(
                [
                    0.10111768,
                    0.09112845,
                    0.08466332,
                    0.07589971,
                    0.06650183,
                    0.06350251,
                    0.04299347,
                    0.01864817,
                    0.00986163,
                    0.00078543,
                    -0.04573630,
                    -0.04799230,
                ]
            ),
            torch.tensor(
                [
                    0.11373727,
                    0.09377633,
                    0.05281663,
                    0.05143715,
                    0.04040275,
                    0.03250912,
                    0.01307789,
                    0.01177734,
                    0.00038105,
                    -0.00540255,
                    -0.01194804,
                    -0.01461012,
                    -0.03061717,
                    -0.03599222,
                ]
            ),
        ]

        torch.set_printoptions(precision=8, sci_mode=False)

        for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip(
            proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits
        ):
            self.assertEqual(len(proposal), len(expected_proposal_box))
            self.assertEqual(proposal.image_size, im_size)
            # It seems that there's some randomness in the result across different machines:
            # This test can be run on a local machine for 100 times with exactly the same result,
            # However, a different machine might produce slightly different results,
            # thus the atol here.
            err_msg = "computed proposal boxes = {}, expected {}".format(
                proposal.proposal_boxes.tensor, expected_proposal_box.tensor
            )
            self.assertTrue(
                torch.allclose(
                    proposal.proposal_boxes.tensor, expected_proposal_box.tensor, atol=1e-5
                ),
                err_msg,
            )

            err_msg = "computed objectness logits = {}, expected {}".format(
                proposal.objectness_logits, expected_objectness_logit
            )
            self.assertTrue(
                torch.allclose(proposal.objectness_logits, expected_objectness_logit, atol=1e-5),
                err_msg,
            )