Esempio n. 1
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        """
        Args:
            in_channels (int): number of channels of the input feature
        """
        super(TextHead, self).__init__()
        # fmt: off
        pooler_resolution = cfg.MODEL.BATEXT.POOLER_RESOLUTION
        pooler_scales = cfg.MODEL.BATEXT.POOLER_SCALES
        sampling_ratio = cfg.MODEL.BATEXT.SAMPLING_RATIO
        conv_dim = cfg.MODEL.BATEXT.CONV_DIM
        num_conv = cfg.MODEL.BATEXT.NUM_CONV
        canonical_size = cfg.MODEL.BATEXT.CANONICAL_SIZE
        self.in_features = cfg.MODEL.BATEXT.IN_FEATURES
        self.voc_size = cfg.MODEL.BATEXT.VOC_SIZE
        recognizer = cfg.MODEL.BATEXT.RECOGNIZER
        self.top_size = cfg.MODEL.TOP_MODULE.DIM
        # fmt: on

        self.pooler = TopPooler(output_size=pooler_resolution,
                                scales=pooler_scales,
                                sampling_ratio=sampling_ratio,
                                pooler_type="BezierAlign",
                                canonical_box_size=canonical_size,
                                canonical_level=3,
                                assign_crit="bezier")

        conv_block = conv_with_kaiming_uniform(norm="BN", activation=True)
        tower = []
        for i in range(num_conv):
            tower.append(conv_block(conv_dim, conv_dim, 3, 1))
        self.tower = nn.Sequential(*tower)

        self.recognizer = build_recognizer(cfg, recognizer)
Esempio n. 2
0
 def __init__(self, cfg, in_channels):
     super(CRNN, self).__init__()
     conv_func = conv_with_kaiming_uniform(norm="GN", activation=True)
     convs = []
     for i in range(2):
         convs.append(conv_func(in_channels, in_channels, 3, stride=(2, 1)))
     self.convs = nn.Sequential(*convs)
     self.rnn = BidirectionalLSTM(in_channels, in_channels, in_channels)
Esempio n. 3
0
    def __init__(self, cfg):
        super(MaskHead, self).__init__()

        conv_dim = cfg.MODEL.BATEXT.CONV_DIM

        conv_block = conv_with_kaiming_uniform(norm="BN", activation=True)
        convs = []
        convs.append(conv_block(258, conv_dim, 3, 1))
        for i in range(3):
            convs.append(conv_block(conv_dim, conv_dim, 3, 1))
        self.mask_convs = nn.Sequential(*convs)
Esempio n. 4
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        """
        TODO: support deconv and variable channel width
        """
        # official protonet has a relu after each conv
        super().__init__()
        # fmt: off
        mask_dim = cfg.MODEL.BASIS_MODULE.NUM_BASES
        planes = cfg.MODEL.BASIS_MODULE.CONVS_DIM
        self.in_features = cfg.MODEL.BASIS_MODULE.IN_FEATURES
        self.loss_on = cfg.MODEL.BASIS_MODULE.LOSS_ON
        norm = cfg.MODEL.BASIS_MODULE.NORM
        num_convs = cfg.MODEL.BASIS_MODULE.NUM_CONVS
        self.visualize = cfg.MODEL.BLENDMASK.VISUALIZE
        # fmt: on

        feature_channels = {k: v.channels for k, v in input_shape.items()}

        conv_block = conv_with_kaiming_uniform(norm, True)  # conv relu bn
        self.refine = nn.ModuleList()
        for in_feature in self.in_features:
            self.refine.append(
                conv_block(feature_channels[in_feature], planes, 3, 1))
        tower = []
        for i in range(num_convs):
            tower.append(conv_block(planes, planes, 3, 1))
        tower.append(
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False))
        tower.append(conv_block(planes, planes, 3, 1))
        tower.append(nn.Conv2d(planes, mask_dim, 1))
        self.add_module('tower', nn.Sequential(*tower))

        if self.loss_on:
            # fmt: off
            self.common_stride = cfg.MODEL.BASIS_MODULE.COMMON_STRIDE
            num_classes = cfg.MODEL.BASIS_MODULE.NUM_CLASSES + 1
            self.sem_loss_weight = cfg.MODEL.BASIS_MODULE.LOSS_WEIGHT
            # fmt: on

            inplanes = feature_channels[self.in_features[0]]
            self.seg_head = nn.Sequential(
                nn.Conv2d(inplanes,
                          planes,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=False), nn.BatchNorm2d(planes), nn.ReLU(),
                nn.Conv2d(planes,
                          planes,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=False), nn.BatchNorm2d(planes), nn.ReLU(),
                nn.Conv2d(planes, num_classes, kernel_size=1, stride=1))
Esempio n. 5
0
    def __init__(self, conv_dim, roi_size):
        super().__init__()

        height = roi_size[0]
        downsample_level = math.log2(height) - 2
        assert math.isclose(downsample_level, int(downsample_level))
        downsample_level = int(downsample_level)

        conv_block = conv_with_kaiming_uniform(norm="BN", activation=True)
        convs = []
        for i in range(downsample_level):
            convs.append(conv_block(conv_dim, conv_dim, 3, stride=(2, 1)))
        convs.append(
            nn.Conv2d(conv_dim, conv_dim, kernel_size=(4, 1), bias=False))
        self.convs = nn.Sequential(*convs)
Esempio n. 6
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        super().__init__()
        self.in_features = cfg.MODEL.CONDINST.MASK_BRANCH.IN_FEATURES
        self.sem_loss_on = cfg.MODEL.CONDINST.MASK_BRANCH.SEMANTIC_LOSS_ON
        self.num_outputs = cfg.MODEL.CONDINST.MASK_BRANCH.OUT_CHANNELS
        norm = cfg.MODEL.CONDINST.MASK_BRANCH.NORM
        num_convs = cfg.MODEL.CONDINST.MASK_BRANCH.NUM_CONVS
        channels = cfg.MODEL.CONDINST.MASK_BRANCH.CHANNELS
        self.out_stride = input_shape[self.in_features[0]].stride

        feature_channels = {k: v.channels for k, v in input_shape.items()}

        conv_block = conv_with_kaiming_uniform(norm, activation=True)

        self.refine = nn.ModuleList()
        for in_feature in self.in_features:
            self.refine.append(conv_block(
                feature_channels[in_feature],
                channels, 3, 1
            ))

        tower = []
        for i in range(num_convs):
            tower.append(conv_block(
                channels, channels, 3, 1
            ))
        tower.append(nn.Conv2d(
            channels, max(self.num_outputs, 1), 1
        ))
        self.add_module('tower', nn.Sequential(*tower))

        if self.sem_loss_on:
            num_classes = cfg.MODEL.FCOS.NUM_CLASSES
            self.focal_loss_alpha = cfg.MODEL.FCOS.LOSS_ALPHA
            self.focal_loss_gamma = cfg.MODEL.FCOS.LOSS_GAMMA

            in_channels = feature_channels[self.in_features[0]]
            self.seg_head = nn.Sequential(
                conv_block(in_channels, channels, kernel_size=3, stride=1),
                conv_block(channels, channels, kernel_size=3, stride=1)
            )

            self.logits = nn.Conv2d(channels, num_classes, kernel_size=1, stride=1)

            prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
            bias_value = -math.log((1 - prior_prob) / prior_prob)
            torch.nn.init.constant_(self.logits.bias, bias_value)
Esempio n. 7
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        """
        TODO: support deconv and variable channel width
        """
        # official protonet has a relu after each conv
        super().__init__()
        # fmt: off
        mask_dim          = cfg.MODEL.FCPOSE.BASIS_MODULE.NUM_BASES
        planes            = cfg.MODEL.FCPOSE.BASIS_MODULE.CONVS_DIM
        self.device       = torch.device(cfg.MODEL.DEVICE)
        self.in_features  = ["p3", "p4", "p5"]
        self.loss_on      = True
        norm              = cfg.MODEL.FCPOSE.BASIS_MODULE.BN_TYPE #"SyncBN"
        num_convs         = 3
        self.visualize    = False
        # fmt: on

        feature_channels = {k: v.channels for k, v in input_shape.items()}

        conv_block = conv_with_kaiming_uniform(norm, True)  # conv relu bn
        self.refine = nn.ModuleList()
        for in_feature in self.in_features:
            self.refine.append(conv_block(
                feature_channels[in_feature], planes, 3, 1))
        tower = []
        for i in range(num_convs):
            tower.append(
                conv_block(planes, planes, 3, 1))
        tower.append(
            conv_block(planes, planes, 3, 1))
        tower.append(
            nn.Conv2d(planes, mask_dim+(2*17), 1))
        self.add_module('tower', nn.Sequential(*tower))

        if self.loss_on:
            # fmt: off
            self.common_stride   = cfg.MODEL.FCPOSE.BASIS_MODULE.COMMON_STRIDE
            self.num_classes          = cfg.MODEL.FCPOSE.BASIS_MODULE.NUM_CLASSES
            self.heatmap_loss_weight = cfg.MODEL.FCPOSE.BASIS_MODULE.LOSS_WEIGHT
            # self.focal_loss_alpha = cfg.MODEL.FCPOSE.BASIS_MODULE.FOCAL_LOSS_ALPHA
            # self.focal_loss_gamma = cfg.MODEL.FCPOSE.BASIS_MODULE.FOCAL_LOSS_GAMMA

            # fmt: on

            inplanes = feature_channels[self.in_features[0]]
            self.seg_head = nn.Sequential(conv_block(planes, planes, 3,1),
                                          conv_block(planes, planes, 3,1),)
            self.p3_logits = nn.Conv2d(planes, self.num_classes, kernel_size=1,
                                                    stride=1)
            self.upsampler = nn.Sequential(
                        ConvTranspose2d(planes+self.num_classes, planes, 8, stride=4, padding=6 // 2 - 1),
                        # get_norm(norm, planes),
                        nn.ReLU(),
                        # conv_block(planes, planes, 3,1),
                        )
            self.p1_logits = nn.Conv2d(planes, self.num_classes, kernel_size=3,
                                        stride=1, padding=1)

            prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
            bias_value = -math.log((1 - prior_prob) / prior_prob)
            torch.nn.init.constant_(self.p3_logits.bias, 0.0)
            torch.nn.init.normal_(self.p3_logits.weight, std=0.0001)
            torch.nn.init.constant_(self.p1_logits.bias, 0.0)
            torch.nn.init.normal_(self.p1_logits.weight, std=0.0001)