Exemple #1
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        super().__init__()
        in_channels = input_shape[0].channels
        num_classes = cfg.MODEL.EFFICIENTDET.NUM_CLASSES
        norm = cfg.MODEL.EFFICIENTDET.HEAD.NORM
        bn_momentum = cfg.MODEL.EFFICIENTDET.HEAD.BN_MOMENTUM
        bn_eps = cfg.MODEL.EFFICIENTDET.HEAD.BN_EPS
        prior_prob = cfg.MODEL.EFFICIENTDET.HEAD.PRIOR_PROB
        memory_efficient = cfg.MODEL.EFFICIENTDET.HEAD.MEMORY_EFFICIENT_SWISH
        num_conv_layers = cfg.MODEL.EFFICIENTDET.HEAD.NUM_CONV
        num_anchors = cfg.build_anchor_generator(cfg,
                                                 input_shape).num_cell_anchors

        self.bn_momentum = bn_momentum
        self.bn_eps = bn_eps
        self.prior_prob = prior_prob

        assert (
            len(set(num_anchors)) == 1
        ), "Using different number of anchors between levels is not currently supported!"

        num_anchors = num_anchors[0]
        self.cls_subnet = nn.ModuleList([])
        self.bbox_subnet = nn.ModuleList([])
        for _ in range(num_conv_layers):
            self.cls_subnet.append(
                SeparableConvBlock(in_channels,
                                   in_channels,
                                   kernel_size=3,
                                   padding="SAME"))
            self.bbox_subnet.append(
                SeparableConvBlock(in_channels,
                                   in_channels,
                                   kernel_size=3,
                                   padding="SAME"))

        num_levels = len(input_shape)
        self.bn_cls_subnet = nn.ModuleList()
        self.bn_bbox_subnet = nn.ModuleList()
        for _ in range(num_levels):
            self.bn_cls_subnet.append(
                nn.ModuleList([
                    get_norm(norm, in_channels) for _ in range(num_conv_layers)
                ]))
            self.bn_bbox_subnet.append(
                nn.ModuleList([
                    get_norm(norm, in_channels) for _ in range(num_conv_layers)
                ]))

        self.cls_score = SeparableConvBlock(in_channels,
                                            num_anchors * num_classes,
                                            kernel_size=3,
                                            padding="SAME")
        self.bbox_pred = SeparableConvBlock(in_channels,
                                            num_anchors * 4,
                                            kernel_size=3,
                                            padding="SAME")
        self.act = MemoryEfficientSwish() if memory_efficient else Swish()
        self._init_weights()
Exemple #2
0
    def __init__(self, block_args, global_params):
        """
        Args:
            block_args (EasyDict): block args, see: class: `EfficientNet`.
            global_params (EasyDict): global args, see: class: `EfficientNet`.
        """
        super().__init__()
        self._block_args = block_args
        self.has_se = (block_args.se_ratio
                       is not None) and (0 < block_args.se_ratio <= 1)
        self.id_skip = block_args.id_skip

        # Expansion phase
        # number of input channels
        inp = block_args.in_channels
        # number of output channels
        oup = block_args.in_channels * block_args.expand_ratio
        if block_args.expand_ratio != 1:
            self._expand_conv = Conv2d(in_channels=inp,
                                       out_channels=oup,
                                       kernel_size=1,
                                       padding=0,
                                       bias=False)
            self._bn0 = get_norm(global_params.norm, out_channels=oup)

        # Depthwise convolution phase
        k = block_args.kernel_size
        s = block_args.stride
        self._depthwise_conv = Conv2d(in_channels=oup,
                                      out_channels=oup,
                                      groups=oup,
                                      kernel_size=k,
                                      stride=s,
                                      padding="SAME",
                                      bias=False)
        self._bn1 = get_norm(global_params.norm, out_channels=oup)

        # Squeeze and Excitation layer, if desired
        if self.has_se:
            num_squeezed_channels = max(
                1, int(block_args.in_channels * block_args.se_ratio))
            self._se_reduce = Conv2d(in_channels=oup,
                                     out_channels=num_squeezed_channels,
                                     kernel_size=1,
                                     padding=0)
            self._se_expand = Conv2d(in_channels=num_squeezed_channels,
                                     out_channels=oup,
                                     kernel_size=1,
                                     padding=0)

        # Output phase
        final_oup = block_args.out_channels
        self._project_conv = Conv2d(in_channels=oup,
                                    out_channels=final_oup,
                                    kernel_size=1,
                                    padding=0,
                                    bias=False)
        self._bn2 = get_norm(global_params.norm, final_oup)
        self._swish = MemoryEfficientSwish()
Exemple #3
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 *,
                 stride=1,
                 norm="BN",
                 activation=None,
                 **kwargs):
        """
        The standard block type for ResNet18 and ResNet34.
        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            stride (int): Stride for the first conv.
            norm (str or callable): A callable that takes the number of
                channels and returns a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
        """
        super().__init__(in_channels, out_channels, stride)

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        self.activation = get_activation(activation)

        self.conv1 = Conv2d(
            in_channels,
            out_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        self.conv2 = Conv2d(
            out_channels,
            out_channels,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        for layer in [self.conv1, self.conv2, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)
Exemple #4
0
 def __init__(self,
              in_channels,
              channels,
              kernel_size,
              stride=(1, 1),
              padding=(0, 0),
              dilation=(1, 1),
              groups=1,
              bias=True,
              radix=2,
              reduction_factor=4,
              rectify=False,
              rectify_avg=False,
              norm=None,
              dropblock_prob=0.0,
              **kwargs):
     super(SplAtConv2d, self).__init__()
     padding = _pair(padding)
     self.rectify = rectify and (padding[0] > 0 or padding[1] > 0)
     self.rectify_avg = rectify_avg
     inter_channels = max(in_channels * radix // reduction_factor, 32)
     self.radix = radix
     self.cardinality = groups
     self.channels = channels
     self.dropblock_prob = dropblock_prob
     if self.rectify:
         self.conv = RFConv2d(in_channels,
                              channels * radix,
                              kernel_size,
                              stride,
                              padding,
                              dilation,
                              groups=groups * radix,
                              bias=bias,
                              average_mode=rectify_avg,
                              **kwargs)
     else:
         self.conv = Conv2d(in_channels,
                            channels * radix,
                            kernel_size,
                            stride,
                            padding,
                            dilation,
                            groups=groups * radix,
                            bias=bias,
                            **kwargs)
     self.use_bn = norm is not None
     self.bn0 = get_norm(norm, channels * radix)
     self.relu = ReLU(inplace=True)
     self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality)
     self.bn1 = get_norm(norm, inter_channels)
     self.fc2 = Conv2d(inter_channels,
                       channels * radix,
                       1,
                       groups=self.cardinality)
     if dropblock_prob > 0.0:
         self.dropblock = DropBlock2D(dropblock_prob, 3)
Exemple #5
0
 def __init__(self,
              C_in,
              C_out,
              kernel_size,
              stride,
              padding,
              norm_layer,
              affine=True,
              input_size=None):
     super(SepConv, self).__init__()
     self.op = nn.Sequential(
         # depth wise
         Conv2d(C_in,
                C_in,
                kernel_size=kernel_size,
                stride=stride,
                padding=padding,
                groups=C_in,
                bias=False),
         # point wise
         Conv2d(C_in,
                C_in,
                kernel_size=1,
                padding=0,
                bias=False,
                norm=get_norm(norm_layer, C_in),
                activation=nn.ReLU()),
         # stack 2 separate depthwise-conv.
         Conv2d(C_in,
                C_in,
                kernel_size=kernel_size,
                stride=1,
                padding=padding,
                groups=C_in,
                bias=False),
         Conv2d(C_in,
                C_out,
                kernel_size=1,
                padding=0,
                bias=False,
                norm=get_norm(norm_layer, C_out)))
     self.flops = self.get_flop([kernel_size, kernel_size], stride, C_in,
                                C_out, affine, input_size[0], input_size[1])
     # using Kaiming init
     for m in self.op.modules():
         if isinstance(m, nn.Conv2d):
             weight_init.kaiming_init(m, mode='fan_in')
         elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)):
             if m.weight is not None:
                 nn.init.constant_(m.weight, 1)
             if m.bias is not None:
                 nn.init.constant_(m.bias, 0)
Exemple #6
0
    def __init__(self,
                 input_channels,
                 output_channels,
                 stride,
                 expand_ratio,
                 norm,
                 activation,
                 use_shortcut=True):
        super(InvertedResBlock, self).__init__()
        self.stride = stride
        assert stride in [1, 2]

        mid_channels = int(round(input_channels * expand_ratio))
        self.use_shortcut = use_shortcut

        if self.use_shortcut:
            assert stride == 1
            assert input_channels == output_channels

        conv_kwargs = {
            "norm": get_norm(norm, mid_channels),
            "activation": get_activation(activation)
        }

        layers = []
        if expand_ratio > 1:
            layers.append(
                Conv2d(
                    input_channels,
                    mid_channels,
                    1,
                    bias=False,  # Pixel-wise non-linear
                    **deepcopy(conv_kwargs)))

        layers += [
            Conv2d(
                mid_channels,
                mid_channels,
                3,
                padding=1,
                bias=False,  # Depth-wise 3x3
                stride=stride,
                groups=mid_channels,
                **deepcopy(conv_kwargs)),
            Conv2d(
                mid_channels,
                output_channels,
                1,
                bias=False,  # Pixel-wise linear
                norm=get_norm(norm, output_channels))
        ]
        self.conv = nn.Sequential(*layers)
Exemple #7
0
 def __init__(self,
              C_in,
              C_out,
              kernel_size,
              stride,
              padding,
              norm_layer,
              expansion=4,
              affine=True,
              input_size=None):
     super(MBConv, self).__init__()
     self.hidden_dim = expansion * C_in
     self.op = nn.Sequential(
         # pw
         Conv2d(C_in,
                self.hidden_dim,
                1,
                1,
                0,
                bias=False,
                norm=get_norm(norm_layer, self.hidden_dim),
                activation=nn.ReLU()),
         # dw
         Conv2d(self.hidden_dim,
                self.hidden_dim,
                kernel_size,
                stride,
                padding,
                groups=self.hidden_dim,
                bias=False,
                norm=get_norm(norm_layer, self.hidden_dim),
                activation=nn.ReLU()),
         # pw-linear without ReLU!
         Conv2d(self.hidden_dim,
                C_out,
                1,
                1,
                0,
                bias=False,
                norm=get_norm(norm_layer, C_out)))
     self.flops = self.get_flop([kernel_size, kernel_size], stride, C_in,
                                C_out, affine, input_size[0], input_size[1])
     # using Kaiming init
     for m in self.op.modules():
         if isinstance(m, nn.Conv2d):
             weight_init.kaiming_init(m, mode='fan_in')
         elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)):
             if m.weight is not None:
                 nn.init.constant_(m.weight, 1)
             if m.bias is not None:
                 nn.init.constant_(m.bias, 0)
Exemple #8
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        super(MaskBranch, self).__init__()
        # fmt: off
        norm = cfg.MODEL.CONDINST.MASK_BRANCH.NORM
        in_features = cfg.MODEL.CONDINST.MASK_BRANCH.IN_FEATURES
        num_convs = cfg.MODEL.CONDINST.MASK_BRANCH.NUM_CONVS
        channels = cfg.MODEL.CONDINST.MASK_BRANCH.BRANCH_CHANNELS
        fpn_out_features = cfg.MODEL.FCOS.IN_FEATURES
        self.num_outputs = cfg.MODEL.CONDINST.MASK_BRANCH.OUT_CHANNELS
        # fmt: on

        self.in_features_inds = [
            fpn_out_features.index(in_fea_key) for in_fea_key in in_features
        ]
        self.out_stride = input_shape[in_features[0]].stride

        self.refine = nn.ModuleList()
        for in_feature in in_features:
            refine_i = [
                nn.Conv2d(input_shape[in_feature].channels,
                          channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=(norm is None)),
                get_norm(norm, channels),
                nn.ReLU(inplace=True)
            ]
            self.refine.append(nn.Sequential(*refine_i))

        mask_subnet = []
        for _ in range(num_convs):
            mask_subnet.append(
                nn.Conv2d(channels,
                          channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=(norm is None)))
            mask_subnet.append(get_norm(norm, channels))
            mask_subnet.append(nn.ReLU(inplace=True))

        mask_subnet.append(nn.Conv2d(channels, max(self.num_outputs, 1), 1))
        self.mask_subnet = nn.Sequential(*mask_subnet)

        # Initialization
        for modules in [self.refine, self.mask_subnet]:
            for layer in modules.modules():
                if isinstance(layer, nn.Conv2d):
                    torch.nn.init.kaiming_uniform_(layer.weight, a=1)
Exemple #9
0
    def _init_layers(self):
        ins_convs = []
        cate_convs = []
        for i in range(self.stacked_convs):
            # Mask branch
            chn = self.in_channels + 2 if i == 0 else self.seg_feat_channels
            ins_convs.append(
                nn.Conv2d(chn,
                          self.seg_feat_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=False if self.norm else True)
            )
            if self.norm:
                ins_convs.append(get_norm(self.norm, self.seg_feat_channels))
            ins_convs.append(nn.ReLU(inplace=True))

            # Category branch
            chn = self.in_channels if i == 0 else self.seg_feat_channels
            cate_convs.append(
                nn.Conv2d(chn,
                          self.seg_feat_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=False if self.norm else True)
            )
            if self.norm:
                cate_convs.append(get_norm(self.norm, self.seg_feat_channels))
            cate_convs.append(nn.ReLU(inplace=True))

        self.ins_convs = nn.Sequential(*ins_convs)
        self.cate_convs = nn.Sequential(*cate_convs)

        self.solo_ins_list = nn.ModuleList()
        for seg_num_grid in self.seg_num_grids:
            self.solo_ins_list.append(
                nn.Conv2d(
                    self.seg_feat_channels,
                    seg_num_grid ** 2,
                    kernel_size=1)
            )
        self.solo_cate = nn.Conv2d(
            self.seg_feat_channels,
            self.num_classes,
            kernel_size=3,
            stride=1,
            padding=1,
        )
Exemple #10
0
 def __init__(self,
              C_in,
              C_out,
              kernel_size,
              stride,
              padding,
              norm_layer,
              affine=True,
              input_size=None):
     super(BasicResBlock, self).__init__()
     self.op = Conv2d(C_in,
                      C_out,
                      kernel_size,
                      stride=stride,
                      padding=padding,
                      bias=False,
                      norm=get_norm(norm_layer, C_out))
     self.flops = self.get_flop([kernel_size, kernel_size], stride, C_in,
                                C_out, affine, input_size[0], input_size[1])
     # using Kaiming init
     for m in self.op.modules():
         if isinstance(m, nn.Conv2d):
             weight_init.kaiming_init(m, mode='fan_in')
         elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)):
             if m.weight is not None:
                 nn.init.constant_(m.weight, 1)
             if m.bias is not None:
                 nn.init.constant_(m.bias, 0)
Exemple #11
0
    def __init__(self,
                 in_channels=3,
                 out_channels=64,
                 norm="BN",
                 activation=None):
        """
        Args:
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
        """
        super().__init__()
        self.conv1 = Conv2d(
            in_channels,
            out_channels,
            kernel_size=7,
            stride=2,
            padding=3,
            bias=False,
            norm=get_norm(norm, out_channels),
        )
        weight_init.c2_msra_fill(self.conv1)

        self.activation = get_activation(activation)
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
Exemple #12
0
    def __init__(self, cfg):
        super(Classification, self).__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        self.network = cfg.build_backbone(
            cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))

        self.network.stem = nn.Sequential(
            Conv2d(3,
                   64,
                   kernel_size=3,
                   stride=1,
                   padding=1,
                   bias=False,
                   norm=get_norm("BN", 64)),
            nn.ReLU(),
        )

        self.loss_evaluator = nn.CrossEntropyLoss()

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std

        self.to(self.device)
Exemple #13
0
    def _make_layers(self, num_blocks, **kwargs):
        """
        Create a vgg-net stage by creating many blocks(conv layers).

        Args:
            num_blocks (int): the number of conv layer in the stage.
            kwargs: other arguments, see: method:`__init__`.

        Returns:
            list[nn.Module]: a list of block module.
        """
        blocks = list()
        for _ in range(num_blocks):
            conv2d = nn.Conv2d(kwargs["in_channels"],
                               kwargs["out_channels"],
                               kernel_size=3,
                               padding=1)
            if kwargs["norm"]:
                blocks += [
                    conv2d,
                    get_norm(kwargs["norm"], kwargs["out_channels"]),
                    nn.ReLU(inplace=True)
                ]
            else:
                blocks += [conv2d, nn.ReLU(inplace=True)]
            kwargs["in_channels"] = kwargs["out_channels"]
        pool = nn.MaxPool2d(kernel_size=kwargs["pool_args"][0],
                            stride=kwargs["pool_args"][1],
                            padding=kwargs["pool_args"][2],
                            ceil_mode=kwargs["pool_args"][3])
        blocks.append(pool)
        return blocks
Exemple #14
0
    def __init__(self, cfg):
        super(Classification, self).__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        self.network = cfg.build_backbone(
            cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))
        self.network.stem = nn.Sequential(
            Conv2d(3,
                   64,
                   kernel_size=3,
                   stride=1,
                   padding=1,
                   bias=False,
                   norm=get_norm(cfg.MODEL.RESNETS.NORM, 64)),
            nn.ReLU(),
        )

        self.freeze()
        self.network.eval()

        # init the fc layer
        self.network.linear.weight.data.normal_(mean=0.0, std=0.01)
        self.network.linear.bias.data.zero_()

        self.loss_evaluator = nn.CrossEntropyLoss()

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            1, 3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            1, 3, 1, 1)
        self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std

        self.to(self.device)
Exemple #15
0
    def __init__(self, input_channels, output_channels, stride, expand_ratio,
                 norm, activation, use_shortcut=True):
        """
        Args:
            input_channels (int): the input channel number.
            output_channels (int): the output channel number.
            stride (int): the stride of the current block.
            expand_ratio(int): the channel expansion ratio for `mid_channels` in InvertedResBlock.
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (See cvpods.layer.get_norm for more details).
            activation (str): a pre-defined string
                (See cvpods.layer.get_activation for more details).
            use_shortcut (bool): whether to use the residual path.
        """
        super(InvertedResBlock, self).__init__()
        self.stride = stride
        assert stride in [1, 2]

        mid_channels = int(round(input_channels * expand_ratio))
        self.use_shortcut = use_shortcut

        if self.use_shortcut:
            assert stride == 1
            assert input_channels == output_channels

        conv_kwargs = {
            "norm": get_norm(norm, mid_channels),
            "activation": get_activation(activation)
        }

        layers = []
        if expand_ratio > 1:
            layers.append(
                Conv2d(input_channels, mid_channels, 1, bias=False,  # Pixel-wise non-linear
                       **deepcopy(conv_kwargs))
            )

        layers += [
            Conv2d(mid_channels, mid_channels, 3, padding=1, bias=False,  # Depth-wise 3x3
                   stride=stride, groups=mid_channels, **deepcopy(conv_kwargs)),
            Conv2d(mid_channels, output_channels, 1, bias=False,  # Pixel-wise linear
                   norm=get_norm(norm, output_channels))
        ]
        self.conv = nn.Sequential(*layers)
Exemple #16
0
    def __init__(self, cfg):
        super(SimSiam, self).__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        self.proj_dim = cfg.MODEL.BYOL.PROJ_DIM
        self.pred_dim = cfg.MODEL.BYOL.PRED_DIM
        self.out_dim = cfg.MODEL.BYOL.OUT_DIM

        self.total_steps = cfg.SOLVER.LR_SCHEDULER.MAX_ITER * cfg.SOLVER.BATCH_SUBDIVISIONS

        # create the encoders
        # num_classes is the output fc dimension
        cfg.MODEL.RESNETS.NUM_CLASSES = self.out_dim

        self.encoder = cfg.build_backbone(
            cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))
        self.encoder.stem = nn.Sequential(
            Conv2d(3,
                   64,
                   kernel_size=3,
                   stride=1,
                   padding=1,
                   bias=False,
                   norm=get_norm(cfg.MODEL.RESNETS.NORM, 64)),
            nn.ReLU(),
        )

        self.size_divisibility = self.encoder.size_divisibility

        dim_mlp = self.encoder.linear.weight.shape[1]

        # Projection Head
        self.encoder.linear = nn.Sequential(
            nn.Linear(dim_mlp, self.proj_dim),
            nn.SyncBatchNorm(self.proj_dim),
            nn.ReLU(),
            nn.Linear(self.proj_dim, self.proj_dim),
            nn.SyncBatchNorm(self.proj_dim),
        )

        # Predictor
        self.predictor = nn.Sequential(
            nn.Linear(self.proj_dim, self.pred_dim),
            nn.SyncBatchNorm(self.pred_dim),
            nn.ReLU(),
            nn.Linear(self.pred_dim, self.out_dim),
        )

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            1, 3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            1, 3, 1, 1)
        self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std

        self.to(self.device)
Exemple #17
0
    def __init__(
        self,
        in_channels=3,
        out_channels=64,
        norm="BN",
        activation=None,
        deep_stem=False,
        stem_width=32,
    ):
        super().__init__()
        self.conv1_1 = Conv2d(
            3,
            stem_width,
            kernel_size=3,
            stride=2,
            padding=1,
            bias=False,
            norm=get_norm(norm, stem_width),
        )
        self.conv1_2 = Conv2d(
            stem_width,
            stem_width,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False,
            norm=get_norm(norm, stem_width),
        )
        self.conv1_3 = Conv2d(
            stem_width,
            stem_width * 2,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False,
            norm=get_norm(norm, stem_width * 2),
        )
        for layer in [self.conv1_1, self.conv1_2, self.conv1_3]:
            if layer is not None:
                weight_init.c2_msra_fill(layer)

        self.activation = get_activation(activation)
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    def __init__(self,
                 in_channels,
                 out_channels,
                 stride=1,
                 norm="BN",
                 activation=None):
        super().__init__()

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        self.activation = get_activation(activation)

        self.conv1 = Conv2d(
            in_channels,
            out_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        self.conv2 = Conv2d(
            out_channels,
            out_channels,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )
Exemple #19
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            num_conv: the number of conv layers
            conv_dim: the dimension of the conv layers
            norm: normalization for the conv layers
        """
        super(MaskRCNNConvUpsampleHead, self).__init__()

        # fmt: off
        num_classes       = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        conv_dims         = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM
        self.norm         = cfg.MODEL.ROI_MASK_HEAD.NORM
        num_conv          = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV
        input_channels    = input_shape.channels
        cls_agnostic_mask = cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK
        # fmt: on

        self.conv_norm_relus = []

        for k in range(num_conv):
            conv = Conv2d(
                input_channels if k == 0 else conv_dims,
                conv_dims,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not self.norm,
                norm=get_norm(self.norm, conv_dims),
                activation=F.relu,
            )
            self.add_module("mask_fcn{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)

        self.deconv = ConvTranspose2d(
            conv_dims if num_conv > 0 else input_channels,
            conv_dims,
            kernel_size=2,
            stride=2,
            padding=0,
        )

        num_mask_classes = 1 if cls_agnostic_mask else num_classes
        self.predictor = Conv2d(conv_dims, num_mask_classes, kernel_size=1, stride=1, padding=0)

        for layer in self.conv_norm_relus + [self.deconv]:
            weight_init.c2_msra_fill(layer)
        # use normal distribution initialization for mask prediction layer
        nn.init.normal_(self.predictor.weight, std=0.001)
        if self.predictor.bias is not None:
            nn.init.constant_(self.predictor.bias, 0)
Exemple #20
0
    def __init__(self, input_channels, output_channels, norm, activation):
        super().__init__()
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = 2

        self.conv = Conv2d(input_channels,
                           output_channels,
                           3,
                           stride=2,
                           padding=1,
                           bias=False,
                           norm=get_norm(norm, output_channels),
                           activation=get_activation(activation))
Exemple #21
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            num_conv, num_fc: the number of conv/fc layers
            conv_dim/fc_dim: the dimension of the conv/fc layers
            norm: normalization for the conv layers
        """
        super().__init__()

        # fmt: off
        num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV
        conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM
        num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC
        fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM
        norm = cfg.MODEL.ROI_BOX_HEAD.NORM
        # fmt: on
        assert num_conv + num_fc > 0

        self._output_size = (input_shape.channels, input_shape.height,
                             input_shape.width)

        self.conv_norm_relus = []
        for k in range(num_conv):
            conv = Conv2d(
                self._output_size[0],
                conv_dim,
                kernel_size=3,
                padding=1,
                bias=not norm,
                norm=get_norm(norm, conv_dim),
                activation=F.relu,
            )
            self.add_module("conv{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)
            self._output_size = (conv_dim, self._output_size[1],
                                 self._output_size[2])

        self.fcs = []
        for k in range(num_fc):
            fc = nn.Linear(np.prod(self._output_size), fc_dim)
            self.add_module("fc{}".format(k + 1), fc)
            self.fcs.append(fc)
            self._output_size = fc_dim

        for layer in self.conv_norm_relus:
            weight_init.c2_msra_fill(layer)
        for layer in self.fcs:
            weight_init.c2_xavier_fill(layer)
Exemple #22
0
 def __init__(self, in_channels, out_channels, norm="BN"):
     """
     Args:
         in_channels (int): the number of input tensor channels.
         out_channels (int): the number of output tensor channels.
         norm (str): the normalization to use.
     """
     super().__init__()
     self.num_levels = 2
     self.in_feature = "stage8"
     self.p6_conv = Conv2d(in_channels,
                           out_channels,
                           kernel_size=1,
                           stride=1,
                           padding=0,
                           norm=get_norm(norm, out_channels),
                           activation=None)
     self.down_sampling = MaxPool2d(kernel_size=3, stride=2, padding="SAME")
 def __init__(self,
              in_channels: int,
              out_channels: int,
              num_convs: int,
              kernel_size: int = 1,
              padding: int = 0,
              stride: int = 1,
              num_groups: int = 1,
              norm: str = "GN",
              gate_activation: str = "ReTanH",
              gate_activation_kargs: dict = None,
              depthwise: bool = False):
     super(DynamicConv2D, self).__init__()
     if depthwise:
         assert in_channels == out_channels
     self.num_groups = num_groups
     self.norm = norm
     self.in_channels = in_channels
     self.out_channels = out_channels
     self.kernel_size = kernel_size
     self.depthwise = depthwise
     convs = []
     for _ in range(num_convs):
         convs += [
             nn.Conv2d(in_channels,
                       out_channels,
                       kernel_size,
                       stride=stride,
                       padding=padding,
                       groups=in_channels if depthwise else 1),
             get_norm(norm, in_channels)
         ]
         in_channels = out_channels
     self.convs = nn.Sequential(*convs)
     self.gate = SpatialGate(in_channels,
                             num_groups=num_groups,
                             kernel_size=kernel_size,
                             padding=padding,
                             stride=stride,
                             gate_activation=gate_activation,
                             gate_activation_kargs=gate_activation_kargs,
                             get_running_cost=self.get_running_cost)
     self.init_parameters()
Exemple #24
0
    def __init__(self, input_channels, output_channels, norm, activation):
        """
        Args:
            input_channels (int): the input channel number.
            output_channels (int): the output channel number.
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
            activation (str): a pre-defined string
                (See cvpods.layer.get_activation for more details).
        """
        super().__init__()
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = 2

        self.conv = Conv2d(input_channels, output_channels, 3, stride=2, padding=1, bias=False,
                           norm=get_norm(norm, output_channels),
                           activation=get_activation(activation))
Exemple #25
0
 def __init__(self, C_in, C_out, norm_layer, affine=True, input_size=None):
     super(Identity, self).__init__()
     if C_in == C_out:
         self.change = False
         self.flops = 0.0
     else:
         self.change = True
         self.op = Conv2d(C_in,
                          C_out,
                          kernel_size=1,
                          padding=0,
                          bias=False,
                          norm=get_norm(norm_layer, C_out))
         self.flops = self.get_flop([1, 1], 1, C_in, C_out, affine,
                                    input_size[0], input_size[1])
         # using Kaiming init
         for m in self.op.modules():
             if isinstance(m, nn.Conv2d):
                 weight_init.kaiming_init(m, mode='fan_in')
             elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)):
                 if m.weight is not None:
                     nn.init.constant_(m.weight, 1)
                 if m.bias is not None:
                     nn.init.constant_(m.bias, 0)
Exemple #26
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        super().__init__()
        # fmt: off
        self.in_features = cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES
        feature_strides = {k: v.stride
                           for k, v in input_shape.items()}  # noqa:F841
        feature_channels = {k: v.channels for k, v in input_shape.items()}
        feature_resolution = {
            k: np.array([v.height, v.width])
            for k, v in input_shape.items()
        }
        self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE
        num_classes = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES
        norm = cfg.MODEL.SEM_SEG_HEAD.NORM
        self.loss_weight = cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT
        self.cal_flops = cfg.MODEL.CAL_FLOPS
        self.real_flops = 0.0
        # fmt: on

        self.layer_decoder_list = nn.ModuleList()
        # set affine in BatchNorm
        if 'Sync' in norm:
            affine = True
        else:
            affine = False
        # use simple decoder
        for _feat in self.in_features:
            res_size = feature_resolution[_feat]
            in_channel = feature_channels[_feat]
            if _feat == 'layer_0':
                out_channel = in_channel
            else:
                out_channel = in_channel // 2
            conv_1x1 = Conv2d(in_channel,
                              out_channel,
                              kernel_size=1,
                              stride=1,
                              padding=0,
                              bias=False,
                              norm=get_norm(norm, out_channel),
                              activation=nn.ReLU())
            self.real_flops += cal_op_flops.count_ConvBNReLU_flop(
                res_size[0],
                res_size[1],
                in_channel,
                out_channel, [1, 1],
                is_affine=affine)
            self.layer_decoder_list.append(conv_1x1)
        # using Kaiming init
        for layer in self.layer_decoder_list:
            for m in layer.modules():
                if isinstance(m, nn.Conv2d):
                    weight_init.kaiming_init(m, mode='fan_in')
                elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)):
                    if m.weight is not None:
                        nn.init.constant_(m.weight, 1)
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
        in_channel = feature_channels['layer_0']
        # the output layer
        self.predictor = Conv2d(in_channels=in_channel,
                                out_channels=num_classes,
                                kernel_size=3,
                                stride=1,
                                padding=1)
        self.real_flops += cal_op_flops.count_Conv_flop(
            feature_resolution['layer_0'][0], feature_resolution['layer_0'][1],
            in_channel, num_classes, [3, 3])
        # using Kaiming init
        for m in self.predictor.modules():
            if isinstance(m, nn.Conv2d):
                weight_init.kaiming_init(m, mode='fan_in')
            elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)):
                if m.weight is not None:
                    nn.init.constant_(m.weight, 1)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
Exemple #27
0
    def __init__(
        self, bottom_up, in_features, out_channels, norm="", top_block=None, fuse_type="sum"
    ):
        """
        Args:
            bottom_up (Backbone): module representing the bottom up subnetwork.
                Must be a subclass of :class:`Backbone`. The multi-scale feature
                maps generated by the bottom up network, and listed in `in_features`,
                are used to generate FPN levels.
            in_features (list[str]): names of the input feature maps coming
                from the backbone to which FPN is attached. For example, if the
                backbone produces ["res2", "res3", "res4"], any *contiguous* sublist
                of these may be used; order must be from high to low resolution.
            out_channels (int): number of channels in the output feature maps.
            norm (str): the normalization to use.
            top_block (nn.Module or None): if provided, an extra operation will
                be performed on the output of the last (smallest resolution)
                FPN output, and the result will extend the result list. The top_block
                further downsamples the feature map. It must have an attribute
                "num_levels", meaning the number of extra FPN levels added by
                this block, and "in_feature", which is a string representing
                its input feature (e.g., p5).
            fuse_type (str): types for fusing the top down features and the lateral
                ones. It can be "sum" (default), which sums up element-wise; or "avg",
                which takes the element-wise mean of the two.
        """
        super(FPN, self).__init__()
        assert isinstance(bottom_up, Backbone)

        # Feature map strides and channels from the bottom up network (e.g. ResNet)
        input_shapes = bottom_up.output_shape()
        in_strides = [input_shapes[f].stride for f in in_features]
        in_channels = [input_shapes[f].channels for f in in_features]

        _assert_strides_are_log2_contiguous(in_strides)
        lateral_convs = []
        output_convs = []

        use_bias = norm == ""
        for idx, in_channels in enumerate(in_channels):
            lateral_norm = get_norm(norm, out_channels)
            output_norm = get_norm(norm, out_channels)

            lateral_conv = Conv2d(
                in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm
            )
            output_conv = Conv2d(
                out_channels,
                out_channels,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=use_bias,
                norm=output_norm,
            )
            weight_init.c2_xavier_fill(lateral_conv)
            weight_init.c2_xavier_fill(output_conv)
            stage = int(math.log2(in_strides[idx]))
            self.add_module("fpn_lateral{}".format(stage), lateral_conv)
            self.add_module("fpn_output{}".format(stage), output_conv)

            lateral_convs.append(lateral_conv)
            output_convs.append(output_conv)
        # Place convs into top-down order (from low to high resolution)
        # to make the top-down computation in forward clearer.
        self.lateral_convs = lateral_convs[::-1]
        self.output_convs = output_convs[::-1]
        self.top_block = top_block
        self.in_features = in_features
        self.bottom_up = bottom_up
        # Return feature names are "p<stage>", like ["p2", "p3", ..., "p6"]
        self._out_feature_strides = {"p{}".format(int(math.log2(s))): s for s in in_strides}
        # top block output feature maps.
        if self.top_block is not None:
            for s in range(stage, stage + self.top_block.num_levels):
                self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1)

        self._out_features = list(self._out_feature_strides.keys())
        self._out_feature_channels = {k: out_channels for k in self._out_features}
        self._size_divisibility = in_strides[-1]
        assert fuse_type in {"avg", "sum"}
        self._fuse_type = fuse_type
Exemple #28
0
    def __init__(self,
                 in_channels,
                 channels,
                 num_classes=None,
                 dropout=False,
                 out_features=None,
                 norm="BN"):
        """
        See: https://arxiv.org/pdf/1903.11752.pdf

        Args:
            num_blocks (int): the number of blocks in this stage.
            in_channels (int): the input channel number.
            channels (int): output channel numbers for stem and every stages.
            num_classes (None or int): if None, will not perform classification.
            dropout (bool): whether to use dropout.
            out_features (list[str]): name of the layers whose outputs should
                be returned in forward. Can be anything in "stem", "linear", or "snet3" ...
                If None, will return the output of the last layer.
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (See cvpods.layer.get_norm for more details).
        """
        super(ShuffleNetV2, self).__init__()
        self.stage_out_channels = channels
        self.num_classes = num_classes

        # ---------------- Stem ---------------------- #
        input_channels = self.stage_out_channels[0]
        self.stem = nn.Sequential(*[
            Conv2d(
                in_channels,
                input_channels,
                kernel_size=3,
                stride=2,
                padding=1,
                bias=False,
                norm=get_norm(norm, input_channels),
                activation=nn.ReLU(inplace=True),
            ),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
        ])

        # TODO: use a stem class and property stride
        current_stride = 4
        self._out_feature_strides = {"stem": current_stride}
        self._out_feature_channels = {"stem": input_channels}

        # ---------------- Stages --------------------- #
        self.stage_num_blocks = [4, 8, 4]
        self.stages_and_names = []
        for i in range(len(self.stage_num_blocks)):
            num_blocks = self.stage_num_blocks[i]
            output_channels = self.stage_out_channels[i + 1]
            name = "snet" + str(i + 3)
            block_list = make_stage(num_blocks, input_channels,
                                    output_channels, norm)
            current_stride = current_stride * np.prod(
                [block.stride for block in block_list])
            stages = nn.Sequential(*block_list)

            self._out_feature_strides[name] = current_stride
            self._out_feature_channels[name] = output_channels
            self.add_module(name, stages)
            self.stages_and_names.append((stages, name))
            input_channels = output_channels

        if len(self.stage_out_channels) == len(self.stage_num_blocks) + 2:
            name = "snet" + str(len(self.stage_num_blocks) + 2) + "-last"
            last_output_channels = self.stage_out_channels[-1]
            last_conv = Conv2d(output_channels,
                               last_output_channels,
                               kernel_size=1,
                               bias=False,
                               norm=get_norm(norm, last_output_channels),
                               activation=nn.ReLU(inplace=True))
            self._out_feature_strides[name] = current_stride
            self._out_feature_channels[name] = last_output_channels
            self.add_module(name, last_conv)
            self.stages_and_names.append((last_conv, name))
        # ---------------- Classifer ------------------- #
        if num_classes is not None:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
            self.dropout = dropout
            if dropout:
                self.dropout = nn.Dropout(0.2)
            self.classifier = nn.Linear(self.stage_out_channels[-1],
                                        num_classes,
                                        bias=False)
            name = "linear"

        self._out_features = [name] if out_features is None else out_features

        self._initialize_weights()
Exemple #29
0
    def __init__(self,
                 in_channels=3,
                 out_channels=64,
                 norm="BN",
                 activation=None,
                 deep_stem=False,
                 stem_width=32):
        """
        Args:
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
        """
        super().__init__()
        self.deep_stem = deep_stem

        if self.deep_stem:
            self.conv1_1 = Conv2d(
                3,
                stem_width,
                kernel_size=3,
                stride=2,
                padding=1,
                bias=False,
                norm=get_norm(norm, stem_width),
            )
            self.conv1_2 = Conv2d(
                stem_width,
                stem_width,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=False,
                norm=get_norm(norm, stem_width),
            )
            self.conv1_3 = Conv2d(
                stem_width,
                stem_width * 2,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=False,
                norm=get_norm(norm, stem_width * 2),
            )
            for layer in [self.conv1_1, self.conv1_2, self.conv1_3]:
                if layer is not None:
                    weight_init.c2_msra_fill(layer)
        else:
            self.conv1 = Conv2d(
                in_channels,
                out_channels,
                kernel_size=7,
                stride=2,
                padding=3,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
            weight_init.c2_msra_fill(self.conv1)

        self.activation = get_activation(activation)
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
Exemple #30
0
    def __init__(
        self,
        in_channels,
        out_channels,
        *,
        bottleneck_channels,
        stride=1,
        num_groups=1,
        norm="BN",
        activation=None,
        stride_in_1x1=False,
        dilation=1,
        deform_modulated=False,
        deform_num_groups=1,
    ):
        """
        Similar to :class:`BottleneckBlock`, but with deformable conv in the 3x3 convolution.
        """
        super().__init__(in_channels, out_channels, stride)
        self.deform_modulated = deform_modulated

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.activation = get_activation(activation)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
            norm=get_norm(norm, bottleneck_channels),
        )

        if deform_modulated:
            deform_conv_op = ModulatedDeformConv
            # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size
            offset_channels = 27
        else:
            deform_conv_op = DeformConv
            offset_channels = 18

        self.conv2_offset = Conv2d(
            bottleneck_channels,
            offset_channels * deform_num_groups,
            kernel_size=3,
            stride=stride_3x3,
            padding=1 * dilation,
            dilation=dilation,
        )
        self.conv2 = deform_conv_op(
            bottleneck_channels,
            bottleneck_channels,
            kernel_size=3,
            stride=stride_3x3,
            padding=1 * dilation,
            bias=False,
            groups=num_groups,
            dilation=dilation,
            deformable_groups=deform_num_groups,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv3 = Conv2d(
            bottleneck_channels,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)

        nn.init.constant_(self.conv2_offset.weight, 0)
        nn.init.constant_(self.conv2_offset.bias, 0)