Esempio n. 1
0
    def __init__(self, C_in, C_out, norm_layer, affine=True, input_size=None):
        super(FactorizedReduce, self).__init__()
        assert C_out % 2 == 0
        self.conv_1 = Conv2d(C_in,
                             C_out // 2,
                             1,
                             stride=2,
                             padding=0,
                             bias=False)
        self.conv_2 = Conv2d(C_in,
                             C_out // 2,
                             1,
                             stride=2,
                             padding=0,
                             bias=False)
        self.bn = norm_layer(C_out, affine=affine)

        self.flops = self.get_flop([1, 1], 2, C_in, C_out, affine,
                                   input_size[0], input_size[1])
        # using Kaiming init
        for layer in [self.conv_1, self.conv_2]:
            for m in layer.modules():
                if isinstance(m, nn.Conv2d):
                    weight_init.kaiming_init(m, mode='fan_in')
                elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)):
                    if m.weight is not None:
                        nn.init.constant_(m.weight, 1)
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
Esempio n. 2
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            conv_dim: the output dimension of the conv layers
            fc_dim: the feature dimenstion of the FC layers
            num_fc: the number of FC layers
            output_side_resolution: side resolution of the output square mask prediction
        """
        super(CoarseMaskHead, self).__init__()

        # fmt: off
        self.num_classes            = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        conv_dim                    = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM
        self.fc_dim                 = cfg.MODEL.ROI_MASK_HEAD.FC_DIM
        num_fc                      = cfg.MODEL.ROI_MASK_HEAD.NUM_FC
        self.output_side_resolution = cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION
        self.input_channels         = input_shape.channels
        self.input_h                = input_shape.height
        self.input_w                = input_shape.width
        # fmt: on

        self.conv_layers = []
        if self.input_channels > conv_dim:
            self.reduce_channel_dim_conv = Conv2d(
                self.input_channels,
                conv_dim,
                kernel_size=1,
                stride=1,
                padding=0,
                bias=True,
                activation=F.relu,
            )
            self.conv_layers.append(self.reduce_channel_dim_conv)

        self.reduce_spatial_dim_conv = Conv2d(
            conv_dim, conv_dim, kernel_size=2, stride=2, padding=0, bias=True, activation=F.relu
        )
        self.conv_layers.append(self.reduce_spatial_dim_conv)

        input_dim = conv_dim * self.input_h * self.input_w
        input_dim //= 4

        self.fcs = []
        for k in range(num_fc):
            fc = nn.Linear(input_dim, self.fc_dim)
            self.add_module("coarse_mask_fc{}".format(k + 1), fc)
            self.fcs.append(fc)
            input_dim = self.fc_dim

        output_dim = self.num_classes * self.output_side_resolution * self.output_side_resolution

        self.prediction = nn.Linear(self.fc_dim, output_dim)
        # use normal distribution initialization for mask prediction layer
        nn.init.normal_(self.prediction.weight, std=0.001)
        nn.init.constant_(self.prediction.bias, 0)

        for layer in self.conv_layers:
            weight_init.c2_msra_fill(layer)
        for layer in self.fcs:
            weight_init.c2_xavier_fill(layer)
Esempio n. 3
0
    def __init__(self, block_args, global_params):
        """
        Args:
            block_args (EasyDict): block args, see: class: `EfficientNet`.
            global_params (EasyDict): global args, see: class: `EfficientNet`.
        """
        super().__init__()
        self._block_args = block_args
        self.has_se = (block_args.se_ratio
                       is not None) and (0 < block_args.se_ratio <= 1)
        self.id_skip = block_args.id_skip

        # Expansion phase
        # number of input channels
        inp = block_args.in_channels
        # number of output channels
        oup = block_args.in_channels * block_args.expand_ratio
        if block_args.expand_ratio != 1:
            self._expand_conv = Conv2d(in_channels=inp,
                                       out_channels=oup,
                                       kernel_size=1,
                                       padding=0,
                                       bias=False)
            self._bn0 = get_norm(global_params.norm, out_channels=oup)

        # Depthwise convolution phase
        k = block_args.kernel_size
        s = block_args.stride
        self._depthwise_conv = Conv2d(in_channels=oup,
                                      out_channels=oup,
                                      groups=oup,
                                      kernel_size=k,
                                      stride=s,
                                      padding="SAME",
                                      bias=False)
        self._bn1 = get_norm(global_params.norm, out_channels=oup)

        # Squeeze and Excitation layer, if desired
        if self.has_se:
            num_squeezed_channels = max(
                1, int(block_args.in_channels * block_args.se_ratio))
            self._se_reduce = Conv2d(in_channels=oup,
                                     out_channels=num_squeezed_channels,
                                     kernel_size=1,
                                     padding=0)
            self._se_expand = Conv2d(in_channels=num_squeezed_channels,
                                     out_channels=oup,
                                     kernel_size=1,
                                     padding=0)

        # Output phase
        final_oup = block_args.out_channels
        self._project_conv = Conv2d(in_channels=oup,
                                    out_channels=final_oup,
                                    kernel_size=1,
                                    padding=0,
                                    bias=False)
        self._bn2 = get_norm(global_params.norm, final_oup)
        self._swish = MemoryEfficientSwish()
Esempio n. 4
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 *,
                 stride=1,
                 norm="BN",
                 activation=None,
                 **kwargs):
        """
        The standard block type for ResNet18 and ResNet34.
        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            stride (int): Stride for the first conv.
            norm (str or callable): A callable that takes the number of
                channels and returns a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
        """
        super().__init__(in_channels, out_channels, stride)

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        self.activation = get_activation(activation)

        self.conv1 = Conv2d(
            in_channels,
            out_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        self.conv2 = Conv2d(
            out_channels,
            out_channels,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        for layer in [self.conv1, self.conv2, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)
Esempio n. 5
0
 def __init__(self,
              in_channels,
              channels,
              kernel_size,
              stride=(1, 1),
              padding=(0, 0),
              dilation=(1, 1),
              groups=1,
              bias=True,
              radix=2,
              reduction_factor=4,
              rectify=False,
              rectify_avg=False,
              norm=None,
              dropblock_prob=0.0,
              **kwargs):
     super(SplAtConv2d, self).__init__()
     padding = _pair(padding)
     self.rectify = rectify and (padding[0] > 0 or padding[1] > 0)
     self.rectify_avg = rectify_avg
     inter_channels = max(in_channels * radix // reduction_factor, 32)
     self.radix = radix
     self.cardinality = groups
     self.channels = channels
     self.dropblock_prob = dropblock_prob
     if self.rectify:
         self.conv = RFConv2d(in_channels,
                              channels * radix,
                              kernel_size,
                              stride,
                              padding,
                              dilation,
                              groups=groups * radix,
                              bias=bias,
                              average_mode=rectify_avg,
                              **kwargs)
     else:
         self.conv = Conv2d(in_channels,
                            channels * radix,
                            kernel_size,
                            stride,
                            padding,
                            dilation,
                            groups=groups * radix,
                            bias=bias,
                            **kwargs)
     self.use_bn = norm is not None
     self.bn0 = get_norm(norm, channels * radix)
     self.relu = ReLU(inplace=True)
     self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality)
     self.bn1 = get_norm(norm, inter_channels)
     self.fc2 = Conv2d(inter_channels,
                       channels * radix,
                       1,
                       groups=self.cardinality)
     if dropblock_prob > 0.0:
         self.dropblock = DropBlock2D(dropblock_prob, 3)
Esempio n. 6
0
 def __init__(self,
              C_in,
              C_out,
              kernel_size,
              stride,
              padding,
              norm_layer,
              affine=True,
              input_size=None):
     super(SepConv, self).__init__()
     self.op = nn.Sequential(
         # depth wise
         Conv2d(C_in,
                C_in,
                kernel_size=kernel_size,
                stride=stride,
                padding=padding,
                groups=C_in,
                bias=False),
         # point wise
         Conv2d(C_in,
                C_in,
                kernel_size=1,
                padding=0,
                bias=False,
                norm=get_norm(norm_layer, C_in),
                activation=nn.ReLU()),
         # stack 2 separate depthwise-conv.
         Conv2d(C_in,
                C_in,
                kernel_size=kernel_size,
                stride=1,
                padding=padding,
                groups=C_in,
                bias=False),
         Conv2d(C_in,
                C_out,
                kernel_size=1,
                padding=0,
                bias=False,
                norm=get_norm(norm_layer, C_out)))
     self.flops = self.get_flop([kernel_size, kernel_size], stride, C_in,
                                C_out, affine, input_size[0], input_size[1])
     # using Kaiming init
     for m in self.op.modules():
         if isinstance(m, nn.Conv2d):
             weight_init.kaiming_init(m, mode='fan_in')
         elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)):
             if m.weight is not None:
                 nn.init.constant_(m.weight, 1)
             if m.bias is not None:
                 nn.init.constant_(m.bias, 0)
Esempio n. 7
0
    def __init__(self,
                 input_channels,
                 output_channels,
                 stride,
                 expand_ratio,
                 norm,
                 activation,
                 use_shortcut=True):
        super(InvertedResBlock, self).__init__()
        self.stride = stride
        assert stride in [1, 2]

        mid_channels = int(round(input_channels * expand_ratio))
        self.use_shortcut = use_shortcut

        if self.use_shortcut:
            assert stride == 1
            assert input_channels == output_channels

        conv_kwargs = {
            "norm": get_norm(norm, mid_channels),
            "activation": get_activation(activation)
        }

        layers = []
        if expand_ratio > 1:
            layers.append(
                Conv2d(
                    input_channels,
                    mid_channels,
                    1,
                    bias=False,  # Pixel-wise non-linear
                    **deepcopy(conv_kwargs)))

        layers += [
            Conv2d(
                mid_channels,
                mid_channels,
                3,
                padding=1,
                bias=False,  # Depth-wise 3x3
                stride=stride,
                groups=mid_channels,
                **deepcopy(conv_kwargs)),
            Conv2d(
                mid_channels,
                output_channels,
                1,
                bias=False,  # Pixel-wise linear
                norm=get_norm(norm, output_channels))
        ]
        self.conv = nn.Sequential(*layers)
Esempio n. 8
0
 def __init__(self,
              C_in,
              C_out,
              kernel_size,
              stride,
              padding,
              norm_layer,
              expansion=4,
              affine=True,
              input_size=None):
     super(MBConv, self).__init__()
     self.hidden_dim = expansion * C_in
     self.op = nn.Sequential(
         # pw
         Conv2d(C_in,
                self.hidden_dim,
                1,
                1,
                0,
                bias=False,
                norm=get_norm(norm_layer, self.hidden_dim),
                activation=nn.ReLU()),
         # dw
         Conv2d(self.hidden_dim,
                self.hidden_dim,
                kernel_size,
                stride,
                padding,
                groups=self.hidden_dim,
                bias=False,
                norm=get_norm(norm_layer, self.hidden_dim),
                activation=nn.ReLU()),
         # pw-linear without ReLU!
         Conv2d(self.hidden_dim,
                C_out,
                1,
                1,
                0,
                bias=False,
                norm=get_norm(norm_layer, C_out)))
     self.flops = self.get_flop([kernel_size, kernel_size], stride, C_in,
                                C_out, affine, input_size[0], input_size[1])
     # using Kaiming init
     for m in self.op.modules():
         if isinstance(m, nn.Conv2d):
             weight_init.kaiming_init(m, mode='fan_in')
         elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)):
             if m.weight is not None:
                 nn.init.constant_(m.weight, 1)
             if m.bias is not None:
                 nn.init.constant_(m.bias, 0)
Esempio n. 9
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            num_conv: the number of conv layers
            conv_dim: the dimension of the conv layers
            norm: normalization for the conv layers
        """
        super(MaskRCNNConvUpsampleHead, self).__init__()

        # fmt: off
        num_classes       = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        conv_dims         = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM
        self.norm         = cfg.MODEL.ROI_MASK_HEAD.NORM
        num_conv          = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV
        input_channels    = input_shape.channels
        cls_agnostic_mask = cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK
        # fmt: on

        self.conv_norm_relus = []

        for k in range(num_conv):
            conv = Conv2d(
                input_channels if k == 0 else conv_dims,
                conv_dims,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not self.norm,
                norm=get_norm(self.norm, conv_dims),
                activation=F.relu,
            )
            self.add_module("mask_fcn{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)

        self.deconv = ConvTranspose2d(
            conv_dims if num_conv > 0 else input_channels,
            conv_dims,
            kernel_size=2,
            stride=2,
            padding=0,
        )

        num_mask_classes = 1 if cls_agnostic_mask else num_classes
        self.predictor = Conv2d(conv_dims, num_mask_classes, kernel_size=1, stride=1, padding=0)

        for layer in self.conv_norm_relus + [self.deconv]:
            weight_init.c2_msra_fill(layer)
        # use normal distribution initialization for mask prediction layer
        nn.init.normal_(self.predictor.weight, std=0.001)
        if self.predictor.bias is not None:
            nn.init.constant_(self.predictor.bias, 0)
Esempio n. 10
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        super().__init__()

        # fmt: off
        self.in_features = cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES
        feature_strides = {k: v.stride for k, v in input_shape.items()}
        feature_channels = {k: v.channels for k, v in input_shape.items()}
        self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE
        num_classes = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES
        conv_dims = cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM
        self.common_stride = cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE
        norm = cfg.MODEL.SEM_SEG_HEAD.NORM
        self.loss_weight = cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT
        # fmt: on

        self.scale_heads = []
        for in_feature in self.in_features:
            head_ops = []
            head_length = max(
                1,
                int(
                    np.log2(feature_strides[in_feature]) -
                    np.log2(self.common_stride)))
            for k in range(head_length):
                norm_module = nn.GroupNorm(32,
                                           conv_dims) if norm == "GN" else None
                conv = Conv2d(
                    feature_channels[in_feature] if k == 0 else conv_dims,
                    conv_dims,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    bias=not norm,
                    norm=norm_module,
                    activation=F.relu,
                )
                weight_init.c2_msra_fill(conv)
                head_ops.append(conv)
                if feature_strides[in_feature] != self.common_stride:
                    head_ops.append(
                        nn.Upsample(scale_factor=2,
                                    mode="bilinear",
                                    align_corners=False))
            self.scale_heads.append(nn.Sequential(*head_ops))
            self.add_module(in_feature, self.scale_heads[-1])
        self.predictor = Conv2d(conv_dims,
                                num_classes,
                                kernel_size=1,
                                stride=1,
                                padding=0)
        weight_init.c2_msra_fill(self.predictor)
Esempio n. 11
0
 def __init__(self,
              C_in,
              C_out,
              kernel_size,
              stride,
              padding,
              norm_layer,
              affine=True,
              input_size=None):
     super(BasicResBlock, self).__init__()
     self.op = Conv2d(C_in,
                      C_out,
                      kernel_size,
                      stride=stride,
                      padding=padding,
                      bias=False,
                      norm=get_norm(norm_layer, C_out))
     self.flops = self.get_flop([kernel_size, kernel_size], stride, C_in,
                                C_out, affine, input_size[0], input_size[1])
     # using Kaiming init
     for m in self.op.modules():
         if isinstance(m, nn.Conv2d):
             weight_init.kaiming_init(m, mode='fan_in')
         elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)):
             if m.weight is not None:
                 nn.init.constant_(m.weight, 1)
             if m.bias is not None:
                 nn.init.constant_(m.bias, 0)
Esempio n. 12
0
    def __init__(self, cfg):
        super(Classification, self).__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        self.network = cfg.build_backbone(
            cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))

        self.network.stem = nn.Sequential(
            Conv2d(3,
                   64,
                   kernel_size=3,
                   stride=1,
                   padding=1,
                   bias=False,
                   norm=get_norm("BN", 64)),
            nn.ReLU(),
        )

        self.loss_evaluator = nn.CrossEntropyLoss()

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std

        self.to(self.device)
Esempio n. 13
0
    def __init__(self,
                 in_channels=3,
                 out_channels=64,
                 norm="BN",
                 activation=None):
        """
        Args:
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
        """
        super().__init__()
        self.conv1 = Conv2d(
            in_channels,
            out_channels,
            kernel_size=7,
            stride=2,
            padding=3,
            bias=False,
            norm=get_norm(norm, out_channels),
        )
        weight_init.c2_msra_fill(self.conv1)

        self.activation = get_activation(activation)
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
Esempio n. 14
0
    def __init__(self, cfg):
        super(Classification, self).__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        self.network = cfg.build_backbone(
            cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))
        self.network.stem = nn.Sequential(
            Conv2d(3,
                   64,
                   kernel_size=3,
                   stride=1,
                   padding=1,
                   bias=False,
                   norm=get_norm(cfg.MODEL.RESNETS.NORM, 64)),
            nn.ReLU(),
        )

        self.freeze()
        self.network.eval()

        # init the fc layer
        self.network.linear.weight.data.normal_(mean=0.0, std=0.01)
        self.network.linear.bias.data.zero_()

        self.loss_evaluator = nn.CrossEntropyLoss()

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            1, 3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            1, 3, 1, 1)
        self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std

        self.to(self.device)
Esempio n. 15
0
    def __init__(self, input_channels, output_channels, stride, expand_ratio,
                 norm, activation, use_shortcut=True):
        """
        Args:
            input_channels (int): the input channel number.
            output_channels (int): the output channel number.
            stride (int): the stride of the current block.
            expand_ratio(int): the channel expansion ratio for `mid_channels` in InvertedResBlock.
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (See cvpods.layer.get_norm for more details).
            activation (str): a pre-defined string
                (See cvpods.layer.get_activation for more details).
            use_shortcut (bool): whether to use the residual path.
        """
        super(InvertedResBlock, self).__init__()
        self.stride = stride
        assert stride in [1, 2]

        mid_channels = int(round(input_channels * expand_ratio))
        self.use_shortcut = use_shortcut

        if self.use_shortcut:
            assert stride == 1
            assert input_channels == output_channels

        conv_kwargs = {
            "norm": get_norm(norm, mid_channels),
            "activation": get_activation(activation)
        }

        layers = []
        if expand_ratio > 1:
            layers.append(
                Conv2d(input_channels, mid_channels, 1, bias=False,  # Pixel-wise non-linear
                       **deepcopy(conv_kwargs))
            )

        layers += [
            Conv2d(mid_channels, mid_channels, 3, padding=1, bias=False,  # Depth-wise 3x3
                   stride=stride, groups=mid_channels, **deepcopy(conv_kwargs)),
            Conv2d(mid_channels, output_channels, 1, bias=False,  # Pixel-wise linear
                   norm=get_norm(norm, output_channels))
        ]
        self.conv = nn.Sequential(*layers)
Esempio n. 16
0
    def __init__(self, cfg):
        super(SimSiam, self).__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        self.proj_dim = cfg.MODEL.BYOL.PROJ_DIM
        self.pred_dim = cfg.MODEL.BYOL.PRED_DIM
        self.out_dim = cfg.MODEL.BYOL.OUT_DIM

        self.total_steps = cfg.SOLVER.LR_SCHEDULER.MAX_ITER * cfg.SOLVER.BATCH_SUBDIVISIONS

        # create the encoders
        # num_classes is the output fc dimension
        cfg.MODEL.RESNETS.NUM_CLASSES = self.out_dim

        self.encoder = cfg.build_backbone(
            cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))
        self.encoder.stem = nn.Sequential(
            Conv2d(3,
                   64,
                   kernel_size=3,
                   stride=1,
                   padding=1,
                   bias=False,
                   norm=get_norm(cfg.MODEL.RESNETS.NORM, 64)),
            nn.ReLU(),
        )

        self.size_divisibility = self.encoder.size_divisibility

        dim_mlp = self.encoder.linear.weight.shape[1]

        # Projection Head
        self.encoder.linear = nn.Sequential(
            nn.Linear(dim_mlp, self.proj_dim),
            nn.SyncBatchNorm(self.proj_dim),
            nn.ReLU(),
            nn.Linear(self.proj_dim, self.proj_dim),
            nn.SyncBatchNorm(self.proj_dim),
        )

        # Predictor
        self.predictor = nn.Sequential(
            nn.Linear(self.proj_dim, self.pred_dim),
            nn.SyncBatchNorm(self.pred_dim),
            nn.ReLU(),
            nn.Linear(self.pred_dim, self.out_dim),
        )

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            1, 3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            1, 3, 1, 1)
        self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std

        self.to(self.device)
Esempio n. 17
0
    def __init__(
        self,
        in_channels=3,
        out_channels=64,
        norm="BN",
        activation=None,
        deep_stem=False,
        stem_width=32,
    ):
        super().__init__()
        self.conv1_1 = Conv2d(
            3,
            stem_width,
            kernel_size=3,
            stride=2,
            padding=1,
            bias=False,
            norm=get_norm(norm, stem_width),
        )
        self.conv1_2 = Conv2d(
            stem_width,
            stem_width,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False,
            norm=get_norm(norm, stem_width),
        )
        self.conv1_3 = Conv2d(
            stem_width,
            stem_width * 2,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False,
            norm=get_norm(norm, stem_width * 2),
        )
        for layer in [self.conv1_1, self.conv1_2, self.conv1_3]:
            if layer is not None:
                weight_init.c2_msra_fill(layer)

        self.activation = get_activation(activation)
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    def __init__(self,
                 in_channels,
                 out_channels,
                 stride=1,
                 norm="BN",
                 activation=None):
        super().__init__()

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        self.activation = get_activation(activation)

        self.conv1 = Conv2d(
            in_channels,
            out_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        self.conv2 = Conv2d(
            out_channels,
            out_channels,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )
Esempio n. 19
0
    def __init__(self, input_channels, output_channels, norm, activation):
        super().__init__()
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = 2

        self.conv = Conv2d(input_channels,
                           output_channels,
                           3,
                           stride=2,
                           padding=1,
                           bias=False,
                           norm=get_norm(norm, output_channels),
                           activation=get_activation(activation))
Esempio n. 20
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            num_conv, num_fc: the number of conv/fc layers
            conv_dim/fc_dim: the dimension of the conv/fc layers
            norm: normalization for the conv layers
        """
        super().__init__()

        # fmt: off
        num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV
        conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM
        num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC
        fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM
        norm = cfg.MODEL.ROI_BOX_HEAD.NORM
        # fmt: on
        assert num_conv + num_fc > 0

        self._output_size = (input_shape.channels, input_shape.height,
                             input_shape.width)

        self.conv_norm_relus = []
        for k in range(num_conv):
            conv = Conv2d(
                self._output_size[0],
                conv_dim,
                kernel_size=3,
                padding=1,
                bias=not norm,
                norm=get_norm(norm, conv_dim),
                activation=F.relu,
            )
            self.add_module("conv{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)
            self._output_size = (conv_dim, self._output_size[1],
                                 self._output_size[2])

        self.fcs = []
        for k in range(num_fc):
            fc = nn.Linear(np.prod(self._output_size), fc_dim)
            self.add_module("fc{}".format(k + 1), fc)
            self.fcs.append(fc)
            self._output_size = fc_dim

        for layer in self.conv_norm_relus:
            weight_init.c2_msra_fill(layer)
        for layer in self.fcs:
            weight_init.c2_xavier_fill(layer)
Esempio n. 21
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            conv_dims: an iterable of output channel counts for each conv in the head
                         e.g. (512, 512, 512) for three convs outputting 512 channels.
            num_keypoints: number of keypoint heatmaps to predicts, determines the number of
                           channels in the final output.
        """
        super(KRCNNConvDeconvUpsampleHead, self).__init__()

        # fmt: off
        # default up_scale to 2 (this can eventually be moved to config)
        up_scale = 2
        conv_dims = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS
        num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS
        in_channels = input_shape.channels
        # fmt: on

        self.blocks = []
        for idx, layer_channels in enumerate(conv_dims, 1):
            module = Conv2d(in_channels,
                            layer_channels,
                            3,
                            stride=1,
                            padding=1)
            self.add_module("conv_fcn{}".format(idx), module)
            self.blocks.append(module)
            in_channels = layer_channels

        deconv_kernel = 4
        self.score_lowres = ConvTranspose2d(in_channels,
                                            num_keypoints,
                                            deconv_kernel,
                                            stride=2,
                                            padding=deconv_kernel // 2 - 1)
        self.up_scale = up_scale

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")
Esempio n. 22
0
 def __init__(self, in_channels, out_channels, norm="BN"):
     """
     Args:
         in_channels (int): the number of input tensor channels.
         out_channels (int): the number of output tensor channels.
         norm (str): the normalization to use.
     """
     super().__init__()
     self.num_levels = 2
     self.in_feature = "stage8"
     self.p6_conv = Conv2d(in_channels,
                           out_channels,
                           kernel_size=1,
                           stride=1,
                           padding=0,
                           norm=get_norm(norm, out_channels),
                           activation=None)
     self.down_sampling = MaxPool2d(kernel_size=3, stride=2, padding="SAME")
Esempio n. 23
0
    def __init__(self, input_channels, output_channels, norm, activation):
        """
        Args:
            input_channels (int): the input channel number.
            output_channels (int): the output channel number.
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
            activation (str): a pre-defined string
                (See cvpods.layer.get_activation for more details).
        """
        super().__init__()
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = 2

        self.conv = Conv2d(input_channels, output_channels, 3, stride=2, padding=1, bias=False,
                           norm=get_norm(norm, output_channels),
                           activation=get_activation(activation))
Esempio n. 24
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        super().__init__()

        self.in_features = cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES
        feature_strides = {k: v.stride for k, v in input_shape.items()}
        feature_channels = {k: v.channels for k, v in input_shape.items()}
        self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE
        num_classes = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES
        self.loss_weight = cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT

        upsampling_strides = []
        feature_strides_list = list(feature_strides.values())
        upsampling_strides.append(feature_strides_list[0])
        feature_strides_list = feature_strides_list[::-1]
        for s1, s2 in zip(feature_strides_list[:], feature_strides_list[1:]):
            upsampling_strides.append(s1 // s2)
        assert len(upsampling_strides) == len(self.in_features)

        score_convs = []
        upsampling_convs = []
        for idx, in_feature in enumerate(self.in_features):
            ch = feature_channels[in_feature]
            score_convs.append(Conv2d(ch, num_classes, kernel_size=1))
            stride = upsampling_strides[idx]
            upsampling_convs.append(
                ConvTranspose2d(
                    num_classes,
                    num_classes,
                    kernel_size=stride * 2,
                    stride=stride,
                    padding=1,
                    bias=False,
                ))
        self.score_convs = nn.ModuleList(score_convs)
        self.upsampling_convs = nn.ModuleList(upsampling_convs)
        self._initialize_weights()
Esempio n. 25
0
 def __init__(self, C_in, C_out, norm_layer, affine=True, input_size=None):
     super(Identity, self).__init__()
     if C_in == C_out:
         self.change = False
         self.flops = 0.0
     else:
         self.change = True
         self.op = Conv2d(C_in,
                          C_out,
                          kernel_size=1,
                          padding=0,
                          bias=False,
                          norm=get_norm(norm_layer, C_out))
         self.flops = self.get_flop([1, 1], 1, C_in, C_out, affine,
                                    input_size[0], input_size[1])
         # using Kaiming init
         for m in self.op.modules():
             if isinstance(m, nn.Conv2d):
                 weight_init.kaiming_init(m, mode='fan_in')
             elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)):
                 if m.weight is not None:
                     nn.init.constant_(m.weight, 1)
                 if m.bias is not None:
                     nn.init.constant_(m.bias, 0)
Esempio n. 26
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        super().__init__()
        # fmt: off
        self.in_features = cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES
        feature_strides = {k: v.stride
                           for k, v in input_shape.items()}  # noqa:F841
        feature_channels = {k: v.channels for k, v in input_shape.items()}
        feature_resolution = {
            k: np.array([v.height, v.width])
            for k, v in input_shape.items()
        }
        self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE
        num_classes = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES
        norm = cfg.MODEL.SEM_SEG_HEAD.NORM
        self.loss_weight = cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT
        self.cal_flops = cfg.MODEL.CAL_FLOPS
        self.real_flops = 0.0
        # fmt: on

        self.layer_decoder_list = nn.ModuleList()
        # set affine in BatchNorm
        if 'Sync' in norm:
            affine = True
        else:
            affine = False
        # use simple decoder
        for _feat in self.in_features:
            res_size = feature_resolution[_feat]
            in_channel = feature_channels[_feat]
            if _feat == 'layer_0':
                out_channel = in_channel
            else:
                out_channel = in_channel // 2
            conv_1x1 = Conv2d(in_channel,
                              out_channel,
                              kernel_size=1,
                              stride=1,
                              padding=0,
                              bias=False,
                              norm=get_norm(norm, out_channel),
                              activation=nn.ReLU())
            self.real_flops += cal_op_flops.count_ConvBNReLU_flop(
                res_size[0],
                res_size[1],
                in_channel,
                out_channel, [1, 1],
                is_affine=affine)
            self.layer_decoder_list.append(conv_1x1)
        # using Kaiming init
        for layer in self.layer_decoder_list:
            for m in layer.modules():
                if isinstance(m, nn.Conv2d):
                    weight_init.kaiming_init(m, mode='fan_in')
                elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)):
                    if m.weight is not None:
                        nn.init.constant_(m.weight, 1)
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
        in_channel = feature_channels['layer_0']
        # the output layer
        self.predictor = Conv2d(in_channels=in_channel,
                                out_channels=num_classes,
                                kernel_size=3,
                                stride=1,
                                padding=1)
        self.real_flops += cal_op_flops.count_Conv_flop(
            feature_resolution['layer_0'][0], feature_resolution['layer_0'][1],
            in_channel, num_classes, [3, 3])
        # using Kaiming init
        for m in self.predictor.modules():
            if isinstance(m, nn.Conv2d):
                weight_init.kaiming_init(m, mode='fan_in')
            elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)):
                if m.weight is not None:
                    nn.init.constant_(m.weight, 1)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
Esempio n. 27
0
    def __init__(
        self, bottom_up, in_features, out_channels, norm="", top_block=None, fuse_type="sum"
    ):
        """
        Args:
            bottom_up (Backbone): module representing the bottom up subnetwork.
                Must be a subclass of :class:`Backbone`. The multi-scale feature
                maps generated by the bottom up network, and listed in `in_features`,
                are used to generate FPN levels.
            in_features (list[str]): names of the input feature maps coming
                from the backbone to which FPN is attached. For example, if the
                backbone produces ["res2", "res3", "res4"], any *contiguous* sublist
                of these may be used; order must be from high to low resolution.
            out_channels (int): number of channels in the output feature maps.
            norm (str): the normalization to use.
            top_block (nn.Module or None): if provided, an extra operation will
                be performed on the output of the last (smallest resolution)
                FPN output, and the result will extend the result list. The top_block
                further downsamples the feature map. It must have an attribute
                "num_levels", meaning the number of extra FPN levels added by
                this block, and "in_feature", which is a string representing
                its input feature (e.g., p5).
            fuse_type (str): types for fusing the top down features and the lateral
                ones. It can be "sum" (default), which sums up element-wise; or "avg",
                which takes the element-wise mean of the two.
        """
        super(FPN, self).__init__()
        assert isinstance(bottom_up, Backbone)

        # Feature map strides and channels from the bottom up network (e.g. ResNet)
        input_shapes = bottom_up.output_shape()
        in_strides = [input_shapes[f].stride for f in in_features]
        in_channels = [input_shapes[f].channels for f in in_features]

        _assert_strides_are_log2_contiguous(in_strides)
        lateral_convs = []
        output_convs = []

        use_bias = norm == ""
        for idx, in_channels in enumerate(in_channels):
            lateral_norm = get_norm(norm, out_channels)
            output_norm = get_norm(norm, out_channels)

            lateral_conv = Conv2d(
                in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm
            )
            output_conv = Conv2d(
                out_channels,
                out_channels,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=use_bias,
                norm=output_norm,
            )
            weight_init.c2_xavier_fill(lateral_conv)
            weight_init.c2_xavier_fill(output_conv)
            stage = int(math.log2(in_strides[idx]))
            self.add_module("fpn_lateral{}".format(stage), lateral_conv)
            self.add_module("fpn_output{}".format(stage), output_conv)

            lateral_convs.append(lateral_conv)
            output_convs.append(output_conv)
        # Place convs into top-down order (from low to high resolution)
        # to make the top-down computation in forward clearer.
        self.lateral_convs = lateral_convs[::-1]
        self.output_convs = output_convs[::-1]
        self.top_block = top_block
        self.in_features = in_features
        self.bottom_up = bottom_up
        # Return feature names are "p<stage>", like ["p2", "p3", ..., "p6"]
        self._out_feature_strides = {"p{}".format(int(math.log2(s))): s for s in in_strides}
        # top block output feature maps.
        if self.top_block is not None:
            for s in range(stage, stage + self.top_block.num_levels):
                self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1)

        self._out_features = list(self._out_feature_strides.keys())
        self._out_feature_channels = {k: out_channels for k in self._out_features}
        self._size_divisibility = in_strides[-1]
        assert fuse_type in {"avg", "sum"}
        self._fuse_type = fuse_type
Esempio n. 28
0
    def __init__(self,
                 in_channels,
                 channels,
                 num_classes=None,
                 dropout=False,
                 out_features=None,
                 norm="BN"):
        """
        See: https://arxiv.org/pdf/1903.11752.pdf

        Args:
            num_blocks (int): the number of blocks in this stage.
            in_channels (int): the input channel number.
            channels (int): output channel numbers for stem and every stages.
            num_classes (None or int): if None, will not perform classification.
            dropout (bool): whether to use dropout.
            out_features (list[str]): name of the layers whose outputs should
                be returned in forward. Can be anything in "stem", "linear", or "snet3" ...
                If None, will return the output of the last layer.
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (See cvpods.layer.get_norm for more details).
        """
        super(ShuffleNetV2, self).__init__()
        self.stage_out_channels = channels
        self.num_classes = num_classes

        # ---------------- Stem ---------------------- #
        input_channels = self.stage_out_channels[0]
        self.stem = nn.Sequential(*[
            Conv2d(
                in_channels,
                input_channels,
                kernel_size=3,
                stride=2,
                padding=1,
                bias=False,
                norm=get_norm(norm, input_channels),
                activation=nn.ReLU(inplace=True),
            ),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
        ])

        # TODO: use a stem class and property stride
        current_stride = 4
        self._out_feature_strides = {"stem": current_stride}
        self._out_feature_channels = {"stem": input_channels}

        # ---------------- Stages --------------------- #
        self.stage_num_blocks = [4, 8, 4]
        self.stages_and_names = []
        for i in range(len(self.stage_num_blocks)):
            num_blocks = self.stage_num_blocks[i]
            output_channels = self.stage_out_channels[i + 1]
            name = "snet" + str(i + 3)
            block_list = make_stage(num_blocks, input_channels,
                                    output_channels, norm)
            current_stride = current_stride * np.prod(
                [block.stride for block in block_list])
            stages = nn.Sequential(*block_list)

            self._out_feature_strides[name] = current_stride
            self._out_feature_channels[name] = output_channels
            self.add_module(name, stages)
            self.stages_and_names.append((stages, name))
            input_channels = output_channels

        if len(self.stage_out_channels) == len(self.stage_num_blocks) + 2:
            name = "snet" + str(len(self.stage_num_blocks) + 2) + "-last"
            last_output_channels = self.stage_out_channels[-1]
            last_conv = Conv2d(output_channels,
                               last_output_channels,
                               kernel_size=1,
                               bias=False,
                               norm=get_norm(norm, last_output_channels),
                               activation=nn.ReLU(inplace=True))
            self._out_feature_strides[name] = current_stride
            self._out_feature_channels[name] = last_output_channels
            self.add_module(name, last_conv)
            self.stages_and_names.append((last_conv, name))
        # ---------------- Classifer ------------------- #
        if num_classes is not None:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
            self.dropout = dropout
            if dropout:
                self.dropout = nn.Dropout(0.2)
            self.classifier = nn.Linear(self.stage_out_channels[-1],
                                        num_classes,
                                        bias=False)
            name = "linear"

        self._out_features = [name] if out_features is None else out_features

        self._initialize_weights()
Esempio n. 29
0
    def __init__(self,
                 in_channels=3,
                 out_channels=64,
                 norm="BN",
                 activation=None,
                 deep_stem=False,
                 stem_width=32):
        """
        Args:
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
        """
        super().__init__()
        self.deep_stem = deep_stem

        if self.deep_stem:
            self.conv1_1 = Conv2d(
                3,
                stem_width,
                kernel_size=3,
                stride=2,
                padding=1,
                bias=False,
                norm=get_norm(norm, stem_width),
            )
            self.conv1_2 = Conv2d(
                stem_width,
                stem_width,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=False,
                norm=get_norm(norm, stem_width),
            )
            self.conv1_3 = Conv2d(
                stem_width,
                stem_width * 2,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=False,
                norm=get_norm(norm, stem_width * 2),
            )
            for layer in [self.conv1_1, self.conv1_2, self.conv1_3]:
                if layer is not None:
                    weight_init.c2_msra_fill(layer)
        else:
            self.conv1 = Conv2d(
                in_channels,
                out_channels,
                kernel_size=7,
                stride=2,
                padding=3,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
            weight_init.c2_msra_fill(self.conv1)

        self.activation = get_activation(activation)
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
Esempio n. 30
0
    def __init__(
        self,
        in_channels,
        out_channels,
        *,
        bottleneck_channels,
        stride=1,
        num_groups=1,
        norm="BN",
        activation=None,
        stride_in_1x1=False,
        dilation=1,
        deform_modulated=False,
        deform_num_groups=1,
    ):
        """
        Similar to :class:`BottleneckBlock`, but with deformable conv in the 3x3 convolution.
        """
        super().__init__(in_channels, out_channels, stride)
        self.deform_modulated = deform_modulated

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.activation = get_activation(activation)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
            norm=get_norm(norm, bottleneck_channels),
        )

        if deform_modulated:
            deform_conv_op = ModulatedDeformConv
            # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size
            offset_channels = 27
        else:
            deform_conv_op = DeformConv
            offset_channels = 18

        self.conv2_offset = Conv2d(
            bottleneck_channels,
            offset_channels * deform_num_groups,
            kernel_size=3,
            stride=stride_3x3,
            padding=1 * dilation,
            dilation=dilation,
        )
        self.conv2 = deform_conv_op(
            bottleneck_channels,
            bottleneck_channels,
            kernel_size=3,
            stride=stride_3x3,
            padding=1 * dilation,
            bias=False,
            groups=num_groups,
            dilation=dilation,
            deformable_groups=deform_num_groups,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv3 = Conv2d(
            bottleneck_channels,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)

        nn.init.constant_(self.conv2_offset.weight, 0)
        nn.init.constant_(self.conv2_offset.bias, 0)