Ejemplo n.º 1
0
    def __init__(self,
                 in_channels=3,
                 out_channels=64,
                 norm="BN",
                 activation=None):
        """
        Args:
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
        """
        super().__init__()
        self.conv1 = Conv2d(
            in_channels,
            out_channels,
            kernel_size=7,
            stride=2,
            padding=3,
            bias=False,
            norm=get_norm(norm, out_channels),
        )
        weight_init.c2_msra_fill(self.conv1)

        self.activation = get_activation(activation)
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
Ejemplo n.º 2
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 *,
                 stride=1,
                 norm="BN",
                 activation=None,
                 **kwargs):
        """
        The standard block type for ResNet18 and ResNet34.
        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            stride (int): Stride for the first conv.
            norm (str or callable): A callable that takes the number of
                channels and returns a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
        """
        super().__init__(in_channels, out_channels, stride)

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        self.activation = get_activation(activation)

        self.conv1 = Conv2d(
            in_channels,
            out_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        self.conv2 = Conv2d(
            out_channels,
            out_channels,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        for layer in [self.conv1, self.conv2, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)
Ejemplo n.º 3
0
    def __init__(self,
                 input_channels,
                 output_channels,
                 stride,
                 expand_ratio,
                 norm,
                 activation,
                 use_shortcut=True):
        super(InvertedResBlock, self).__init__()
        self.stride = stride
        assert stride in [1, 2]

        mid_channels = int(round(input_channels * expand_ratio))
        self.use_shortcut = use_shortcut

        if self.use_shortcut:
            assert stride == 1
            assert input_channels == output_channels

        conv_kwargs = {
            "norm": get_norm(norm, mid_channels),
            "activation": get_activation(activation)
        }

        layers = []
        if expand_ratio > 1:
            layers.append(
                Conv2d(
                    input_channels,
                    mid_channels,
                    1,
                    bias=False,  # Pixel-wise non-linear
                    **deepcopy(conv_kwargs)))

        layers += [
            Conv2d(
                mid_channels,
                mid_channels,
                3,
                padding=1,
                bias=False,  # Depth-wise 3x3
                stride=stride,
                groups=mid_channels,
                **deepcopy(conv_kwargs)),
            Conv2d(
                mid_channels,
                output_channels,
                1,
                bias=False,  # Pixel-wise linear
                norm=get_norm(norm, output_channels))
        ]
        self.conv = nn.Sequential(*layers)
Ejemplo n.º 4
0
    def __init__(self, input_channels, output_channels, norm, activation):
        super().__init__()
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = 2

        self.conv = Conv2d(input_channels,
                           output_channels,
                           3,
                           stride=2,
                           padding=1,
                           bias=False,
                           norm=get_norm(norm, output_channels),
                           activation=get_activation(activation))
Ejemplo n.º 5
0
    def __init__(self, input_channels, output_channels, stride, expand_ratio,
                 norm, activation, use_shortcut=True):
        """
        Args:
            input_channels (int): the input channel number.
            output_channels (int): the output channel number.
            stride (int): the stride of the current block.
            expand_ratio(int): the channel expansion ratio for `mid_channels` in InvertedResBlock.
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (See cvpods.layer.get_norm for more details).
            activation (str): a pre-defined string
                (See cvpods.layer.get_activation for more details).
            use_shortcut (bool): whether to use the residual path.
        """
        super(InvertedResBlock, self).__init__()
        self.stride = stride
        assert stride in [1, 2]

        mid_channels = int(round(input_channels * expand_ratio))
        self.use_shortcut = use_shortcut

        if self.use_shortcut:
            assert stride == 1
            assert input_channels == output_channels

        conv_kwargs = {
            "norm": get_norm(norm, mid_channels),
            "activation": get_activation(activation)
        }

        layers = []
        if expand_ratio > 1:
            layers.append(
                Conv2d(input_channels, mid_channels, 1, bias=False,  # Pixel-wise non-linear
                       **deepcopy(conv_kwargs))
            )

        layers += [
            Conv2d(mid_channels, mid_channels, 3, padding=1, bias=False,  # Depth-wise 3x3
                   stride=stride, groups=mid_channels, **deepcopy(conv_kwargs)),
            Conv2d(mid_channels, output_channels, 1, bias=False,  # Pixel-wise linear
                   norm=get_norm(norm, output_channels))
        ]
        self.conv = nn.Sequential(*layers)
Ejemplo n.º 6
0
    def __init__(
        self,
        in_channels=3,
        out_channels=64,
        norm="BN",
        activation=None,
        deep_stem=False,
        stem_width=32,
    ):
        super().__init__()
        self.conv1_1 = Conv2d(
            3,
            stem_width,
            kernel_size=3,
            stride=2,
            padding=1,
            bias=False,
            norm=get_norm(norm, stem_width),
        )
        self.conv1_2 = Conv2d(
            stem_width,
            stem_width,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False,
            norm=get_norm(norm, stem_width),
        )
        self.conv1_3 = Conv2d(
            stem_width,
            stem_width * 2,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False,
            norm=get_norm(norm, stem_width * 2),
        )
        for layer in [self.conv1_1, self.conv1_2, self.conv1_3]:
            if layer is not None:
                weight_init.c2_msra_fill(layer)

        self.activation = get_activation(activation)
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
Ejemplo n.º 7
0
    def __init__(self, input_channels, output_channels, norm, activation):
        """
        Args:
            input_channels (int): the input channel number.
            output_channels (int): the output channel number.
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
            activation (str): a pre-defined string
                (See cvpods.layer.get_activation for more details).
        """
        super().__init__()
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = 2

        self.conv = Conv2d(input_channels, output_channels, 3, stride=2, padding=1, bias=False,
                           norm=get_norm(norm, output_channels),
                           activation=get_activation(activation))
    def __init__(self,
                 in_channels,
                 out_channels,
                 stride=1,
                 norm="BN",
                 activation=None):
        super().__init__()

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        self.activation = get_activation(activation)

        self.conv1 = Conv2d(
            in_channels,
            out_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        self.conv2 = Conv2d(
            out_channels,
            out_channels,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )
Ejemplo n.º 9
0
    def __init__(self,
                 in_channels=3,
                 out_channels=64,
                 norm="BN",
                 activation=None,
                 deep_stem=False,
                 stem_width=32):
        """
        Args:
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
        """
        super().__init__()
        self.deep_stem = deep_stem

        if self.deep_stem:
            self.conv1_1 = Conv2d(
                3,
                stem_width,
                kernel_size=3,
                stride=2,
                padding=1,
                bias=False,
                norm=get_norm(norm, stem_width),
            )
            self.conv1_2 = Conv2d(
                stem_width,
                stem_width,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=False,
                norm=get_norm(norm, stem_width),
            )
            self.conv1_3 = Conv2d(
                stem_width,
                stem_width * 2,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=False,
                norm=get_norm(norm, stem_width * 2),
            )
            for layer in [self.conv1_1, self.conv1_2, self.conv1_3]:
                if layer is not None:
                    weight_init.c2_msra_fill(layer)
        else:
            self.conv1 = Conv2d(
                in_channels,
                out_channels,
                kernel_size=7,
                stride=2,
                padding=3,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
            weight_init.c2_msra_fill(self.conv1)

        self.activation = get_activation(activation)
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
Ejemplo n.º 10
0
    def __init__(
        self,
        in_channels,
        out_channels,
        *,
        bottleneck_channels,
        stride=1,
        num_groups=1,
        norm="BN",
        activation=None,
        stride_in_1x1=False,
        dilation=1,
        deform_modulated=False,
        deform_num_groups=1,
    ):
        """
        Similar to :class:`BottleneckBlock`, but with deformable conv in the 3x3 convolution.
        """
        super().__init__(in_channels, out_channels, stride)
        self.deform_modulated = deform_modulated

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.activation = get_activation(activation)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
            norm=get_norm(norm, bottleneck_channels),
        )

        if deform_modulated:
            deform_conv_op = ModulatedDeformConv
            # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size
            offset_channels = 27
        else:
            deform_conv_op = DeformConv
            offset_channels = 18

        self.conv2_offset = Conv2d(
            bottleneck_channels,
            offset_channels * deform_num_groups,
            kernel_size=3,
            stride=stride_3x3,
            padding=1 * dilation,
            dilation=dilation,
        )
        self.conv2 = deform_conv_op(
            bottleneck_channels,
            bottleneck_channels,
            kernel_size=3,
            stride=stride_3x3,
            padding=1 * dilation,
            bias=False,
            groups=num_groups,
            dilation=dilation,
            deformable_groups=deform_num_groups,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv3 = Conv2d(
            bottleneck_channels,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)

        nn.init.constant_(self.conv2_offset.weight, 0)
        nn.init.constant_(self.conv2_offset.bias, 0)
Ejemplo n.º 11
0
    def __init__(
        self,
        in_channels,
        out_channels,
        *,
        bottleneck_channels,
        stride=1,
        num_groups=1,
        norm="BN",
        activation=None,
        stride_in_1x1=False,
        dilation=1,
    ):
        """
        Args:
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
            stride_in_1x1 (bool): when stride==2, whether to put stride in the
                first 1x1 convolution or the bottleneck 3x3 convolution.
        """
        super().__init__(in_channels, out_channels, stride)

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        # The original MSRA ResNet models have stride in the first 1x1 conv
        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
        # stride in the 3x3 conv
        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.activation = get_activation(activation)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv2 = Conv2d(
            bottleneck_channels,
            bottleneck_channels,
            kernel_size=3,
            stride=stride_3x3,
            padding=1 * dilation,
            bias=False,
            groups=num_groups,
            dilation=dilation,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv3 = Conv2d(
            bottleneck_channels,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)
Ejemplo n.º 12
0
    def __init__(
        self,
        in_channels,
        out_channels,
        *,
        bottleneck_channels,
        stride=1,
        num_groups=1,
        norm="BN",
        activation=None,
        stride_in_1x1=False,
        dilation=1,
        avd=False,
        avg_down=False,
        radix=1,
        bottleneck_width=64,
    ):
        """
        Args:
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
            stride_in_1x1 (bool): when stride==2, whether to put stride in the
                first 1x1 convolution or the bottleneck 3x3 convolution.
        """
        super().__init__(in_channels, out_channels, stride)

        self.avd = avd and (stride > 1)
        self.avg_down = avg_down
        self.radix = radix

        cardinality = num_groups
        group_width = int(bottleneck_channels *
                          (bottleneck_width / 64.)) * cardinality

        if in_channels != out_channels:
            if self.avg_down:
                self.shortcut_avgpool = nn.AvgPool2d(kernel_size=stride,
                                                     stride=stride,
                                                     ceil_mode=True,
                                                     count_include_pad=False)
                self.shortcut = Conv2d(
                    in_channels,
                    out_channels,
                    kernel_size=1,
                    stride=1,
                    bias=False,
                    norm=get_norm(norm, out_channels),
                )
            else:
                self.shortcut = Conv2d(
                    in_channels,
                    out_channels,
                    kernel_size=1,
                    stride=stride,
                    bias=False,
                    norm=get_norm(norm, out_channels),
                )
        else:
            self.shortcut = None

        # The original MSRA ResNet models have stride in the first 1x1 conv
        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
        # stride in the 3x3 conv
        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.activation = get_activation(activation)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
            norm=get_norm(norm, group_width),
        )

        if self.radix > 1:
            from .splat import SplAtConv2d
            self.conv2 = SplAtConv2d(
                group_width,
                group_width,
                kernel_size=3,
                stride=1 if self.avd else stride_3x3,
                padding=dilation,
                dilation=dilation,
                groups=cardinality,
                bias=False,
                radix=self.radix,
                norm=norm,
            )
        else:
            self.conv2 = Conv2d(
                group_width,
                group_width,
                kernel_size=3,
                stride=stride_3x3,
                padding=1 * dilation,
                bias=False,
                groups=num_groups,
                dilation=dilation,
                norm=get_norm(norm, group_width),
            )

        if self.avd:
            self.avd_layer = nn.AvgPool2d(3, stride, padding=1)

        self.conv3 = Conv2d(
            group_width,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        if self.radix > 1:
            for layer in [self.conv1, self.conv3, self.shortcut]:
                if layer is not None:  # shortcut can be None
                    weight_init.c2_msra_fill(layer)
        else:
            for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
                if layer is not None:  # shortcut can be None
                    weight_init.c2_msra_fill(layer)
Ejemplo n.º 13
0
    def __init__(
            self,
            in_channels,
            out_channels,
            *,
            bottleneck_channels,
            stride=1,
            num_groups=1,
            norm="BN",
            activation=None,
            stride_in_1x1=False,
            num_branch=3,
            dilations=(1, 2, 3),
            concat_output=False,
            test_branch_idx=-1,
    ):
        """
        Args:
            num_branch (int): the number of branches in TridentNet.
            dilations (tuple): the dilations of multiple branches in TridentNet.
            concat_output (bool): if concatenate outputs of multiple branches in TridentNet.
                Use 'True' for the last trident block.
        """
        super().__init__(in_channels, out_channels, stride)

        assert num_branch == len(dilations)

        self.num_branch = num_branch
        self.concat_output = concat_output
        self.test_branch_idx = test_branch_idx

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.activation = get_activation(activation)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv2 = TridentConv(
            bottleneck_channels,
            bottleneck_channels,
            kernel_size=3,
            stride=stride_3x3,
            paddings=dilations,
            bias=False,
            groups=num_groups,
            dilations=dilations,
            num_branch=num_branch,
            test_branch_idx=test_branch_idx,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv3 = Conv2d(
            bottleneck_channels,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)
Ejemplo n.º 14
0
    def __init__(
        self,
        stem,
        inverted_residual_setting,
        norm,
        activation,
        num_classes=None,
        out_features=None,
    ):
        """
        See: https://arxiv.org/pdf/1801.04381.pdf

        Args:
            stem (nn.Module): a stem module
            inverted_residual_setting(list of list): Network structure.
                (See https://arxiv.org/pdf/1801.04381.pdf Table 2)
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (See cvpods.layer.get_norm for more details).
            activation (str): a pre-defined string
                (See cvpods.layer.get_activation for more details).
            num_classes (None or int): if None, will not perform classification.
            out_features (list[str]): name of the layers whose outputs should
                be returned in forward. Can be anything in "stem", "linear", or "MobileNetV23" ...
                If None, will return the output of the last layer.
        """
        super(MobileNetV2, self).__init__()

        self.num_classes = num_classes

        # only check the first element, assuming user knows t,c,n,s are required
        if len(inverted_residual_setting[0]) != 4:
            raise ValueError(
                "inverted_residual_setting should be a "
                "4-element list, got {}".format(inverted_residual_setting))

        self.stem = stem
        self.last_channel = 1280

        input_channels = stem.output_channels

        current_stride = stem.stride
        self._out_feature_strides = {"stem": current_stride}
        self._out_feature_channels = {"stem": input_channels}

        # ---------------- Stages --------------------- #
        ext = 0
        self.stages_and_names = []
        for i, (t, c, n, s) in enumerate(inverted_residual_setting):
            # t: expand ratio
            # c: output channels
            # n: block number
            # s: stride
            # See https://arxiv.org/pdf/1801.04381.pdf Table 2 for more details
            if s == 1 and i > 0:
                ext += 1
            else:
                ext = 0

            current_stride *= s
            assert int(np.log2(current_stride)) == np.log2(current_stride)

            name = "mobile" + str(int(np.log2(current_stride)))
            if ext != 0:
                name += "-{}".format(ext + 1)

            stage = nn.Sequential(
                *make_stage(n, input_channels, c, s, t, norm, activation))

            self._out_feature_strides[name] = current_stride
            self._out_feature_channels[name] = c

            self.add_module(name, stage)
            self.stages_and_names.append((stage, name))
            input_channels = c

        name = "mobile" + str(int(np.log2(current_stride))) + "-last"
        stage = Conv2d(input_channels,
                       self.last_channel,
                       kernel_size=1,
                       bias=False,
                       norm=get_norm("BN", self.last_channel),
                       activation=get_activation(activation))
        self.stages_and_names.append((stage, name))
        self.add_module(name, stage)

        self._out_feature_strides[name] = current_stride
        self._out_feature_channels[name] = self.last_channel

        # ---------------- Classifer ------------------- #
        if num_classes is not None:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
            self.dropout = nn.Dropout(0.2)
            self.classifier = nn.Linear(self.last_channel, num_classes)
            name = "linear"

        self._out_features = [name] if out_features is None else out_features

        self._initialize_weights()
Ejemplo n.º 15
0
    def __init__(self,
                 stem,
                 inverted_residual_setting,
                 norm,
                 activation,
                 num_classes=None,
                 out_features=None,
                 init_model=False):
        """
        MobileNet V2 main class

        Args:
            num_classes (int): Number of classes
            inverted_residual_setting: Network structure
        """
        super(MobileNetV2, self).__init__()

        self.num_classes = num_classes

        # only check the first element, assuming user knows t,c,n,s are required
        if len(inverted_residual_setting[0]) != 4:
            raise ValueError(
                "inverted_residual_setting should be a "
                "4-element list, got {}".format(inverted_residual_setting))

        self.stem = stem
        self.last_channel = 1280

        input_channels = stem.output_channels

        current_stride = stem.stride
        self._out_feature_strides = {"stem": current_stride}
        self._out_feature_channels = {"stem": input_channels}

        # ---------------- Stages --------------------- #
        ext = 0
        self.stages_and_names = []
        for i, (t, c, n, s) in enumerate(inverted_residual_setting):
            # t: expand ratio
            # c: output channels
            # n: block number
            # s: stride
            # See https://arxiv.org/pdf/1801.04381.pdf Table 2 for more details
            if s == 1 and i > 0:
                ext += 1
            else:
                ext = 0

            current_stride *= s
            assert int(np.log2(current_stride)) == np.log2(current_stride)

            name = "mobile" + str(int(np.log2(current_stride)))
            if ext != 0:
                name += "-{}".format(ext + 1)

            stage = nn.Sequential(
                *make_stage(n, input_channels, c, s, t, norm, activation))

            self._out_feature_strides[name] = current_stride
            self._out_feature_channels[name] = c

            self.add_module(name, stage)
            self.stages_and_names.append((stage, name))
            input_channels = c

        name = "mobile" + str(int(np.log2(current_stride))) + "-last"
        stage = Conv2d(input_channels,
                       self.last_channel,
                       kernel_size=1,
                       bias=False,
                       norm=get_norm("BN", self.last_channel),
                       activation=get_activation(activation))
        self.stages_and_names.append((stage, name))
        self.add_module(name, stage)

        self._out_feature_strides[name] = current_stride
        self._out_feature_channels[name] = self.last_channel

        # ---------------- Classifer ------------------- #
        if num_classes is not None:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
            self.dropout = nn.Dropout(0.2)
            self.classifier = nn.Linear(self.last_channel, num_classes)
            name = "linear"

        self._out_features = [name] if out_features is None else out_features

        if init_model:
            self._initialize_weights()