Example #1
0
    def __init__(
        self,
        in_channels,
        out_channels,
        dw_kernel_size=3,
        stride=1,
        dilation=1,
        use_se=False,
        expand_ratio=1.0,  # expansion
        keep_prob=1,  # drop connect param
        noskip=False,
        norm_layer=ABN,
        norm_act="relu",
    ):
        super().__init__()
        mid_chs = make_divisible(in_channels * expand_ratio)
        self.has_residual = (in_channels == out_channels
                             and stride == 1) and not noskip
        self.has_expansion = expand_ratio != 1
        if self.has_expansion:
            self.conv_pw = conv1x1(in_channels, mid_chs)
            self.bn1 = norm_layer(mid_chs, activation=norm_act)

        self.conv_dw = nn.Conv2d(
            mid_chs,
            mid_chs,
            dw_kernel_size,
            stride=stride,
            groups=mid_chs,
            dilation=dilation,
            bias=False,
            padding=dilation * (dw_kernel_size - 1) // 2,
        )
        self.bn2 = norm_layer(mid_chs, activation=norm_act)
        # some models like MobileNet use mid_chs here instead of in_channels. But I don't care for now
        self.se = SEModule(mid_chs, in_channels //
                           4, norm_act) if use_se else nn.Identity()
        self.conv_pw1 = conv1x1(mid_chs, out_channels)
        self.bn3 = norm_layer(out_channels, activation="identity")
        self.drop_connect = DropConnect(
            keep_prob) if keep_prob < 1 else nn.Identity()
Example #2
0
    def __init__(
        self,
        blocks_args=None,
        width_multiplier=None,
        depth_multiplier=None,
        pretrained=None,  # not used. here for proper signature
        num_classes=1000,
        in_channels=3,
        output_stride=32,
        encoder=False,
        drop_rate=0,
        drop_connect_rate=0,
        stem_size=32,
        norm_layer="abn",
        norm_act="swish",
        match_tf_same_padding=False,
    ):
        super().__init__()
        norm_layer = bn_from_name(norm_layer)
        self.norm_layer = norm_layer
        self.norm_act = norm_act
        self.width_multiplier = width_multiplier
        self.depth_multiplier = depth_multiplier
        stem_size = make_divisible(stem_size * width_multiplier)
        self.conv_stem = conv3x3(in_channels, stem_size, stride=2)
        self.bn1 = norm_layer(stem_size, activation=norm_act)
        in_channels = stem_size
        self.blocks = nn.ModuleList([])
        # modify block args to account for output_stride strategy
        blocks_args = _patch_block_args(blocks_args, output_stride)
        for block_idx, block_arg in enumerate(blocks_args):
            block = []
            block_arg["in_channels"] = make_divisible(block_arg["in_channels"] * self.width_multiplier)
            block_arg["out_channels"] = make_divisible(block_arg["out_channels"] * self.width_multiplier)
            block_arg["norm_layer"] = norm_layer
            block_arg["norm_act"] = norm_act
            # linearly scale keep prob
            block_arg["keep_prob"] = 1 - drop_connect_rate * block_idx / len(blocks_args)
            repeats = block_arg.pop("num_repeat")
            repeats = int(math.ceil(repeats * self.depth_multiplier))
            # when dilating conv with stride 2 we want it to have dilation // 2
            # it prevents checkerboard artifacts with OS=16 and OS=8
            dilation = block_arg.get("dilation", 1)  # save block values
            if block_arg.pop("no_first_dilation", False):
                block_arg["dilation"] = max(1, block_arg["dilation"] // 2)
            block.append(InvertedResidual(**block_arg))
            # only first layer in block is strided
            block_arg["stride"] = 1
            block_arg["dilation"] = dilation
            block_arg["in_channels"] = block_arg["out_channels"]
            for _ in range(repeats - 1):
                block.append(InvertedResidual(**block_arg))

            self.blocks.append(nn.Sequential(*block))

        # Head

        if encoder:
            self.forward = self.encoder_features
        else:
            out_channels = block_arg["out_channels"]
            num_features = make_divisible(1280 * width_multiplier)
            self.conv_head = conv1x1(out_channels, num_features)
            self.bn2 = norm_layer(num_features, activation=norm_act)
            self.global_pool = nn.AdaptiveAvgPool2d(1)
            self.dropout = nn.Dropout(drop_rate, inplace=True)
            self.classifier = nn.Linear(num_features, num_classes)

        patch_bn(self)  # adjust epsilon
        initialize(self)
        if match_tf_same_padding:
            conv_to_same_conv(self)
            maxpool_to_same_maxpool(self)