예제 #1
0
    def __init__(self,
                 prev_channels,
                 current_channels,
                 norm_layer=ABN,
                 norm_act="relu"):
        super().__init__()
        transition_layers = []
        for prev_ch, curr_ch in zip(prev_channels, current_channels):
            if prev_ch != curr_ch:
                # this case only happens between 1st and 2nd stage
                layers = [
                    conv3x3(prev_ch, curr_ch),
                    norm_layer(curr_ch, activation=norm_act)
                ]
                transition_layers.append(nn.Sequential(*layers))
            else:
                transition_layers.append(nn.Identity())

        if len(current_channels) > len(
                prev_channels):  # only works for ONE extra branch
            layers = [
                conv3x3(prev_channels[-1], current_channels[-1], 2),
                norm_layer(current_channels[-1], activation=norm_act)
            ]
            transition_layers.append(nn.Sequential(*layers))
        self.trans_layers = nn.ModuleList(transition_layers)
예제 #2
0
 def _make_stem(self, stem_type, stem_width, in_channels, norm_layer,
                norm_act):
     assert stem_type in {"", "deep", "space2depth"
                          }, f"Stem type {stem_type} is not supported"
     if stem_type == "space2depth":
         # in the paper they use conv1x1 but in code conv3x3 (which seems better)
         self.conv1 = nn.Sequential(SpaceToDepth(),
                                    conv3x3(in_channels * 16, stem_width))
         self.bn1 = norm_layer(stem_width, activation=norm_act)
         self.maxpool = nn.Identity(
         )  # not used but needed for code compatability
     else:
         if stem_type == "deep":
             self.conv1 = nn.Sequential(
                 conv3x3(in_channels, stem_width // 2, 2),
                 norm_layer(stem_width // 2, activation=norm_act),
                 conv3x3(stem_width // 2, stem_width // 2),
                 norm_layer(stem_width // 2, activation=norm_act),
                 conv3x3(stem_width // 2, stem_width),
             )
         else:
             self.conv1 = nn.Conv2d(in_channels,
                                    stem_width,
                                    kernel_size=7,
                                    stride=2,
                                    padding=3,
                                    bias=False)
         self.bn1 = norm_layer(stem_width, activation=norm_act)
         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
예제 #3
0
    def __init__(
        self,
        pretrained="coco",  # not used here for proper signature
        encoder_name="resnet50",
        encoder_weights="imagenet",
        pyramid_channels=256,
        num_classes=80,
        # drop_connect_rate=0, # TODO: add
        encoder_norm_layer="abn",
        encoder_norm_act="relu",
        decoder_norm_layer="none",  # None by default to match detectron & mmdet versions
        decoder_norm_act="relu",
        **encoder_params,
    ):
        super().__init__()
        self.encoder = get_encoder(
            encoder_name,
            norm_layer=encoder_norm_layer,
            norm_act=encoder_norm_act,
            encoder_weights=encoder_weights,
            **encoder_params,
        )
        norm_layer = bn_from_name(decoder_norm_layer)
        self.pyramid6 = nn.Sequential(
            conv3x3(self.encoder.out_shapes[0], pyramid_channels, 2,
                    bias=True),
            norm_layer(pyramid_channels, activation="identity"),
        )
        self.pyramid7 = nn.Sequential(
            conv3x3(pyramid_channels, pyramid_channels, 2, bias=True),
            norm_layer(pyramid_channels, activation="identity"),
        )
        self.fpn = FPN(self.encoder.out_shapes[:-2],
                       pyramid_channels=pyramid_channels)

        def make_final_convs():
            layers = []
            for _ in range(4):
                layers += [
                    conv3x3(pyramid_channels, pyramid_channels, bias=True)
                ]
                # Norm here is fine for GroupNorm but for BN it should be implemented the other way
                # see EffDet for example. Maybe need to change this implementation to align with EffDet
                layers += [
                    norm_layer(pyramid_channels, activation=decoder_norm_act)
                ]
            return nn.Sequential(*layers)

        anchors_per_location = 9
        self.cls_convs = make_final_convs()
        self.cls_head_conv = conv3x3(pyramid_channels,
                                     num_classes * anchors_per_location,
                                     bias=True)
        self.box_convs = make_final_convs()
        self.box_head_conv = conv3x3(pyramid_channels,
                                     4 * anchors_per_location,
                                     bias=True)
        self.num_classes = num_classes
        self._initialize_weights()
예제 #4
0
    def __init__(self, pre_channels, norm_layer=ABN, norm_act="relu"):
        super().__init__()
        head_block = Bottleneck
        head_channels = [32, 64, 128, 256]
        # Increasing the #channels on each resolution
        # from C, 2C, 4C, 8C to 128, 256, 512, 1024
        incre_modules = []
        for (pre_c, head_c) in zip(pre_channels, head_channels):
            incre_modules.append(
                make_layer(pre_c, head_c, 1, norm_layer, norm_act))
        self.incre_modules = nn.ModuleList(incre_modules)

        # downsampling modules
        downsamp_modules = []
        for i in range(len(pre_channels) - 1):
            in_ch = head_channels[i] * head_block.expansion
            out_ch = head_channels[i + 1] * head_block.expansion
            downsamp_module = nn.Sequential(
                conv3x3(in_ch, out_ch, 2, bias=True),
                norm_layer(out_ch, activation=norm_act))
            downsamp_modules.append(downsamp_module)
        self.downsamp_modules = nn.ModuleList(downsamp_modules)

        self.final_layer = nn.Sequential(
            conv1x1(head_channels[3] * head_block.expansion, 2048, bias=True),
            norm_layer(2048, activation=norm_act),
        )
예제 #5
0
    def __init__(
        self,
        encoder_name="resnet34",
        encoder_weights="imagenet",
        pyramid_channels=256,
        num_classes=80,
        norm_layer="abn",
        norm_act="relu",
        **encoder_params,
    ):
        super().__init__()
        self.encoder = get_encoder(
            encoder_name,
            norm_layer=norm_layer,
            norm_act=norm_act,
            encoder_weights=encoder_weights,
            **encoder_params,
        )
        norm_layer = bn_from_name(norm_layer)
        self.pyramid6 = conv3x3(256, 256, 2, bias=True)
        self.pyramid7 = conv3x3(256, 256, 2, bias=True)
        self.fpn = FPN(
            self.encoder.out_shapes[:-2],
            pyramid_channels=pyramid_channels,
        )

        def make_head(out_size):
            layers = []
            for _ in range(4):
                # some implementations don't use BN here but I think it's needed
                # TODO: test how it affects results
                layers += [
                    nn.Conv2d(256, 256, 3, padding=1),
                    norm_layer(256, activation=norm_act)
                ]
                # layers += [nn.Conv2d(256, 256, 3, padding=1), nn.ReLU()]

            layers += [nn.Conv2d(256, out_size, 3, padding=1)]
            return nn.Sequential(*layers)

        self.ratios = [1.0, 2.0, 0.5]
        self.scales = [4 * 2**(i / 3) for i in range(3)]
        anchors = len(self.ratios) * len(self.scales)  # 9

        self.cls_head = make_head(num_classes * anchors)
        self.box_head = make_head(4 * anchors)
예제 #6
0
    def _make_fuse_layers(self, norm_layer, norm_act):
        if self.num_branches == 1:
            return None

        num_branches = self.num_branches
        num_inchannels = self.num_inchannels
        fuse_layers = []
        for i in range(num_branches):
            fuse_layer = []
            for j in range(num_branches):
                if j > i:
                    fuse_layer.append(
                        nn.Sequential(
                            conv1x1(num_inchannels[j], num_inchannels[i]),
                            norm_layer(num_inchannels[i],
                                       activation="identity"),
                            nn.Upsample(scale_factor=2**(j - i),
                                        mode='nearest')))
                elif j == i:
                    fuse_layer.append(nn.Identity())
                else:
                    conv3x3s = []
                    for k in range(i - j):
                        if k == i - j - 1:
                            num_outchannels_conv3x3 = num_inchannels[i]
                            conv3x3s.append(
                                nn.Sequential(
                                    conv3x3(num_inchannels[j],
                                            num_outchannels_conv3x3, 2),
                                    norm_layer(num_outchannels_conv3x3,
                                               activation="identity")))
                        else:
                            num_outchannels_conv3x3 = num_inchannels[j]
                            conv3x3s.append(
                                nn.Sequential(
                                    conv3x3(num_inchannels[j],
                                            num_outchannels_conv3x3, 2),
                                    norm_layer(num_outchannels_conv3x3,
                                               activation=norm_act)))
                    fuse_layer.append(nn.Sequential(*conv3x3s))
            fuse_layers.append(nn.ModuleList(fuse_layer))

        return nn.ModuleList(fuse_layers)
예제 #7
0
 def make_final_convs():
     layers = []
     for _ in range(4):
         layers += [
             conv3x3(pyramid_channels, pyramid_channels, bias=True)
         ]
         # Norm here is fine for GroupNorm but for BN it should be implemented the other way
         # see EffDet for example. Maybe need to change this implementation to align with EffDet
         layers += [
             norm_layer(pyramid_channels, activation=decoder_norm_act)
         ]
     return nn.Sequential(*layers)
예제 #8
0
 def _make_stem(self, stem_type, stem_width, in_channels, norm_layer, norm_act):
     supported_stems = {"", "deep", "space2depth", "space2depth_2"}
     assert stem_type in supported_stems, f"Stem type {stem_type} is not supported"
     if stem_type == "space2depth":
         # in the paper they use conv1x1 but in code conv3x3 (which seems better)
         self.conv1 = nn.Sequential(SpaceToDepth(block_size=4), conv3x3(in_channels * 16, stem_width))
         self.bn1 = norm_layer(stem_width, activation=norm_act)
         self.maxpool = nn.Identity()  # not used but needed for code compatability
     elif stem_type == "space2depth_2":
         # original S2D is ~4% faster than default. this version is 2% faster than default but can be used as encoder
         self.conv1 = nn.Sequential(
             SpaceToDepth(block_size=2),
             conv3x3(in_channels * 4, stem_width // 4),
             norm_layer(stem_width // 4, activation=norm_act),
         )
         self.bn1 = nn.Identity()
         # name is confusing but it's for compatability
         self.maxpool = nn.Sequential(
             SpaceToDepth(block_size=2),
             conv3x3(stem_width, stem_width),
             norm_layer(stem_width, activation=norm_act),
         )
     else:
         if stem_type == "deep":
             self.conv1 = nn.Sequential(
                 conv3x3(in_channels, stem_width // 2, 2),
                 norm_layer(stem_width // 2, activation=norm_act),
                 conv3x3(stem_width // 2, stem_width // 2),
                 norm_layer(stem_width // 2, activation=norm_act),
                 conv3x3(stem_width // 2, stem_width),
             )
         else:
             self.conv1 = nn.Conv2d(in_channels, stem_width, kernel_size=7, stride=2, padding=3, bias=False)
         self.bn1 = norm_layer(stem_width, activation=norm_act)
         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
예제 #9
0
    def __init__(self,
                 in_planes,
                 growth_rate,
                 drop_rate=0.0,
                 memory_efficient=False,
                 norm_layer=ABN,
                 norm_act='relu'):
        super(_DenseLayer, self).__init__()

        width = growth_rate * self.expansion
        self.norm1 = norm_layer(in_planes, activation=norm_act)
        self.conv1 = conv1x1(in_planes, width)
        self.norm2 = norm_layer(width, activation=norm_act)
        self.conv2 = conv3x3(width, growth_rate)
        self.drop_rate = drop_rate
        self.memory_efficient = memory_efficient
예제 #10
0
    def __init__(
        self,
        block=None,
        layers=None,
        pretrained=None,  # not used. here for proper signature
        num_classes=1000,
        in_channels=3,
        use_se=False,
        groups=1,
        base_width=64,
        deep_stem=False,
        output_stride=32,
        norm_layer="abn",
        norm_act="relu",
        antialias=False,
        encoder=False,
        drop_rate=0.0,
        drop_connect_rate=0.0,
        global_pool="avg",
        init_bn0=True,
    ):

        stem_width = 64
        norm_layer = bn_from_name(norm_layer)
        self.inplanes = stem_width
        self.num_classes = num_classes
        self.groups = groups
        self.base_width = base_width
        self.block = block
        self.expansion = block.expansion
        self.norm_act = norm_act
        self.block_idx = 0
        self.num_blocks = sum(layers)
        self.drop_connect_rate = drop_connect_rate
        super(ResNet, self).__init__()

        if deep_stem:
            self.conv1 = nn.Sequential(
                conv3x3(in_channels, stem_width // 2, 2),
                norm_layer(stem_width // 2, activation=norm_act),
                conv3x3(stem_width // 2, stem_width // 2),
                norm_layer(stem_width // 2, activation=norm_act),
                conv3x3(stem_width // 2, stem_width),
            )
        else:
            self.conv1 = nn.Conv2d(in_channels,
                                   stem_width,
                                   kernel_size=7,
                                   stride=2,
                                   padding=3,
                                   bias=False)
        self.bn1 = norm_layer(stem_width, activation=norm_act)
        self.maxpool = nn.MaxPool2d(
            kernel_size=3,
            stride=2,
            padding=0 if use_se else 1,
            ceil_mode=True if use_se else False,
        )
        if output_stride not in [8, 16, 32]:
            raise ValueError("Output stride should be in [8, 16, 32]")
        if output_stride == 8:
            stride_3, stride_4, dilation_3, dilation_4 = 1, 1, 2, 4
        elif output_stride == 16:
            stride_3, stride_4, dilation_3, dilation_4 = 2, 1, 1, 2
        elif output_stride == 32:
            stride_3, stride_4, dilation_3, dilation_4 = 2, 2, 1, 1
        largs = dict(use_se=use_se,
                     norm_layer=norm_layer,
                     norm_act=norm_act,
                     antialias=antialias)
        self.layer1 = self._make_layer(64, layers[0], stride=1, **largs)
        self.layer2 = self._make_layer(128, layers[1], stride=2, **largs)
        self.layer3 = self._make_layer(256,
                                       layers[2],
                                       stride=stride_3,
                                       dilation=dilation_3,
                                       **largs)
        self.layer4 = self._make_layer(512,
                                       layers[3],
                                       stride=stride_4,
                                       dilation=dilation_4,
                                       **largs)
        self.global_pool = GlobalPool2d(global_pool)
        self.num_features = 512 * self.expansion
        self.encoder = encoder
        if not encoder:
            self.dropout = nn.Dropout(p=drop_rate, inplace=True)
            self.last_linear = nn.Linear(
                self.num_features * self.global_pool.feat_mult(), num_classes)
        else:
            self.forward = self.encoder_features

        self._initialize_weights(init_bn0)
예제 #11
0
    def __init__(
        self,
        width=18,
        small=False,
        pretrained=None,  # not used. here for proper signature
        num_classes=1000,
        in_channels=3,
        norm_layer="abn",
        norm_act="relu",
        encoder=False,
    ):
        super(HighResolutionNet, self).__init__()
        stem_width = 64
        norm_layer = bn_from_name(norm_layer)
        self.bn_args = bn_args = {
            "norm_layer": norm_layer,
            "norm_act": norm_act
        }
        self.conv1 = conv3x3(in_channels, stem_width, stride=2)
        self.bn1 = norm_layer(stem_width, activation=norm_act)

        self.conv2 = conv3x3(stem_width, stem_width, stride=2)
        self.bn2 = norm_layer(stem_width, activation=norm_act)

        channels = [width, width * 2, width * 4, width * 8]
        n_blocks = [2 if small else 4] * 4

        self.layer1 = make_layer(stem_width, stem_width, n_blocks[0],
                                 **bn_args)

        self.transition1 = TransitionBlock([stem_width * Bottleneck.expansion],
                                           channels[:2], **bn_args)
        self.stage2 = self._make_stage(n_modules=1,
                                       n_branches=2,
                                       n_blocks=n_blocks[:2],
                                       n_chnls=channels[:2])

        self.transition2 = TransitionBlock(channels[:2], channels[:3],
                                           **bn_args)
        self.stage3 = self._make_stage(  # 3 if small else 4
            n_modules=(4, 3)[small],
            n_branches=3,
            n_blocks=n_blocks[:3],
            n_chnls=channels[:3])

        self.transition3 = TransitionBlock(channels[:3], channels, **bn_args)
        self.stage4 = self._make_stage(  # 2 if small else 3
            n_modules=(3, 2)[small],
            n_branches=4,
            n_blocks=n_blocks,
            n_chnls=channels,
        )

        self.encoder = encoder
        if encoder:
            self.forward = self.encoder_features
        else:
            # Classification Head
            self.cls_head = HRClassificationHead(channels, **bn_args)
            self.global_pool = nn.AdaptiveAvgPool2d(1)
            self.last_linear = nn.Linear(2048, num_classes)
        # initialize weights
        initialize(self)
예제 #12
0
    def __init__(
            self,
            block=None,
            layers=None,
            pretrained=None,  # not used. here for proper signature
            num_classes=1000,
            in_channels=3,
            use_se=False,
            groups=1,
            base_width=64,
            deep_stem=False,
            dilated=False,
            norm_layer='abn',
            norm_act='relu',
            antialias=False,
            encoder=False,
            drop_rate=0.0,
            global_pool='avg',
            init_bn0=True):

        stem_width = 64
        if norm_layer.lower() == 'abn':
            norm_act = 'relu'

        norm_layer = bn_from_name(norm_layer)
        self.inplanes = stem_width
        self.num_classes = num_classes
        self.groups = groups
        self.base_width = base_width
        self.drop_rate = drop_rate
        self.block = block
        self.expansion = block.expansion
        self.dilated = dilated
        self.norm_act = norm_act
        super(ResNet, self).__init__()

        if deep_stem:
            self.conv1 = nn.Sequential(
                conv3x3(in_channels, stem_width // 2, 2),
                norm_layer(stem_width // 2, activation=norm_act),
                conv3x3(stem_width // 2, stem_width // 2, 2),
                norm_layer(stem_width // 2, activation=norm_act),
                conv3x3(stem_width // 2, stem_width))
        else:
            self.conv1 = nn.Conv2d(in_channels,
                                   stem_width,
                                   kernel_size=7,
                                   stride=2,
                                   padding=3,
                                   bias=False)
        self.bn1 = norm_layer(stem_width, activation=norm_act)
        if deep_stem:
            self.maxpool = nn.Sequential()  # don't need it
        elif antialias:
            self.maxpool = nn.Sequential(
                nn.MaxPool2d(kernel_size=3, stride=1, padding=1), BlurPool())
        else:
            # for se resnets fist maxpool is slightly different
            self.maxpool = nn.MaxPool2d(kernel_size=3,
                                        stride=2,
                                        padding=0 if use_se else 1,
                                        ceil_mode=True if use_se else False)
        # Output stride is 8 with dilated and 32 without
        stride_3_4 = 1 if self.dilated else 2
        dilation_3 = 2 if self.dilated else 1
        dilation_4 = 4 if self.dilated else 1
        largs = dict(use_se=use_se,
                     norm_layer=norm_layer,
                     norm_act=norm_act,
                     antialias=antialias)
        self.layer1 = self._make_layer(64, layers[0], stride=1, **largs)
        self.layer2 = self._make_layer(128, layers[1], stride=2, **largs)
        self.layer3 = self._make_layer(256,
                                       layers[2],
                                       stride=stride_3_4,
                                       dilation=dilation_3,
                                       **largs)
        self.layer4 = self._make_layer(512,
                                       layers[3],
                                       stride=stride_3_4,
                                       dilation=dilation_4,
                                       **largs)
        self.global_pool = GlobalPool2d(global_pool)
        self.num_features = 512 * self.expansion
        self.encoder = encoder
        if not encoder:
            self.last_linear = nn.Linear(
                self.num_features * self.global_pool.feat_mult(), num_classes)
        else:
            self.forward = self.encoder_features

        self._initialize_weights(init_bn0)
예제 #13
0
    def __init__(
        self,
        encoder_name="hrnet_w18",
        encoder_weights="imagenet",
        pretrained=None,  # not used 
        num_classes=1,
        last_upsample=True,
        OCR=False,
        drop_rate=0,
        norm_layer="inplace_abn",  # use memory efficient by default
        norm_act="leaky_relu",
        **encoder_params,
    ):

        super().__init__()
        self.encoder = get_encoder(
            encoder_name,
            encoder_weights=encoder_weights,
            norm_layer=norm_layer,
            norm_act=norm_act,
            **encoder_params,
        )
        norm_layer = bn_from_name(norm_layer)
        final_channels = sum(self.encoder.out_shapes[:4])

        self.OCR = OCR
        if OCR:
            self.conv3x3 = nn.Sequential(
                conv3x3(final_channels, 512, bias=True),
                norm_layer(512, activation=norm_act),
            )
            self.ocr_gather_head = SpatialOCR_Gather()
            self.ocr_distri_head = SpatialOCR(in_channels=512,
                                              key_channels=256,
                                              out_channels=512,
                                              norm_layer=norm_layer,
                                              norm_act=norm_act)
            self.head = conv1x1(512, num_classes, bias=True)
            self.aux_head = nn.Sequential(  # in OCR first conv is 3x3
                conv3x3(final_channels, final_channels, bias=True),
                norm_layer(final_channels, activation=norm_act),
                conv1x1(final_channels, num_classes, bias=True),
            )
        else:
            self.head = nn.Sequential(
                conv1x1(final_channels, final_channels, bias=True),
                norm_layer(final_channels, activation=norm_act),
                conv1x1(final_channels, num_classes, bias=True),
            )

        up_kwargs = dict(mode="bilinear", align_corners=True)
        self.up_x2 = nn.Upsample(scale_factor=2, **up_kwargs)
        self.up_x4 = nn.Upsample(scale_factor=4, **up_kwargs)
        self.up_x8 = nn.Upsample(scale_factor=8, **up_kwargs)
        self.last_upsample = nn.Upsample(
            scale_factor=4, **up_kwargs) if last_upsample else nn.Identity()
        self.dropout = nn.Dropout2d(
            drop_rate)  # can't use inplace. it would raise a backprop error
        self.name = f"segm-{encoder_name}"
        # use lower momemntum
        patch_bn_mom(self)
        self._init_weights()
예제 #14
0
    def __init__(
        self,
        blocks_args=None,
        width_multiplier=None,
        depth_multiplier=None,
        pretrained=None,  # not used. here for proper signature
        num_classes=1000,
        in_channels=3,
        output_stride=32,
        encoder=False,
        drop_rate=0,
        drop_connect_rate=0,
        stem_size=32,
        norm_layer="abn",
        norm_act="swish",
        match_tf_same_padding=False,
    ):
        super().__init__()
        norm_layer = bn_from_name(norm_layer)
        self.norm_layer = norm_layer
        self.norm_act = norm_act
        self.width_multiplier = width_multiplier
        self.depth_multiplier = depth_multiplier
        stem_size = make_divisible(stem_size * width_multiplier)
        self.conv_stem = conv3x3(in_channels, stem_size, stride=2)
        self.bn1 = norm_layer(stem_size, activation=norm_act)
        in_channels = stem_size
        self.blocks = nn.ModuleList([])
        # modify block args to account for output_stride strategy
        blocks_args = _patch_block_args(blocks_args, output_stride)
        for block_idx, block_arg in enumerate(blocks_args):
            block = []
            block_arg["in_channels"] = make_divisible(block_arg["in_channels"] * self.width_multiplier)
            block_arg["out_channels"] = make_divisible(block_arg["out_channels"] * self.width_multiplier)
            block_arg["norm_layer"] = norm_layer
            block_arg["norm_act"] = norm_act
            # linearly scale keep prob
            block_arg["keep_prob"] = 1 - drop_connect_rate * block_idx / len(blocks_args)
            repeats = block_arg.pop("num_repeat")
            repeats = int(math.ceil(repeats * self.depth_multiplier))
            # when dilating conv with stride 2 we want it to have dilation // 2
            # it prevents checkerboard artifacts with OS=16 and OS=8
            dilation = block_arg.get("dilation", 1)  # save block values
            if block_arg.pop("no_first_dilation", False):
                block_arg["dilation"] = max(1, block_arg["dilation"] // 2)
            block.append(InvertedResidual(**block_arg))
            # only first layer in block is strided
            block_arg["stride"] = 1
            block_arg["dilation"] = dilation
            block_arg["in_channels"] = block_arg["out_channels"]
            for _ in range(repeats - 1):
                block.append(InvertedResidual(**block_arg))

            self.blocks.append(nn.Sequential(*block))

        # Head

        if encoder:
            self.forward = self.encoder_features
        else:
            out_channels = block_arg["out_channels"]
            num_features = make_divisible(1280 * width_multiplier)
            self.conv_head = conv1x1(out_channels, num_features)
            self.bn2 = norm_layer(num_features, activation=norm_act)
            self.global_pool = nn.AdaptiveAvgPool2d(1)
            self.dropout = nn.Dropout(drop_rate, inplace=True)
            self.classifier = nn.Linear(num_features, num_classes)

        patch_bn(self)  # adjust epsilon
        initialize(self)
        if match_tf_same_padding:
            conv_to_same_conv(self)
            maxpool_to_same_maxpool(self)
예제 #15
0
    def __init__(
            self,
            growth_rate=None,
            block_config=None,
            pretrained=None,  # not used. here for proper signature
            num_classes=1000,
            drop_rate=0.0,
            in_channels=3,
            norm_layer='abn',
            norm_act='relu',
            deep_stem=False,
            stem_width=64,
            encoder=False,
            global_pool='avg',
            memory_efficient=True):

        super(DenseNet, self).__init__()
        norm_layer = bn_from_name(norm_layer)
        self.num_classes = num_classes
        if deep_stem:
            self.conv0 = nn.Sequential(
                conv3x3(in_channels, stem_width // 2, 2),
                norm_layer(stem_width // 2, activation=norm_act),
                conv3x3(stem_width // 2, stem_width // 2),
                norm_layer(stem_width // 2, activation=norm_act),
                conv3x3(stem_width // 2, stem_width, 2))
        else:
            self.conv0 = nn.Conv2d(in_channels,
                                   stem_width,
                                   kernel_size=7,
                                   stride=2,
                                   padding=3,
                                   bias=False)

        self.norm0 = norm_layer(stem_width, activation=norm_act)
        self.pool0 = nn.MaxPool2d(kernel_size=3,
                                  stride=2,
                                  padding=1,
                                  ceil_mode=False)

        largs = dict(growth_rate=growth_rate,
                     drop_rate=drop_rate,
                     memory_efficient=memory_efficient,
                     norm_layer=norm_layer,
                     norm_act=norm_act)
        in_planes = stem_width
        for i, num_layers in enumerate(block_config):
            block = _DenseBlock(num_layers, in_planes, **largs)
            setattr(self, 'denseblock{}'.format(i + 1), block)
            in_planes += num_layers * growth_rate
            if i != len(block_config) - 1:
                trans = _Transition(in_planes=in_planes,
                                    out_planes=in_planes // 2)
                setattr(self, 'transition{}'.format(i + 1), trans)
                in_planes //= 2

        # Final normalization
        self.norm5 = nn.BatchNorm2d(in_planes)

        # Linear layer
        self.encoder = encoder
        if not encoder:
            self.global_pool = GlobalPool2d(global_pool)
            self.classifier = nn.Linear(in_planes, num_classes)
        else:
            assert len(block_config) == 4, 'Need 4 blocks to use as encoder'
            self.forward = self.encoder_features
예제 #16
0
    def __init__(
        self,
        layers=None,
        pretrained=None,  # not used. here for proper signature
        num_classes=1000,
        in_channels=3,
        width_factor=1.0,
        output_stride=32,
        norm_layer="inplaceabn",
        norm_act="leaky_relu",
        encoder=False,
        drop_rate=0.0,
        drop_connect_rate=0.0,
    ):
        nn.Module.__init__(self)
        stem_width = int(64 * width_factor)
        norm_layer = bn_from_name(norm_layer)
        self.inplanes = stem_width
        self.num_classes = num_classes
        self.groups = 1  # not really used but needed inside _make_layer
        self.base_width = 64  # used inside _make_layer
        self.norm_act = norm_act
        self.block_idx = 0
        self.num_blocks = sum(layers)
        self.drop_connect_rate = drop_connect_rate

        # in the paper they use conv1x1 but in code conv3x3 (which seems better)
        self.conv1 = nn.Sequential(SpaceToDepth(),
                                   conv3x3(in_channels * 16, stem_width))
        self.bn1 = norm_layer(stem_width, activation=norm_act)
        self.maxpool = nn.Identity(
        )  # not used but needed for code compatability

        if output_stride not in [8, 16, 32]:
            raise ValueError("Output stride should be in [8, 16, 32]")
        # TODO add OS later
        # if output_stride == 8:
        # stride_3, stride_4, dilation_3, dilation_4 = 1, 1, 2, 4
        # elif output_stride == 16:
        # stride_3, stride_4, dilation_3, dilation_4 = 2, 1, 1, 2
        # elif output_stride == 32:
        stride_3, stride_4, dilation_3, dilation_4 = 2, 2, 1, 1

        largs = dict(use_se=True,
                     norm_layer=norm_layer,
                     norm_act=norm_act,
                     antialias=True)
        self.block = TBasicBlock
        self.expansion = TBasicBlock.expansion
        self.layer1 = self._make_layer(stem_width,
                                       layers[0],
                                       stride=1,
                                       **largs)
        self.layer2 = self._make_layer(stem_width * 2,
                                       layers[1],
                                       stride=2,
                                       **largs)

        self.block = TBottleneck  # first 2 - Basic, last 2 - Bottleneck
        self.expansion = TBottleneck.expansion
        self.layer3 = self._make_layer(stem_width * 4,
                                       layers[2],
                                       stride=stride_3,
                                       dilation=dilation_3,
                                       **largs)
        largs.update(use_se=False)  # no se in last layer
        self.layer4 = self._make_layer(stem_width * 8,
                                       layers[3],
                                       stride=stride_4,
                                       dilation=dilation_4,
                                       **largs)
        self.global_pool = FastGlobalAvgPool2d(flatten=True)
        self.num_features = stem_width * 8 * self.expansion
        self.encoder = encoder
        if not encoder:
            self.dropout = nn.Dropout(p=drop_rate, inplace=True)
            self.last_linear = nn.Linear(self.num_features, num_classes)
        else:
            self.forward = self.encoder_features

        self._initialize_weights(init_bn0=True)
예제 #17
0
    def __init__(
            self,
            stage_fns=None,  # list of nn.Module
            block_fns=None,  # list of nn.Module
            stage_args=None,  # list of dicts
            layers=None,  # num layers in each block
            channels=None,  # it's actually output channels. 256, 512, 1024, 2048 for R50
            # pretrained=None,  # not used. here for proper signature
        num_classes=1000,
            in_channels=3,
            norm_layer="abn",
            norm_act="leaky_relu",
            head_norm_act="leaky_relu",  # activation in head
            stem_type="default",
            # antialias=False,
            # encoder=False,
            # drop_rate=0.0,
            drop_connect_rate=0.0,
            head_width=2048,
            stem_width=64,
            head_type="default",  # type of head
    ):
        norm_layer = bn_from_name(norm_layer)
        self.num_classes = num_classes
        self.norm_act = norm_act
        self.block_idx = 0  # for drop connect
        self.drop_connect_rate = drop_connect_rate
        super().__init__()

        first_norm = nn.Identity() if block_fns[0].startswith(
            "Pre") else norm_layer(stem_width, activation=norm_act)
        if stem_type == "default":
            self.stem_conv1 = nn.Sequential(
                conv3x3(in_channels, stem_width, stride=2), first_norm)
        elif stem_type == "s2d":
            # instead of default stem I'm using Space2Depth followed by conv. no norm because there is one at the beginning
            # of DarkStage. upd. there is norm in not PreAct version
            self.stem_conv1 = nn.Sequential(
                SpaceToDepth(block_size=2),
                conv3x3(in_channels * 4, stem_width),
                first_norm,
            )
        else:
            raise ValueError(f"Stem type `{stem_type}` is not supported")

        bn_args = dict(norm_layer=norm_layer, norm_act=norm_act)
        block_name_to_module = {
            "XX": SimpleBasicBlock,
            "Pre_XX": SimplePreActBasicBlock,
            "Pre_XX_Res2": SimplePreActRes2BasicBlock,
            "Btl": SimpleBottleneck,
            "Pre_Btl": SimplePreActBottleneck,
            "IR": SimpleInvertedResidual,
            "Pre_IR": SimplePreActInvertedResidual,
            "Sep2": SimpleSeparable_2,
            "Pre_Sep2": SimplePreActSeparable_2,
            "Sep3": SimpleSeparable_3,
            "Pre_Custom_2": PreBlock_2,
        }
        stage_name_to_module = {"simpl": SimpleStage}
        # set stride=2 for all blocks
        # using **{**bn_args, **stage_args} to allow updating norm layer for particular stage
        self.layer1 = stage_name_to_module[stage_fns[0]](
            block_fn=block_name_to_module[block_fns[0]],
            in_chs=stem_width,
            out_chs=channels[0],
            num_blocks=layers[0],
            stride=2,
            **{
                **bn_args,
                **stage_args[0]
            },
        )
        self.layer2 = stage_name_to_module[stage_fns[1]](
            block_fn=block_name_to_module[block_fns[1]],
            in_chs=channels[0],
            out_chs=channels[1],
            num_blocks=layers[1],
            stride=2,
            **{
                **bn_args,
                **stage_args[1]
            },
        )
        self.layer3 = stage_name_to_module[stage_fns[2]](
            block_fn=block_name_to_module[block_fns[2]],
            in_chs=channels[1],
            out_chs=channels[2],
            num_blocks=layers[2],
            stride=2,
            **{
                **bn_args,
                **stage_args[2]
            },
        )
        extra_stage3_filters = stage_args[2].get("filter_steps",
                                                 0) * (layers[2] - 1)
        self.layer4 = stage_name_to_module[stage_fns[3]](
            block_fn=block_name_to_module[block_fns[3]],
            in_chs=channels[2] + extra_stage3_filters,
            out_chs=channels[3],
            num_blocks=layers[3],
            stride=2,
            **{
                **bn_args,
                **stage_args[3]
            },
        )
        extra_stage4_filters = stage_args[3].get("filter_steps",
                                                 0) * (layers[3] - 1)
        channels[
            3] += extra_stage4_filters  # TODO rewrite it cleaner instead of doing inplace
        last_norm = norm_layer(channels[3],
                               activation=norm_act) if block_fns[0].startswith(
                                   "Pre") else nn.Identity()
        if head_type == "mobilenetv3":
            self.head = nn.Sequential(  # Mbln v3 head. GAP first, then expand convs
                last_norm,
                FastGlobalAvgPool2d(flatten=True),
                nn.Linear(channels[3], head_width),
                pt.modules.activations.activation_from_name(head_norm_act),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "mobilenetv3_norm":  # mobilenet with last norm
            self.head = nn.Sequential(  # Mbln v3 head. GAP first, then expand convs
                last_norm,
                FastGlobalAvgPool2d(flatten=True),
                nn.Linear(channels[3], head_width),
                nn.BatchNorm1d(head_width),
                pt.modules.activations.activation_from_name(head_norm_act),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "default":
            self.head = nn.Sequential(
                last_norm,
                conv1x1(channels[3], head_width),
                norm_layer(head_width, activation=head_norm_act),
                FastGlobalAvgPool2d(flatten=True),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "default_nonorm":  # if used in angular losses don't want norm
            self.head = nn.Sequential(
                last_norm,
                conv1x1(channels[3], head_width,
                        bias=True),  # need bias because not followed by norm
                FastGlobalAvgPool2d(flatten=True),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "mlp_bn_fc_bn":
            self.head = nn.Sequential(
                last_norm,
                conv1x1(channels[3], channels[3]),
                FastGlobalAvgPool2d(flatten=True),
                nn.BatchNorm1d(channels[3]),
                pt.modules.activations.activation_from_name(head_norm_act),
                nn.Linear(channels[3], head_width, bias=False),
                nn.BatchNorm1d(head_width, affine=False),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "mlp_bn_fc":  # same as above but without last BN
            self.head = nn.Sequential(
                last_norm,
                conv1x1(channels[3], channels[3]),
                FastGlobalAvgPool2d(flatten=True),
                nn.BatchNorm1d(channels[3]),
                pt.modules.activations.activation_from_name(head_norm_act),
                nn.Linear(channels[3], head_width, bias=False),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "mlp_2":
            assert isinstance(head_width, (tuple, list)), head_width
            self.head = nn.Sequential(  # like Mbln v3 head. GAP first, then MLP convs
                last_norm,
                FastGlobalAvgPool2d(flatten=True),
                nn.Linear(channels[3], head_width[0]),
                nn.BatchNorm1d(head_width[0]),
                pt.modules.activations.activation_from_name(head_norm_act),
                nn.Linear(head_width[0], head_width[1]),
                nn.BatchNorm1d(head_width[1]),
                pt.modules.activations.activation_from_name(head_norm_act),
            )
            self.last_linear = nn.Linear(head_width[1], num_classes)
        elif head_type == "mlp_3":
            assert isinstance(head_width, (tuple, list)), head_width
            self.head = nn.Sequential(  # like Mbln v3 head. GAP first, then MLP convs
                last_norm,
                FastGlobalAvgPool2d(flatten=True),
                nn.Linear(channels[3], head_width[0]),
                nn.BatchNorm1d(head_width[0]),
                pt.modules.activations.activation_from_name(head_norm_act),
                nn.Linear(head_width[0], head_width[1]),
                nn.BatchNorm1d(head_width[1]),
                pt.modules.activations.activation_from_name(head_norm_act),
                nn.Linear(head_width[1], head_width[2]),
                nn.BatchNorm1d(head_width[2]),
                pt.modules.activations.activation_from_name(head_norm_act),
            )
            self.last_linear = nn.Linear(head_width[2], num_classes)
        else:
            raise ValueError(f"Head type: {head_type} is not supported!")
        initialize(self)
예제 #18
0
    def __init__(
        self,
        stage_fn=None,
        block_fn=None,
        layers=None,  # num layers in each block
        channels=None,  # it's actually output channels. 256, 512, 1024, 2048 for R50
        pretrained=None,  # not used. here for proper signature
        num_classes=1000,
        in_channels=3,
        attn_type=None,
        # base_width=64,
        stem_type="default",
        norm_layer="abn",
        norm_act="leaky_relu",
        antialias=False,
        # encoder=False,
        bottle_ratio=0.25,  # how much to shrink channels in bottleneck layer
        no_first_csp=False,  # make first stage a Simple Stage
        drop_rate=0.0,
        drop_connect_rate=0.0,
        expand_before_head=True,  # add addition conv from 512 -> 2048 to avoid representational bottleneck
        mobilenetv3_head=False,  # put GAP first, then expand convs
        **block_kwargs,
    ):

        stem_width = 64
        norm_layer = bn_from_name(norm_layer)
        self.num_classes = num_classes
        self.norm_act = norm_act
        self.block_idx = 0  # for drop connect
        self.drop_connect_rate = drop_connect_rate
        super().__init__()

        if block_fn != SimplePreActBottleneck:
            stem_norm = norm_layer(stem_width, activation=norm_act)
        else:
            stem_norm = nn.Identity()
        if stem_type == "default":
            self.stem_conv1 = nn.Sequential(
                nn.Conv2d(3,
                          stem_width,
                          kernel_size=7,
                          stride=2,
                          padding=3,
                          bias=False),
                stem_norm,
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            )
            first_stride = 1
        elif stem_type == "s2d":
            # instead of default stem I'm using Space2Depth followed by conv. no norm because there is one at the beginning
            # of DarkStage. upd. there is norm in not PreAct version
            self.stem_conv1 = nn.Sequential(
                SpaceToDepth(block_size=2),
                conv3x3(in_channels * 4, stem_width),
                stem_norm,
                # nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            )
            first_stride = 2

        # blocks
        largs = dict(
            stride=2,
            bottle_ratio=bottle_ratio,
            block_fn=block_fn,
            attn_type=attn_type,
            norm_layer=norm_layer,
            norm_act=norm_act,
            # antialias=antialias,
            **block_kwargs,
        )
        first_stage_fn = SimpleStage if no_first_csp else stage_fn
        # fmt: off
        self.layer1 = first_stage_fn(
            in_chs=stem_width,
            out_chs=channels[0],
            num_blocks=layers[0],
            keep_prob=self.keep_prob,
            **{
                **largs, "stride": first_stride
            },  # overwrite default stride
        )
        # **{**largs, "antialias": False} # antialias in first stage is too expensive
        self.layer2 = stage_fn(in_chs=channels[0],
                               out_chs=channels[1],
                               num_blocks=layers[1],
                               keep_prob=self.keep_prob,
                               **largs)
        self.layer3 = stage_fn(in_chs=channels[1],
                               out_chs=channels[2],
                               num_blocks=layers[2],
                               keep_prob=self.keep_prob,
                               **largs)
        self.layer4 = stage_fn(in_chs=channels[2],
                               out_chs=channels[3],
                               num_blocks=layers[3],
                               keep_prob=self.keep_prob,
                               **largs)
        # fmt: on

        # self.global_pool = FastGlobalAvgPool2d(flatten=True)
        # self.dropout = nn.Dropout(p=drop_rate, inplace=True)
        head_layers = []
        # this is a very dirty if but i don't care for now
        if mobilenetv3_head:
            head_layers.append(FastGlobalAvgPool2d(flatten=True))
            if channels[3] < 2048 and expand_before_head:
                head_layers.append(
                    nn.Linear(channels[3], 2048)
                )  # no norm here as in original MobilnetV3 from google
                head_layers.append(
                    pt.modules.activations.activation_from_name(norm_act))
            head_layers.append(
                nn.Linear(2048 if expand_before_head else channels[3],
                          num_classes))
        else:
            if channels[3] < 2048 and expand_before_head:
                if block_fn == SimplePreActBottleneck:  # for PreAct add additional BN here
                    head_layers.append(
                        norm_layer(channels[3], activation=norm_act))
                head_layers.extend([
                    conv1x1(channels[3], 2048),
                    norm_layer(2048, activation=norm_act)
                ])
            head_layers.extend([
                FastGlobalAvgPool2d(flatten=True),
                nn.Linear(2048 if expand_before_head else channels[3],
                          num_classes)
            ])
        # self.head = nn.Sequential(
        #     conv1x1(channels[3], 2048),
        #     norm_layer(activation=norm_act),
        #     # norm_layer(1024, activation=norm_act),
        #     FastGlobalAvgPool2d(flatten=True),
        #     nn.Linear(2048, num_classes),
        # )
        self.head = nn.Sequential(*head_layers)
        initialize(self)