Exemple #1
0
    def _make_fuse_layers(self, norm_layer, norm_act):
        if self.num_branches == 1:
            return None

        num_branches = self.num_branches
        num_inchannels = self.num_inchannels
        fuse_layers = []
        for i in range(num_branches):
            fuse_layer = []
            for j in range(num_branches):
                if j > i:
                    fuse_layer.append(
                        nn.Sequential(
                            conv1x1(num_inchannels[j], num_inchannels[i]),
                            norm_layer(num_inchannels[i],
                                       activation="identity"),
                            nn.Upsample(scale_factor=2**(j - i),
                                        mode='nearest')))
                elif j == i:
                    fuse_layer.append(nn.Identity())
                else:
                    conv3x3s = []
                    for k in range(i - j):
                        if k == i - j - 1:
                            num_outchannels_conv3x3 = num_inchannels[i]
                            conv3x3s.append(
                                nn.Sequential(
                                    conv3x3(num_inchannels[j],
                                            num_outchannels_conv3x3, 2),
                                    norm_layer(num_outchannels_conv3x3,
                                               activation="identity")))
                        else:
                            num_outchannels_conv3x3 = num_inchannels[j]
                            conv3x3s.append(
                                nn.Sequential(
                                    conv3x3(num_inchannels[j],
                                            num_outchannels_conv3x3, 2),
                                    norm_layer(num_outchannels_conv3x3,
                                               activation=norm_act)))
                    fuse_layer.append(nn.Sequential(*conv3x3s))
            fuse_layers.append(nn.ModuleList(fuse_layer))

        return nn.ModuleList(fuse_layers)
Exemple #2
0
    def __init__(self,
                 encoder_channels,
                 prefinal_channels=32,
                 final_channels=1,
                 **bn_params):  # norm layer, norm_act
        super().__init__()

        in_channels = encoder_channels
        self.layer1 = LinknetDecoderBlock(in_channels[0], in_channels[1],
                                          **bn_params)
        self.layer2 = LinknetDecoderBlock(in_channels[1], in_channels[2],
                                          **bn_params)
        self.layer3 = LinknetDecoderBlock(in_channels[2], in_channels[3],
                                          **bn_params)
        self.layer4 = LinknetDecoderBlock(in_channels[3], in_channels[4],
                                          **bn_params)
        self.layer5 = LinknetDecoderBlock(in_channels[4], prefinal_channels,
                                          **bn_params)
        self.final_conv = conv1x1(prefinal_channels, final_channels)

        initialize(self)
Exemple #3
0
    def __init__(
            self,
            encoder_channels,
            decoder_channels=(256, 128, 64, 32, 16),
            final_channels=1,
            center=False,
            drop_rate=0,
            output_stride=32,
            attn_type=None,
            **bn_params,  # norm layer, norm_act
    ):

        super().__init__()
        if center:
            channels = encoder_channels[0]
            self.center = UnetCenterBlock(channels, channels)
        else:
            self.center = None

        in_chs = self.compute_channels(encoder_channels, decoder_channels)
        kwargs = {**bn_params, "attn_type": attn_type}
        self.layer1 = UnetDecoderBlock(in_chs[0],
                                       decoder_channels[0],
                                       upsample=output_stride == 32,
                                       **kwargs)
        self.layer2 = UnetDecoderBlock(in_chs[1],
                                       decoder_channels[1],
                                       upsample=output_stride != 8,
                                       **kwargs)
        self.layer3 = UnetDecoderBlock(in_chs[2], decoder_channels[2],
                                       **kwargs)
        self.layer4 = UnetDecoderBlock(in_chs[3], decoder_channels[3],
                                       **kwargs)
        self.layer5 = UnetDecoderBlock(in_chs[4], decoder_channels[4],
                                       **kwargs)
        self.dropout = nn.Dropout2d(
            drop_rate, inplace=False)  # inplace=True raises a backprop error
        self.final_conv = conv1x1(decoder_channels[4],
                                  final_channels,
                                  bias=True)
Exemple #4
0
    def _make_layer(self,
                    planes,
                    blocks,
                    stride=1,
                    dilation=1,
                    use_se=None,
                    norm_layer=None,
                    norm_act=None,
                    antialias=None):
        downsample = None

        if stride != 1 or self.inplanes != planes * self.expansion:
            downsample_layers = []
            if antialias and stride == 2:  # using OrderedDict to preserve ordering and allow loading
                downsample_layers += [('blur', BlurPool())]
            downsample_layers += [('0',
                                   conv1x1(self.inplanes,
                                           planes * self.expansion,
                                           stride=1 if antialias else stride)),
                                  ('1',
                                   norm_layer(planes * self.expansion,
                                              activation='identity'))]
            downsample = nn.Sequential(OrderedDict(downsample_layers))

        layers = [
            self.block(self.inplanes, planes, stride, downsample, self.groups,
                       self.base_width, use_se, dilation, norm_layer, norm_act,
                       antialias)
        ]

        self.inplanes = planes * self.expansion
        for _ in range(1, blocks):
            layers.append(
                self.block(self.inplanes, planes, 1, None, self.groups,
                           self.base_width, use_se, dilation, norm_layer,
                           norm_act, antialias))
        return nn.Sequential(*layers)
Exemple #5
0
 def __init__(
         self,
         encoder_channels,
         prefinal_channels=32,
         final_channels=1,
         drop_rate=0,
         attn_type=None,
         **bn_params,  # norm layer, norm_act
 ):
     super().__init__()
     extra_params = {**bn_params, "attn_type": attn_type}
     in_channels = encoder_channels
     self.layer1 = LinknetDecoderBlock(in_channels[0], in_channels[1],
                                       **extra_params)
     self.layer2 = LinknetDecoderBlock(in_channels[1], in_channels[2],
                                       **extra_params)
     self.layer3 = LinknetDecoderBlock(in_channels[2], in_channels[3],
                                       **extra_params)
     self.layer4 = LinknetDecoderBlock(in_channels[3], in_channels[4],
                                       **extra_params)
     self.layer5 = LinknetDecoderBlock(in_channels[4], prefinal_channels,
                                       **extra_params)
     self.dropout = nn.Dropout2d(drop_rate, inplace=True)
     self.final_conv = conv1x1(prefinal_channels, final_channels)
    def __init__(
        self, 
        in_channels, 
        key_channels, 
        out_channels, 
        norm_layer=ABN,
        norm_act="relu"
        ):
        super().__init__()

        self.in_channels = in_channels
        self.key_channels = key_channels

        self.f_pixel = nn.Sequential(
            conv1x1(in_channels, key_channels, bias=True),
            norm_layer(key_channels, activation=norm_act), 
            conv1x1(key_channels, key_channels, bias=True),
            norm_layer(key_channels, activation=norm_act),
        )
        self.f_object = nn.Sequential(
            conv1x1(in_channels, key_channels, bias=True),
            norm_layer(key_channels, activation=norm_act), 
            conv1x1(key_channels, key_channels, bias=True),
            norm_layer(key_channels, activation=norm_act),
        )
        self.f_down = nn.Sequential(
            conv1x1(in_channels, key_channels, bias=True),
            norm_layer(key_channels, activation=norm_act), 
        )
        self.f_up = nn.Sequential(
            conv1x1(key_channels, in_channels, bias=True),
            norm_layer(in_channels, activation=norm_act), 
        )

        self.conv_bn = nn.Sequential(
            conv1x1(2 * in_channels, out_channels, bias=True),
            norm_layer(out_channels, activation=norm_act),
        )
Exemple #7
0
    def __init__(
        self,
        encoder_name="hrnet_w18",
        encoder_weights="imagenet",
        pretrained=None,  # not used 
        num_classes=1,
        last_upsample=True,
        OCR=False,
        drop_rate=0,
        norm_layer="inplace_abn",  # use memory efficient by default
        norm_act="leaky_relu",
        **encoder_params,
    ):

        super().__init__()
        self.encoder = get_encoder(
            encoder_name,
            encoder_weights=encoder_weights,
            norm_layer=norm_layer,
            norm_act=norm_act,
            **encoder_params,
        )
        norm_layer = bn_from_name(norm_layer)
        final_channels = sum(self.encoder.out_shapes[:4])

        self.OCR = OCR
        if OCR:
            self.conv3x3 = nn.Sequential(
                conv3x3(final_channels, 512, bias=True),
                norm_layer(512, activation=norm_act),
            )
            self.ocr_gather_head = SpatialOCR_Gather()
            self.ocr_distri_head = SpatialOCR(in_channels=512,
                                              key_channels=256,
                                              out_channels=512,
                                              norm_layer=norm_layer,
                                              norm_act=norm_act)
            self.head = conv1x1(512, num_classes, bias=True)
            self.aux_head = nn.Sequential(  # in OCR first conv is 3x3
                conv3x3(final_channels, final_channels, bias=True),
                norm_layer(final_channels, activation=norm_act),
                conv1x1(final_channels, num_classes, bias=True),
            )
        else:
            self.head = nn.Sequential(
                conv1x1(final_channels, final_channels, bias=True),
                norm_layer(final_channels, activation=norm_act),
                conv1x1(final_channels, num_classes, bias=True),
            )

        up_kwargs = dict(mode="bilinear", align_corners=True)
        self.up_x2 = nn.Upsample(scale_factor=2, **up_kwargs)
        self.up_x4 = nn.Upsample(scale_factor=4, **up_kwargs)
        self.up_x8 = nn.Upsample(scale_factor=8, **up_kwargs)
        self.last_upsample = nn.Upsample(
            scale_factor=4, **up_kwargs) if last_upsample else nn.Identity()
        self.dropout = nn.Dropout2d(
            drop_rate)  # can't use inplace. it would raise a backprop error
        self.name = f"segm-{encoder_name}"
        # use lower momemntum
        patch_bn_mom(self)
        self._init_weights()
Exemple #8
0
    def __init__(
        self,
        blocks_args=None,
        width_multiplier=None,
        depth_multiplier=None,
        pretrained=None,  # not used. here for proper signature
        num_classes=1000,
        in_channels=3,
        output_stride=32,
        encoder=False,
        drop_rate=0,
        drop_connect_rate=0,
        stem_size=32,
        norm_layer="abn",
        norm_act="swish",
        match_tf_same_padding=False,
    ):
        super().__init__()
        norm_layer = bn_from_name(norm_layer)
        self.norm_layer = norm_layer
        self.norm_act = norm_act
        self.width_multiplier = width_multiplier
        self.depth_multiplier = depth_multiplier
        stem_size = make_divisible(stem_size * width_multiplier)
        self.conv_stem = conv3x3(in_channels, stem_size, stride=2)
        self.bn1 = norm_layer(stem_size, activation=norm_act)
        in_channels = stem_size
        self.blocks = nn.ModuleList([])
        # modify block args to account for output_stride strategy
        blocks_args = _patch_block_args(blocks_args, output_stride)
        for block_idx, block_arg in enumerate(blocks_args):
            block = []
            block_arg["in_channels"] = make_divisible(block_arg["in_channels"] * self.width_multiplier)
            block_arg["out_channels"] = make_divisible(block_arg["out_channels"] * self.width_multiplier)
            block_arg["norm_layer"] = norm_layer
            block_arg["norm_act"] = norm_act
            # linearly scale keep prob
            block_arg["keep_prob"] = 1 - drop_connect_rate * block_idx / len(blocks_args)
            repeats = block_arg.pop("num_repeat")
            repeats = int(math.ceil(repeats * self.depth_multiplier))
            # when dilating conv with stride 2 we want it to have dilation // 2
            # it prevents checkerboard artifacts with OS=16 and OS=8
            dilation = block_arg.get("dilation", 1)  # save block values
            if block_arg.pop("no_first_dilation", False):
                block_arg["dilation"] = max(1, block_arg["dilation"] // 2)
            block.append(InvertedResidual(**block_arg))
            # only first layer in block is strided
            block_arg["stride"] = 1
            block_arg["dilation"] = dilation
            block_arg["in_channels"] = block_arg["out_channels"]
            for _ in range(repeats - 1):
                block.append(InvertedResidual(**block_arg))

            self.blocks.append(nn.Sequential(*block))

        # Head

        if encoder:
            self.forward = self.encoder_features
        else:
            out_channels = block_arg["out_channels"]
            num_features = make_divisible(1280 * width_multiplier)
            self.conv_head = conv1x1(out_channels, num_features)
            self.bn2 = norm_layer(num_features, activation=norm_act)
            self.global_pool = nn.AdaptiveAvgPool2d(1)
            self.dropout = nn.Dropout(drop_rate, inplace=True)
            self.classifier = nn.Linear(num_features, num_classes)

        patch_bn(self)  # adjust epsilon
        initialize(self)
        if match_tf_same_padding:
            conv_to_same_conv(self)
            maxpool_to_same_maxpool(self)
Exemple #9
0
 def __init__(self, in_planes, out_planes, norm_layer=ABN, norm_act='relu'):
     super(_Transition, self).__init__()
     self.norm = norm_layer(in_planes, activation=norm_act)
     self.conv = conv1x1(in_planes, out_planes)
     self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
Exemple #10
0
    def __init__(
        self,
        stage_fn=None,
        block_fn=None,
        layers=None,  # num layers in each block
        channels=None,  # it's actually output channels. 256, 512, 1024, 2048 for R50
        pretrained=None,  # not used. here for proper signature
        num_classes=1000,
        in_channels=3,
        attn_type=None,
        # base_width=64,
        stem_type="default",
        norm_layer="abn",
        norm_act="leaky_relu",
        antialias=False,
        # encoder=False,
        bottle_ratio=0.25,  # how much to shrink channels in bottleneck layer
        no_first_csp=False,  # make first stage a Simple Stage
        drop_rate=0.0,
        drop_connect_rate=0.0,
        expand_before_head=True,  # add addition conv from 512 -> 2048 to avoid representational bottleneck
        mobilenetv3_head=False,  # put GAP first, then expand convs
        **block_kwargs,
    ):

        stem_width = 64
        norm_layer = bn_from_name(norm_layer)
        self.num_classes = num_classes
        self.norm_act = norm_act
        self.block_idx = 0  # for drop connect
        self.drop_connect_rate = drop_connect_rate
        super().__init__()

        if block_fn != SimplePreActBottleneck:
            stem_norm = norm_layer(stem_width, activation=norm_act)
        else:
            stem_norm = nn.Identity()
        if stem_type == "default":
            self.stem_conv1 = nn.Sequential(
                nn.Conv2d(3,
                          stem_width,
                          kernel_size=7,
                          stride=2,
                          padding=3,
                          bias=False),
                stem_norm,
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            )
            first_stride = 1
        elif stem_type == "s2d":
            # instead of default stem I'm using Space2Depth followed by conv. no norm because there is one at the beginning
            # of DarkStage. upd. there is norm in not PreAct version
            self.stem_conv1 = nn.Sequential(
                SpaceToDepth(block_size=2),
                conv3x3(in_channels * 4, stem_width),
                stem_norm,
                # nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            )
            first_stride = 2

        # blocks
        largs = dict(
            stride=2,
            bottle_ratio=bottle_ratio,
            block_fn=block_fn,
            attn_type=attn_type,
            norm_layer=norm_layer,
            norm_act=norm_act,
            # antialias=antialias,
            **block_kwargs,
        )
        first_stage_fn = SimpleStage if no_first_csp else stage_fn
        # fmt: off
        self.layer1 = first_stage_fn(
            in_chs=stem_width,
            out_chs=channels[0],
            num_blocks=layers[0],
            keep_prob=self.keep_prob,
            **{
                **largs, "stride": first_stride
            },  # overwrite default stride
        )
        # **{**largs, "antialias": False} # antialias in first stage is too expensive
        self.layer2 = stage_fn(in_chs=channels[0],
                               out_chs=channels[1],
                               num_blocks=layers[1],
                               keep_prob=self.keep_prob,
                               **largs)
        self.layer3 = stage_fn(in_chs=channels[1],
                               out_chs=channels[2],
                               num_blocks=layers[2],
                               keep_prob=self.keep_prob,
                               **largs)
        self.layer4 = stage_fn(in_chs=channels[2],
                               out_chs=channels[3],
                               num_blocks=layers[3],
                               keep_prob=self.keep_prob,
                               **largs)
        # fmt: on

        # self.global_pool = FastGlobalAvgPool2d(flatten=True)
        # self.dropout = nn.Dropout(p=drop_rate, inplace=True)
        head_layers = []
        # this is a very dirty if but i don't care for now
        if mobilenetv3_head:
            head_layers.append(FastGlobalAvgPool2d(flatten=True))
            if channels[3] < 2048 and expand_before_head:
                head_layers.append(
                    nn.Linear(channels[3], 2048)
                )  # no norm here as in original MobilnetV3 from google
                head_layers.append(
                    pt.modules.activations.activation_from_name(norm_act))
            head_layers.append(
                nn.Linear(2048 if expand_before_head else channels[3],
                          num_classes))
        else:
            if channels[3] < 2048 and expand_before_head:
                if block_fn == SimplePreActBottleneck:  # for PreAct add additional BN here
                    head_layers.append(
                        norm_layer(channels[3], activation=norm_act))
                head_layers.extend([
                    conv1x1(channels[3], 2048),
                    norm_layer(2048, activation=norm_act)
                ])
            head_layers.extend([
                FastGlobalAvgPool2d(flatten=True),
                nn.Linear(2048 if expand_before_head else channels[3],
                          num_classes)
            ])
        # self.head = nn.Sequential(
        #     conv1x1(channels[3], 2048),
        #     norm_layer(activation=norm_act),
        #     # norm_layer(1024, activation=norm_act),
        #     FastGlobalAvgPool2d(flatten=True),
        #     nn.Linear(2048, num_classes),
        # )
        self.head = nn.Sequential(*head_layers)
        initialize(self)
Exemple #11
0
    def __init__(
            self,
            stage_fns=None,  # list of nn.Module
            block_fns=None,  # list of nn.Module
            stage_args=None,  # list of dicts
            layers=None,  # num layers in each block
            channels=None,  # it's actually output channels. 256, 512, 1024, 2048 for R50
            # pretrained=None,  # not used. here for proper signature
        num_classes=1000,
            in_channels=3,
            norm_layer="abn",
            norm_act="leaky_relu",
            head_norm_act="leaky_relu",  # activation in head
            stem_type="default",
            # antialias=False,
            # encoder=False,
            # drop_rate=0.0,
            drop_connect_rate=0.0,
            head_width=2048,
            stem_width=64,
            head_type="default",  # type of head
    ):
        norm_layer = bn_from_name(norm_layer)
        self.num_classes = num_classes
        self.norm_act = norm_act
        self.block_idx = 0  # for drop connect
        self.drop_connect_rate = drop_connect_rate
        super().__init__()

        first_norm = nn.Identity() if block_fns[0].startswith(
            "Pre") else norm_layer(stem_width, activation=norm_act)
        if stem_type == "default":
            self.stem_conv1 = nn.Sequential(
                conv3x3(in_channels, stem_width, stride=2), first_norm)
        elif stem_type == "s2d":
            # instead of default stem I'm using Space2Depth followed by conv. no norm because there is one at the beginning
            # of DarkStage. upd. there is norm in not PreAct version
            self.stem_conv1 = nn.Sequential(
                SpaceToDepth(block_size=2),
                conv3x3(in_channels * 4, stem_width),
                first_norm,
            )
        else:
            raise ValueError(f"Stem type `{stem_type}` is not supported")

        bn_args = dict(norm_layer=norm_layer, norm_act=norm_act)
        block_name_to_module = {
            "XX": SimpleBasicBlock,
            "Pre_XX": SimplePreActBasicBlock,
            "Pre_XX_Res2": SimplePreActRes2BasicBlock,
            "Btl": SimpleBottleneck,
            "Pre_Btl": SimplePreActBottleneck,
            "IR": SimpleInvertedResidual,
            "Pre_IR": SimplePreActInvertedResidual,
            "Sep2": SimpleSeparable_2,
            "Pre_Sep2": SimplePreActSeparable_2,
            "Sep3": SimpleSeparable_3,
            "Pre_Custom_2": PreBlock_2,
        }
        stage_name_to_module = {"simpl": SimpleStage}
        # set stride=2 for all blocks
        # using **{**bn_args, **stage_args} to allow updating norm layer for particular stage
        self.layer1 = stage_name_to_module[stage_fns[0]](
            block_fn=block_name_to_module[block_fns[0]],
            in_chs=stem_width,
            out_chs=channels[0],
            num_blocks=layers[0],
            stride=2,
            **{
                **bn_args,
                **stage_args[0]
            },
        )
        self.layer2 = stage_name_to_module[stage_fns[1]](
            block_fn=block_name_to_module[block_fns[1]],
            in_chs=channels[0],
            out_chs=channels[1],
            num_blocks=layers[1],
            stride=2,
            **{
                **bn_args,
                **stage_args[1]
            },
        )
        self.layer3 = stage_name_to_module[stage_fns[2]](
            block_fn=block_name_to_module[block_fns[2]],
            in_chs=channels[1],
            out_chs=channels[2],
            num_blocks=layers[2],
            stride=2,
            **{
                **bn_args,
                **stage_args[2]
            },
        )
        extra_stage3_filters = stage_args[2].get("filter_steps",
                                                 0) * (layers[2] - 1)
        self.layer4 = stage_name_to_module[stage_fns[3]](
            block_fn=block_name_to_module[block_fns[3]],
            in_chs=channels[2] + extra_stage3_filters,
            out_chs=channels[3],
            num_blocks=layers[3],
            stride=2,
            **{
                **bn_args,
                **stage_args[3]
            },
        )
        extra_stage4_filters = stage_args[3].get("filter_steps",
                                                 0) * (layers[3] - 1)
        channels[
            3] += extra_stage4_filters  # TODO rewrite it cleaner instead of doing inplace
        last_norm = norm_layer(channels[3],
                               activation=norm_act) if block_fns[0].startswith(
                                   "Pre") else nn.Identity()
        if head_type == "mobilenetv3":
            self.head = nn.Sequential(  # Mbln v3 head. GAP first, then expand convs
                last_norm,
                FastGlobalAvgPool2d(flatten=True),
                nn.Linear(channels[3], head_width),
                pt.modules.activations.activation_from_name(head_norm_act),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "mobilenetv3_norm":  # mobilenet with last norm
            self.head = nn.Sequential(  # Mbln v3 head. GAP first, then expand convs
                last_norm,
                FastGlobalAvgPool2d(flatten=True),
                nn.Linear(channels[3], head_width),
                nn.BatchNorm1d(head_width),
                pt.modules.activations.activation_from_name(head_norm_act),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "default":
            self.head = nn.Sequential(
                last_norm,
                conv1x1(channels[3], head_width),
                norm_layer(head_width, activation=head_norm_act),
                FastGlobalAvgPool2d(flatten=True),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "default_nonorm":  # if used in angular losses don't want norm
            self.head = nn.Sequential(
                last_norm,
                conv1x1(channels[3], head_width,
                        bias=True),  # need bias because not followed by norm
                FastGlobalAvgPool2d(flatten=True),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "mlp_bn_fc_bn":
            self.head = nn.Sequential(
                last_norm,
                conv1x1(channels[3], channels[3]),
                FastGlobalAvgPool2d(flatten=True),
                nn.BatchNorm1d(channels[3]),
                pt.modules.activations.activation_from_name(head_norm_act),
                nn.Linear(channels[3], head_width, bias=False),
                nn.BatchNorm1d(head_width, affine=False),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "mlp_bn_fc":  # same as above but without last BN
            self.head = nn.Sequential(
                last_norm,
                conv1x1(channels[3], channels[3]),
                FastGlobalAvgPool2d(flatten=True),
                nn.BatchNorm1d(channels[3]),
                pt.modules.activations.activation_from_name(head_norm_act),
                nn.Linear(channels[3], head_width, bias=False),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "mlp_2":
            assert isinstance(head_width, (tuple, list)), head_width
            self.head = nn.Sequential(  # like Mbln v3 head. GAP first, then MLP convs
                last_norm,
                FastGlobalAvgPool2d(flatten=True),
                nn.Linear(channels[3], head_width[0]),
                nn.BatchNorm1d(head_width[0]),
                pt.modules.activations.activation_from_name(head_norm_act),
                nn.Linear(head_width[0], head_width[1]),
                nn.BatchNorm1d(head_width[1]),
                pt.modules.activations.activation_from_name(head_norm_act),
            )
            self.last_linear = nn.Linear(head_width[1], num_classes)
        elif head_type == "mlp_3":
            assert isinstance(head_width, (tuple, list)), head_width
            self.head = nn.Sequential(  # like Mbln v3 head. GAP first, then MLP convs
                last_norm,
                FastGlobalAvgPool2d(flatten=True),
                nn.Linear(channels[3], head_width[0]),
                nn.BatchNorm1d(head_width[0]),
                pt.modules.activations.activation_from_name(head_norm_act),
                nn.Linear(head_width[0], head_width[1]),
                nn.BatchNorm1d(head_width[1]),
                pt.modules.activations.activation_from_name(head_norm_act),
                nn.Linear(head_width[1], head_width[2]),
                nn.BatchNorm1d(head_width[2]),
                pt.modules.activations.activation_from_name(head_norm_act),
            )
            self.last_linear = nn.Linear(head_width[2], num_classes)
        else:
            raise ValueError(f"Head type: {head_type} is not supported!")
        initialize(self)
Exemple #12
0
    def __init__(
        self,
        pretrained="coco",  # Not used. here for proper signature
        encoder_name="efficientnet_b0",
        encoder_weights="imagenet",
        pyramid_channels=64,
        num_fpn_layers=3,
        num_head_repeats=3,
        num_classes=90,
        encoder_norm_layer="frozenabn",
        encoder_norm_act="swish",
        decoder_norm_layer="abn",
        decoder_norm_act="swish",
        match_tf_same_padding=False,
        anchors_per_location=9,
        **encoder_params,
    ):
        super().__init__()
        self.encoder = get_encoder(
            encoder_name,
            norm_layer=encoder_norm_layer,
            norm_act=encoder_norm_act,
            encoder_weights=encoder_weights,
            **encoder_params,
        )
        norm_layer = bn_from_name(decoder_norm_layer)
        bn_args = dict(norm_layer=norm_layer, norm_act=decoder_norm_act)
        self.pyramid6 = nn.Sequential(
            conv1x1(self.encoder.out_shapes[0], pyramid_channels, bias=True),
            norm_layer(pyramid_channels, activation="identity"),
            nn.MaxPool2d(3, stride=2, padding=1),
        )
        self.pyramid7 = nn.MaxPool2d(
            3, stride=2, padding=1)  # in EffDet it's a simple maxpool

        self.bifpn = BiFPN(
            encoder_channels=(pyramid_channels, ) * 2 +
            self.encoder.out_shapes[:-2],
            pyramid_channels=pyramid_channels,
            num_layers=num_fpn_layers,
            **bn_args,
        )

        def make_head(out_size):
            layers = []
            for _ in range(num_head_repeats):
                layers += [
                    DepthwiseSeparableConv(pyramid_channels,
                                           pyramid_channels,
                                           use_norm=False)
                ]
            return nn.ModuleList(layers)

        # The convolution layers in the head are shared among all levels, but
        # each level has its batch normalization to capture the statistical
        # difference among different levels.
        def make_head_norm():
            return nn.ModuleList([
                nn.ModuleList([
                    norm_layer(pyramid_channels, activation=decoder_norm_act)
                    for _ in range(num_head_repeats)
                ]) for _ in range(5)
            ])

        self.cls_convs = make_head(num_classes * anchors_per_location)
        self.cls_head_conv = DepthwiseSeparableConv(pyramid_channels,
                                                    num_classes *
                                                    anchors_per_location,
                                                    use_norm=False)
        self.cls_norms = make_head_norm()

        self.box_convs = make_head(4 * anchors_per_location)
        self.box_head_conv = DepthwiseSeparableConv(pyramid_channels,
                                                    4 * anchors_per_location,
                                                    use_norm=False)
        self.box_norms = make_head_norm()

        self.num_classes = num_classes
        self.num_head_repeats = num_head_repeats

        patch_bn_tf(self)
        self._initialize_weights()
        if match_tf_same_padding:
            conv_to_same_conv(self)
            maxpool_to_same_maxpool(self)
Exemple #13
0
    def _make_layer(
        self,
        planes,
        blocks,
        stride=1,
        dilation=1,
        attn_type=None,
        norm_layer=None,
        norm_act=None,
        antialias=None,
    ):
        downsample = None

        if stride != 1 or self.inplanes != planes * self.expansion:
            downsample_layers = []
            if antialias and stride == 2:  # using OrderedDict to preserve ordering and allow loading
                downsample_layers += [("blur", nn.AvgPool2d(2, 2))]
            downsample_layers += [
                ("0",
                 conv1x1(self.inplanes,
                         planes * self.expansion,
                         stride=1 if antialias else stride)),
                ("1", norm_layer(planes * self.expansion,
                                 activation="identity")),
            ]
            downsample = nn.Sequential(OrderedDict(downsample_layers))
        # removes first dilation to avoid checkerboard artifacts
        first_dilation = max(1, dilation // 2)
        layers = [
            self.block(
                inplanes=self.inplanes,
                planes=planes,
                stride=stride,
                downsample=downsample,
                groups=self.groups,
                base_width=self.base_width,
                attn_type=attn_type,
                dilation=first_dilation,
                norm_layer=norm_layer,
                norm_act=norm_act,
                antialias=antialias,
                keep_prob=self.keep_prob,
            )
        ]

        self.inplanes = planes * self.expansion
        for _ in range(1, blocks):
            layers.append(
                self.block(
                    inplanes=self.inplanes,
                    planes=planes,
                    groups=self.groups,
                    base_width=self.base_width,
                    attn_type=attn_type,
                    dilation=first_dilation,
                    norm_layer=norm_layer,
                    norm_act=norm_act,
                    antialias=antialias,
                    keep_prob=self.keep_prob,
                ))
        return nn.Sequential(*layers)