def test_abn_repr(): """Checks that activation param is present in repr""" l = modules.bn_from_name("frozen_abn")(10) expected = "ABN(10, eps=1e-05, momentum=0.1, affine=True, activation=ACT.LEAKY_RELU[0.01], frozen=True)" assert repr(l) == expected l2 = modules.bn_from_name("estimated_abn")(10, activation="relu") expected2 = "ABN(10, eps=1e-05, momentum=0.1, affine=True, activation=ACT.RELU, estimated_stats=True)" assert repr(l2) == expected2
def __init__( self, encoder_name="resnet34", encoder_weights="imagenet", num_classes=1, drop_rate=0, decoder_attention_type=None, encoder_norm_layer="abn", encoder_norm_act="relu", decoder_norm_layer="abn", decoder_norm_act="relu", **encoder_params, ): encoder = get_encoder( encoder_name, norm_layer=encoder_norm_layer, norm_act=encoder_norm_act, encoder_weights=encoder_weights, **encoder_params, ) decoder = LinknetDecoder( encoder_channels=encoder.out_shapes, prefinal_channels=32, final_channels=num_classes, drop_rate=drop_rate, attn_type=decoder_attention_type, norm_layer=bn_from_name(decoder_norm_layer), norm_act=decoder_norm_act, ) super().__init__(encoder, decoder) self.name = f"link-{encoder_name}"
def __init__( self, layers, pretrained=None, # not used. here for proper signature. num_classes=1000, in_channels=3, encoder=False, antialias=False, norm_layer="abn", norm_act="relu", ): super(VGG, self).__init__() self.in_channels = in_channels self.norm_act = norm_act self.norm_layer = bn_from_name(norm_layer) self.encoder = encoder self.antialias = antialias self.features = self._make_layers(layers) self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) if not encoder: self.classifier = nn.Sequential( nn.Linear(512 * 7 * 7, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, num_classes), ) else: self.forward = self.encoder_features initialize(self)
def __init__( self, encoder_name="resnet34", encoder_weights="imagenet", decoder_channels=(256, 128, 64, 32, 16), num_classes=1, center=False, # usefull for VGG models drop_rate=0, norm_layer="abn", norm_act="relu", **encoder_params, ): encoder = get_encoder( encoder_name, norm_layer=norm_layer, norm_act=norm_act, encoder_weights=encoder_weights, **encoder_params, ) decoder = UnetDecoder( encoder_channels=encoder.out_shapes, decoder_channels=decoder_channels, final_channels=num_classes, center=center, drop_rate=drop_rate, norm_layer=bn_from_name(norm_layer), norm_act=norm_act, ) super().__init__(encoder, decoder) self.name = f"u-{encoder_name}"
def __init__( self, encoder_name="resnet34", encoder_weights="imagenet", num_classes=1, norm_layer="abn", norm_act="relu", **encoder_params, ): encoder = get_encoder( encoder_name, norm_layer=norm_layer, norm_act=norm_act, encoder_weights=encoder_weights, **encoder_params, ) decoder = LinknetDecoder( encoder_channels=encoder.out_shapes, prefinal_channels=32, final_channels=num_classes, norm_layer=bn_from_name(norm_layer), norm_act=norm_act, ) super().__init__(encoder, decoder) self.name = f"link-{encoder_name}"
def __init__( self, pretrained="coco", # not used here for proper signature encoder_name="resnet50", encoder_weights="imagenet", pyramid_channels=256, num_classes=80, # drop_connect_rate=0, # TODO: add encoder_norm_layer="abn", encoder_norm_act="relu", decoder_norm_layer="none", # None by default to match detectron & mmdet versions decoder_norm_act="relu", **encoder_params, ): super().__init__() self.encoder = get_encoder( encoder_name, norm_layer=encoder_norm_layer, norm_act=encoder_norm_act, encoder_weights=encoder_weights, **encoder_params, ) norm_layer = bn_from_name(decoder_norm_layer) self.pyramid6 = nn.Sequential( conv3x3(self.encoder.out_shapes[0], pyramid_channels, 2, bias=True), norm_layer(pyramid_channels, activation="identity"), ) self.pyramid7 = nn.Sequential( conv3x3(pyramid_channels, pyramid_channels, 2, bias=True), norm_layer(pyramid_channels, activation="identity"), ) self.fpn = FPN(self.encoder.out_shapes[:-2], pyramid_channels=pyramid_channels) def make_final_convs(): layers = [] for _ in range(4): layers += [ conv3x3(pyramid_channels, pyramid_channels, bias=True) ] # Norm here is fine for GroupNorm but for BN it should be implemented the other way # see EffDet for example. Maybe need to change this implementation to align with EffDet layers += [ norm_layer(pyramid_channels, activation=decoder_norm_act) ] return nn.Sequential(*layers) anchors_per_location = 9 self.cls_convs = make_final_convs() self.cls_head_conv = conv3x3(pyramid_channels, num_classes * anchors_per_location, bias=True) self.box_convs = make_final_convs() self.box_head_conv = conv3x3(pyramid_channels, 4 * anchors_per_location, bias=True) self.num_classes = num_classes self._initialize_weights()
def __init__( self, encoder_name="resnet34", encoder_weights="imagenet", pyramid_channels=256, num_fpn_layers=1, segmentation_channels=128, num_classes=1, merge_policy="add", last_upsample=True, output_stride=32, drop_rate=0, norm_layer="abn", norm_act="relu", **encoder_params, ): super().__init__() if output_stride != 32: encoder_params["output_stride"] = output_stride self.encoder = get_encoder( encoder_name, norm_layer=norm_layer, norm_act=norm_act, encoder_weights=encoder_weights, **encoder_params, ) bn_args = { "norm_layer": bn_from_name(norm_layer), "norm_act": norm_act } self.fpn = self.__class__.FEATURE_PYRAMID( self.encoder. out_shapes[:-1], # only want features from 1/4 to 1/32 pyramid_channels=pyramid_channels, num_layers=num_fpn_layers, output_stride=output_stride, **bn_args, ) self.decoder = PanopticDecoder( pyramid_channels=pyramid_channels, segmentation_channels=segmentation_channels, merge_policy=merge_policy, upsamples=[2, 2, 1, 0] if output_stride == 16 else [3, 2, 1, 0], **bn_args, ) if merge_policy == "cat": segmentation_channels *= 4 self.dropout = nn.Dropout2d(drop_rate, inplace=True) self.segm_head = conv1x1(segmentation_channels, num_classes) self.upsample = nn.Upsample( scale_factor=4, mode="bilinear") if last_upsample else nn.Identity() self.name = f"segm-fpn-{encoder_name}"
def test_frozen_abn(): l = modules.bn_from_name("frozen_abn")(10) assert list(l.parameters()) == [] l = modules.ABN(10, frozen=True) assert list(l.parameters()) == [] # check that passing tensor through frozen ABN won't update stats running_mean_original = l.running_mean.clone() running_var_original = l.running_var.clone() l(INP) assert torch.allclose(running_mean_original, l.running_mean) assert torch.allclose(running_var_original, l.running_var)
def __init__( self, encoder_name="efficientnet_b0", encoder_weights="imagenet", pyramid_channels=128, head_channels=256, num_classes=1, last_upsample=True, encoder_norm_layer="abn", encoder_norm_act="swish", decoder_norm_layer="abn", decoder_norm_act="swish", **encoder_params, ): super().__init__() self.encoder = get_encoder( encoder_name, norm_layer=encoder_norm_layer, norm_act=encoder_norm_act, encoder_weights=encoder_weights, **encoder_params, ) norm_layer = bn_from_name(decoder_norm_layer) bn_args = dict(norm_layer=norm_layer, norm_act=decoder_norm_act) self.bifpn = BiFPN( # pass P2-P5 encoder_channels=self.encoder.out_shapes[:-1], pyramid_channels=pyramid_channels, num_layers=3, # hardcode num_fpn_layers=3 **bn_args, ) self.cls_head_conv = nn.Sequential( DepthwiseSeparableConv(pyramid_channels, head_channels, **bn_args), DepthwiseSeparableConv(head_channels, head_channels, **bn_args), DepthwiseSeparableConv(head_channels, num_classes, use_norm=False), ) self.upsample = nn.Upsample( scale_factor=4, mode="bilinear") if last_upsample else nn.Identity() self.num_classes = num_classes patch_bn_mom(self, 0.01) # set last layer bias for better convergence with sigmoid loss # -4.59 = -np.log((1 - 0.01) / 0.01) nn.init.constant_(self.cls_head_conv[-1][1].bias, -4.59)
def __init__( self, encoder_name="resnet34", encoder_weights="imagenet", pyramid_channels=256, num_classes=80, norm_layer="abn", norm_act="relu", **encoder_params, ): super().__init__() self.encoder = get_encoder( encoder_name, norm_layer=norm_layer, norm_act=norm_act, encoder_weights=encoder_weights, **encoder_params, ) norm_layer = bn_from_name(norm_layer) self.pyramid6 = conv3x3(256, 256, 2, bias=True) self.pyramid7 = conv3x3(256, 256, 2, bias=True) self.fpn = FPN( self.encoder.out_shapes[:-2], pyramid_channels=pyramid_channels, ) def make_head(out_size): layers = [] for _ in range(4): # some implementations don't use BN here but I think it's needed # TODO: test how it affects results layers += [ nn.Conv2d(256, 256, 3, padding=1), norm_layer(256, activation=norm_act) ] # layers += [nn.Conv2d(256, 256, 3, padding=1), nn.ReLU()] layers += [nn.Conv2d(256, out_size, 3, padding=1)] return nn.Sequential(*layers) self.ratios = [1.0, 2.0, 0.5] self.scales = [4 * 2**(i / 3) for i in range(3)] anchors = len(self.ratios) * len(self.scales) # 9 self.cls_head = make_head(num_classes * anchors) self.box_head = make_head(4 * anchors)
def __init__( self, encoder_name="resnet34", encoder_weights="imagenet", decoder_channels=(256, 128, 64, 32, 16), num_classes=1, center=False, # usefull for VGG models output_stride=32, drop_rate=0, decoder_attention_type=None, encoder_norm_layer="abn", encoder_norm_act="relu", decoder_norm_layer="abn", decoder_norm_act="relu", sigmoid_init=True, **encoder_params, ): if output_stride != 32: encoder_params["output_stride"] = output_stride encoder = get_encoder( encoder_name, norm_layer=encoder_norm_layer, norm_act=encoder_norm_act, encoder_weights=encoder_weights, **encoder_params, ) decoder = UnetDecoder( encoder_channels=encoder.out_shapes, decoder_channels=decoder_channels, final_channels=num_classes, center=center, drop_rate=drop_rate, output_stride=output_stride, attn_type=decoder_attention_type, norm_layer=bn_from_name(decoder_norm_layer), norm_act=decoder_norm_act, ) super().__init__(encoder, decoder) self.name = f"u-{encoder_name}" if sigmoid_init: # set last layer bias for better convergence with sigmoid loss # -4.59 = -np.log((1 - 0.01) / 0.01) nn.init.constant_(self.decoder.final_conv.bias, -4.59)
def __init__( self, encoder_name="resnet34", encoder_weights="imagenet", num_classes=1, last_upsample=True, aspp_dilation_rates=[6, 12, 18], output_stride=16, drop_rate=0, encoder_norm_layer="abn", encoder_norm_act="relu", decoder_norm_layer="abn", decoder_norm_act="relu", **encoder_params, ): encoder = get_encoder( encoder_name, encoder_weights=encoder_weights, output_stride=output_stride, norm_layer=encoder_norm_layer, norm_act=encoder_norm_act, **encoder_params, ) decoder = DeepLabHead( encoder_channels=encoder.out_shapes, num_classes=num_classes, dilation_rates=aspp_dilation_rates, output_stride=output_stride, drop_rate=drop_rate, norm_layer=bn_from_name(decoder_norm_layer), norm_act=decoder_norm_act, ) super().__init__(encoder, decoder) self.upsample = nn.Upsample( scale_factor=4, mode="bilinear") if last_upsample else nn.Identity() self.name = f"deeplabv3plus-{encoder_name}"
def __init__( self, block=None, layers=None, pretrained=None, # not used. here for proper signature num_classes=1000, in_channels=3, use_se=False, groups=1, base_width=64, deep_stem=False, output_stride=32, norm_layer="abn", norm_act="relu", antialias=False, encoder=False, drop_rate=0.0, drop_connect_rate=0.0, global_pool="avg", init_bn0=True, ): stem_width = 64 norm_layer = bn_from_name(norm_layer) self.inplanes = stem_width self.num_classes = num_classes self.groups = groups self.base_width = base_width self.block = block self.expansion = block.expansion self.norm_act = norm_act self.block_idx = 0 self.num_blocks = sum(layers) self.drop_connect_rate = drop_connect_rate super(ResNet, self).__init__() if deep_stem: self.conv1 = nn.Sequential( conv3x3(in_channels, stem_width // 2, 2), norm_layer(stem_width // 2, activation=norm_act), conv3x3(stem_width // 2, stem_width // 2), norm_layer(stem_width // 2, activation=norm_act), conv3x3(stem_width // 2, stem_width), ) else: self.conv1 = nn.Conv2d(in_channels, stem_width, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(stem_width, activation=norm_act) self.maxpool = nn.MaxPool2d( kernel_size=3, stride=2, padding=0 if use_se else 1, ceil_mode=True if use_se else False, ) if output_stride not in [8, 16, 32]: raise ValueError("Output stride should be in [8, 16, 32]") if output_stride == 8: stride_3, stride_4, dilation_3, dilation_4 = 1, 1, 2, 4 elif output_stride == 16: stride_3, stride_4, dilation_3, dilation_4 = 2, 1, 1, 2 elif output_stride == 32: stride_3, stride_4, dilation_3, dilation_4 = 2, 2, 1, 1 largs = dict(use_se=use_se, norm_layer=norm_layer, norm_act=norm_act, antialias=antialias) self.layer1 = self._make_layer(64, layers[0], stride=1, **largs) self.layer2 = self._make_layer(128, layers[1], stride=2, **largs) self.layer3 = self._make_layer(256, layers[2], stride=stride_3, dilation=dilation_3, **largs) self.layer4 = self._make_layer(512, layers[3], stride=stride_4, dilation=dilation_4, **largs) self.global_pool = GlobalPool2d(global_pool) self.num_features = 512 * self.expansion self.encoder = encoder if not encoder: self.dropout = nn.Dropout(p=drop_rate, inplace=True) self.last_linear = nn.Linear( self.num_features * self.global_pool.feat_mult(), num_classes) else: self.forward = self.encoder_features self._initialize_weights(init_bn0)
def __init__( self, encoder_name="hrnet_w18", encoder_weights="imagenet", pretrained=None, # not used num_classes=1, last_upsample=True, OCR=False, drop_rate=0, norm_layer="inplace_abn", # use memory efficient by default norm_act="leaky_relu", **encoder_params, ): super().__init__() self.encoder = get_encoder( encoder_name, encoder_weights=encoder_weights, norm_layer=norm_layer, norm_act=norm_act, **encoder_params, ) norm_layer = bn_from_name(norm_layer) final_channels = sum(self.encoder.out_shapes[:4]) self.OCR = OCR if OCR: self.conv3x3 = nn.Sequential( conv3x3(final_channels, 512, bias=True), norm_layer(512, activation=norm_act), ) self.ocr_gather_head = SpatialOCR_Gather() self.ocr_distri_head = SpatialOCR(in_channels=512, key_channels=256, out_channels=512, norm_layer=norm_layer, norm_act=norm_act) self.head = conv1x1(512, num_classes, bias=True) self.aux_head = nn.Sequential( # in OCR first conv is 3x3 conv3x3(final_channels, final_channels, bias=True), norm_layer(final_channels, activation=norm_act), conv1x1(final_channels, num_classes, bias=True), ) else: self.head = nn.Sequential( conv1x1(final_channels, final_channels, bias=True), norm_layer(final_channels, activation=norm_act), conv1x1(final_channels, num_classes, bias=True), ) up_kwargs = dict(mode="bilinear", align_corners=True) self.up_x2 = nn.Upsample(scale_factor=2, **up_kwargs) self.up_x4 = nn.Upsample(scale_factor=4, **up_kwargs) self.up_x8 = nn.Upsample(scale_factor=8, **up_kwargs) self.last_upsample = nn.Upsample( scale_factor=4, **up_kwargs) if last_upsample else nn.Identity() self.dropout = nn.Dropout2d( drop_rate) # can't use inplace. it would raise a backprop error self.name = f"segm-{encoder_name}" # use lower momemntum patch_bn_mom(self) self._init_weights()
def test_agn_repr(): """Checks that repr for AGN includes number of groups""" l = modules.bn_from_name("agn")(10, num_groups=2, activation="leaky_relu") expected = "AGN(10, num_groups=2, eps=1e-05, affine=True, activation=ACT.LEAKY_RELU[0.01])" assert repr(l) == expected
def __init__( self, block=None, layers=None, pretrained=None, # not used. here for proper signature num_classes=1000, in_channels=3, attn_type=None, groups=1, base_width=64, stem_type="", output_stride=32, norm_layer="abn", norm_act="relu", antialias=False, encoder=False, drop_rate=0.0, drop_connect_rate=0.0, init_bn0=True, ): stem_width = 64 norm_layer = bn_from_name(norm_layer) self.inplanes = stem_width self.num_classes = num_classes self.groups = groups self.base_width = base_width self.block = block self.expansion = block.expansion self.norm_act = norm_act self.block_idx = 0 self.num_blocks = sum(layers) self.drop_connect_rate = drop_connect_rate super(ResNet, self).__init__() # move stem creation in separate function for simplicity self._make_stem(stem_type, stem_width, in_channels, norm_layer, norm_act) if output_stride not in [8, 16, 32]: raise ValueError("Output stride should be in [8, 16, 32]") if output_stride == 8: stride_3, stride_4, dilation_3, dilation_4 = 1, 1, 2, 4 elif output_stride == 16: stride_3, stride_4, dilation_3, dilation_4 = 2, 1, 1, 2 elif output_stride == 32: stride_3, stride_4, dilation_3, dilation_4 = 2, 2, 1, 1 largs = dict(attn_type=attn_type, norm_layer=norm_layer, norm_act=norm_act, antialias=antialias) self.layer1 = self._make_layer(64, layers[0], stride=1, **largs) self.layer2 = self._make_layer(128, layers[1], stride=2, **largs) self.layer3 = self._make_layer(256, layers[2], stride=stride_3, dilation=dilation_3, **largs) self.layer4 = self._make_layer(512, layers[3], stride=stride_4, dilation=dilation_4, **largs) self.global_pool = FastGlobalAvgPool2d() self.num_features = 512 * self.expansion self.encoder = encoder if not encoder: self.dropout = nn.Dropout(p=drop_rate, inplace=True) self.last_linear = nn.Linear(self.num_features, num_classes) else: self.forward = self.encoder_features self._initialize_weights(init_bn0)
def test_estimated_abn(): """Checks that init works and output is the same in eval mode""" est_bn = modules.bn_from_name("estimated_abn")(10).eval() bn = modules.bn_from_name("estimated_abn")(10).eval() est_bn.load_state_dict(bn.state_dict()) assert torch.allclose(est_bn(INP), bn(INP))
def __init__( self, layers=None, pretrained=None, # not used. here for proper signature num_classes=1000, in_channels=3, width_factor=1.0, output_stride=32, norm_layer="inplaceabn", norm_act="leaky_relu", encoder=False, drop_rate=0.0, drop_connect_rate=0.0, ): nn.Module.__init__(self) stem_width = int(64 * width_factor) norm_layer = bn_from_name(norm_layer) self.inplanes = stem_width self.num_classes = num_classes self.groups = 1 # not really used but needed inside _make_layer self.base_width = 64 # used inside _make_layer self.norm_act = norm_act self.block_idx = 0 self.num_blocks = sum(layers) self.drop_connect_rate = drop_connect_rate self._make_stem("space2depth", stem_width, in_channels, norm_layer, norm_act) if output_stride not in [8, 16, 32]: raise ValueError("Output stride should be in [8, 16, 32]") # TODO add OS later # if output_stride == 8: # stride_3, stride_4, dilation_3, dilation_4 = 1, 1, 2, 4 # elif output_stride == 16: # stride_3, stride_4, dilation_3, dilation_4 = 2, 1, 1, 2 # elif output_stride == 32: stride_3, stride_4, dilation_3, dilation_4 = 2, 2, 1, 1 largs = dict(attn_type="se", norm_layer=norm_layer, norm_act=norm_act, antialias=True) self.block = TBasicBlock self.expansion = TBasicBlock.expansion self.layer1 = self._make_layer(stem_width, layers[0], stride=1, **largs) self.layer2 = self._make_layer(stem_width * 2, layers[1], stride=2, **largs) self.block = TBottleneck # first 2 - Basic, last 2 - Bottleneck self.expansion = TBottleneck.expansion self.layer3 = self._make_layer(stem_width * 4, layers[2], stride=stride_3, dilation=dilation_3, **largs) largs.update(attn_type=None) # no se in last layer self.layer4 = self._make_layer(stem_width * 8, layers[3], stride=stride_4, dilation=dilation_4, **largs) self.global_pool = FastGlobalAvgPool2d(flatten=True) self.num_features = stem_width * 8 * self.expansion self.encoder = encoder if not encoder: self.dropout = nn.Dropout(p=drop_rate, inplace=True) self.last_linear = nn.Linear(self.num_features, num_classes) else: self.forward = self.encoder_features self._initialize_weights(init_bn0=True)
def test_frozen_abn(): l = modules.bn_from_name("frozen_abn")(10) assert list(l.parameters()) == [] l = modules.ABN(10, frozen=True) assert list(l.parameters()) == []
def test_abcn(): """Check that abcn init and forward works""" l = modules.bn_from_name("abcn")(10, num_groups=2)
def __init__( self, pretrained="coco", # Not used. here for proper signature encoder_name="efficientnet_b0", encoder_weights="imagenet", pyramid_channels=64, num_fpn_layers=3, num_head_repeats=3, num_classes=90, encoder_norm_layer="frozenabn", encoder_norm_act="swish", decoder_norm_layer="abn", decoder_norm_act="swish", match_tf_same_padding=False, anchors_per_location=9, **encoder_params, ): super().__init__() self.encoder = get_encoder( encoder_name, norm_layer=encoder_norm_layer, norm_act=encoder_norm_act, encoder_weights=encoder_weights, **encoder_params, ) norm_layer = bn_from_name(decoder_norm_layer) bn_args = dict(norm_layer=norm_layer, norm_act=decoder_norm_act) self.pyramid6 = nn.Sequential( conv1x1(self.encoder.out_shapes[0], pyramid_channels, bias=True), norm_layer(pyramid_channels, activation="identity"), nn.MaxPool2d(3, stride=2, padding=1), ) self.pyramid7 = nn.MaxPool2d( 3, stride=2, padding=1) # in EffDet it's a simple maxpool self.bifpn = BiFPN( encoder_channels=(pyramid_channels, ) * 2 + self.encoder.out_shapes[:-2], pyramid_channels=pyramid_channels, num_layers=num_fpn_layers, **bn_args, ) def make_head(out_size): layers = [] for _ in range(num_head_repeats): layers += [ DepthwiseSeparableConv(pyramid_channels, pyramid_channels, use_norm=False) ] return nn.ModuleList(layers) # The convolution layers in the head are shared among all levels, but # each level has its batch normalization to capture the statistical # difference among different levels. def make_head_norm(): return nn.ModuleList([ nn.ModuleList([ norm_layer(pyramid_channels, activation=decoder_norm_act) for _ in range(num_head_repeats) ]) for _ in range(5) ]) self.cls_convs = make_head(num_classes * anchors_per_location) self.cls_head_conv = DepthwiseSeparableConv(pyramid_channels, num_classes * anchors_per_location, use_norm=False) self.cls_norms = make_head_norm() self.box_convs = make_head(4 * anchors_per_location) self.box_head_conv = DepthwiseSeparableConv(pyramid_channels, 4 * anchors_per_location, use_norm=False) self.box_norms = make_head_norm() self.num_classes = num_classes self.num_head_repeats = num_head_repeats patch_bn_tf(self) self._initialize_weights() if match_tf_same_padding: conv_to_same_conv(self) maxpool_to_same_maxpool(self)
def __init__( self, growth_rate=None, block_config=None, pretrained=None, # not used. here for proper signature num_classes=1000, drop_rate=0.0, in_channels=3, norm_layer="abn", norm_act="relu", deep_stem=False, stem_width=64, encoder=False, global_pool="avg", memory_efficient=True, output_stride=32, # not used! only here to allow using as encoder ): super(DenseNet, self).__init__() norm_layer = bn_from_name(norm_layer) self.num_classes = num_classes if deep_stem: self.conv0 = nn.Sequential( conv3x3(in_channels, stem_width // 2, 2), norm_layer(stem_width // 2, activation=norm_act), conv3x3(stem_width // 2, stem_width // 2), norm_layer(stem_width // 2, activation=norm_act), conv3x3(stem_width // 2, stem_width, 2), ) else: self.conv0 = nn.Conv2d(in_channels, stem_width, kernel_size=7, stride=2, padding=3, bias=False) self.norm0 = norm_layer(stem_width, activation=norm_act) self.pool0 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False) largs = dict( growth_rate=growth_rate, drop_rate=drop_rate, memory_efficient=memory_efficient, norm_layer=norm_layer, norm_act=norm_act, ) in_planes = stem_width for i, num_layers in enumerate(block_config): block = _DenseBlock(num_layers, in_planes, **largs) setattr(self, f"denseblock{i+1}", block) in_planes += num_layers * growth_rate if i != len(block_config) - 1: trans = _Transition(in_planes=in_planes, out_planes=in_planes // 2, norm_layer=norm_layer, norm_act=norm_act) setattr(self, f"transition{i+1}", trans) in_planes //= 2 # Final normalization self.norm5 = nn.BatchNorm2d(in_planes) # Linear layer self.encoder = encoder if not encoder: self.global_pool = GlobalPool2d(global_pool) self.classifier = nn.Linear(in_planes, num_classes) else: assert len(block_config) == 4, "Need 4 blocks to use as encoder" self.forward = self.encoder_features
def __init__( self, stage_fns=None, # list of nn.Module block_fns=None, # list of nn.Module stage_args=None, # list of dicts layers=None, # num layers in each block channels=None, # it's actually output channels. 256, 512, 1024, 2048 for R50 # pretrained=None, # not used. here for proper signature num_classes=1000, in_channels=3, norm_layer="abn", norm_act="leaky_relu", head_norm_act="leaky_relu", # activation in head stem_type="default", # antialias=False, # encoder=False, # drop_rate=0.0, drop_connect_rate=0.0, head_width=2048, stem_width=64, head_type="default", # type of head ): norm_layer = bn_from_name(norm_layer) self.num_classes = num_classes self.norm_act = norm_act self.block_idx = 0 # for drop connect self.drop_connect_rate = drop_connect_rate super().__init__() first_norm = nn.Identity() if block_fns[0].startswith( "Pre") else norm_layer(stem_width, activation=norm_act) if stem_type == "default": self.stem_conv1 = nn.Sequential( conv3x3(in_channels, stem_width, stride=2), first_norm) elif stem_type == "s2d": # instead of default stem I'm using Space2Depth followed by conv. no norm because there is one at the beginning # of DarkStage. upd. there is norm in not PreAct version self.stem_conv1 = nn.Sequential( SpaceToDepth(block_size=2), conv3x3(in_channels * 4, stem_width), first_norm, ) else: raise ValueError(f"Stem type `{stem_type}` is not supported") bn_args = dict(norm_layer=norm_layer, norm_act=norm_act) block_name_to_module = { "XX": SimpleBasicBlock, "Pre_XX": SimplePreActBasicBlock, "Pre_XX_Res2": SimplePreActRes2BasicBlock, "Btl": SimpleBottleneck, "Pre_Btl": SimplePreActBottleneck, "IR": SimpleInvertedResidual, "Pre_IR": SimplePreActInvertedResidual, "Sep2": SimpleSeparable_2, "Pre_Sep2": SimplePreActSeparable_2, "Sep3": SimpleSeparable_3, "Pre_Custom_2": PreBlock_2, } stage_name_to_module = {"simpl": SimpleStage} # set stride=2 for all blocks # using **{**bn_args, **stage_args} to allow updating norm layer for particular stage self.layer1 = stage_name_to_module[stage_fns[0]]( block_fn=block_name_to_module[block_fns[0]], in_chs=stem_width, out_chs=channels[0], num_blocks=layers[0], stride=2, **{ **bn_args, **stage_args[0] }, ) self.layer2 = stage_name_to_module[stage_fns[1]]( block_fn=block_name_to_module[block_fns[1]], in_chs=channels[0], out_chs=channels[1], num_blocks=layers[1], stride=2, **{ **bn_args, **stage_args[1] }, ) self.layer3 = stage_name_to_module[stage_fns[2]]( block_fn=block_name_to_module[block_fns[2]], in_chs=channels[1], out_chs=channels[2], num_blocks=layers[2], stride=2, **{ **bn_args, **stage_args[2] }, ) extra_stage3_filters = stage_args[2].get("filter_steps", 0) * (layers[2] - 1) self.layer4 = stage_name_to_module[stage_fns[3]]( block_fn=block_name_to_module[block_fns[3]], in_chs=channels[2] + extra_stage3_filters, out_chs=channels[3], num_blocks=layers[3], stride=2, **{ **bn_args, **stage_args[3] }, ) extra_stage4_filters = stage_args[3].get("filter_steps", 0) * (layers[3] - 1) channels[ 3] += extra_stage4_filters # TODO rewrite it cleaner instead of doing inplace last_norm = norm_layer(channels[3], activation=norm_act) if block_fns[0].startswith( "Pre") else nn.Identity() if head_type == "mobilenetv3": self.head = nn.Sequential( # Mbln v3 head. GAP first, then expand convs last_norm, FastGlobalAvgPool2d(flatten=True), nn.Linear(channels[3], head_width), pt.modules.activations.activation_from_name(head_norm_act), ) self.last_linear = nn.Linear(head_width, num_classes) elif head_type == "mobilenetv3_norm": # mobilenet with last norm self.head = nn.Sequential( # Mbln v3 head. GAP first, then expand convs last_norm, FastGlobalAvgPool2d(flatten=True), nn.Linear(channels[3], head_width), nn.BatchNorm1d(head_width), pt.modules.activations.activation_from_name(head_norm_act), ) self.last_linear = nn.Linear(head_width, num_classes) elif head_type == "default": self.head = nn.Sequential( last_norm, conv1x1(channels[3], head_width), norm_layer(head_width, activation=head_norm_act), FastGlobalAvgPool2d(flatten=True), ) self.last_linear = nn.Linear(head_width, num_classes) elif head_type == "default_nonorm": # if used in angular losses don't want norm self.head = nn.Sequential( last_norm, conv1x1(channels[3], head_width, bias=True), # need bias because not followed by norm FastGlobalAvgPool2d(flatten=True), ) self.last_linear = nn.Linear(head_width, num_classes) elif head_type == "mlp_bn_fc_bn": self.head = nn.Sequential( last_norm, conv1x1(channels[3], channels[3]), FastGlobalAvgPool2d(flatten=True), nn.BatchNorm1d(channels[3]), pt.modules.activations.activation_from_name(head_norm_act), nn.Linear(channels[3], head_width, bias=False), nn.BatchNorm1d(head_width, affine=False), ) self.last_linear = nn.Linear(head_width, num_classes) elif head_type == "mlp_bn_fc": # same as above but without last BN self.head = nn.Sequential( last_norm, conv1x1(channels[3], channels[3]), FastGlobalAvgPool2d(flatten=True), nn.BatchNorm1d(channels[3]), pt.modules.activations.activation_from_name(head_norm_act), nn.Linear(channels[3], head_width, bias=False), ) self.last_linear = nn.Linear(head_width, num_classes) elif head_type == "mlp_2": assert isinstance(head_width, (tuple, list)), head_width self.head = nn.Sequential( # like Mbln v3 head. GAP first, then MLP convs last_norm, FastGlobalAvgPool2d(flatten=True), nn.Linear(channels[3], head_width[0]), nn.BatchNorm1d(head_width[0]), pt.modules.activations.activation_from_name(head_norm_act), nn.Linear(head_width[0], head_width[1]), nn.BatchNorm1d(head_width[1]), pt.modules.activations.activation_from_name(head_norm_act), ) self.last_linear = nn.Linear(head_width[1], num_classes) elif head_type == "mlp_3": assert isinstance(head_width, (tuple, list)), head_width self.head = nn.Sequential( # like Mbln v3 head. GAP first, then MLP convs last_norm, FastGlobalAvgPool2d(flatten=True), nn.Linear(channels[3], head_width[0]), nn.BatchNorm1d(head_width[0]), pt.modules.activations.activation_from_name(head_norm_act), nn.Linear(head_width[0], head_width[1]), nn.BatchNorm1d(head_width[1]), pt.modules.activations.activation_from_name(head_norm_act), nn.Linear(head_width[1], head_width[2]), nn.BatchNorm1d(head_width[2]), pt.modules.activations.activation_from_name(head_norm_act), ) self.last_linear = nn.Linear(head_width[2], num_classes) else: raise ValueError(f"Head type: {head_type} is not supported!") initialize(self)
def __init__( self, blocks_args=None, width_multiplier=None, depth_multiplier=None, pretrained=None, # not used. here for proper signature num_classes=1000, in_channels=3, output_stride=32, encoder=False, drop_rate=0, drop_connect_rate=0, stem_size=32, norm_layer="abn", norm_act="swish", match_tf_same_padding=False, ): super().__init__() norm_layer = bn_from_name(norm_layer) self.norm_layer = norm_layer self.norm_act = norm_act self.width_multiplier = width_multiplier self.depth_multiplier = depth_multiplier stem_size = make_divisible(stem_size * width_multiplier) self.conv_stem = conv3x3(in_channels, stem_size, stride=2) self.bn1 = norm_layer(stem_size, activation=norm_act) in_channels = stem_size self.blocks = nn.ModuleList([]) # modify block args to account for output_stride strategy blocks_args = _patch_block_args(blocks_args, output_stride) for block_idx, block_arg in enumerate(blocks_args): block = [] block_arg["in_channels"] = make_divisible(block_arg["in_channels"] * self.width_multiplier) block_arg["out_channels"] = make_divisible(block_arg["out_channels"] * self.width_multiplier) block_arg["norm_layer"] = norm_layer block_arg["norm_act"] = norm_act # linearly scale keep prob block_arg["keep_prob"] = 1 - drop_connect_rate * block_idx / len(blocks_args) repeats = block_arg.pop("num_repeat") repeats = int(math.ceil(repeats * self.depth_multiplier)) # when dilating conv with stride 2 we want it to have dilation // 2 # it prevents checkerboard artifacts with OS=16 and OS=8 dilation = block_arg.get("dilation", 1) # save block values if block_arg.pop("no_first_dilation", False): block_arg["dilation"] = max(1, block_arg["dilation"] // 2) block.append(InvertedResidual(**block_arg)) # only first layer in block is strided block_arg["stride"] = 1 block_arg["dilation"] = dilation block_arg["in_channels"] = block_arg["out_channels"] for _ in range(repeats - 1): block.append(InvertedResidual(**block_arg)) self.blocks.append(nn.Sequential(*block)) # Head if encoder: self.forward = self.encoder_features else: out_channels = block_arg["out_channels"] num_features = make_divisible(1280 * width_multiplier) self.conv_head = conv1x1(out_channels, num_features) self.bn2 = norm_layer(num_features, activation=norm_act) self.global_pool = nn.AdaptiveAvgPool2d(1) self.dropout = nn.Dropout(drop_rate, inplace=True) self.classifier = nn.Linear(num_features, num_classes) patch_bn(self) # adjust epsilon initialize(self) if match_tf_same_padding: conv_to_same_conv(self) maxpool_to_same_maxpool(self)
def __init__( self, stage_fn=None, block_fn=None, layers=None, # num layers in each block channels=None, # it's actually output channels. 256, 512, 1024, 2048 for R50 pretrained=None, # not used. here for proper signature num_classes=1000, in_channels=3, attn_type=None, # base_width=64, stem_type="default", norm_layer="abn", norm_act="leaky_relu", antialias=False, # encoder=False, bottle_ratio=0.25, # how much to shrink channels in bottleneck layer no_first_csp=False, # make first stage a Simple Stage drop_rate=0.0, drop_connect_rate=0.0, expand_before_head=True, # add addition conv from 512 -> 2048 to avoid representational bottleneck mobilenetv3_head=False, # put GAP first, then expand convs **block_kwargs, ): stem_width = 64 norm_layer = bn_from_name(norm_layer) self.num_classes = num_classes self.norm_act = norm_act self.block_idx = 0 # for drop connect self.drop_connect_rate = drop_connect_rate super().__init__() if block_fn != SimplePreActBottleneck: stem_norm = norm_layer(stem_width, activation=norm_act) else: stem_norm = nn.Identity() if stem_type == "default": self.stem_conv1 = nn.Sequential( nn.Conv2d(3, stem_width, kernel_size=7, stride=2, padding=3, bias=False), stem_norm, nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ) first_stride = 1 elif stem_type == "s2d": # instead of default stem I'm using Space2Depth followed by conv. no norm because there is one at the beginning # of DarkStage. upd. there is norm in not PreAct version self.stem_conv1 = nn.Sequential( SpaceToDepth(block_size=2), conv3x3(in_channels * 4, stem_width), stem_norm, # nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ) first_stride = 2 # blocks largs = dict( stride=2, bottle_ratio=bottle_ratio, block_fn=block_fn, attn_type=attn_type, norm_layer=norm_layer, norm_act=norm_act, # antialias=antialias, **block_kwargs, ) first_stage_fn = SimpleStage if no_first_csp else stage_fn # fmt: off self.layer1 = first_stage_fn( in_chs=stem_width, out_chs=channels[0], num_blocks=layers[0], keep_prob=self.keep_prob, **{ **largs, "stride": first_stride }, # overwrite default stride ) # **{**largs, "antialias": False} # antialias in first stage is too expensive self.layer2 = stage_fn(in_chs=channels[0], out_chs=channels[1], num_blocks=layers[1], keep_prob=self.keep_prob, **largs) self.layer3 = stage_fn(in_chs=channels[1], out_chs=channels[2], num_blocks=layers[2], keep_prob=self.keep_prob, **largs) self.layer4 = stage_fn(in_chs=channels[2], out_chs=channels[3], num_blocks=layers[3], keep_prob=self.keep_prob, **largs) # fmt: on # self.global_pool = FastGlobalAvgPool2d(flatten=True) # self.dropout = nn.Dropout(p=drop_rate, inplace=True) head_layers = [] # this is a very dirty if but i don't care for now if mobilenetv3_head: head_layers.append(FastGlobalAvgPool2d(flatten=True)) if channels[3] < 2048 and expand_before_head: head_layers.append( nn.Linear(channels[3], 2048) ) # no norm here as in original MobilnetV3 from google head_layers.append( pt.modules.activations.activation_from_name(norm_act)) head_layers.append( nn.Linear(2048 if expand_before_head else channels[3], num_classes)) else: if channels[3] < 2048 and expand_before_head: if block_fn == SimplePreActBottleneck: # for PreAct add additional BN here head_layers.append( norm_layer(channels[3], activation=norm_act)) head_layers.extend([ conv1x1(channels[3], 2048), norm_layer(2048, activation=norm_act) ]) head_layers.extend([ FastGlobalAvgPool2d(flatten=True), nn.Linear(2048 if expand_before_head else channels[3], num_classes) ]) # self.head = nn.Sequential( # conv1x1(channels[3], 2048), # norm_layer(activation=norm_act), # # norm_layer(1024, activation=norm_act), # FastGlobalAvgPool2d(flatten=True), # nn.Linear(2048, num_classes), # ) self.head = nn.Sequential(*head_layers) initialize(self)
def __init__( self, width=18, small=False, pretrained=None, # not used. here for proper signature num_classes=1000, in_channels=3, norm_layer="abn", norm_act="relu", encoder=False, ): super(HighResolutionNet, self).__init__() stem_width = 64 norm_layer = bn_from_name(norm_layer) self.bn_args = bn_args = { "norm_layer": norm_layer, "norm_act": norm_act } self.conv1 = conv3x3(in_channels, stem_width, stride=2) self.bn1 = norm_layer(stem_width, activation=norm_act) self.conv2 = conv3x3(stem_width, stem_width, stride=2) self.bn2 = norm_layer(stem_width, activation=norm_act) channels = [width, width * 2, width * 4, width * 8] n_blocks = [2 if small else 4] * 4 self.layer1 = make_layer(stem_width, stem_width, n_blocks[0], **bn_args) self.transition1 = TransitionBlock([stem_width * Bottleneck.expansion], channels[:2], **bn_args) self.stage2 = self._make_stage(n_modules=1, n_branches=2, n_blocks=n_blocks[:2], n_chnls=channels[:2]) self.transition2 = TransitionBlock(channels[:2], channels[:3], **bn_args) self.stage3 = self._make_stage( # 3 if small else 4 n_modules=(4, 3)[small], n_branches=3, n_blocks=n_blocks[:3], n_chnls=channels[:3]) self.transition3 = TransitionBlock(channels[:3], channels, **bn_args) self.stage4 = self._make_stage( # 2 if small else 3 n_modules=(3, 2)[small], n_branches=4, n_blocks=n_blocks, n_chnls=channels, ) self.encoder = encoder if encoder: self.forward = self.encoder_features else: # Classification Head self.cls_head = HRClassificationHead(channels, **bn_args) self.global_pool = nn.AdaptiveAvgPool2d(1) self.last_linear = nn.Linear(2048, num_classes) # initialize weights initialize(self)
def test_no_norm(): """check that can init without params""" modules.bn_from_name("none")(10) modules.bn_from_name("none")()