def _make_fuse_layers(self, norm_layer, norm_act): if self.num_branches == 1: return None num_branches = self.num_branches num_inchannels = self.num_inchannels fuse_layers = [] for i in range(num_branches): fuse_layer = [] for j in range(num_branches): if j > i: fuse_layer.append( nn.Sequential( conv1x1(num_inchannels[j], num_inchannels[i]), norm_layer(num_inchannels[i], activation="identity"), nn.Upsample(scale_factor=2**(j - i), mode='nearest'))) elif j == i: fuse_layer.append(nn.Identity()) else: conv3x3s = [] for k in range(i - j): if k == i - j - 1: num_outchannels_conv3x3 = num_inchannels[i] conv3x3s.append( nn.Sequential( conv3x3(num_inchannels[j], num_outchannels_conv3x3, 2), norm_layer(num_outchannels_conv3x3, activation="identity"))) else: num_outchannels_conv3x3 = num_inchannels[j] conv3x3s.append( nn.Sequential( conv3x3(num_inchannels[j], num_outchannels_conv3x3, 2), norm_layer(num_outchannels_conv3x3, activation=norm_act))) fuse_layer.append(nn.Sequential(*conv3x3s)) fuse_layers.append(nn.ModuleList(fuse_layer)) return nn.ModuleList(fuse_layers)
def __init__(self, encoder_channels, prefinal_channels=32, final_channels=1, **bn_params): # norm layer, norm_act super().__init__() in_channels = encoder_channels self.layer1 = LinknetDecoderBlock(in_channels[0], in_channels[1], **bn_params) self.layer2 = LinknetDecoderBlock(in_channels[1], in_channels[2], **bn_params) self.layer3 = LinknetDecoderBlock(in_channels[2], in_channels[3], **bn_params) self.layer4 = LinknetDecoderBlock(in_channels[3], in_channels[4], **bn_params) self.layer5 = LinknetDecoderBlock(in_channels[4], prefinal_channels, **bn_params) self.final_conv = conv1x1(prefinal_channels, final_channels) initialize(self)
def __init__( self, encoder_channels, decoder_channels=(256, 128, 64, 32, 16), final_channels=1, center=False, drop_rate=0, output_stride=32, attn_type=None, **bn_params, # norm layer, norm_act ): super().__init__() if center: channels = encoder_channels[0] self.center = UnetCenterBlock(channels, channels) else: self.center = None in_chs = self.compute_channels(encoder_channels, decoder_channels) kwargs = {**bn_params, "attn_type": attn_type} self.layer1 = UnetDecoderBlock(in_chs[0], decoder_channels[0], upsample=output_stride == 32, **kwargs) self.layer2 = UnetDecoderBlock(in_chs[1], decoder_channels[1], upsample=output_stride != 8, **kwargs) self.layer3 = UnetDecoderBlock(in_chs[2], decoder_channels[2], **kwargs) self.layer4 = UnetDecoderBlock(in_chs[3], decoder_channels[3], **kwargs) self.layer5 = UnetDecoderBlock(in_chs[4], decoder_channels[4], **kwargs) self.dropout = nn.Dropout2d( drop_rate, inplace=False) # inplace=True raises a backprop error self.final_conv = conv1x1(decoder_channels[4], final_channels, bias=True)
def _make_layer(self, planes, blocks, stride=1, dilation=1, use_se=None, norm_layer=None, norm_act=None, antialias=None): downsample = None if stride != 1 or self.inplanes != planes * self.expansion: downsample_layers = [] if antialias and stride == 2: # using OrderedDict to preserve ordering and allow loading downsample_layers += [('blur', BlurPool())] downsample_layers += [('0', conv1x1(self.inplanes, planes * self.expansion, stride=1 if antialias else stride)), ('1', norm_layer(planes * self.expansion, activation='identity'))] downsample = nn.Sequential(OrderedDict(downsample_layers)) layers = [ self.block(self.inplanes, planes, stride, downsample, self.groups, self.base_width, use_se, dilation, norm_layer, norm_act, antialias) ] self.inplanes = planes * self.expansion for _ in range(1, blocks): layers.append( self.block(self.inplanes, planes, 1, None, self.groups, self.base_width, use_se, dilation, norm_layer, norm_act, antialias)) return nn.Sequential(*layers)
def __init__( self, encoder_channels, prefinal_channels=32, final_channels=1, drop_rate=0, attn_type=None, **bn_params, # norm layer, norm_act ): super().__init__() extra_params = {**bn_params, "attn_type": attn_type} in_channels = encoder_channels self.layer1 = LinknetDecoderBlock(in_channels[0], in_channels[1], **extra_params) self.layer2 = LinknetDecoderBlock(in_channels[1], in_channels[2], **extra_params) self.layer3 = LinknetDecoderBlock(in_channels[2], in_channels[3], **extra_params) self.layer4 = LinknetDecoderBlock(in_channels[3], in_channels[4], **extra_params) self.layer5 = LinknetDecoderBlock(in_channels[4], prefinal_channels, **extra_params) self.dropout = nn.Dropout2d(drop_rate, inplace=True) self.final_conv = conv1x1(prefinal_channels, final_channels)
def __init__( self, in_channels, key_channels, out_channels, norm_layer=ABN, norm_act="relu" ): super().__init__() self.in_channels = in_channels self.key_channels = key_channels self.f_pixel = nn.Sequential( conv1x1(in_channels, key_channels, bias=True), norm_layer(key_channels, activation=norm_act), conv1x1(key_channels, key_channels, bias=True), norm_layer(key_channels, activation=norm_act), ) self.f_object = nn.Sequential( conv1x1(in_channels, key_channels, bias=True), norm_layer(key_channels, activation=norm_act), conv1x1(key_channels, key_channels, bias=True), norm_layer(key_channels, activation=norm_act), ) self.f_down = nn.Sequential( conv1x1(in_channels, key_channels, bias=True), norm_layer(key_channels, activation=norm_act), ) self.f_up = nn.Sequential( conv1x1(key_channels, in_channels, bias=True), norm_layer(in_channels, activation=norm_act), ) self.conv_bn = nn.Sequential( conv1x1(2 * in_channels, out_channels, bias=True), norm_layer(out_channels, activation=norm_act), )
def __init__( self, encoder_name="hrnet_w18", encoder_weights="imagenet", pretrained=None, # not used num_classes=1, last_upsample=True, OCR=False, drop_rate=0, norm_layer="inplace_abn", # use memory efficient by default norm_act="leaky_relu", **encoder_params, ): super().__init__() self.encoder = get_encoder( encoder_name, encoder_weights=encoder_weights, norm_layer=norm_layer, norm_act=norm_act, **encoder_params, ) norm_layer = bn_from_name(norm_layer) final_channels = sum(self.encoder.out_shapes[:4]) self.OCR = OCR if OCR: self.conv3x3 = nn.Sequential( conv3x3(final_channels, 512, bias=True), norm_layer(512, activation=norm_act), ) self.ocr_gather_head = SpatialOCR_Gather() self.ocr_distri_head = SpatialOCR(in_channels=512, key_channels=256, out_channels=512, norm_layer=norm_layer, norm_act=norm_act) self.head = conv1x1(512, num_classes, bias=True) self.aux_head = nn.Sequential( # in OCR first conv is 3x3 conv3x3(final_channels, final_channels, bias=True), norm_layer(final_channels, activation=norm_act), conv1x1(final_channels, num_classes, bias=True), ) else: self.head = nn.Sequential( conv1x1(final_channels, final_channels, bias=True), norm_layer(final_channels, activation=norm_act), conv1x1(final_channels, num_classes, bias=True), ) up_kwargs = dict(mode="bilinear", align_corners=True) self.up_x2 = nn.Upsample(scale_factor=2, **up_kwargs) self.up_x4 = nn.Upsample(scale_factor=4, **up_kwargs) self.up_x8 = nn.Upsample(scale_factor=8, **up_kwargs) self.last_upsample = nn.Upsample( scale_factor=4, **up_kwargs) if last_upsample else nn.Identity() self.dropout = nn.Dropout2d( drop_rate) # can't use inplace. it would raise a backprop error self.name = f"segm-{encoder_name}" # use lower momemntum patch_bn_mom(self) self._init_weights()
def __init__( self, blocks_args=None, width_multiplier=None, depth_multiplier=None, pretrained=None, # not used. here for proper signature num_classes=1000, in_channels=3, output_stride=32, encoder=False, drop_rate=0, drop_connect_rate=0, stem_size=32, norm_layer="abn", norm_act="swish", match_tf_same_padding=False, ): super().__init__() norm_layer = bn_from_name(norm_layer) self.norm_layer = norm_layer self.norm_act = norm_act self.width_multiplier = width_multiplier self.depth_multiplier = depth_multiplier stem_size = make_divisible(stem_size * width_multiplier) self.conv_stem = conv3x3(in_channels, stem_size, stride=2) self.bn1 = norm_layer(stem_size, activation=norm_act) in_channels = stem_size self.blocks = nn.ModuleList([]) # modify block args to account for output_stride strategy blocks_args = _patch_block_args(blocks_args, output_stride) for block_idx, block_arg in enumerate(blocks_args): block = [] block_arg["in_channels"] = make_divisible(block_arg["in_channels"] * self.width_multiplier) block_arg["out_channels"] = make_divisible(block_arg["out_channels"] * self.width_multiplier) block_arg["norm_layer"] = norm_layer block_arg["norm_act"] = norm_act # linearly scale keep prob block_arg["keep_prob"] = 1 - drop_connect_rate * block_idx / len(blocks_args) repeats = block_arg.pop("num_repeat") repeats = int(math.ceil(repeats * self.depth_multiplier)) # when dilating conv with stride 2 we want it to have dilation // 2 # it prevents checkerboard artifacts with OS=16 and OS=8 dilation = block_arg.get("dilation", 1) # save block values if block_arg.pop("no_first_dilation", False): block_arg["dilation"] = max(1, block_arg["dilation"] // 2) block.append(InvertedResidual(**block_arg)) # only first layer in block is strided block_arg["stride"] = 1 block_arg["dilation"] = dilation block_arg["in_channels"] = block_arg["out_channels"] for _ in range(repeats - 1): block.append(InvertedResidual(**block_arg)) self.blocks.append(nn.Sequential(*block)) # Head if encoder: self.forward = self.encoder_features else: out_channels = block_arg["out_channels"] num_features = make_divisible(1280 * width_multiplier) self.conv_head = conv1x1(out_channels, num_features) self.bn2 = norm_layer(num_features, activation=norm_act) self.global_pool = nn.AdaptiveAvgPool2d(1) self.dropout = nn.Dropout(drop_rate, inplace=True) self.classifier = nn.Linear(num_features, num_classes) patch_bn(self) # adjust epsilon initialize(self) if match_tf_same_padding: conv_to_same_conv(self) maxpool_to_same_maxpool(self)
def __init__(self, in_planes, out_planes, norm_layer=ABN, norm_act='relu'): super(_Transition, self).__init__() self.norm = norm_layer(in_planes, activation=norm_act) self.conv = conv1x1(in_planes, out_planes) self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
def __init__( self, stage_fn=None, block_fn=None, layers=None, # num layers in each block channels=None, # it's actually output channels. 256, 512, 1024, 2048 for R50 pretrained=None, # not used. here for proper signature num_classes=1000, in_channels=3, attn_type=None, # base_width=64, stem_type="default", norm_layer="abn", norm_act="leaky_relu", antialias=False, # encoder=False, bottle_ratio=0.25, # how much to shrink channels in bottleneck layer no_first_csp=False, # make first stage a Simple Stage drop_rate=0.0, drop_connect_rate=0.0, expand_before_head=True, # add addition conv from 512 -> 2048 to avoid representational bottleneck mobilenetv3_head=False, # put GAP first, then expand convs **block_kwargs, ): stem_width = 64 norm_layer = bn_from_name(norm_layer) self.num_classes = num_classes self.norm_act = norm_act self.block_idx = 0 # for drop connect self.drop_connect_rate = drop_connect_rate super().__init__() if block_fn != SimplePreActBottleneck: stem_norm = norm_layer(stem_width, activation=norm_act) else: stem_norm = nn.Identity() if stem_type == "default": self.stem_conv1 = nn.Sequential( nn.Conv2d(3, stem_width, kernel_size=7, stride=2, padding=3, bias=False), stem_norm, nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ) first_stride = 1 elif stem_type == "s2d": # instead of default stem I'm using Space2Depth followed by conv. no norm because there is one at the beginning # of DarkStage. upd. there is norm in not PreAct version self.stem_conv1 = nn.Sequential( SpaceToDepth(block_size=2), conv3x3(in_channels * 4, stem_width), stem_norm, # nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ) first_stride = 2 # blocks largs = dict( stride=2, bottle_ratio=bottle_ratio, block_fn=block_fn, attn_type=attn_type, norm_layer=norm_layer, norm_act=norm_act, # antialias=antialias, **block_kwargs, ) first_stage_fn = SimpleStage if no_first_csp else stage_fn # fmt: off self.layer1 = first_stage_fn( in_chs=stem_width, out_chs=channels[0], num_blocks=layers[0], keep_prob=self.keep_prob, **{ **largs, "stride": first_stride }, # overwrite default stride ) # **{**largs, "antialias": False} # antialias in first stage is too expensive self.layer2 = stage_fn(in_chs=channels[0], out_chs=channels[1], num_blocks=layers[1], keep_prob=self.keep_prob, **largs) self.layer3 = stage_fn(in_chs=channels[1], out_chs=channels[2], num_blocks=layers[2], keep_prob=self.keep_prob, **largs) self.layer4 = stage_fn(in_chs=channels[2], out_chs=channels[3], num_blocks=layers[3], keep_prob=self.keep_prob, **largs) # fmt: on # self.global_pool = FastGlobalAvgPool2d(flatten=True) # self.dropout = nn.Dropout(p=drop_rate, inplace=True) head_layers = [] # this is a very dirty if but i don't care for now if mobilenetv3_head: head_layers.append(FastGlobalAvgPool2d(flatten=True)) if channels[3] < 2048 and expand_before_head: head_layers.append( nn.Linear(channels[3], 2048) ) # no norm here as in original MobilnetV3 from google head_layers.append( pt.modules.activations.activation_from_name(norm_act)) head_layers.append( nn.Linear(2048 if expand_before_head else channels[3], num_classes)) else: if channels[3] < 2048 and expand_before_head: if block_fn == SimplePreActBottleneck: # for PreAct add additional BN here head_layers.append( norm_layer(channels[3], activation=norm_act)) head_layers.extend([ conv1x1(channels[3], 2048), norm_layer(2048, activation=norm_act) ]) head_layers.extend([ FastGlobalAvgPool2d(flatten=True), nn.Linear(2048 if expand_before_head else channels[3], num_classes) ]) # self.head = nn.Sequential( # conv1x1(channels[3], 2048), # norm_layer(activation=norm_act), # # norm_layer(1024, activation=norm_act), # FastGlobalAvgPool2d(flatten=True), # nn.Linear(2048, num_classes), # ) self.head = nn.Sequential(*head_layers) initialize(self)
def __init__( self, stage_fns=None, # list of nn.Module block_fns=None, # list of nn.Module stage_args=None, # list of dicts layers=None, # num layers in each block channels=None, # it's actually output channels. 256, 512, 1024, 2048 for R50 # pretrained=None, # not used. here for proper signature num_classes=1000, in_channels=3, norm_layer="abn", norm_act="leaky_relu", head_norm_act="leaky_relu", # activation in head stem_type="default", # antialias=False, # encoder=False, # drop_rate=0.0, drop_connect_rate=0.0, head_width=2048, stem_width=64, head_type="default", # type of head ): norm_layer = bn_from_name(norm_layer) self.num_classes = num_classes self.norm_act = norm_act self.block_idx = 0 # for drop connect self.drop_connect_rate = drop_connect_rate super().__init__() first_norm = nn.Identity() if block_fns[0].startswith( "Pre") else norm_layer(stem_width, activation=norm_act) if stem_type == "default": self.stem_conv1 = nn.Sequential( conv3x3(in_channels, stem_width, stride=2), first_norm) elif stem_type == "s2d": # instead of default stem I'm using Space2Depth followed by conv. no norm because there is one at the beginning # of DarkStage. upd. there is norm in not PreAct version self.stem_conv1 = nn.Sequential( SpaceToDepth(block_size=2), conv3x3(in_channels * 4, stem_width), first_norm, ) else: raise ValueError(f"Stem type `{stem_type}` is not supported") bn_args = dict(norm_layer=norm_layer, norm_act=norm_act) block_name_to_module = { "XX": SimpleBasicBlock, "Pre_XX": SimplePreActBasicBlock, "Pre_XX_Res2": SimplePreActRes2BasicBlock, "Btl": SimpleBottleneck, "Pre_Btl": SimplePreActBottleneck, "IR": SimpleInvertedResidual, "Pre_IR": SimplePreActInvertedResidual, "Sep2": SimpleSeparable_2, "Pre_Sep2": SimplePreActSeparable_2, "Sep3": SimpleSeparable_3, "Pre_Custom_2": PreBlock_2, } stage_name_to_module = {"simpl": SimpleStage} # set stride=2 for all blocks # using **{**bn_args, **stage_args} to allow updating norm layer for particular stage self.layer1 = stage_name_to_module[stage_fns[0]]( block_fn=block_name_to_module[block_fns[0]], in_chs=stem_width, out_chs=channels[0], num_blocks=layers[0], stride=2, **{ **bn_args, **stage_args[0] }, ) self.layer2 = stage_name_to_module[stage_fns[1]]( block_fn=block_name_to_module[block_fns[1]], in_chs=channels[0], out_chs=channels[1], num_blocks=layers[1], stride=2, **{ **bn_args, **stage_args[1] }, ) self.layer3 = stage_name_to_module[stage_fns[2]]( block_fn=block_name_to_module[block_fns[2]], in_chs=channels[1], out_chs=channels[2], num_blocks=layers[2], stride=2, **{ **bn_args, **stage_args[2] }, ) extra_stage3_filters = stage_args[2].get("filter_steps", 0) * (layers[2] - 1) self.layer4 = stage_name_to_module[stage_fns[3]]( block_fn=block_name_to_module[block_fns[3]], in_chs=channels[2] + extra_stage3_filters, out_chs=channels[3], num_blocks=layers[3], stride=2, **{ **bn_args, **stage_args[3] }, ) extra_stage4_filters = stage_args[3].get("filter_steps", 0) * (layers[3] - 1) channels[ 3] += extra_stage4_filters # TODO rewrite it cleaner instead of doing inplace last_norm = norm_layer(channels[3], activation=norm_act) if block_fns[0].startswith( "Pre") else nn.Identity() if head_type == "mobilenetv3": self.head = nn.Sequential( # Mbln v3 head. GAP first, then expand convs last_norm, FastGlobalAvgPool2d(flatten=True), nn.Linear(channels[3], head_width), pt.modules.activations.activation_from_name(head_norm_act), ) self.last_linear = nn.Linear(head_width, num_classes) elif head_type == "mobilenetv3_norm": # mobilenet with last norm self.head = nn.Sequential( # Mbln v3 head. GAP first, then expand convs last_norm, FastGlobalAvgPool2d(flatten=True), nn.Linear(channels[3], head_width), nn.BatchNorm1d(head_width), pt.modules.activations.activation_from_name(head_norm_act), ) self.last_linear = nn.Linear(head_width, num_classes) elif head_type == "default": self.head = nn.Sequential( last_norm, conv1x1(channels[3], head_width), norm_layer(head_width, activation=head_norm_act), FastGlobalAvgPool2d(flatten=True), ) self.last_linear = nn.Linear(head_width, num_classes) elif head_type == "default_nonorm": # if used in angular losses don't want norm self.head = nn.Sequential( last_norm, conv1x1(channels[3], head_width, bias=True), # need bias because not followed by norm FastGlobalAvgPool2d(flatten=True), ) self.last_linear = nn.Linear(head_width, num_classes) elif head_type == "mlp_bn_fc_bn": self.head = nn.Sequential( last_norm, conv1x1(channels[3], channels[3]), FastGlobalAvgPool2d(flatten=True), nn.BatchNorm1d(channels[3]), pt.modules.activations.activation_from_name(head_norm_act), nn.Linear(channels[3], head_width, bias=False), nn.BatchNorm1d(head_width, affine=False), ) self.last_linear = nn.Linear(head_width, num_classes) elif head_type == "mlp_bn_fc": # same as above but without last BN self.head = nn.Sequential( last_norm, conv1x1(channels[3], channels[3]), FastGlobalAvgPool2d(flatten=True), nn.BatchNorm1d(channels[3]), pt.modules.activations.activation_from_name(head_norm_act), nn.Linear(channels[3], head_width, bias=False), ) self.last_linear = nn.Linear(head_width, num_classes) elif head_type == "mlp_2": assert isinstance(head_width, (tuple, list)), head_width self.head = nn.Sequential( # like Mbln v3 head. GAP first, then MLP convs last_norm, FastGlobalAvgPool2d(flatten=True), nn.Linear(channels[3], head_width[0]), nn.BatchNorm1d(head_width[0]), pt.modules.activations.activation_from_name(head_norm_act), nn.Linear(head_width[0], head_width[1]), nn.BatchNorm1d(head_width[1]), pt.modules.activations.activation_from_name(head_norm_act), ) self.last_linear = nn.Linear(head_width[1], num_classes) elif head_type == "mlp_3": assert isinstance(head_width, (tuple, list)), head_width self.head = nn.Sequential( # like Mbln v3 head. GAP first, then MLP convs last_norm, FastGlobalAvgPool2d(flatten=True), nn.Linear(channels[3], head_width[0]), nn.BatchNorm1d(head_width[0]), pt.modules.activations.activation_from_name(head_norm_act), nn.Linear(head_width[0], head_width[1]), nn.BatchNorm1d(head_width[1]), pt.modules.activations.activation_from_name(head_norm_act), nn.Linear(head_width[1], head_width[2]), nn.BatchNorm1d(head_width[2]), pt.modules.activations.activation_from_name(head_norm_act), ) self.last_linear = nn.Linear(head_width[2], num_classes) else: raise ValueError(f"Head type: {head_type} is not supported!") initialize(self)
def __init__( self, pretrained="coco", # Not used. here for proper signature encoder_name="efficientnet_b0", encoder_weights="imagenet", pyramid_channels=64, num_fpn_layers=3, num_head_repeats=3, num_classes=90, encoder_norm_layer="frozenabn", encoder_norm_act="swish", decoder_norm_layer="abn", decoder_norm_act="swish", match_tf_same_padding=False, anchors_per_location=9, **encoder_params, ): super().__init__() self.encoder = get_encoder( encoder_name, norm_layer=encoder_norm_layer, norm_act=encoder_norm_act, encoder_weights=encoder_weights, **encoder_params, ) norm_layer = bn_from_name(decoder_norm_layer) bn_args = dict(norm_layer=norm_layer, norm_act=decoder_norm_act) self.pyramid6 = nn.Sequential( conv1x1(self.encoder.out_shapes[0], pyramid_channels, bias=True), norm_layer(pyramid_channels, activation="identity"), nn.MaxPool2d(3, stride=2, padding=1), ) self.pyramid7 = nn.MaxPool2d( 3, stride=2, padding=1) # in EffDet it's a simple maxpool self.bifpn = BiFPN( encoder_channels=(pyramid_channels, ) * 2 + self.encoder.out_shapes[:-2], pyramid_channels=pyramid_channels, num_layers=num_fpn_layers, **bn_args, ) def make_head(out_size): layers = [] for _ in range(num_head_repeats): layers += [ DepthwiseSeparableConv(pyramid_channels, pyramid_channels, use_norm=False) ] return nn.ModuleList(layers) # The convolution layers in the head are shared among all levels, but # each level has its batch normalization to capture the statistical # difference among different levels. def make_head_norm(): return nn.ModuleList([ nn.ModuleList([ norm_layer(pyramid_channels, activation=decoder_norm_act) for _ in range(num_head_repeats) ]) for _ in range(5) ]) self.cls_convs = make_head(num_classes * anchors_per_location) self.cls_head_conv = DepthwiseSeparableConv(pyramid_channels, num_classes * anchors_per_location, use_norm=False) self.cls_norms = make_head_norm() self.box_convs = make_head(4 * anchors_per_location) self.box_head_conv = DepthwiseSeparableConv(pyramid_channels, 4 * anchors_per_location, use_norm=False) self.box_norms = make_head_norm() self.num_classes = num_classes self.num_head_repeats = num_head_repeats patch_bn_tf(self) self._initialize_weights() if match_tf_same_padding: conv_to_same_conv(self) maxpool_to_same_maxpool(self)
def _make_layer( self, planes, blocks, stride=1, dilation=1, attn_type=None, norm_layer=None, norm_act=None, antialias=None, ): downsample = None if stride != 1 or self.inplanes != planes * self.expansion: downsample_layers = [] if antialias and stride == 2: # using OrderedDict to preserve ordering and allow loading downsample_layers += [("blur", nn.AvgPool2d(2, 2))] downsample_layers += [ ("0", conv1x1(self.inplanes, planes * self.expansion, stride=1 if antialias else stride)), ("1", norm_layer(planes * self.expansion, activation="identity")), ] downsample = nn.Sequential(OrderedDict(downsample_layers)) # removes first dilation to avoid checkerboard artifacts first_dilation = max(1, dilation // 2) layers = [ self.block( inplanes=self.inplanes, planes=planes, stride=stride, downsample=downsample, groups=self.groups, base_width=self.base_width, attn_type=attn_type, dilation=first_dilation, norm_layer=norm_layer, norm_act=norm_act, antialias=antialias, keep_prob=self.keep_prob, ) ] self.inplanes = planes * self.expansion for _ in range(1, blocks): layers.append( self.block( inplanes=self.inplanes, planes=planes, groups=self.groups, base_width=self.base_width, attn_type=attn_type, dilation=first_dilation, norm_layer=norm_layer, norm_act=norm_act, antialias=antialias, keep_prob=self.keep_prob, )) return nn.Sequential(*layers)