def _make_fuse_layers(self): if self.num_branches == 1: return None num_branches = self.num_branches num_inchannels = self.num_inchannels fuse_layers = [] for i in range(num_branches if self.multi_scale_output else 1): fuse_layer = [] for j in range(num_branches): if j > i: fuse_layer.append( nn.Sequential( nn.Conv2d(num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False), ABN(num_inchannels[i], momentum=BN_MOMENTUM))) elif j == i: fuse_layer.append(None) else: conv3x3s = [] for k in range(i - j): if k == i - j - 1: num_outchannels_conv3x3 = num_inchannels[i] conv3x3s.append( nn.Sequential( nn.Conv2d(num_inchannels[j], num_outchannels_conv3x3, 3, 2, 1, bias=False), ABN(num_outchannels_conv3x3))) else: num_outchannels_conv3x3 = num_inchannels[j] conv3x3s.append( nn.Sequential( nn.Conv2d(num_inchannels[j], num_outchannels_conv3x3, 3, 2, 1, bias=False), ABN(num_outchannels_conv3x3))) fuse_layer.append(nn.Sequential(*conv3x3s)) fuse_layers.append(nn.ModuleList(fuse_layer)) return nn.ModuleList(fuse_layers)
def InplacABN_to_ABN(module: nn.Module) -> nn.Module: # convert all InplaceABN layer to bit-accurate ABN layers. if isinstance(module, InPlaceABN): module_new = ABN(module.num_features, activation=module.activation, activation_param=module.activation_param) for key in module.state_dict(): module_new.state_dict()[key].copy_(module.state_dict()[key]) module_new.training = module.training module_new.weight.data = module_new.weight.abs() + module_new.eps return module_new for name, child in reversed(module._modules.items()): new_child = InplacABN_to_ABN(child) if new_child != child: module._modules[name] = new_child return module
def __init__(self, inplanes, planes, stride=1, downsample=None, act=Mish()): super(BasicBlock, self).__init__() self.conv1 = conv3x3(inplanes, planes, stride) self.bn1 = ABN(planes) # self.bn1 = ABN(planes, momentum=BN_MOMENTUM) # self.relu = act self.conv2 = conv3x3(planes, planes) self.bn2 = ABN(planes) self.downsample = downsample self.stride = stride
def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1): super().__init__() self.scale = scale self.in_channels = in_channels self.out_channels = out_channels self.key_channels = key_channels self.value_channels = value_channels if out_channels is None: self.out_channels = in_channels self.pool = nn.MaxPool2d(kernel_size=(scale, scale)) self.f_key = nn.Sequential( nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels, kernel_size=1, stride=1, padding=0), ABN(self.key_channels)) self.f_query = self.f_key self.f_value = nn.Conv2d(in_channels=self.in_channels, out_channels=self.value_channels, kernel_size=1, stride=1, padding=0) self.W = nn.Conv2d(in_channels=self.value_channels, out_channels=self.out_channels, kernel_size=1, stride=1, padding=0) nn.init.constant_(self.W.weight, 0) nn.init.constant_(self.W.bias, 0)
def __init__(self, in_channels, middle_channels, out_channels, use_self_attention=True): super().__init__() self.block = nn.Sequential( # nn.Dropout2d(p=0.1, inplace=True), nn.Conv2d(in_channels, middle_channels, kernel_size=3, padding=1), ABN(middle_channels), # DANetHead(middle_channels, middle_channels), BaseOC(in_channels=middle_channels, out_channels=middle_channels, key_channels=middle_channels // 2, value_channels=middle_channels // 2, dropout=0.2, use_self_attention=use_self_attention), # Parameters were chosen to avoid artifacts, suggested by https://distill.pub/2016/deconv-checkerboard/ nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2, padding=1), # upsample(scale_factor=2) )
def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1): downsample = None if stride != 1 or \ self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.num_inchannels[branch_index], num_channels[branch_index] * block.expansion, kernel_size=1, stride=stride, bias=False), ABN(num_channels[branch_index] * block.expansion, momentum=BN_MOMENTUM), ) layers = [] layers.append( block(self.num_inchannels[branch_index], num_channels[branch_index], stride, downsample)) self.num_inchannels[branch_index] = \ num_channels[branch_index] * block.expansion for i in range(1, num_blocks[branch_index]): layers.append( block(self.num_inchannels[branch_index], num_channels[branch_index])) return nn.Sequential(*layers)
def replace_bn(bn, act=None): slop = 0.01 if isinstance(act, nn.ReLU): activation = 'leaky_relu' # approximate relu elif isinstance(act, nn.LeakyReLU): activation = 'leaky_relu' slope = act.negative_slope elif isinstance(act, nn.ELU): activation = 'elu' else: activation = 'none' abn = ABN(num_features=bn.num_features, eps=bn.eps, momentum=bn.momentum, affine=bn.affine, track_running_stats=bn.track_running_stats, activation=activation, slope=slop) abn.load_state_dict(bn.state_dict()) return abn
def __init__(self, inplanes, planes, stride=1, downsample=None, act=Mish()): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = ABN(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = ABN(planes) self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) self.bn3 = ABN(planes * self.expansion) self.downsample = downsample self.stride = stride
def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer): num_branches_cur = len(num_channels_cur_layer) num_branches_pre = len(num_channels_pre_layer) transition_layers = [] for i in range(num_branches_cur): if i < num_branches_pre: if num_channels_cur_layer[i] != num_channels_pre_layer[i]: transition_layers.append( nn.Sequential( nn.Conv2d(num_channels_pre_layer[i], num_channels_cur_layer[i], 3, 1, 1, bias=False), ABN(num_channels_cur_layer[i]))) else: transition_layers.append(None) else: conv3x3s = [] for j in range(i + 1 - num_branches_pre): inchannels = num_channels_pre_layer[-1] outchannels = num_channels_cur_layer[i] \ if j == i-num_branches_pre else inchannels conv3x3s.append( nn.Sequential( nn.Conv2d(inchannels, outchannels, 3, 2, 1, bias=False), ABN(outchannels))) transition_layers.append(nn.Sequential(*conv3x3s)) return nn.ModuleList(transition_layers)
def _make_layer(self, block, inplanes, planes, blocks, stride=1): downsample = None if stride != 1 or inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), ABN(planes * block.expansion, ), ) layers = [] layers.append( block(inplanes, planes, stride, downsample, act=self.relu)) inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(inplanes, planes)) return nn.Sequential(*layers)
def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, use_self_attention, sizes=(1, )): super().__init__() self.use_self_attention = use_self_attention if use_self_attention: self.stages = nn.ModuleList([ SelfAttentionBlock2D(in_channels, key_channels, value_channels, out_channels, size) for size in sizes ]) if use_self_attention: channels = 2 * in_channels else: channels = in_channels self.conv_bn_dropout = nn.Sequential( nn.Conv2d(channels, out_channels, kernel_size=1, padding=0), ABN(out_channels), nn.Dropout2d(dropout))
def __init__(self, basenet='vgg11', num_filters=16, pretrained='imagenet'): super().__init__() net, bn, n_pretrained = create_basenet(basenet, pretrained) if basenet.startswith('vgg'): self.encoder1 = net[0] # 64 else: # add upsample self.encoder1 = nn.Sequential( nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False), net[0]) self.encoder1.out_channels = net[0].out_channels self.encoder2 = net[1] # 64 self.encoder3 = net[2] # 128 self.encoder4 = net[3] # 256 context_channels = num_filters * 8 * 4 self.encoder5 = nn.Sequential( net[4], nn.Conv2d(net[4].out_channels, context_channels, kernel_size=3, stride=1, padding=1), ABN(context_channels, activation='relu'), BaseOC(in_channels=context_channels, out_channels=context_channels, key_channels=context_channels // 2, value_channels=context_channels // 2, dropout=0.05, use_self_attention=True)) self.encoder5.out_channels = context_channels self.fuse_image = nn.Sequential(nn.Linear(512, 64), nn.ReLU(inplace=True)) self.logit_image = nn.Sequential(nn.Linear(64, 1)) self.pool = nn.MaxPool2d(2, 2) self.center = Decoder(self.encoder5.out_channels, num_filters * 8 * 2, num_filters * 8, use_self_attention=True) self.decoder5 = Decoder(self.encoder5.out_channels + num_filters * 8, num_filters * 8 * 2, num_filters * 8, use_self_attention=True) self.decoder4 = Decoder(self.encoder4.out_channels + num_filters * 8, num_filters * 8 * 2, num_filters * 4, use_self_attention=True) self.decoder3 = Decoder(self.encoder3.out_channels + num_filters * 4, num_filters * 4 * 2, num_filters * 2, use_self_attention=True) if basenet.startswith('vgg'): self.decoder2 = Decoder(self.encoder2.out_channels + num_filters * 2, num_filters * 2 * 2, num_filters, use_self_attention=True) self.decoder1 = nn.Sequential( nn.Conv2d(self.encoder1.out_channels + num_filters, num_filters, kernel_size=3, padding=1), nn.ReLU(inplace=True)) else: self.decoder2 = nn.Sequential( nn.Conv2d(self.encoder2.out_channels + num_filters * 2, num_filters * 2 * 2, kernel_size=3, padding=1), ABN(num_filters * 2 * 2), nn.Conv2d(num_filters * 2 * 2, num_filters, kernel_size=3, padding=1), ABN(num_filters)) self.decoder1 = Decoder(self.encoder1.out_channels + num_filters, num_filters * 2, num_filters, use_self_attention=True) self.logit = nn.Sequential( nn.Dropout2d(p=0.5, inplace=True), nn.Conv2d(128, num_filters, kernel_size=3, padding=1), ABN(num_filters), nn.Conv2d(num_filters, 1, kernel_size=1), ) self.fuse_pixel = nn.Sequential( nn.Dropout2d(p=0.5, inplace=True), nn.Conv2d(num_filters * (8 + 4 + 2 + 1 + 1), 64, kernel_size=1, padding=0)) self.logit_pixel5 = nn.Sequential( nn.Dropout2d(p=0.5, inplace=True), nn.Conv2d(num_filters * 8, num_filters, kernel_size=3, padding=1), ABN(num_filters), nn.Conv2d(num_filters, 1, kernel_size=1), ) self.logit_pixel4 = nn.Sequential( nn.Dropout2d(p=0.5, inplace=True), nn.Conv2d(num_filters * 4, num_filters, kernel_size=3, padding=1), ABN(num_filters), nn.Conv2d(num_filters, 1, kernel_size=1), ) self.logit_pixel3 = nn.Sequential( nn.Dropout2d(p=0.5, inplace=True), nn.Conv2d(num_filters * 2, num_filters, kernel_size=3, padding=1), ABN(num_filters), nn.Conv2d(num_filters, 1, kernel_size=1), ) self.logit_pixel2 = nn.Sequential( nn.Dropout2d(p=0.5, inplace=True), nn.Conv2d(num_filters, num_filters, kernel_size=3, padding=1), ABN(num_filters), nn.Conv2d(num_filters, 1, kernel_size=1), ) self.logit_pixel1 = nn.Sequential( nn.Dropout2d(p=0.5, inplace=True), nn.Conv2d(num_filters, num_filters, kernel_size=3, padding=1), ABN(num_filters), nn.Conv2d(num_filters, 1, kernel_size=1), )
def __init__(self, n_class, act=Mish(), **kwargs): super(HRNetV2, self).__init__() extra = { 'STAGE2': { 'NUM_MODULES': 1, 'NUM_BRANCHES': 2, 'BLOCK': 'BASIC', 'NUM_BLOCKS': (4, 4), 'NUM_CHANNELS': (48, 96), 'FUSE_METHOD': 'SUM' }, 'STAGE3': { 'NUM_MODULES': 4, 'NUM_BRANCHES': 3, 'BLOCK': 'BASIC', 'NUM_BLOCKS': (4, 4, 4), 'NUM_CHANNELS': (48, 96, 192), 'FUSE_METHOD': 'SUM' }, 'STAGE4': { 'NUM_MODULES': 3, 'NUM_BRANCHES': 4, 'BLOCK': 'BASIC', 'NUM_BLOCKS': (4, 4, 4, 4), 'NUM_CHANNELS': (48, 96, 192, 384), 'FUSE_METHOD': 'SUM' }, 'FINAL_CONV_KERNEL': 1 } # stem net self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False) self.bn1 = ABN(64, momentum=BN_MOMENTUM) self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False) self.bn2 = ABN(64, momentum=BN_MOMENTUM) self.relu = act self.layer1 = self._make_layer(Bottleneck, 64, 64, 4) self.stage2_cfg = extra['STAGE2'] num_channels = self.stage2_cfg['NUM_CHANNELS'] block = blocks_dict[self.stage2_cfg['BLOCK']] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels)) ] self.transition1 = self._make_transition_layer([256], num_channels) self.stage2, pre_stage_channels = self._make_stage( self.stage2_cfg, num_channels) self.stage3_cfg = extra['STAGE3'] num_channels = self.stage3_cfg['NUM_CHANNELS'] block = blocks_dict[self.stage3_cfg['BLOCK']] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels)) ] self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels) self.stage3, pre_stage_channels = self._make_stage( self.stage3_cfg, num_channels) self.stage4_cfg = extra['STAGE4'] num_channels = self.stage4_cfg['NUM_CHANNELS'] block = blocks_dict[self.stage4_cfg['BLOCK']] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels)) ] self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels) self.stage4, pre_stage_channels = self._make_stage( self.stage4_cfg, num_channels, multi_scale_output=True)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias = True, padding_mode='zeros', norm = 'BN', groups_size=16, conv_last = False): super(ConvNorm, self).__init__() if norm not in [None,'BN', 'ABN','IN', 'GN', 'LN','WN', 'SN', 'MWN','MSN', 'MSNTReLU', 'MWNTReLU']: raise ValueError("Undefined norm value. Must be one of " "[None,'BN', 'ABN','IN', 'GN', 'LN', 'WN', 'SN','MWN', 'MSN', 'MSNTReLU', 'MWNTReLU']") layers = [] if norm in ['MSN','MSNTReLU']: conv2d = MeanSpectralNormConv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode) layers += [conv2d ] elif norm == 'SN': conv2d = SpectralNormConv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode) layers += [conv2d] elif norm == 'WN': conv2d = WeightNormConv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode) layers += [conv2d ] elif norm in ['MWN', 'MWNTReLU']: conv2d = MeanWeightNormConv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode) layers += [conv2d ] elif norm == 'IN': conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode) layers += [conv2d, nn.InstanceNorm2d(out_channels) ] elif norm == 'GN': conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode) layers += [conv2d, nn.GroupNorm(groups_size, out_channels) ] elif norm == 'LN': conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode) layers += [conv2d, nn.LayerNorm(out_channels) ] elif norm == 'BN': conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode) layers += [conv2d, nn.BatchNorm2d(out_channels) ] elif norm == 'ABN': try: from inplace_abn import ABN conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode) layers += [conv2d, ABN(out_channels) ] except ImportError: raise ImportError('Unable to import implace_abn') else: conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode) layers += [conv2d] """ conv_last is a flag to change the order of operations from Conv2D+ BN to BN+Con2D This is frequently used in DenseNet & ResNet architectures. So to change the order, we simply rotate the array by 1 to the left and change the num_features to the in_channels size """ if conv_last: layers = layers[1:] + layers[:1] # Reinitialize the batchnorm layer or its variants if norm in ['ABN', 'BN', 'LN', 'IN', 'GN']: layers[0].__init__(in_channels) self.layers= nn.Sequential(*layers)