def __init__(self, in_chan, out_chan): super(AttentionRefinementModule, self).__init__() self.conv = layers.ConvBNReLU(in_chan, out_chan, kernel_size=3, stride=1, padding=1) self.conv_atten = nn.Conv2D(out_chan, out_chan, kernel_size=1, bias_attr=None) self.bn_atten = nn.BatchNorm2D(out_chan) self.sigmoid_atten = nn.Sigmoid()
def make_layers(self, block: dict, no_relu_layers: list): layers = [] for layer_name, v in block.items(): if 'pool' in layer_name: layer = nn.MaxPool2D(kernel_size=v[0], stride=v[1], padding=v[2]) layers.append((layer_name, layer)) else: conv2d = nn.Conv2D(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) layers.append((layer_name, conv2d)) if layer_name not in no_relu_layers: layers.append(('relu_' + layer_name, nn.ReLU())) layers = tuple(layers) return nn.Sequential(*layers)
def __init__(self, in_channels, out_channels, sizes=([1]), ds=1): super(PAM, self).__init__() self.group = len(sizes) self.stages = [] self.ds = ds # output stride self.value_channels = out_channels self.key_channels = out_channels // 8 self.stages = nn.LayerList( [self._make_stage(in_channels, self.key_channels, self.value_channels, size, self.ds) for size in sizes]) self.conv_bn = nn.Sequential( nn.Conv2D(in_channels * self.group, out_channels, kernel_size=1, padding=0), # nn.BatchNorm2D(out_channels), )
def __init__(self, npoint, radius, nsample, in_channel, mlp, group_all): super(PointNetSetAbstraction, self).__init__() self.npoint = npoint self.radius = radius self.nsample = nsample self.mlp_convs = [] self.mlp_bns = [] last_channel = in_channel for out_channel in mlp: self.mlp_convs.append(nn.Conv2D(last_channel, out_channel, 1)) self.mlp_bns.append(nn.BatchNorm2D(out_channel)) last_channel = out_channel self.group_all = group_all
def rep(self): if not hasattr(self, 'rbr_reparam'): self.rbr_reparam = nn.Conv2D(in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups, padding_mode=self.padding_mode) kernel, bias = self.get_equivalent_kernel_bias() self.rbr_reparam.weight.set_value(kernel) self.rbr_reparam.bias.set_value(bias)
def __init__(self, inplanes, act_layer=nn.ReLU, groups=1, norm_layer=partial(nn.BatchNorm2D, epsilon=1e-6), drop_block=None, drop_path=None): super(Med_ConvBlock, self).__init__() expansion = 4 med_planes = inplanes // expansion self.conv1 = nn.Conv2D(inplanes, med_planes, kernel_size=1, stride=1, padding=0, bias_attr=False) self.bn1 = norm_layer(med_planes) self.act1 = act_layer() self.conv2 = nn.Conv2D(med_planes, med_planes, kernel_size=3, stride=1, groups=groups, padding=1, bias_attr=False) self.bn2 = norm_layer(med_planes) self.act2 = act_layer() self.conv3 = nn.Conv2D(med_planes, inplanes, kernel_size=1, stride=1, padding=0, bias_attr=False) self.bn3 = norm_layer(inplanes) self.act3 = act_layer() self.drop_block = drop_block self.drop_path = drop_path
def __init__(self, inplanes, out_channels, dilation_series, padding_series, num_classes): super(edge_branch, self).__init__() self.conv_x1 = nn.Conv2D(inplanes[0], 512, kernel_size=3) self.conv_x4 = nn.Conv2D(inplanes[1], 512, kernel_size=3) self.conv0 = resnet_vd.ConvBNLayer(in_channels=512 * 2, out_channels=out_channels, kernel_size=3, act='relu') self.conv1 = resnet_vd.ConvBNLayer(in_channels=out_channels, out_channels=out_channels, kernel_size=3, act=None) self.add = layers.Add() self.relu = layers.Activation(act="relu") self.conv2d_list = nn.LayerList() for dilation, padding in zip(dilation_series, padding_series): weight_attr = paddle.ParamAttr( initializer=nn.initializer.Normal(std=0.01), learning_rate=10.0) bias_attr = paddle.ParamAttr( initializer=nn.initializer.Constant(value=0.0), learning_rate=10.0) self.conv2d_list.append( nn.Conv2D(out_channels, num_classes, kernel_size=3, stride=1, padding=padding, dilation=dilation, weight_attr=weight_attr, bias_attr=bias_attr)) self.classifier = nn.Conv2D(out_channels, num_classes, kernel_size=3, stride=1)
def __init__(self, in_feature, out_feature, stride, padding_mode="zeros"): super(conv_head_pooling, self).__init__() self.conv = nn.Conv2D( in_feature, out_feature, kernel_size=stride + 1, padding=stride // 2, stride=stride, padding_mode=padding_mode, groups=in_feature, ) self.fc = nn.Linear(in_feature, out_feature)
def __init__(self, in_channels, block, layers, num_classes=2): super(CDNet, self).__init__() filters = [64, 128, 256, 512] self.in_planes = 64 self.firstconv = nn.Conv2D(in_channels, 64, kernel_size=7, stride=2, padding=3) self.firstbn = nn.BatchNorm(64) self.firstmaxpool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) # encode self.encoder1 = self._make_layer(block, 64, layers[0]) self.encoder2 = self._make_layer(block, 128, layers[1], stride=2) self.encoder3 = self._make_layer(block, 256, layers[2], stride=2) self.encoder4 = self._make_layer(block, 512, layers[3], stride=2) # decode self.decoder4 = DecoderBlock(filters[3], filters[2]) self.decoder3 = DecoderBlock(filters[2], filters[1]) self.decoder2 = DecoderBlock(filters[1], filters[0]) self.decoder1 = DecoderBlock(filters[0], filters[0]) # -- self.dblock_master = Dblock(512) self.dblock = Dblock(512) self.decoder4_master = DecoderBlock(filters[3], filters[2]) self.decoder3_master = DecoderBlock(filters[2], filters[1]) self.decoder2_master = DecoderBlock(filters[1], filters[0]) self.decoder1_master = DecoderBlock(filters[0], filters[0]) # final self.finaldeconv1_master = nn.Conv2DTranspose(filters[0], 32, 4, 2, 1) self.finalconv2_master = nn.Conv2D(32, 32, 3, padding=1) self.finalconv3_master = nn.Conv2D(32, num_classes, 3, padding=1) self.finaldeconv1 = nn.Conv2DTranspose(filters[0], 32, 4, 2, 1) self.finalconv2 = nn.Conv2D(32, 32, 3, padding=1) self.finalconv3 = nn.Conv2D(32, num_classes, 3, padding=1) # init for sublayer in self.sublayers(): if isinstance(sublayer, nn.Conv2D): n = sublayer._kernel_size[0] * sublayer._kernel_size[1] * sublayer._out_channels normal_init(sublayer.weight, mean=0, std=math.sqrt(2. / n)) elif isinstance(sublayer, nn.BatchNorm): constant_init(sublayer.weight, value=0) constant_init(sublayer.bias, value=1)
def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.in_planes != planes*block.expansion: downsample = nn.Sequential( nn.Conv2D(self.in_planes, planes*block.expansion, kernel_size=1, stride=stride), nn.BatchNorm(planes*block.expansion) ) layers = [] layers.append(block(self.in_planes, planes, stride, downsample)) self.in_planes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.in_planes, planes)) return nn.Sequential(*layers)
def __init__(self, stride=1, padding=0, dilation=1, groups=1, padding_mode="zeros"): super(Conv2D1, self).__init__() self.conv = nn.Conv2D( 3, 6, 3, stride=stride, padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, ) self.softmax = nn.Softmax()
def __init__(self, channels, cond_channels, kernel_size, dilations): super(ResidualBlock, self).__init__() # input conv std = math.sqrt(1 / channels * np.prod(kernel_size)) init = I.Uniform(-std, std) receptive_field = [ 1 + (k - 1) * d for (k, d) in zip(kernel_size, dilations) ] rh, rw = receptive_field paddings = [rh - 1, 0, rw // 2, (rw - 1) // 2] # causal & same conv = nn.Conv2D( channels, 2 * channels, kernel_size, padding=paddings, dilation=dilations, weight_attr=init, bias_attr=init) self.conv = nn.utils.weight_norm(conv) self.rh = rh self.rw = rw self.dilations = dilations # condition projection std = math.sqrt(1 / cond_channels) init = I.Uniform(-std, std) condition_proj = nn.Conv2D( cond_channels, 2 * channels, (1, 1), weight_attr=init, bias_attr=init) self.condition_proj = nn.utils.weight_norm(condition_proj) # parametric residual & skip connection std = math.sqrt(1 / channels) init = I.Uniform(-std, std) out_proj = nn.Conv2D( channels, 2 * channels, (1, 1), weight_attr=init, bias_attr=init) self.out_proj = nn.utils.weight_norm(out_proj)
def __init__(self, cin, cout, kernel_size, stride, padding, residual=False, *args, **kwargs): super().__init__(*args, **kwargs) self.conv_block = nn.Sequential( nn.Conv2D(cin, cout, kernel_size, stride, padding), ) self.act = nn.LeakyReLU(0.01)
def __init__(self, block_expansion, num_kp, num_channels, max_features, num_blocks, temperature, estimate_jacobian=False, scale_factor=1, single_jacobian_map=False, pad=0): super(KPDetector, self).__init__() self.predictor = Hourglass(block_expansion, in_features=num_channels, max_features=max_features, num_blocks=num_blocks) self.kp = nn.Conv2D(in_channels=self.predictor.out_filters, out_channels=num_kp, kernel_size=(7, 7), padding=pad) if estimate_jacobian: self.num_jacobian_maps = 1 if single_jacobian_map else num_kp self.jacobian = nn.Conv2D(in_channels=self.predictor.out_filters, out_channels=4 * self.num_jacobian_maps, kernel_size=(7, 7), padding=pad) # self.jacobian.weight.data.zero_() # self.jacobian.bias.data.copy_(paddle.tensor([1, 0, 0, 1] * self.num_jacobian_maps, dtype='float32')) else: self.jacobian = None self.temperature = temperature self.scale_factor = scale_factor if self.scale_factor != 1: self.down = AntiAliasInterpolation2d(num_channels, self.scale_factor)
def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.bn1 = nn.BatchNorm2D(inplanes) self.conv1 = nn.Conv2D(inplanes, planes, kernel_size=1, bias_attr=False) self.bn2 = nn.BatchNorm2D(planes) self.conv2 = nn.Conv2D(planes, (planes * 1), kernel_size=3, stride=stride, padding=1, bias_attr=False) self.bn3 = nn.BatchNorm2D((planes * 1)) self.conv3 = nn.Conv2D((planes * 1), planes * Bottleneck.outchannel_ratio, kernel_size=1, bias_attr=False) self.bn4 = nn.BatchNorm2D(planes * Bottleneck.outchannel_ratio) self.relu = nn.ReLU() self.downsample = downsample self.stride = stride
def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1): super(UpBlock2d, self).__init__() self.conv = nn.Conv2D(in_features, out_features, kernel_size=kernel_size, padding=padding, groups=groups) self.norm = nn.BatchNorm(num_channels=out_features)
def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): super().__init__() self._conv = nn.Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) self._batch_norm = SyncBatchNorm(out_channels)
def __init__(self, conv_dim=64, repeat_num=3): super(TNetDown, self).__init__() layers = [] layers.append( nn.Conv2D(3, conv_dim, kernel_size=7, stride=1, padding=3, bias_attr=False)) layers.append( nn.InstanceNorm2D(conv_dim, weight_attr=False, bias_attr=False)) layers.append(nn.ReLU()) # Down-Sampling curr_dim = conv_dim for i in range(2): layers.append( nn.Conv2D(curr_dim, curr_dim * 2, kernel_size=4, stride=2, padding=1, bias_attr=False)) layers.append( nn.InstanceNorm2D(curr_dim * 2, weight_attr=False, bias_attr=False)) layers.append(nn.ReLU()) curr_dim = curr_dim * 2 # Bottleneck for i in range(repeat_num): layers.append( ResidualBlock(dim_in=curr_dim, dim_out=curr_dim, mode='t')) self.main = nn.Sequential(*layers)
def __init__(self, in_channel_left, in_channel_right): super(MFR, self).__init__() self.conv0 = nn.Conv2D(in_channel_left, 192, 3, 1, 1) self.bn0 = nn.BatchNorm2D(192) self.conv1 = nn.Conv2D(in_channel_right, 192, 1) self.bn1 = nn.BatchNorm2D(192) self.conv2 = nn.Conv2D(192, 192, kernel_size=3, stride=1, padding=1) self.bn2 = nn.BatchNorm2D(192) self.conv13 = nn.Conv2D(192, 192, kernel_size=(1, 3), stride=1, padding=(0, 1)) self.bn13 = nn.BatchNorm2D(192) self.conv31 = nn.Conv2D(192, 192, kernel_size=(3, 1), stride=1, padding=(1, 0)) self.bn31 = nn.BatchNorm2D(192)
def __init__(self, in_channel=256, out_channel=256, num_convs=4, norm_type=None): super(MaskFeat, self).__init__() self.num_convs = num_convs self.in_channel = in_channel self.out_channel = out_channel self.norm_type = norm_type fan_conv = out_channel * 3 * 3 fan_deconv = out_channel * 2 * 2 mask_conv = nn.Sequential() if norm_type == 'gn': for i in range(self.num_convs): conv_name = 'mask_inter_feat_{}'.format(i + 1) mask_conv.add_sublayer( conv_name, ConvNormLayer(ch_in=in_channel if i == 0 else out_channel, ch_out=out_channel, filter_size=3, stride=1, norm_type=self.norm_type, initializer=KaimingNormal(fan_in=fan_conv), skip_quant=True)) mask_conv.add_sublayer(conv_name + 'act', nn.ReLU()) else: for i in range(self.num_convs): conv_name = 'mask_inter_feat_{}'.format(i + 1) conv = nn.Conv2D( in_channels=in_channel if i == 0 else out_channel, out_channels=out_channel, kernel_size=3, padding=1, weight_attr=paddle.ParamAttr(initializer=KaimingNormal( fan_in=fan_conv))) conv.skip_quant = True mask_conv.add_sublayer(conv_name, conv) mask_conv.add_sublayer(conv_name + 'act', nn.ReLU()) mask_conv.add_sublayer( 'conv5_mask', nn.Conv2DTranspose( in_channels=self.in_channel, out_channels=self.out_channel, kernel_size=2, stride=2, weight_attr=paddle.ParamAttr(initializer=KaimingNormal( fan_in=fan_deconv)))) mask_conv.add_sublayer('conv5_mask' + 'act', nn.ReLU()) self.upsample = mask_conv
def __init__(self, in_c, out_c, filter_size, stride, padding, num_groups=1, act=None, lr_mult=1., conv_decay=0., norm_type='bn', norm_decay=0., freeze_norm=False, name=""): super(ConvBNLayer, self).__init__() self.act = act self.conv = nn.Conv2D( in_channels=in_c, out_channels=out_c, kernel_size=filter_size, stride=stride, padding=padding, groups=num_groups, weight_attr=ParamAttr( learning_rate=lr_mult, regularizer=L2Decay(conv_decay)), bias_attr=False) norm_lr = 0. if freeze_norm else lr_mult param_attr = ParamAttr( learning_rate=norm_lr, regularizer=L2Decay(norm_decay), trainable=False if freeze_norm else True) bias_attr = ParamAttr( learning_rate=norm_lr, regularizer=L2Decay(norm_decay), trainable=False if freeze_norm else True) global_stats = True if freeze_norm else False if norm_type == 'sync_bn': self.bn = nn.SyncBatchNorm( out_c, weight_attr=param_attr, bias_attr=bias_attr) else: self.bn = nn.BatchNorm( out_c, act=None, param_attr=param_attr, bias_attr=bias_attr, use_global_stats=global_stats) norm_params = self.bn.parameters() if freeze_norm: for param in norm_params: param.stop_gradient = True
def __init__(self, num_classes, stem_channels=(16, 24, 32, 48), ch_list=(64, 96, 160, 224, 320), grmul=1.7, gr=(10, 16, 18, 24, 32), n_layers=(4, 4, 8, 8, 8), align_corners=False, pretrained=None): super().__init__() self.align_corners = align_corners self.pretrained = pretrained encoder_blks_num = len(n_layers) decoder_blks_num = encoder_blks_num - 1 encoder_in_channels = stem_channels[3] self.stem = nn.Sequential( layers.ConvBNReLU(3, stem_channels[0], kernel_size=3, bias_attr=False), layers.ConvBNReLU(stem_channels[0], stem_channels[1], kernel_size=3, bias_attr=False), layers.ConvBNReLU(stem_channels[1], stem_channels[2], kernel_size=3, stride=2, bias_attr=False), layers.ConvBNReLU(stem_channels[2], stem_channels[3], kernel_size=3, bias_attr=False)) self.encoder = Encoder(encoder_blks_num, encoder_in_channels, ch_list, gr, grmul, n_layers) skip_connection_channels = self.encoder.get_skip_channels() decoder_in_channels = self.encoder.get_out_channels() self.decoder = Decoder(decoder_blks_num, decoder_in_channels, skip_connection_channels, gr, grmul, n_layers, align_corners) self.cls_head = nn.Conv2D(in_channels=self.decoder.get_out_channels(), out_channels=num_classes, kernel_size=1) self.init_weight()
def __init__(self): super(ModelConv2, self).__init__() with supernet(expand_ratio=(1, 2, 4)) as ofa_super: models = [] models += [nn.Conv2DTranspose(4, 4, 3)] models += [nn.BatchNorm2D(4)] models += [ReLU()] models += [nn.Conv2D(4, 4, 3)] models += [nn.BatchNorm2D(4)] models += [ReLU()] models = ofa_super.convert(models) with supernet(channel=((4, 6, 8), (4, 6, 8))) as ofa_super: models1 = [] models1 += [nn.Conv2DTranspose(4, 4, 3)] models1 += [nn.BatchNorm2D(4)] models1 += [ReLU()] models1 += [nn.Conv2DTranspose(4, 4, 3)] models1 += [nn.BatchNorm2D(4)] models1 += [ReLU()] models1 = ofa_super.convert(models1) models += models1 with supernet(kernel_size=(3, 5, 7)) as ofa_super: models2 = [] models2 += [nn.Conv2D(4, 4, 3)] models2 += [nn.BatchNorm2D(4)] models2 += [ReLU()] models2 += [nn.Conv2DTranspose(4, 4, 3)] models2 += [nn.BatchNorm2D(4)] models2 += [ReLU()] models2 += [nn.Conv2D(4, 4, 3)] models2 += [nn.BatchNorm2D(4)] models2 += [ReLU()] models2 = ofa_super.convert(models2) models += models2 self.models = paddle.nn.Sequential(*models)
def __init__(self, img_size=256, style_dim=64, max_conv_dim=512, w_hpf=1): super().__init__() dim_in = 2**14 // img_size self.img_size = img_size self.from_rgb = nn.Conv2D(3, dim_in, 3, 1, 1) self.encode = nn.LayerList() self.decode = nn.LayerList() self.to_rgb = nn.Sequential( nn.InstanceNorm2D(dim_in, weight_attr=True, bias_attr=True), nn.LeakyReLU(0.2), nn.Conv2D(dim_in, 3, 1, 1, 0)) # down/up-sampling blocks repeat_num = int(np.log2(img_size)) - 4 if w_hpf > 0: repeat_num += 1 for _ in range(repeat_num): dim_out = min(dim_in*2, max_conv_dim) self.encode.append( ResBlk(dim_in, dim_out, normalize=True, downsample=True)) if len(self.decode) == 0: self.decode.append(AdainResBlk(dim_out, dim_in, style_dim, w_hpf=w_hpf, upsample=True)) else: self.decode.insert( 0, AdainResBlk(dim_out, dim_in, style_dim, w_hpf=w_hpf, upsample=True)) # stack-like dim_in = dim_out # bottleneck blocks for _ in range(2): self.encode.append( ResBlk(dim_out, dim_out, normalize=True)) self.decode.insert( 0, AdainResBlk(dim_out, dim_out, style_dim, w_hpf=w_hpf)) if w_hpf > 0: self.hpf = HighPass(w_hpf)
def __init__(self, in_channels, out_channels=None, kernel_size=3, norm_type='bn', norm_groups=32, act='swish'): super(SeparableConvLayer, self).__init__() assert norm_type in ['bn', 'sync_bn', 'gn', None] assert act in ['swish', 'relu', None] self.in_channels = in_channels if out_channels is None: self.out_channels = self.in_channels self.norm_type = norm_type self.norm_groups = norm_groups self.depthwise_conv = nn.Conv2D(in_channels, in_channels, kernel_size, padding=kernel_size // 2, groups=in_channels, bias_attr=False) self.pointwise_conv = nn.Conv2D(in_channels, self.out_channels, 1) # norm type if self.norm_type == 'bn': self.norm = nn.BatchNorm2D(self.out_channels) elif self.norm_type == 'sync_bn': self.norm = nn.SyncBatchNorm(self.out_channels) elif self.norm_type == 'gn': self.norm = nn.GroupNorm(num_groups=self.norm_groups, num_channels=self.out_channels) # activation if act == 'swish': self.act = nn.Swish() elif act == 'relu': self.act = nn.ReLU()
def __init__(self, inplanes, planes, stride=1): super().__init__() # all conv layers have stride 1. an avgpool is performed after the second convolution when stride > 1 self.conv1 = nn.Conv2D(inplanes, planes, 1, bias_attr=False) self.bn1 = nn.BatchNorm2D(planes) self.conv2 = nn.Conv2D(planes, planes, 3, padding=1, bias_attr=False) self.bn2 = nn.BatchNorm2D(planes) self.avgpool = nn.AvgPool2D(stride) if stride > 1 else Identity() self.conv3 = nn.Conv2D(planes, planes * self.expansion, 1, bias_attr=False) self.bn3 = nn.BatchNorm2D(planes * self.expansion) self.relu = nn.ReLU() self.downsample = None self.stride = stride if stride > 1 or inplanes != planes * Bottleneck.expansion: # downsampling layer is prepended with an avgpool, and the subsequent convolution has stride 1 # self.downsample = nn.Sequential(OrderedDict([ # ("-1", nn.AvgPool2D(stride)), # ("0", nn.Conv2D(inplanes, planes * self.expansion, 1, stride=1, bias_attr=False)), # ("1", nn.BatchNorm2D(planes * self.expansion)) # ])) self.downsample = nn.Sequential( ("-1", nn.AvgPool2D(stride)), ("0", nn.Conv2D(inplanes, planes * self.expansion, 1, stride=1, bias_attr=False)), ("1", nn.BatchNorm2D(planes * self.expansion)))
def make_layers(cfg, batch_norm=False): layers = [] in_channels = 3 for v in cfg: if v == 'M': layers += [nn.MaxPool2D(kernel_size=2, stride=2)] else: conv2d = nn.Conv2D(in_channels, v, kernel_size=3, padding=1) if batch_norm: layers += [conv2d, nn.BatchNorm2D(v), nn.ReLU()] else: layers += [conv2d, nn.ReLU()] in_channels = v return nn.Sequential(*layers)
def __init__(self, num_classes, in_channels): super().__init__() in_channels = in_channels[-1] inter_channels = in_channels // 4 self.channel_conv = layers.ConvBNReLU(in_channels, inter_channels, 3) self.position_conv = layers.ConvBNReLU(in_channels, inter_channels, 3) self.pam = PAM(inter_channels) self.cam = CAM() self.conv1 = layers.ConvBNReLU(inter_channels, inter_channels, 3) self.conv2 = layers.ConvBNReLU(inter_channels, inter_channels, 3) self.aux_head = nn.Sequential(nn.Dropout2D(0.1), nn.Conv2D(in_channels, num_classes, 1)) self.aux_head_pam = nn.Sequential( nn.Dropout2D(0.1), nn.Conv2D(inter_channels, num_classes, 1)) self.aux_head_cam = nn.Sequential( nn.Dropout2D(0.1), nn.Conv2D(inter_channels, num_classes, 1)) self.cls_head = nn.Sequential( nn.Dropout2D(0.1), nn.Conv2D(inter_channels, num_classes, 1))
def __init__(self, num_queries=100, position_embed_type='sine', return_intermediate_dec=True, backbone_num_channels=2048, hidden_dim=256, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1, activation="relu", attn_dropout=None, act_dropout=None, normalize_before=False): super(DETRTransformer, self).__init__() assert position_embed_type in ['sine', 'learned'],\ f'ValueError: position_embed_type not supported {position_embed_type}!' self.hidden_dim = hidden_dim self.nhead = nhead encoder_layer = TransformerEncoderLayer(hidden_dim, nhead, dim_feedforward, dropout, activation, attn_dropout, act_dropout, normalize_before) encoder_norm = nn.LayerNorm(hidden_dim) if normalize_before else None self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) decoder_layer = TransformerDecoderLayer(hidden_dim, nhead, dim_feedforward, dropout, activation, attn_dropout, act_dropout, normalize_before) decoder_norm = nn.LayerNorm(hidden_dim) self.decoder = TransformerDecoder( decoder_layer, num_decoder_layers, decoder_norm, return_intermediate=return_intermediate_dec) self.input_proj = nn.Conv2D(backbone_num_channels, hidden_dim, kernel_size=1) self.query_pos_embed = nn.Embedding(num_queries, hidden_dim) self.position_embedding = PositionEmbedding( hidden_dim // 2, normalize=True if position_embed_type == 'sine' else False, embed_type=position_embed_type) self._reset_parameters()
def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): super().__init__() img_size = to_2tuple(img_size) patch_size = to_2tuple(patch_size) num_patches = (img_size[1] // patch_size[1]) * \ (img_size[0] // patch_size[0]) self.img_size = img_size self.patch_size = patch_size self.num_patches = num_patches self.proj = nn.Conv2D(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)