def __init__(self, in_channels, c_wise_channels, out_channels, init_cfg=[dict(type='Kaiming', layer='Conv', bias=0)]): super().__init__(init_cfg=init_cfg) self.avg_pool = nn.AdaptiveAvgPool2d(1) # Channel Wise self.channel_wise = Sequential( ConvModule( in_channels, c_wise_channels, 1, bias=False, conv_cfg=None, norm_cfg=None, act_cfg=dict(type='ReLU'), inplace=False), ConvModule( c_wise_channels, in_channels, 1, bias=False, conv_cfg=None, norm_cfg=None, act_cfg=dict(type='Sigmoid'), inplace=False)) # Spatial Wise self.spatial_wise = Sequential( ConvModule( 1, 1, 3, padding=1, bias=False, conv_cfg=None, norm_cfg=None, act_cfg=dict(type='ReLU'), inplace=False), ConvModule( 1, 1, 1, bias=False, conv_cfg=None, norm_cfg=None, act_cfg=dict(type='Sigmoid'), inplace=False)) # Attention Wise self.attention_wise = ConvModule( in_channels, out_channels, 1, bias=False, conv_cfg=None, norm_cfg=None, act_cfg=dict(type='Sigmoid'), inplace=False)
def _init_layers(self): if self.hidden_dim is None: layers = [('head', nn.Linear(self.in_channels, self.num_classes))] else: layers = [ ('pre_logits', nn.Linear(self.in_channels, self.hidden_dim)), ('act', build_activation_layer(self.act_cfg)), ('head', nn.Linear(self.hidden_dim, self.num_classes)), ] self.layers = Sequential(OrderedDict(layers))
def test_sequential_model_weight_init(): seq_model_cfg = [ dict( type='FooConv1d', init_cfg=dict(type='Constant', layer='Conv1d', val=0., bias=1.)), dict( type='FooConv2d', init_cfg=dict(type='Constant', layer='Conv2d', val=2., bias=3.)), ] layers = [build_from_cfg(cfg, COMPONENTS) for cfg in seq_model_cfg] seq_model = Sequential(*layers) seq_model.init_weights() assert torch.equal(seq_model[0].conv1d.weight, torch.full(seq_model[0].conv1d.weight.shape, 0.)) assert torch.equal(seq_model[0].conv1d.bias, torch.full(seq_model[0].conv1d.bias.shape, 1.)) assert torch.equal(seq_model[1].conv2d.weight, torch.full(seq_model[1].conv2d.weight.shape, 2.)) assert torch.equal(seq_model[1].conv2d.bias, torch.full(seq_model[1].conv2d.bias.shape, 3.)) # inner init_cfg has higher priority layers = [build_from_cfg(cfg, COMPONENTS) for cfg in seq_model_cfg] seq_model = Sequential( *layers, init_cfg=dict( type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.)) seq_model.init_weights() assert torch.equal(seq_model[0].conv1d.weight, torch.full(seq_model[0].conv1d.weight.shape, 0.)) assert torch.equal(seq_model[0].conv1d.bias, torch.full(seq_model[0].conv1d.bias.shape, 1.)) assert torch.equal(seq_model[1].conv2d.weight, torch.full(seq_model[1].conv2d.weight.shape, 2.)) assert torch.equal(seq_model[1].conv2d.bias, torch.full(seq_model[1].conv2d.bias.shape, 3.))
def _make_extra_layers(self, outplanes): layers = [] kernel_sizes = (1, 3) num_layers = 0 outplane = None for i in range(len(outplanes)): if self.inplanes == 'S': self.inplanes = outplane continue k = kernel_sizes[num_layers % 2] if outplanes[i] == 'S': outplane = outplanes[i + 1] conv = nn.Conv2d(self.inplanes, outplane, k, stride=2, padding=1) else: outplane = outplanes[i] conv = nn.Conv2d(self.inplanes, outplane, k, stride=1, padding=0) layers.append(conv) self.inplanes = outplanes[i] num_layers += 1 if self.input_size == 512: layers.append(nn.Conv2d(self.inplanes, 256, 4, padding=1)) return Sequential(*layers)
def _make_layer(self, block_cfgs, inplanes, planes, blocks, stride): layers = [] downsample = None block_cfgs_ = block_cfgs.copy() if isinstance(stride, int): stride = (stride, stride) if stride[0] != 1 or stride[1] != 1 or inplanes != planes: downsample = ConvModule(inplanes, planes, 1, stride, norm_cfg=dict(type='BN'), act_cfg=None) if block_cfgs_['type'] == 'BasicBlock': block = BasicBlock block_cfgs_.pop('type') else: raise ValueError('{} not implement yet'.format(block['type'])) layers.append( block(inplanes, planes, stride=stride, downsample=downsample, **block_cfgs_)) inplanes = planes for _ in range(1, blocks): layers.append(block(inplanes, planes, **block_cfgs_)) return Sequential(*layers)
def _make_stage(self, in_channels, out_channels, num_blocks, stride, dilation, next_create_block_idx, init_cfg): strides = [stride] + [1] * (num_blocks - 1) dilations = [dilation] * num_blocks blocks = [] for i in range(num_blocks): groups = self.arch['group_layer_map'].get( next_create_block_idx, 1) if self.arch['group_layer_map'] is not None else 1 blocks.append( RepVGGBlock(in_channels, out_channels, stride=strides[i], padding=dilations[i], dilation=dilations[i], groups=groups, se_cfg=self.arch['se_cfg'], with_cp=self.with_cp, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, deploy=self.deploy, init_cfg=init_cfg)) in_channels = out_channels next_create_block_idx += 1 return Sequential(*blocks), next_create_block_idx
def __init__(self, leaky_relu=True, input_channels=3, init_cfg=[ dict(type='Xavier', layer='Conv2d'), dict(type='Uniform', layer='BatchNorm2d') ]): super().__init__(init_cfg=init_cfg) ks = [3, 3, 3, 3, 3, 3, 2] ps = [1, 1, 1, 1, 1, 1, 0] ss = [1, 1, 1, 1, 1, 1, 1] nm = [64, 128, 256, 256, 512, 512, 512] self.channels = nm # cnn = nn.Sequential() cnn = Sequential() def conv_relu(i, batch_normalization=False): n_in = input_channels if i == 0 else nm[i - 1] n_out = nm[i] cnn.add_module('conv{0}'.format(i), nn.Conv2d(n_in, n_out, ks[i], ss[i], ps[i])) if batch_normalization: cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(n_out)) if leaky_relu: cnn.add_module('relu{0}'.format(i), nn.LeakyReLU(0.2, inplace=True)) else: cnn.add_module('relu{0}'.format(i), nn.ReLU(True)) conv_relu(0) cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2)) # 64x16x64 conv_relu(1) cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2)) # 128x8x32 conv_relu(2, True) conv_relu(3) cnn.add_module('pooling{0}'.format(2), nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 256x4x16 conv_relu(4, True) conv_relu(5) cnn.add_module('pooling{0}'.format(3), nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 512x2x16 conv_relu(6, True) # 512x1x16 self.cnn = cnn
def make_layer(self): # Without the first and the final conv block. layer_setting = self.layer_setting[1:-1] total_num_blocks = sum([len(x) for x in layer_setting]) block_idx = 0 dpr = [ x.item() for x in torch.linspace(0, self.drop_path_rate, total_num_blocks) ] # stochastic depth decay rule for layer_cfg in layer_setting: layer = [] for i, block_cfg in enumerate(layer_cfg): (kernel_size, out_channels, se_ratio, stride, expand_ratio, block_type) = block_cfg mid_channels = int(self.in_channels * expand_ratio) out_channels = make_divisible(out_channels, 8) if se_ratio <= 0: se_cfg = None else: se_cfg = dict(channels=mid_channels, ratio=expand_ratio * se_ratio, divisor=1, act_cfg=(self.act_cfg, dict(type='Sigmoid'))) if block_type == 1: # edge tpu if i > 0 and expand_ratio == 3: with_residual = False expand_ratio = 4 else: with_residual = True mid_channels = int(self.in_channels * expand_ratio) if se_cfg is not None: se_cfg = dict(channels=mid_channels, ratio=se_ratio * expand_ratio, divisor=1, act_cfg=(self.act_cfg, dict(type='Sigmoid'))) block = partial(EdgeResidual, with_residual=with_residual) else: block = InvertedResidual layer.append( block(in_channels=self.in_channels, out_channels=out_channels, mid_channels=mid_channels, kernel_size=kernel_size, stride=stride, se_cfg=se_cfg, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, drop_path_rate=dpr[block_idx], with_cp=self.with_cp)) self.in_channels = out_channels block_idx += 1 self.layers.append(Sequential(*layer))
def __init__(self, arch, stem_fn, in_channels=3, out_indices=-1, frozen_stages=-1, drop_path_rate=0., conv_cfg=None, norm_cfg=dict(type='BN', eps=1e-5), act_cfg=dict(type='LeakyReLU', inplace=True), norm_eval=False, init_cfg=dict(type='Kaiming', layer='Conv2d')): super().__init__(init_cfg=init_cfg) self.arch = self.expand_arch(arch) self.num_stages = len(self.arch['in_channels']) self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.norm_eval = norm_eval if frozen_stages not in range(-1, self.num_stages): raise ValueError('frozen_stages must be in range(-1, ' f'{self.num_stages}). But received ' f'{frozen_stages}') self.frozen_stages = frozen_stages self.stem = stem_fn(in_channels) stages = [] depths = self.arch['num_blocks'] dpr = torch.linspace(0, drop_path_rate, sum(depths)).split(depths) for i in range(self.num_stages): stage_cfg = {k: v[i] for k, v in self.arch.items()} csp_stage = CSPStage(**stage_cfg, block_dpr=dpr[i].tolist(), conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, init_cfg=init_cfg) stages.append(csp_stage) self.stages = Sequential(*stages) if isinstance(out_indices, int): out_indices = [out_indices] assert isinstance(out_indices, Sequence), \ f'"out_indices" must by a sequence or int, ' \ f'get {type(out_indices)} instead.' out_indices = list(out_indices) for i, index in enumerate(out_indices): if index < 0: out_indices[i] = len(self.stages) + index assert 0 <= out_indices[i] <= len(self.stages), \ f'Invalid out_indices {index}.' self.out_indices = out_indices
def _make_layer(self, input_channels, output_channels, blocks): layers = [] for _ in range(blocks): downsample = None if input_channels != output_channels: downsample = Sequential( nn.Conv2d( input_channels, output_channels, kernel_size=1, stride=1, bias=False), nn.BatchNorm2d(output_channels), ) layers.append( BasicBlock( input_channels, output_channels, downsample=downsample)) input_channels = output_channels return Sequential(*layers)
def _init_thr(self, inner_channels, bias=False): in_channels = inner_channels seq = Sequential( nn.Conv2d(in_channels, inner_channels // 4, 3, padding=1, bias=bias), nn.BatchNorm2d(inner_channels // 4), nn.ReLU(inplace=True), nn.ConvTranspose2d(inner_channels // 4, inner_channels // 4, 2, 2), nn.BatchNorm2d(inner_channels // 4), nn.ReLU(inplace=True), nn.ConvTranspose2d(inner_channels // 4, 1, 2, 2), nn.Sigmoid()) return seq
def __init__(self, in_channels, with_bias=False, decoding_type='db', text_repr_type='poly', downsample_ratio=1.0, loss=dict(type='DBLoss'), train_cfg=None, test_cfg=None, init_cfg=[ dict(type='Kaiming', layer='Conv'), dict(type='Constant', layer='BatchNorm', val=1., bias=1e-4) ]): """Initialization. Args: in_channels (int): The number of input channels of the db head. decoding_type (str): The type of decoder for dbnet. text_repr_type (str): Boundary encoding type 'poly' or 'quad'. downsample_ratio (float): The downsample ratio of ground truths. loss (dict): The type of loss for dbnet. """ super().__init__(init_cfg=init_cfg) assert isinstance(in_channels, int) self.in_channels = in_channels self.text_repr_type = text_repr_type self.loss_module = build_loss(loss) self.train_cfg = train_cfg self.test_cfg = test_cfg self.downsample_ratio = downsample_ratio self.decoding_type = decoding_type self.binarize = Sequential( nn.Conv2d(in_channels, in_channels // 4, 3, bias=with_bias, padding=1), nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True), nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2), nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True), nn.ConvTranspose2d(in_channels // 4, 1, 2, 2), nn.Sigmoid()) self.threshold = self._init_thr(in_channels)
def __init__(self, in_channels, with_bias=False, downsample_ratio=1.0, loss=dict(type='DBLoss'), postprocessor=dict(type='DBPostprocessor', text_repr_type='quad'), init_cfg=[ dict(type='Kaiming', layer='Conv'), dict(type='Constant', layer='BatchNorm', val=1., bias=1e-4) ], train_cfg=None, test_cfg=None, **kwargs): old_keys = ['text_repr_type', 'decoding_type'] for key in old_keys: if kwargs.get(key, None): postprocessor[key] = kwargs.get(key) warnings.warn( f'{key} is deprecated, please specify ' 'it in postprocessor config dict. See ' 'https://github.com/open-mmlab/mmocr/pull/640' ' for details.', UserWarning) BaseModule.__init__(self, init_cfg=init_cfg) HeadMixin.__init__(self, loss, postprocessor) assert isinstance(in_channels, int) self.in_channels = in_channels self.train_cfg = train_cfg self.test_cfg = test_cfg self.downsample_ratio = downsample_ratio self.binarize = Sequential( nn.Conv2d(in_channels, in_channels // 4, 3, bias=with_bias, padding=1), nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True), nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2), nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True), nn.ConvTranspose2d(in_channels // 4, 1, 2, 2), nn.Sigmoid()) self.threshold = self._init_thr(in_channels)
def _make_layer(self, block, inplanes, planes, blocks, stride=1): """Make each layer.""" downsample = None if stride != 1 or inplanes != planes * block.expansion: downsample = nn.Sequential( build_conv_layer( self.conv_cfg, inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) layers = [] block_init_cfg = None if self.pretrained is None and not hasattr( self, 'init_cfg') and self.zero_init_residual: if block is BasicBlock: block_init_cfg = dict( type='Constant', val=0, override=dict(name='norm2')) elif block is Bottleneck: block_init_cfg = dict( type='Constant', val=0, override=dict(name='norm3')) layers.append( block( inplanes, planes, stride, downsample=downsample, with_cp=self.with_cp, norm_cfg=self.norm_cfg, conv_cfg=self.conv_cfg, init_cfg=block_init_cfg)) inplanes = planes * block.expansion for i in range(1, blocks): layers.append( block( inplanes, planes, with_cp=self.with_cp, norm_cfg=self.norm_cfg, conv_cfg=self.conv_cfg, init_cfg=block_init_cfg)) return Sequential(*layers)
def __init__(self, in_channels=None, num_classes=None, rnn_flag=False, init_cfg=dict(type='Xavier', layer='Conv2d'), **kwargs): super().__init__(init_cfg=init_cfg) self.num_classes = num_classes self.rnn_flag = rnn_flag if rnn_flag: self.decoder = Sequential( BidirectionalLSTM(in_channels, 256, 256), BidirectionalLSTM(256, 256, num_classes)) else: self.decoder = nn.Conv2d( in_channels, num_classes, kernel_size=1, stride=1)
def _make_stem_layer(self, in_channels, stem_channels): if isinstance(stem_channels, int): stem_channels = [stem_channels] stem_layers = [] for _, channels in enumerate(stem_channels): stem_layer = ConvModule(in_channels, channels, kernel_size=3, stride=1, padding=1, bias=False, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU')) in_channels = channels stem_layers.append(stem_layer) self.stem_layers = Sequential(*stem_layers) self.inplanes = stem_channels[-1]
def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1): """Build one branch.""" downsample = None if stride != 1 or \ self.in_channels[branch_index] != \ num_channels[branch_index] * block.expansion: downsample = nn.Sequential( build_conv_layer( self.conv_cfg, self.in_channels[branch_index], num_channels[branch_index] * block.expansion, kernel_size=1, stride=stride, bias=False), build_norm_layer(self.norm_cfg, num_channels[branch_index] * block.expansion)[1]) layers = [] layers.append( block( self.in_channels[branch_index], num_channels[branch_index], stride, downsample=downsample, with_cp=self.with_cp, norm_cfg=self.norm_cfg, conv_cfg=self.conv_cfg, init_cfg=self.block_init_cfg)) self.in_channels[branch_index] = \ num_channels[branch_index] * block.expansion for i in range(1, num_blocks[branch_index]): layers.append( block( self.in_channels[branch_index], num_channels[branch_index], with_cp=self.with_cp, norm_cfg=self.norm_cfg, conv_cfg=self.conv_cfg, init_cfg=self.block_init_cfg)) return Sequential(*layers)
def __init__(self, embed_dims, feedforward_channels, act_cfg=dict(type='GELU'), ffn_drop=0., dropout_layer=None, use_conv=False, init_cfg=None): super(MixFFN, self).__init__(init_cfg=init_cfg) self.embed_dims = embed_dims self.feedforward_channels = feedforward_channels self.act_cfg = act_cfg activate = build_activation_layer(act_cfg) in_channels = embed_dims fc1 = Conv2d( in_channels=in_channels, out_channels=feedforward_channels, kernel_size=1, stride=1, bias=True) if use_conv: # 3x3 depth wise conv to provide positional encode information dw_conv = Conv2d( in_channels=feedforward_channels, out_channels=feedforward_channels, kernel_size=3, stride=1, padding=(3 - 1) // 2, bias=True, groups=feedforward_channels) fc2 = Conv2d( in_channels=feedforward_channels, out_channels=in_channels, kernel_size=1, stride=1, bias=True) drop = nn.Dropout(ffn_drop) layers = [fc1, activate, drop, fc2, drop] if use_conv: layers.insert(1, dw_conv) self.layers = Sequential(*layers) self.dropout_layer = build_dropout( dropout_layer) if dropout_layer else torch.nn.Identity()
def _make_layer(self, block, inplanes, planes, blocks, stride=1): layers = [] downsample = None if stride != 1 or inplanes != planes: downsample = nn.Sequential( nn.Conv2d(inplanes, planes, 1, stride, bias=False), nn.BatchNorm2d(planes), ) layers.append( block(inplanes, planes, use_conv1x1=True, stride=stride, downsample=downsample)) inplanes = planes for _ in range(1, blocks): layers.append(block(inplanes, planes, use_conv1x1=True)) return Sequential(*layers)
def _make_stage(self, layer_config, in_channels, multiscale_output=True): """Make each stage.""" num_modules = layer_config['num_modules'] num_branches = layer_config['num_branches'] num_blocks = layer_config['num_blocks'] num_channels = layer_config['num_channels'] block = self.blocks_dict[layer_config['block']] hr_modules = [] block_init_cfg = None if self.pretrained is None and not hasattr( self, 'init_cfg') and self.zero_init_residual: if block is BasicBlock: block_init_cfg = dict(type='Constant', val=0, override=dict(name='norm2')) elif block is Bottleneck: block_init_cfg = dict(type='Constant', val=0, override=dict(name='norm3')) for i in range(num_modules): # multi_scale_output is only used for the last module if not multiscale_output and i == num_modules - 1: reset_multiscale_output = False else: reset_multiscale_output = True hr_modules.append( HRModule(num_branches, block, num_blocks, in_channels, num_channels, reset_multiscale_output, with_cp=self.with_cp, norm_cfg=self.norm_cfg, conv_cfg=self.conv_cfg, block_init_cfg=block_init_cfg)) return Sequential(*hr_modules), in_channels
def __init__(self, in_channels, with_bias=False, decoding_type='db', text_repr_type='poly', downsample_ratio=1.0, loss=dict(type='DBLoss'), train_cfg=None, test_cfg=None, init_cfg=[ dict(type='Kaiming', layer='Conv'), dict(type='Constant', layer='BatchNorm', val=1., bias=1e-4) ]): super().__init__(init_cfg=init_cfg) assert isinstance(in_channels, int) self.in_channels = in_channels self.text_repr_type = text_repr_type self.loss_module = build_loss(loss) self.train_cfg = train_cfg self.test_cfg = test_cfg self.downsample_ratio = downsample_ratio self.decoding_type = decoding_type self.binarize = Sequential( nn.Conv2d(in_channels, in_channels // 4, 3, bias=with_bias, padding=1), nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True), nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2), nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True), nn.ConvTranspose2d(in_channels // 4, 1, 2, 2), nn.Sigmoid()) self.threshold = self._init_thr(in_channels)
def create_conv_bn(self, kernel_size, dilation=1, padding=0): conv_bn = Sequential() conv_bn.add_module( 'conv', build_conv_layer(self.conv_cfg, in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=kernel_size, stride=self.stride, dilation=dilation, padding=padding, groups=self.groups, bias=False)) conv_bn.add_module( 'norm', build_norm_layer(self.norm_cfg, num_features=self.out_channels)[1]) return conv_bn
def __init__(self, block_fn, in_channels, out_channels, has_downsampler=True, down_growth=False, expand_ratio=0.5, bottle_ratio=2, num_blocks=1, block_dpr=0, block_args={}, conv_cfg=None, norm_cfg=dict(type='BN', eps=1e-5), act_cfg=dict(type='LeakyReLU', inplace=True), init_cfg=None): super().__init__(init_cfg) # grow downsample channels to output channels down_channels = out_channels if down_growth else in_channels block_dpr = to_ntuple(num_blocks)(block_dpr) if has_downsampler: self.downsample_conv = ConvModule( in_channels=in_channels, out_channels=down_channels, kernel_size=3, stride=2, padding=1, groups=32 if block_fn is ResNeXtBottleneck else 1, norm_cfg=norm_cfg, act_cfg=act_cfg) else: self.downsample_conv = nn.Identity() exp_channels = int(down_channels * expand_ratio) self.expand_conv = ConvModule( in_channels=down_channels, out_channels=exp_channels, kernel_size=1, norm_cfg=norm_cfg, act_cfg=act_cfg if block_fn is DarknetBottleneck else None) assert exp_channels % 2 == 0, \ 'The channel number before blocks must be divisible by 2.' block_channels = exp_channels // 2 blocks = [] for i in range(num_blocks): block_cfg = dict(in_channels=block_channels, out_channels=block_channels, expansion=bottle_ratio, drop_path_rate=block_dpr[i], conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, **block_args) blocks.append(block_fn(**block_cfg)) self.blocks = Sequential(*blocks) self.atfer_blocks_conv = ConvModule(block_channels, block_channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg) self.final_conv = ConvModule(2 * block_channels, out_channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg)
def __init__(self, arch, img_size=224, in_channels=3, patch_size=4, out_indices=(3, ), reparam_conv_kernels=(3, ), globalperceptron_ratio=4, conv_cfg=None, norm_cfg=dict(type='BN', requires_grad=True), patch_cfg=dict(), final_norm=True, deploy=False, init_cfg=None): super(RepMLPNet, self).__init__(init_cfg=init_cfg) if isinstance(arch, str): arch = arch.lower() assert arch in set(self.arch_zoo), \ f'Arch {arch} is not in default archs {set(self.arch_zoo)}' self.arch_settings = self.arch_zoo[arch] else: essential_keys = {'channels', 'depths', 'sharesets_nums'} assert isinstance(arch, dict) and set(arch) == essential_keys, \ f'Custom arch needs a dict with keys {essential_keys}.' self.arch_settings = arch self.img_size = to_2tuple(img_size) self.patch_size = to_2tuple(patch_size) self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.num_stage = len(self.arch_settings['channels']) for value in self.arch_settings.values(): assert isinstance(value, list) and len(value) == self.num_stage, ( 'Length of setting item in arch dict must be type of list and' ' have the same length.') self.channels = self.arch_settings['channels'] self.depths = self.arch_settings['depths'] self.sharesets_nums = self.arch_settings['sharesets_nums'] _patch_cfg = dict(in_channels=in_channels, input_size=self.img_size, embed_dims=self.channels[0], conv_type='Conv2d', kernel_size=self.patch_size, stride=self.patch_size, norm_cfg=self.norm_cfg, bias=False) _patch_cfg.update(patch_cfg) self.patch_embed = PatchEmbed(**_patch_cfg) self.patch_resolution = self.patch_embed.init_out_size self.patch_hs = [ self.patch_resolution[0] // 2**i for i in range(self.num_stage) ] self.patch_ws = [ self.patch_resolution[1] // 2**i for i in range(self.num_stage) ] self.stages = ModuleList() self.downsample_layers = ModuleList() for stage_idx in range(self.num_stage): # make stage layers _stage_cfg = dict(channels=self.channels[stage_idx], path_h=self.patch_hs[stage_idx], path_w=self.patch_ws[stage_idx], reparam_conv_kernels=reparam_conv_kernels, globalperceptron_ratio=globalperceptron_ratio, norm_cfg=self.norm_cfg, ffn_expand=4, num_sharesets=self.sharesets_nums[stage_idx], deploy=deploy) stage_blocks = [ RepMLPNetUnit(**_stage_cfg) for _ in range(self.depths[stage_idx]) ] self.stages.append(Sequential(*stage_blocks)) # make downsample layers if stage_idx < self.num_stage - 1: self.downsample_layers.append( ConvModule(in_channels=self.channels[stage_idx], out_channels=self.channels[stage_idx + 1], kernel_size=2, stride=2, padding=0, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, inplace=True)) self.out_indice = out_indices if final_norm: norm_layer = build_norm_layer(norm_cfg, self.channels[-1])[1] else: norm_layer = nn.Identity() self.add_module('final_norm', norm_layer)
def make_layer(self): # Without the first and the final conv block. layer_setting = self.layer_setting[1:-1] total_num_blocks = sum([len(x) for x in layer_setting]) block_idx = 0 dpr = [ x.item() for x in torch.linspace(0, self.drop_path_rate, total_num_blocks) ] # stochastic depth decay rule for i, layer_cfg in enumerate(layer_setting): # Avoid building unused layers in mmdetection. if i > max(self.out_indices) - 1: break layer = [] for i, block_cfg in enumerate(layer_cfg): (kernel_size, out_channels, se_ratio, stride, expand_ratio, block_type) = block_cfg mid_channels = int(self.in_channels * expand_ratio) out_channels = make_divisible(out_channels, 8) if se_ratio <= 0: se_cfg = None else: # In mmdetection, the `divisor` is deleted to align # the logic of SELayer with mmcls. se_cfg = dict(channels=mid_channels, ratio=expand_ratio * se_ratio, act_cfg=(self.act_cfg, dict(type='Sigmoid'))) if block_type == 1: # edge tpu if i > 0 and expand_ratio == 3: with_residual = False expand_ratio = 4 else: with_residual = True mid_channels = int(self.in_channels * expand_ratio) if se_cfg is not None: # In mmdetection, the `divisor` is deleted to align # the logic of SELayer with mmcls. se_cfg = dict(channels=mid_channels, ratio=se_ratio * expand_ratio, act_cfg=(self.act_cfg, dict(type='Sigmoid'))) block = partial(EdgeResidual, with_residual=with_residual) else: block = InvertedResidual layer.append( block( in_channels=self.in_channels, out_channels=out_channels, mid_channels=mid_channels, kernel_size=kernel_size, stride=stride, se_cfg=se_cfg, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, drop_path_rate=dpr[block_idx], with_cp=self.with_cp, # In mmdetection, `with_expand_conv` is set to align # the logic of InvertedResidual with mmcls. with_expand_conv=(mid_channels != self.in_channels))) self.in_channels = out_channels block_idx += 1 self.layers.append(Sequential(*layer))
class VisionTransformerClsHead(ClsHead): """Vision Transformer classifier head. Args: num_classes (int): Number of categories excluding the background category. in_channels (int): Number of channels in the input feature map. hidden_dim (int): Number of the dimensions for hidden layer. Defaults to None, which means no extra hidden layer. act_cfg (dict): The activation config. Only available during pre-training. Defaults to ``dict(type='Tanh')``. init_cfg (dict): The extra initialization configs. Defaults to ``dict(type='Constant', layer='Linear', val=0)``. """ def __init__(self, num_classes, in_channels, hidden_dim=None, act_cfg=dict(type='Tanh'), init_cfg=dict(type='Constant', layer='Linear', val=0), *args, **kwargs): super(VisionTransformerClsHead, self).__init__( init_cfg=init_cfg, *args, **kwargs) self.in_channels = in_channels self.num_classes = num_classes self.hidden_dim = hidden_dim self.act_cfg = act_cfg if self.num_classes <= 0: raise ValueError( f'num_classes={num_classes} must be a positive integer') self._init_layers() def _init_layers(self): if self.hidden_dim is None: layers = [('head', nn.Linear(self.in_channels, self.num_classes))] else: layers = [ ('pre_logits', nn.Linear(self.in_channels, self.hidden_dim)), ('act', build_activation_layer(self.act_cfg)), ('head', nn.Linear(self.hidden_dim, self.num_classes)), ] self.layers = Sequential(OrderedDict(layers)) def init_weights(self): super(VisionTransformerClsHead, self).init_weights() # Modified from ClassyVision if hasattr(self.layers, 'pre_logits'): # Lecun norm trunc_normal_( self.layers.pre_logits.weight, std=math.sqrt(1 / self.layers.pre_logits.in_features)) nn.init.zeros_(self.layers.pre_logits.bias) def pre_logits(self, x): if isinstance(x, tuple): x = x[-1] _, cls_token = x if self.hidden_dim is None: return cls_token else: x = self.layers.pre_logits(cls_token) return self.layers.act(x) def simple_test(self, x, softmax=True, post_process=True): """Inference without augmentation. Args: x (tuple[tuple[tensor, tensor]]): The input features. Multi-stage inputs are acceptable but only the last stage will be used to classify. Every item should be a tuple which includes patch token and cls token. The cls token will be used to classify and the shape of it should be ``(num_samples, in_channels)``. softmax (bool): Whether to softmax the classification score. post_process (bool): Whether to do post processing the inference results. It will convert the output to a list. Returns: Tensor | list: The inference results. - If no post processing, the output is a tensor with shape ``(num_samples, num_classes)``. - If post processing, the output is a multi-dimentional list of float and the dimensions are ``(num_samples, num_classes)``. """ x = self.pre_logits(x) cls_score = self.layers.head(x) if softmax: pred = ( F.softmax(cls_score, dim=1) if cls_score is not None else None) else: pred = cls_score if post_process: return self.post_process(pred) else: return pred def forward_train(self, x, gt_label, **kwargs): x = self.pre_logits(x) cls_score = self.layers.head(x) losses = self.loss(cls_score, gt_label, **kwargs) return losses