예제 #1
0
파일: fpn_cat.py 프로젝트: open-mmlab/mmocr
 def __init__(self,
              in_channels,
              c_wise_channels,
              out_channels,
              init_cfg=[dict(type='Kaiming', layer='Conv', bias=0)]):
     super().__init__(init_cfg=init_cfg)
     self.avg_pool = nn.AdaptiveAvgPool2d(1)
     # Channel Wise
     self.channel_wise = Sequential(
         ConvModule(
             in_channels,
             c_wise_channels,
             1,
             bias=False,
             conv_cfg=None,
             norm_cfg=None,
             act_cfg=dict(type='ReLU'),
             inplace=False),
         ConvModule(
             c_wise_channels,
             in_channels,
             1,
             bias=False,
             conv_cfg=None,
             norm_cfg=None,
             act_cfg=dict(type='Sigmoid'),
             inplace=False))
     # Spatial Wise
     self.spatial_wise = Sequential(
         ConvModule(
             1,
             1,
             3,
             padding=1,
             bias=False,
             conv_cfg=None,
             norm_cfg=None,
             act_cfg=dict(type='ReLU'),
             inplace=False),
         ConvModule(
             1,
             1,
             1,
             bias=False,
             conv_cfg=None,
             norm_cfg=None,
             act_cfg=dict(type='Sigmoid'),
             inplace=False))
     # Attention Wise
     self.attention_wise = ConvModule(
         in_channels,
         out_channels,
         1,
         bias=False,
         conv_cfg=None,
         norm_cfg=None,
         act_cfg=dict(type='Sigmoid'),
         inplace=False)
 def _init_layers(self):
     if self.hidden_dim is None:
         layers = [('head', nn.Linear(self.in_channels, self.num_classes))]
     else:
         layers = [
             ('pre_logits', nn.Linear(self.in_channels, self.hidden_dim)),
             ('act', build_activation_layer(self.act_cfg)),
             ('head', nn.Linear(self.hidden_dim, self.num_classes)),
         ]
     self.layers = Sequential(OrderedDict(layers))
예제 #3
0
def test_sequential_model_weight_init():
    seq_model_cfg = [
        dict(
            type='FooConv1d',
            init_cfg=dict(type='Constant', layer='Conv1d', val=0., bias=1.)),
        dict(
            type='FooConv2d',
            init_cfg=dict(type='Constant', layer='Conv2d', val=2., bias=3.)),
    ]
    layers = [build_from_cfg(cfg, COMPONENTS) for cfg in seq_model_cfg]
    seq_model = Sequential(*layers)
    seq_model.init_weights()
    assert torch.equal(seq_model[0].conv1d.weight,
                       torch.full(seq_model[0].conv1d.weight.shape, 0.))
    assert torch.equal(seq_model[0].conv1d.bias,
                       torch.full(seq_model[0].conv1d.bias.shape, 1.))
    assert torch.equal(seq_model[1].conv2d.weight,
                       torch.full(seq_model[1].conv2d.weight.shape, 2.))
    assert torch.equal(seq_model[1].conv2d.bias,
                       torch.full(seq_model[1].conv2d.bias.shape, 3.))
    # inner init_cfg has higher priority
    layers = [build_from_cfg(cfg, COMPONENTS) for cfg in seq_model_cfg]
    seq_model = Sequential(
        *layers,
        init_cfg=dict(
            type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.))
    seq_model.init_weights()
    assert torch.equal(seq_model[0].conv1d.weight,
                       torch.full(seq_model[0].conv1d.weight.shape, 0.))
    assert torch.equal(seq_model[0].conv1d.bias,
                       torch.full(seq_model[0].conv1d.bias.shape, 1.))
    assert torch.equal(seq_model[1].conv2d.weight,
                       torch.full(seq_model[1].conv2d.weight.shape, 2.))
    assert torch.equal(seq_model[1].conv2d.bias,
                       torch.full(seq_model[1].conv2d.bias.shape, 3.))
예제 #4
0
    def _make_extra_layers(self, outplanes):
        layers = []
        kernel_sizes = (1, 3)
        num_layers = 0
        outplane = None
        for i in range(len(outplanes)):
            if self.inplanes == 'S':
                self.inplanes = outplane
                continue
            k = kernel_sizes[num_layers % 2]
            if outplanes[i] == 'S':
                outplane = outplanes[i + 1]
                conv = nn.Conv2d(self.inplanes,
                                 outplane,
                                 k,
                                 stride=2,
                                 padding=1)
            else:
                outplane = outplanes[i]
                conv = nn.Conv2d(self.inplanes,
                                 outplane,
                                 k,
                                 stride=1,
                                 padding=0)
            layers.append(conv)
            self.inplanes = outplanes[i]
            num_layers += 1
        if self.input_size == 512:
            layers.append(nn.Conv2d(self.inplanes, 256, 4, padding=1))

        return Sequential(*layers)
예제 #5
0
파일: resnet.py 프로젝트: HqWei/mmocr
    def _make_layer(self, block_cfgs, inplanes, planes, blocks, stride):
        layers = []
        downsample = None
        block_cfgs_ = block_cfgs.copy()
        if isinstance(stride, int):
            stride = (stride, stride)

        if stride[0] != 1 or stride[1] != 1 or inplanes != planes:
            downsample = ConvModule(inplanes,
                                    planes,
                                    1,
                                    stride,
                                    norm_cfg=dict(type='BN'),
                                    act_cfg=None)

        if block_cfgs_['type'] == 'BasicBlock':
            block = BasicBlock
            block_cfgs_.pop('type')
        else:
            raise ValueError('{} not implement yet'.format(block['type']))

        layers.append(
            block(inplanes,
                  planes,
                  stride=stride,
                  downsample=downsample,
                  **block_cfgs_))
        inplanes = planes
        for _ in range(1, blocks):
            layers.append(block(inplanes, planes, **block_cfgs_))

        return Sequential(*layers)
예제 #6
0
    def _make_stage(self, in_channels, out_channels, num_blocks, stride,
                    dilation, next_create_block_idx, init_cfg):
        strides = [stride] + [1] * (num_blocks - 1)
        dilations = [dilation] * num_blocks

        blocks = []
        for i in range(num_blocks):
            groups = self.arch['group_layer_map'].get(
                next_create_block_idx,
                1) if self.arch['group_layer_map'] is not None else 1
            blocks.append(
                RepVGGBlock(in_channels,
                            out_channels,
                            stride=strides[i],
                            padding=dilations[i],
                            dilation=dilations[i],
                            groups=groups,
                            se_cfg=self.arch['se_cfg'],
                            with_cp=self.with_cp,
                            conv_cfg=self.conv_cfg,
                            norm_cfg=self.norm_cfg,
                            act_cfg=self.act_cfg,
                            deploy=self.deploy,
                            init_cfg=init_cfg))
            in_channels = out_channels
            next_create_block_idx += 1

        return Sequential(*blocks), next_create_block_idx
예제 #7
0
    def __init__(self,
                 leaky_relu=True,
                 input_channels=3,
                 init_cfg=[
                     dict(type='Xavier', layer='Conv2d'),
                     dict(type='Uniform', layer='BatchNorm2d')
                 ]):
        super().__init__(init_cfg=init_cfg)

        ks = [3, 3, 3, 3, 3, 3, 2]
        ps = [1, 1, 1, 1, 1, 1, 0]
        ss = [1, 1, 1, 1, 1, 1, 1]
        nm = [64, 128, 256, 256, 512, 512, 512]

        self.channels = nm

        # cnn = nn.Sequential()
        cnn = Sequential()

        def conv_relu(i, batch_normalization=False):
            n_in = input_channels if i == 0 else nm[i - 1]
            n_out = nm[i]
            cnn.add_module('conv{0}'.format(i),
                           nn.Conv2d(n_in, n_out, ks[i], ss[i], ps[i]))
            if batch_normalization:
                cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(n_out))
            if leaky_relu:
                cnn.add_module('relu{0}'.format(i),
                               nn.LeakyReLU(0.2, inplace=True))
            else:
                cnn.add_module('relu{0}'.format(i), nn.ReLU(True))

        conv_relu(0)
        cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2))  # 64x16x64
        conv_relu(1)
        cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2))  # 128x8x32
        conv_relu(2, True)
        conv_relu(3)
        cnn.add_module('pooling{0}'.format(2),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 256x4x16
        conv_relu(4, True)
        conv_relu(5)
        cnn.add_module('pooling{0}'.format(3),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 512x2x16
        conv_relu(6, True)  # 512x1x16

        self.cnn = cnn
예제 #8
0
    def make_layer(self):
        # Without the first and the final conv block.
        layer_setting = self.layer_setting[1:-1]

        total_num_blocks = sum([len(x) for x in layer_setting])
        block_idx = 0
        dpr = [
            x.item()
            for x in torch.linspace(0, self.drop_path_rate, total_num_blocks)
        ]  # stochastic depth decay rule

        for layer_cfg in layer_setting:
            layer = []
            for i, block_cfg in enumerate(layer_cfg):
                (kernel_size, out_channels, se_ratio, stride, expand_ratio,
                 block_type) = block_cfg

                mid_channels = int(self.in_channels * expand_ratio)
                out_channels = make_divisible(out_channels, 8)
                if se_ratio <= 0:
                    se_cfg = None
                else:
                    se_cfg = dict(channels=mid_channels,
                                  ratio=expand_ratio * se_ratio,
                                  divisor=1,
                                  act_cfg=(self.act_cfg, dict(type='Sigmoid')))
                if block_type == 1:  # edge tpu
                    if i > 0 and expand_ratio == 3:
                        with_residual = False
                        expand_ratio = 4
                    else:
                        with_residual = True
                    mid_channels = int(self.in_channels * expand_ratio)
                    if se_cfg is not None:
                        se_cfg = dict(channels=mid_channels,
                                      ratio=se_ratio * expand_ratio,
                                      divisor=1,
                                      act_cfg=(self.act_cfg,
                                               dict(type='Sigmoid')))
                    block = partial(EdgeResidual, with_residual=with_residual)
                else:
                    block = InvertedResidual
                layer.append(
                    block(in_channels=self.in_channels,
                          out_channels=out_channels,
                          mid_channels=mid_channels,
                          kernel_size=kernel_size,
                          stride=stride,
                          se_cfg=se_cfg,
                          conv_cfg=self.conv_cfg,
                          norm_cfg=self.norm_cfg,
                          act_cfg=self.act_cfg,
                          drop_path_rate=dpr[block_idx],
                          with_cp=self.with_cp))
                self.in_channels = out_channels
                block_idx += 1
            self.layers.append(Sequential(*layer))
예제 #9
0
    def __init__(self,
                 arch,
                 stem_fn,
                 in_channels=3,
                 out_indices=-1,
                 frozen_stages=-1,
                 drop_path_rate=0.,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN', eps=1e-5),
                 act_cfg=dict(type='LeakyReLU', inplace=True),
                 norm_eval=False,
                 init_cfg=dict(type='Kaiming', layer='Conv2d')):
        super().__init__(init_cfg=init_cfg)
        self.arch = self.expand_arch(arch)
        self.num_stages = len(self.arch['in_channels'])
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.norm_eval = norm_eval
        if frozen_stages not in range(-1, self.num_stages):
            raise ValueError('frozen_stages must be in range(-1, '
                             f'{self.num_stages}). But received '
                             f'{frozen_stages}')
        self.frozen_stages = frozen_stages

        self.stem = stem_fn(in_channels)

        stages = []
        depths = self.arch['num_blocks']
        dpr = torch.linspace(0, drop_path_rate, sum(depths)).split(depths)

        for i in range(self.num_stages):
            stage_cfg = {k: v[i] for k, v in self.arch.items()}
            csp_stage = CSPStage(**stage_cfg,
                                 block_dpr=dpr[i].tolist(),
                                 conv_cfg=conv_cfg,
                                 norm_cfg=norm_cfg,
                                 act_cfg=act_cfg,
                                 init_cfg=init_cfg)
            stages.append(csp_stage)
        self.stages = Sequential(*stages)

        if isinstance(out_indices, int):
            out_indices = [out_indices]
        assert isinstance(out_indices, Sequence), \
            f'"out_indices" must by a sequence or int, ' \
            f'get {type(out_indices)} instead.'
        out_indices = list(out_indices)
        for i, index in enumerate(out_indices):
            if index < 0:
                out_indices[i] = len(self.stages) + index
            assert 0 <= out_indices[i] <= len(self.stages), \
                f'Invalid out_indices {index}.'
        self.out_indices = out_indices
예제 #10
0
    def _make_layer(self, input_channels, output_channels, blocks):
        layers = []
        for _ in range(blocks):
            downsample = None
            if input_channels != output_channels:
                downsample = Sequential(
                    nn.Conv2d(
                        input_channels,
                        output_channels,
                        kernel_size=1,
                        stride=1,
                        bias=False),
                    nn.BatchNorm2d(output_channels),
                )
            layers.append(
                BasicBlock(
                    input_channels, output_channels, downsample=downsample))
            input_channels = output_channels

        return Sequential(*layers)
예제 #11
0
 def _init_thr(self, inner_channels, bias=False):
     in_channels = inner_channels
     seq = Sequential(
         nn.Conv2d(in_channels,
                   inner_channels // 4,
                   3,
                   padding=1,
                   bias=bias), nn.BatchNorm2d(inner_channels // 4),
         nn.ReLU(inplace=True),
         nn.ConvTranspose2d(inner_channels // 4, inner_channels // 4, 2, 2),
         nn.BatchNorm2d(inner_channels // 4), nn.ReLU(inplace=True),
         nn.ConvTranspose2d(inner_channels // 4, 1, 2, 2), nn.Sigmoid())
     return seq
예제 #12
0
파일: db_head.py 프로젝트: xyzhu8/mmocr
    def __init__(self,
                 in_channels,
                 with_bias=False,
                 decoding_type='db',
                 text_repr_type='poly',
                 downsample_ratio=1.0,
                 loss=dict(type='DBLoss'),
                 train_cfg=None,
                 test_cfg=None,
                 init_cfg=[
                     dict(type='Kaiming', layer='Conv'),
                     dict(type='Constant',
                          layer='BatchNorm',
                          val=1.,
                          bias=1e-4)
                 ]):
        """Initialization.

        Args:
            in_channels (int): The number of input channels of the db head.
            decoding_type (str): The type of decoder for dbnet.
            text_repr_type (str): Boundary encoding type 'poly' or 'quad'.
            downsample_ratio (float): The downsample ratio of ground truths.
            loss (dict): The type of loss for dbnet.
        """
        super().__init__(init_cfg=init_cfg)

        assert isinstance(in_channels, int)

        self.in_channels = in_channels
        self.text_repr_type = text_repr_type
        self.loss_module = build_loss(loss)
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.downsample_ratio = downsample_ratio
        self.decoding_type = decoding_type

        self.binarize = Sequential(
            nn.Conv2d(in_channels,
                      in_channels // 4,
                      3,
                      bias=with_bias,
                      padding=1), nn.BatchNorm2d(in_channels // 4),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2),
            nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True),
            nn.ConvTranspose2d(in_channels // 4, 1, 2, 2), nn.Sigmoid())

        self.threshold = self._init_thr(in_channels)
예제 #13
0
파일: db_head.py 프로젝트: open-mmlab/mmocr
    def __init__(self,
                 in_channels,
                 with_bias=False,
                 downsample_ratio=1.0,
                 loss=dict(type='DBLoss'),
                 postprocessor=dict(type='DBPostprocessor',
                                    text_repr_type='quad'),
                 init_cfg=[
                     dict(type='Kaiming', layer='Conv'),
                     dict(type='Constant',
                          layer='BatchNorm',
                          val=1.,
                          bias=1e-4)
                 ],
                 train_cfg=None,
                 test_cfg=None,
                 **kwargs):
        old_keys = ['text_repr_type', 'decoding_type']
        for key in old_keys:
            if kwargs.get(key, None):
                postprocessor[key] = kwargs.get(key)
                warnings.warn(
                    f'{key} is deprecated, please specify '
                    'it in postprocessor config dict. See '
                    'https://github.com/open-mmlab/mmocr/pull/640'
                    ' for details.', UserWarning)
        BaseModule.__init__(self, init_cfg=init_cfg)
        HeadMixin.__init__(self, loss, postprocessor)

        assert isinstance(in_channels, int)

        self.in_channels = in_channels
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.downsample_ratio = downsample_ratio

        self.binarize = Sequential(
            nn.Conv2d(in_channels,
                      in_channels // 4,
                      3,
                      bias=with_bias,
                      padding=1), nn.BatchNorm2d(in_channels // 4),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2),
            nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True),
            nn.ConvTranspose2d(in_channels // 4, 1, 2, 2), nn.Sigmoid())

        self.threshold = self._init_thr(in_channels)
    def _make_layer(self, block, inplanes, planes, blocks, stride=1):
        """Make each layer."""
        downsample = None
        if stride != 1 or inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                build_conv_layer(
                    self.conv_cfg,
                    inplanes,
                    planes * block.expansion,
                    kernel_size=1,
                    stride=stride,
                    bias=False),
                build_norm_layer(self.norm_cfg, planes * block.expansion)[1])

        layers = []
        block_init_cfg = None
        if self.pretrained is None and not hasattr(
                self, 'init_cfg') and self.zero_init_residual:
            if block is BasicBlock:
                block_init_cfg = dict(
                    type='Constant', val=0, override=dict(name='norm2'))
            elif block is Bottleneck:
                block_init_cfg = dict(
                    type='Constant', val=0, override=dict(name='norm3'))

        layers.append(
            block(
                inplanes,
                planes,
                stride,
                downsample=downsample,
                with_cp=self.with_cp,
                norm_cfg=self.norm_cfg,
                conv_cfg=self.conv_cfg,
                init_cfg=block_init_cfg))
        inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(
                block(
                    inplanes,
                    planes,
                    with_cp=self.with_cp,
                    norm_cfg=self.norm_cfg,
                    conv_cfg=self.conv_cfg,
                    init_cfg=block_init_cfg))

        return Sequential(*layers)
예제 #15
0
    def __init__(self,
                 in_channels=None,
                 num_classes=None,
                 rnn_flag=False,
                 init_cfg=dict(type='Xavier', layer='Conv2d'),
                 **kwargs):
        super().__init__(init_cfg=init_cfg)
        self.num_classes = num_classes
        self.rnn_flag = rnn_flag

        if rnn_flag:
            self.decoder = Sequential(
                BidirectionalLSTM(in_channels, 256, 256),
                BidirectionalLSTM(256, 256, num_classes))
        else:
            self.decoder = nn.Conv2d(
                in_channels, num_classes, kernel_size=1, stride=1)
예제 #16
0
파일: resnet.py 프로젝트: HqWei/mmocr
 def _make_stem_layer(self, in_channels, stem_channels):
     if isinstance(stem_channels, int):
         stem_channels = [stem_channels]
     stem_layers = []
     for _, channels in enumerate(stem_channels):
         stem_layer = ConvModule(in_channels,
                                 channels,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1,
                                 bias=False,
                                 norm_cfg=dict(type='BN'),
                                 act_cfg=dict(type='ReLU'))
         in_channels = channels
         stem_layers.append(stem_layer)
     self.stem_layers = Sequential(*stem_layers)
     self.inplanes = stem_channels[-1]
    def _make_one_branch(self,
                         branch_index,
                         block,
                         num_blocks,
                         num_channels,
                         stride=1):
        """Build one branch."""
        downsample = None
        if stride != 1 or \
                self.in_channels[branch_index] != \
                num_channels[branch_index] * block.expansion:
            downsample = nn.Sequential(
                build_conv_layer(
                    self.conv_cfg,
                    self.in_channels[branch_index],
                    num_channels[branch_index] * block.expansion,
                    kernel_size=1,
                    stride=stride,
                    bias=False),
                build_norm_layer(self.norm_cfg, num_channels[branch_index] *
                                 block.expansion)[1])

        layers = []
        layers.append(
            block(
                self.in_channels[branch_index],
                num_channels[branch_index],
                stride,
                downsample=downsample,
                with_cp=self.with_cp,
                norm_cfg=self.norm_cfg,
                conv_cfg=self.conv_cfg,
                init_cfg=self.block_init_cfg))
        self.in_channels[branch_index] = \
            num_channels[branch_index] * block.expansion
        for i in range(1, num_blocks[branch_index]):
            layers.append(
                block(
                    self.in_channels[branch_index],
                    num_channels[branch_index],
                    with_cp=self.with_cp,
                    norm_cfg=self.norm_cfg,
                    conv_cfg=self.conv_cfg,
                    init_cfg=self.block_init_cfg))

        return Sequential(*layers)
예제 #18
0
    def __init__(self,
                 embed_dims,
                 feedforward_channels,
                 act_cfg=dict(type='GELU'),
                 ffn_drop=0.,
                 dropout_layer=None,
                 use_conv=False,
                 init_cfg=None):
        super(MixFFN, self).__init__(init_cfg=init_cfg)

        self.embed_dims = embed_dims
        self.feedforward_channels = feedforward_channels
        self.act_cfg = act_cfg
        activate = build_activation_layer(act_cfg)

        in_channels = embed_dims
        fc1 = Conv2d(
            in_channels=in_channels,
            out_channels=feedforward_channels,
            kernel_size=1,
            stride=1,
            bias=True)
        if use_conv:
            # 3x3 depth wise conv to provide positional encode information
            dw_conv = Conv2d(
                in_channels=feedforward_channels,
                out_channels=feedforward_channels,
                kernel_size=3,
                stride=1,
                padding=(3 - 1) // 2,
                bias=True,
                groups=feedforward_channels)
        fc2 = Conv2d(
            in_channels=feedforward_channels,
            out_channels=in_channels,
            kernel_size=1,
            stride=1,
            bias=True)
        drop = nn.Dropout(ffn_drop)
        layers = [fc1, activate, drop, fc2, drop]
        if use_conv:
            layers.insert(1, dw_conv)
        self.layers = Sequential(*layers)
        self.dropout_layer = build_dropout(
            dropout_layer) if dropout_layer else torch.nn.Identity()
예제 #19
0
    def _make_layer(self, block, inplanes, planes, blocks, stride=1):
        layers = []
        downsample = None
        if stride != 1 or inplanes != planes:
            downsample = nn.Sequential(
                nn.Conv2d(inplanes, planes, 1, stride, bias=False),
                nn.BatchNorm2d(planes),
            )
        layers.append(
            block(inplanes,
                  planes,
                  use_conv1x1=True,
                  stride=stride,
                  downsample=downsample))
        inplanes = planes
        for _ in range(1, blocks):
            layers.append(block(inplanes, planes, use_conv1x1=True))

        return Sequential(*layers)
    def _make_stage(self, layer_config, in_channels, multiscale_output=True):
        """Make each stage."""
        num_modules = layer_config['num_modules']
        num_branches = layer_config['num_branches']
        num_blocks = layer_config['num_blocks']
        num_channels = layer_config['num_channels']
        block = self.blocks_dict[layer_config['block']]

        hr_modules = []
        block_init_cfg = None
        if self.pretrained is None and not hasattr(
                self, 'init_cfg') and self.zero_init_residual:
            if block is BasicBlock:
                block_init_cfg = dict(type='Constant',
                                      val=0,
                                      override=dict(name='norm2'))
            elif block is Bottleneck:
                block_init_cfg = dict(type='Constant',
                                      val=0,
                                      override=dict(name='norm3'))

        for i in range(num_modules):
            # multi_scale_output is only used for the last module
            if not multiscale_output and i == num_modules - 1:
                reset_multiscale_output = False
            else:
                reset_multiscale_output = True

            hr_modules.append(
                HRModule(num_branches,
                         block,
                         num_blocks,
                         in_channels,
                         num_channels,
                         reset_multiscale_output,
                         with_cp=self.with_cp,
                         norm_cfg=self.norm_cfg,
                         conv_cfg=self.conv_cfg,
                         block_init_cfg=block_init_cfg))

        return Sequential(*hr_modules), in_channels
예제 #21
0
    def __init__(self,
                 in_channels,
                 with_bias=False,
                 decoding_type='db',
                 text_repr_type='poly',
                 downsample_ratio=1.0,
                 loss=dict(type='DBLoss'),
                 train_cfg=None,
                 test_cfg=None,
                 init_cfg=[
                     dict(type='Kaiming', layer='Conv'),
                     dict(type='Constant',
                          layer='BatchNorm',
                          val=1.,
                          bias=1e-4)
                 ]):
        super().__init__(init_cfg=init_cfg)

        assert isinstance(in_channels, int)

        self.in_channels = in_channels
        self.text_repr_type = text_repr_type
        self.loss_module = build_loss(loss)
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.downsample_ratio = downsample_ratio
        self.decoding_type = decoding_type

        self.binarize = Sequential(
            nn.Conv2d(in_channels,
                      in_channels // 4,
                      3,
                      bias=with_bias,
                      padding=1), nn.BatchNorm2d(in_channels // 4),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2),
            nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True),
            nn.ConvTranspose2d(in_channels // 4, 1, 2, 2), nn.Sigmoid())

        self.threshold = self._init_thr(in_channels)
예제 #22
0
    def create_conv_bn(self, kernel_size, dilation=1, padding=0):
        conv_bn = Sequential()
        conv_bn.add_module(
            'conv',
            build_conv_layer(self.conv_cfg,
                             in_channels=self.in_channels,
                             out_channels=self.out_channels,
                             kernel_size=kernel_size,
                             stride=self.stride,
                             dilation=dilation,
                             padding=padding,
                             groups=self.groups,
                             bias=False))
        conv_bn.add_module(
            'norm',
            build_norm_layer(self.norm_cfg, num_features=self.out_channels)[1])

        return conv_bn
예제 #23
0
    def __init__(self,
                 block_fn,
                 in_channels,
                 out_channels,
                 has_downsampler=True,
                 down_growth=False,
                 expand_ratio=0.5,
                 bottle_ratio=2,
                 num_blocks=1,
                 block_dpr=0,
                 block_args={},
                 conv_cfg=None,
                 norm_cfg=dict(type='BN', eps=1e-5),
                 act_cfg=dict(type='LeakyReLU', inplace=True),
                 init_cfg=None):
        super().__init__(init_cfg)
        # grow downsample channels to output channels
        down_channels = out_channels if down_growth else in_channels
        block_dpr = to_ntuple(num_blocks)(block_dpr)

        if has_downsampler:
            self.downsample_conv = ConvModule(
                in_channels=in_channels,
                out_channels=down_channels,
                kernel_size=3,
                stride=2,
                padding=1,
                groups=32 if block_fn is ResNeXtBottleneck else 1,
                norm_cfg=norm_cfg,
                act_cfg=act_cfg)
        else:
            self.downsample_conv = nn.Identity()

        exp_channels = int(down_channels * expand_ratio)
        self.expand_conv = ConvModule(
            in_channels=down_channels,
            out_channels=exp_channels,
            kernel_size=1,
            norm_cfg=norm_cfg,
            act_cfg=act_cfg if block_fn is DarknetBottleneck else None)

        assert exp_channels % 2 == 0, \
            'The channel number before blocks must be divisible by 2.'
        block_channels = exp_channels // 2
        blocks = []
        for i in range(num_blocks):
            block_cfg = dict(in_channels=block_channels,
                             out_channels=block_channels,
                             expansion=bottle_ratio,
                             drop_path_rate=block_dpr[i],
                             conv_cfg=conv_cfg,
                             norm_cfg=norm_cfg,
                             act_cfg=act_cfg,
                             **block_args)
            blocks.append(block_fn(**block_cfg))
        self.blocks = Sequential(*blocks)
        self.atfer_blocks_conv = ConvModule(block_channels,
                                            block_channels,
                                            1,
                                            norm_cfg=norm_cfg,
                                            act_cfg=act_cfg)

        self.final_conv = ConvModule(2 * block_channels,
                                     out_channels,
                                     1,
                                     norm_cfg=norm_cfg,
                                     act_cfg=act_cfg)
예제 #24
0
    def __init__(self,
                 arch,
                 img_size=224,
                 in_channels=3,
                 patch_size=4,
                 out_indices=(3, ),
                 reparam_conv_kernels=(3, ),
                 globalperceptron_ratio=4,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN', requires_grad=True),
                 patch_cfg=dict(),
                 final_norm=True,
                 deploy=False,
                 init_cfg=None):
        super(RepMLPNet, self).__init__(init_cfg=init_cfg)
        if isinstance(arch, str):
            arch = arch.lower()
            assert arch in set(self.arch_zoo), \
                f'Arch {arch} is not in default archs {set(self.arch_zoo)}'
            self.arch_settings = self.arch_zoo[arch]
        else:
            essential_keys = {'channels', 'depths', 'sharesets_nums'}
            assert isinstance(arch, dict) and set(arch) == essential_keys, \
                f'Custom arch needs a dict with keys {essential_keys}.'
            self.arch_settings = arch

        self.img_size = to_2tuple(img_size)
        self.patch_size = to_2tuple(patch_size)
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg

        self.num_stage = len(self.arch_settings['channels'])
        for value in self.arch_settings.values():
            assert isinstance(value, list) and len(value) == self.num_stage, (
                'Length of setting item in arch dict must be type of list and'
                ' have the same length.')

        self.channels = self.arch_settings['channels']
        self.depths = self.arch_settings['depths']
        self.sharesets_nums = self.arch_settings['sharesets_nums']

        _patch_cfg = dict(in_channels=in_channels,
                          input_size=self.img_size,
                          embed_dims=self.channels[0],
                          conv_type='Conv2d',
                          kernel_size=self.patch_size,
                          stride=self.patch_size,
                          norm_cfg=self.norm_cfg,
                          bias=False)
        _patch_cfg.update(patch_cfg)
        self.patch_embed = PatchEmbed(**_patch_cfg)
        self.patch_resolution = self.patch_embed.init_out_size

        self.patch_hs = [
            self.patch_resolution[0] // 2**i for i in range(self.num_stage)
        ]
        self.patch_ws = [
            self.patch_resolution[1] // 2**i for i in range(self.num_stage)
        ]

        self.stages = ModuleList()
        self.downsample_layers = ModuleList()
        for stage_idx in range(self.num_stage):
            # make stage layers
            _stage_cfg = dict(channels=self.channels[stage_idx],
                              path_h=self.patch_hs[stage_idx],
                              path_w=self.patch_ws[stage_idx],
                              reparam_conv_kernels=reparam_conv_kernels,
                              globalperceptron_ratio=globalperceptron_ratio,
                              norm_cfg=self.norm_cfg,
                              ffn_expand=4,
                              num_sharesets=self.sharesets_nums[stage_idx],
                              deploy=deploy)
            stage_blocks = [
                RepMLPNetUnit(**_stage_cfg)
                for _ in range(self.depths[stage_idx])
            ]
            self.stages.append(Sequential(*stage_blocks))

            # make downsample layers
            if stage_idx < self.num_stage - 1:
                self.downsample_layers.append(
                    ConvModule(in_channels=self.channels[stage_idx],
                               out_channels=self.channels[stage_idx + 1],
                               kernel_size=2,
                               stride=2,
                               padding=0,
                               conv_cfg=self.conv_cfg,
                               norm_cfg=self.norm_cfg,
                               inplace=True))

        self.out_indice = out_indices

        if final_norm:
            norm_layer = build_norm_layer(norm_cfg, self.channels[-1])[1]
        else:
            norm_layer = nn.Identity()
        self.add_module('final_norm', norm_layer)
예제 #25
0
    def make_layer(self):
        # Without the first and the final conv block.
        layer_setting = self.layer_setting[1:-1]

        total_num_blocks = sum([len(x) for x in layer_setting])
        block_idx = 0
        dpr = [
            x.item()
            for x in torch.linspace(0, self.drop_path_rate, total_num_blocks)
        ]  # stochastic depth decay rule

        for i, layer_cfg in enumerate(layer_setting):
            # Avoid building unused layers in mmdetection.
            if i > max(self.out_indices) - 1:
                break
            layer = []
            for i, block_cfg in enumerate(layer_cfg):
                (kernel_size, out_channels, se_ratio, stride, expand_ratio,
                 block_type) = block_cfg

                mid_channels = int(self.in_channels * expand_ratio)
                out_channels = make_divisible(out_channels, 8)
                if se_ratio <= 0:
                    se_cfg = None
                else:
                    # In mmdetection, the `divisor` is deleted to align
                    # the logic of SELayer with mmcls.
                    se_cfg = dict(channels=mid_channels,
                                  ratio=expand_ratio * se_ratio,
                                  act_cfg=(self.act_cfg, dict(type='Sigmoid')))
                if block_type == 1:  # edge tpu
                    if i > 0 and expand_ratio == 3:
                        with_residual = False
                        expand_ratio = 4
                    else:
                        with_residual = True
                    mid_channels = int(self.in_channels * expand_ratio)
                    if se_cfg is not None:
                        # In mmdetection, the `divisor` is deleted to align
                        # the logic of SELayer with mmcls.
                        se_cfg = dict(channels=mid_channels,
                                      ratio=se_ratio * expand_ratio,
                                      act_cfg=(self.act_cfg,
                                               dict(type='Sigmoid')))
                    block = partial(EdgeResidual, with_residual=with_residual)
                else:
                    block = InvertedResidual
                layer.append(
                    block(
                        in_channels=self.in_channels,
                        out_channels=out_channels,
                        mid_channels=mid_channels,
                        kernel_size=kernel_size,
                        stride=stride,
                        se_cfg=se_cfg,
                        conv_cfg=self.conv_cfg,
                        norm_cfg=self.norm_cfg,
                        act_cfg=self.act_cfg,
                        drop_path_rate=dpr[block_idx],
                        with_cp=self.with_cp,
                        # In mmdetection, `with_expand_conv` is set to align
                        # the logic of InvertedResidual with mmcls.
                        with_expand_conv=(mid_channels != self.in_channels)))
                self.in_channels = out_channels
                block_idx += 1
            self.layers.append(Sequential(*layer))
class VisionTransformerClsHead(ClsHead):
    """Vision Transformer classifier head.

    Args:
        num_classes (int): Number of categories excluding the background
            category.
        in_channels (int): Number of channels in the input feature map.
        hidden_dim (int): Number of the dimensions for hidden layer.
            Defaults to None, which means no extra hidden layer.
        act_cfg (dict): The activation config. Only available during
            pre-training. Defaults to ``dict(type='Tanh')``.
        init_cfg (dict): The extra initialization configs. Defaults to
            ``dict(type='Constant', layer='Linear', val=0)``.
    """

    def __init__(self,
                 num_classes,
                 in_channels,
                 hidden_dim=None,
                 act_cfg=dict(type='Tanh'),
                 init_cfg=dict(type='Constant', layer='Linear', val=0),
                 *args,
                 **kwargs):
        super(VisionTransformerClsHead, self).__init__(
            init_cfg=init_cfg, *args, **kwargs)
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.hidden_dim = hidden_dim
        self.act_cfg = act_cfg

        if self.num_classes <= 0:
            raise ValueError(
                f'num_classes={num_classes} must be a positive integer')

        self._init_layers()

    def _init_layers(self):
        if self.hidden_dim is None:
            layers = [('head', nn.Linear(self.in_channels, self.num_classes))]
        else:
            layers = [
                ('pre_logits', nn.Linear(self.in_channels, self.hidden_dim)),
                ('act', build_activation_layer(self.act_cfg)),
                ('head', nn.Linear(self.hidden_dim, self.num_classes)),
            ]
        self.layers = Sequential(OrderedDict(layers))

    def init_weights(self):
        super(VisionTransformerClsHead, self).init_weights()
        # Modified from ClassyVision
        if hasattr(self.layers, 'pre_logits'):
            # Lecun norm
            trunc_normal_(
                self.layers.pre_logits.weight,
                std=math.sqrt(1 / self.layers.pre_logits.in_features))
            nn.init.zeros_(self.layers.pre_logits.bias)

    def pre_logits(self, x):
        if isinstance(x, tuple):
            x = x[-1]
        _, cls_token = x
        if self.hidden_dim is None:
            return cls_token
        else:
            x = self.layers.pre_logits(cls_token)
            return self.layers.act(x)

    def simple_test(self, x, softmax=True, post_process=True):
        """Inference without augmentation.

        Args:
            x (tuple[tuple[tensor, tensor]]): The input features.
                Multi-stage inputs are acceptable but only the last stage will
                be used to classify. Every item should be a tuple which
                includes patch token and cls token. The cls token will be used
                to classify and the shape of it should be
                ``(num_samples, in_channels)``.
            softmax (bool): Whether to softmax the classification score.
            post_process (bool): Whether to do post processing the
                inference results. It will convert the output to a list.

        Returns:
            Tensor | list: The inference results.

                - If no post processing, the output is a tensor with shape
                  ``(num_samples, num_classes)``.
                - If post processing, the output is a multi-dimentional list of
                  float and the dimensions are ``(num_samples, num_classes)``.
        """
        x = self.pre_logits(x)
        cls_score = self.layers.head(x)

        if softmax:
            pred = (
                F.softmax(cls_score, dim=1) if cls_score is not None else None)
        else:
            pred = cls_score

        if post_process:
            return self.post_process(pred)
        else:
            return pred

    def forward_train(self, x, gt_label, **kwargs):
        x = self.pre_logits(x)
        cls_score = self.layers.head(x)
        losses = self.loss(cls_score, gt_label, **kwargs)
        return losses