Ejemplo n.º 1
0
 def __init__(self,
              in_channels,
              conv_out=128,
              fpem_repeat=2,
              align_corners=False,
              init_cfg=dict(type='Xavier',
                            layer='Conv2d',
                            distribution='uniform')):
     super().__init__(init_cfg=init_cfg)
     # reduce layers
     self.reduce_conv_c2 = nn.Sequential(
         nn.Conv2d(in_channels=in_channels[0],
                   out_channels=conv_out,
                   kernel_size=1), nn.BatchNorm2d(conv_out), nn.ReLU())
     self.reduce_conv_c3 = nn.Sequential(
         nn.Conv2d(in_channels=in_channels[1],
                   out_channels=conv_out,
                   kernel_size=1), nn.BatchNorm2d(conv_out), nn.ReLU())
     self.reduce_conv_c4 = nn.Sequential(
         nn.Conv2d(in_channels=in_channels[2],
                   out_channels=conv_out,
                   kernel_size=1), nn.BatchNorm2d(conv_out), nn.ReLU())
     self.reduce_conv_c5 = nn.Sequential(
         nn.Conv2d(in_channels=in_channels[3],
                   out_channels=conv_out,
                   kernel_size=1), nn.BatchNorm2d(conv_out), nn.ReLU())
     self.align_corners = align_corners
     self.fpems = ModuleList()
     for _ in range(fpem_repeat):
         self.fpems.append(FPEM(conv_out))
Ejemplo n.º 2
0
    def init_mask_head(self, mask_roi_extractor, mask_head):
        """Initialize mask head and mask roi extractor.

        Args:
            mask_roi_extractor (dict): Config of mask roi extractor.
            mask_head (dict): Config of mask in mask head.
        """
        self.mask_head = nn.ModuleList()
        if not isinstance(mask_head, list):
            mask_head = [mask_head for _ in range(self.num_stages)]
        assert len(mask_head) == self.num_stages
        for head in mask_head:
            self.mask_head.append(build_head(head))
        if mask_roi_extractor is not None:
            self.share_roi_extractor = False
            self.mask_roi_extractor = ModuleList()
            if not isinstance(mask_roi_extractor, list):
                mask_roi_extractor = [
                    mask_roi_extractor for _ in range(self.num_stages)
                ]
            assert len(mask_roi_extractor) == self.num_stages
            for roi_extractor in mask_roi_extractor:
                self.mask_roi_extractor.append(
                    build_roi_extractor(roi_extractor))
        else:
            self.share_roi_extractor = True
            self.mask_roi_extractor = self.bbox_roi_extractor
Ejemplo n.º 3
0
    def __init__(self,
                 blocks,
                 dims,
                 mlp_ratios,
                 in_channels=3,
                 stem_channels=64,
                 num_conv_blocks=3,
                 out_indices=(0, 1, 2, 3),
                 init_cfg=None):
        super().__init__(init_cfg=init_cfg)

        if out_indices != (0, 1, 2, 3):
            raise NotImplementedError
        assert len(blocks) == len(dims) == len(mlp_ratios), \
            'blocks, dims and mlp_ratios must agree in size, ' \
            f'{len(blocks)}, {len(dims)} and {len(mlp_ratios)} passed.'

        self.tokenizer = ConvTokenizer(
            in_dim=in_channels, embed_dim=stem_channels)
        self.conv_stages = ConvStage(
            num_conv_blocks,
            embed_dim_in=stem_channels,
            hidden_dim=dims[0],
            embed_dim_out=dims[0])
        self.stages = ModuleList()
        for i in range(0, len(blocks)):
            is_last_stage = i == len(blocks) - 1
            stage = ConvMLPStage(
                num_blocks=blocks[i],
                embed_dims=dims[i:i + 2],
                mlp_ratio=mlp_ratios[i],
                drop_path_rate=0.1,
                downsample=(not is_last_stage))
            self.stages.append(stage)
Ejemplo n.º 4
0
 def _init_layers(self):
     """Initialize layers of the head."""
     self.relu = nn.ReLU(inplace=True)
     self.head_convs = ModuleList()
     for i in range(self.num_head_convs):
         chn = self.in_channels if i == 0 else self.feat_channels
         self.head_convs.append(
             ConvModule(chn,
                        self.feat_channels,
                        3,
                        stride=1,
                        padding=1,
                        conv_cfg=self.conv_cfg,
                        norm_cfg=self.norm_cfg))
     self.conv_cls = nn.Conv2d(self.feat_channels,
                               self.num_anchors * self.cls_out_channels,
                               3,
                               padding=1)
     self.conv_reg = nn.Conv2d(self.feat_channels,
                               self.num_anchors * 4,
                               3,
                               padding=1)
     self.conv_coeff = nn.Conv2d(self.feat_channels,
                                 self.num_anchors * self.num_protos,
                                 3,
                                 padding=1)
Ejemplo n.º 5
0
 def __init__(self,
              rfp_steps,
              rfp_backbone,
              aspp_out_channels,
              aspp_dilations=(1, 3, 6, 1),
              init_cfg=None,
              **kwargs):
     assert init_cfg is None, 'To prevent abnormal initialization ' \
                              'behavior, init_cfg is not allowed to be set'
     super().__init__(init_cfg=init_cfg, **kwargs)
     self.rfp_steps = rfp_steps
     # Be careful! Pretrained weights cannot be loaded when use
     # nn.ModuleList
     self.rfp_modules = ModuleList()
     for rfp_idx in range(1, rfp_steps):
         rfp_module = build_backbone(rfp_backbone)
         self.rfp_modules.append(rfp_module)
     self.rfp_aspp = ASPP(self.out_channels, aspp_out_channels,
                          aspp_dilations)
     self.rfp_weight = nn.Conv2d(self.out_channels,
                                 1,
                                 kernel_size=1,
                                 stride=1,
                                 padding=0,
                                 bias=True)
Ejemplo n.º 6
0
 def __init__(self,
              in_channels,
              inner_channels,
              num_layers=1,
              num_upsample=None,
              conv_cfg=None,
              norm_cfg=None,
              init_cfg=None,
              **kwargs):
     super(ConvUpsample, self).__init__(init_cfg)
     if num_upsample is None:
         num_upsample = num_layers
     assert num_upsample <= num_layers, \
         f'num_upsample({num_upsample})must be no more than ' \
         f'num_layers({num_layers})'
     self.num_layers = num_layers
     self.num_upsample = num_upsample
     self.conv = ModuleList()
     for i in range(num_layers):
         self.conv.append(
             ConvModule(in_channels,
                        inner_channels,
                        3,
                        padding=1,
                        stride=1,
                        conv_cfg=conv_cfg,
                        norm_cfg=norm_cfg,
                        **kwargs))
         in_channels = inner_channels
Ejemplo n.º 7
0
 def _add_fc_branch(self):
     """Add the fc branch which consists of a sequential of fc layers."""
     branch_fcs = ModuleList()
     for i in range(self.num_fcs):
         fc_in_channels = (self.in_channels * self.roi_feat_area
                           if i == 0 else self.fc_out_channels)
         branch_fcs.append(nn.Linear(fc_in_channels, self.fc_out_channels))
     return branch_fcs
Ejemplo n.º 8
0
    def __init__(self,
                 in_channels=[256, 512, 1024, 2048],
                 out_channels=256,
                 fusion_type='concat',
                 upsample_ratio=1,
                 init_cfg=dict(type='Xavier',
                               layer='Conv2d',
                               distribution='uniform')):
        super().__init__(init_cfg=init_cfg)
        conv_cfg = None
        norm_cfg = dict(type='BN')
        act_cfg = dict(type='ReLU')

        self.in_channels = in_channels
        self.out_channels = out_channels

        self.lateral_convs = ModuleList()
        self.fpn_convs = ModuleList()
        self.backbone_end_level = len(in_channels)
        for i in range(self.backbone_end_level):
            l_conv = ConvModule(in_channels[i],
                                out_channels,
                                1,
                                conv_cfg=conv_cfg,
                                norm_cfg=norm_cfg,
                                act_cfg=act_cfg,
                                inplace=False)
            self.lateral_convs.append(l_conv)

            if i < self.backbone_end_level - 1:
                fpn_conv = ConvModule(out_channels,
                                      out_channels,
                                      3,
                                      padding=1,
                                      conv_cfg=conv_cfg,
                                      norm_cfg=norm_cfg,
                                      act_cfg=act_cfg,
                                      inplace=False)
                self.fpn_convs.append(fpn_conv)

        self.fusion_type = fusion_type

        if self.fusion_type == 'concat':
            feature_channels = 1024
        elif self.fusion_type == 'add':
            feature_channels = 256
        else:
            raise NotImplementedError

        self.output_convs = ConvModule(feature_channels,
                                       out_channels,
                                       3,
                                       padding=1,
                                       conv_cfg=None,
                                       norm_cfg=norm_cfg,
                                       act_cfg=act_cfg,
                                       inplace=False)
        self.upsample_ratio = upsample_ratio
Ejemplo n.º 9
0
 def _add_conv_branch(self):
     """Add the fc branch which consists of a sequential of conv layers."""
     branch_convs = ModuleList()
     for i in range(self.num_convs):
         branch_convs.append(
             Bottleneck(inplanes=self.conv_out_channels,
                        planes=self.conv_out_channels // 4,
                        conv_cfg=self.conv_cfg,
                        norm_cfg=self.norm_cfg))
     return branch_convs
Ejemplo n.º 10
0
    def __init__(self,
                 num_things_classes=80,
                 num_stuff_classes=53,
                 num_classes=None,
                 in_channels=256,
                 inner_channels=128,
                 start_level=0,
                 end_level=4,
                 fg_range=None,
                 bg_range=None,
                 conv_cfg=None,
                 norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
                 init_cfg=None,
                 loss_seg=dict(type='CrossEntropyLoss',
                               ignore_index=-1,
                               loss_weight=1.0)):
        if num_classes is not None:
            warnings.warn(
                '`num_classes` is deprecated now, please set '
                '`num_stuff_classes` directly, the `num_classes` will be '
                'set to `num_stuff_classes + 1`')
            # num_classes = num_stuff_classes + 1 for PanopticFPN.
            assert num_classes == num_stuff_classes + 1
        super(PanopticFPNHead, self).__init__(num_stuff_classes + 1, init_cfg,
                                              loss_seg)
        self.num_things_classes = num_things_classes
        self.num_stuff_classes = num_stuff_classes
        if fg_range is not None and bg_range is not None:
            self.fg_range = fg_range
            self.bg_range = bg_range
            self.num_things_classes = fg_range[1] - fg_range[0] + 1
            self.num_stuff_classes = bg_range[1] - bg_range[0] + 1
            warnings.warn(
                '`fg_range` and `bg_range` are deprecated now, '
                f'please use `num_things_classes`={self.num_things_classes} '
                f'and `num_stuff_classes`={self.num_stuff_classes} instead.')

        # Used feature layers are [start_level, end_level)
        self.start_level = start_level
        self.end_level = end_level
        self.num_stages = end_level - start_level
        self.inner_channels = inner_channels

        self.conv_upsample_layers = ModuleList()
        for i in range(start_level, end_level):
            self.conv_upsample_layers.append(
                ConvUpsample(
                    in_channels,
                    inner_channels,
                    num_layers=i if i > 0 else 1,
                    num_upsample=i if i > 0 else 0,
                    conv_cfg=conv_cfg,
                    norm_cfg=norm_cfg,
                ))
        self.conv_logits = nn.Conv2d(inner_channels, self.num_classes, 1)
    def __init__(self,
                 num_convs=0,
                 num_fcs=2,
                 fc_out_channels=1024,
                 downsample_factor=2,
                 init_cfg=dict(type='Xavier',
                               override=[
                                   dict(name='fcs'),
                                   dict(type='Constant',
                                        val=0.001,
                                        name='fc_logits')
                               ]),
                 *arg,
                 **kwarg):
        super(CoarseMaskHead, self).__init__(*arg,
                                             num_convs=num_convs,
                                             upsample_cfg=dict(type=None),
                                             init_cfg=None,
                                             **kwarg)
        self.init_cfg = init_cfg
        self.num_fcs = num_fcs
        assert self.num_fcs > 0
        self.fc_out_channels = fc_out_channels
        self.downsample_factor = downsample_factor
        assert self.downsample_factor >= 1
        # remove conv_logit
        delattr(self, 'conv_logits')

        if downsample_factor > 1:
            downsample_in_channels = (self.conv_out_channels if
                                      self.num_convs > 0 else self.in_channels)
            self.downsample_conv = ConvModule(downsample_in_channels,
                                              self.conv_out_channels,
                                              kernel_size=downsample_factor,
                                              stride=downsample_factor,
                                              padding=0,
                                              conv_cfg=self.conv_cfg,
                                              norm_cfg=self.norm_cfg)
        else:
            self.downsample_conv = None

        self.output_size = (self.roi_feat_size[0] // downsample_factor,
                            self.roi_feat_size[1] // downsample_factor)
        self.output_area = self.output_size[0] * self.output_size[1]

        last_layer_dim = self.conv_out_channels * self.output_area

        self.fcs = ModuleList()
        for i in range(num_fcs):
            fc_in_channels = (last_layer_dim
                              if i == 0 else self.fc_out_channels)
            self.fcs.append(Linear(fc_in_channels, self.fc_out_channels))
        last_layer_dim = self.fc_out_channels
        output_channels = self.num_classes * self.output_area
        self.fc_logits = Linear(last_layer_dim, output_channels)
    def __init__(
            self,
            in_channels,
            num_layers,
            num_heads,
            embed_dims,
            mlp_ratio=4,
            drop_path_rate=0.1,
            drop_rate=0.0,
            attn_drop_rate=0.0,
            num_fcs=2,
            qkv_bias=True,
            act_cfg=dict(type='GELU'),
            norm_cfg=dict(type='LN'),
            init_std=0.02,
            **kwargs,
    ):
        super(SegmenterMaskTransformerHead, self).__init__(
            in_channels=in_channels, **kwargs)

        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, num_layers)]
        self.layers = ModuleList()
        for i in range(num_layers):
            self.layers.append(
                TransformerEncoderLayer(
                    embed_dims=embed_dims,
                    num_heads=num_heads,
                    feedforward_channels=mlp_ratio * embed_dims,
                    attn_drop_rate=attn_drop_rate,
                    drop_rate=drop_rate,
                    drop_path_rate=dpr[i],
                    num_fcs=num_fcs,
                    qkv_bias=qkv_bias,
                    act_cfg=act_cfg,
                    norm_cfg=norm_cfg,
                    batch_first=True,
                ))

        self.dec_proj = nn.Linear(in_channels, embed_dims)

        self.cls_emb = nn.Parameter(
            torch.randn(1, self.num_classes, embed_dims))
        self.patch_proj = nn.Linear(embed_dims, embed_dims, bias=False)
        self.classes_proj = nn.Linear(embed_dims, embed_dims, bias=False)

        self.decoder_norm = build_norm_layer(
            norm_cfg, embed_dims, postfix=1)[1]
        self.mask_norm = build_norm_layer(
            norm_cfg, self.num_classes, postfix=2)[1]

        self.init_std = init_std

        delattr(self, 'conv_seg')
Ejemplo n.º 13
0
 def __init__(self, num_stages, stages, train_cfg, test_cfg, init_cfg=None):
     super(CascadeRPNHead, self).__init__(init_cfg)
     assert num_stages == len(stages)
     self.num_stages = num_stages
     # Be careful! Pretrained weights cannot be loaded when use
     # nn.ModuleList
     self.stages = ModuleList()
     for i in range(len(stages)):
         train_cfg_i = train_cfg[i] if train_cfg is not None else None
         stages[i].update(train_cfg=train_cfg_i)
         stages[i].update(test_cfg=test_cfg)
         self.stages.append(build_head(stages[i]))
     self.train_cfg = train_cfg
     self.test_cfg = test_cfg
Ejemplo n.º 14
0
def test_modulelist_weight_init():
    models_cfg = [
        dict(
            type='FooConv1d',
            init_cfg=dict(type='Constant', layer='Conv1d', val=0., bias=1.)),
        dict(
            type='FooConv2d',
            init_cfg=dict(type='Constant', layer='Conv2d', val=2., bias=3.)),
    ]
    layers = [build_from_cfg(cfg, COMPONENTS) for cfg in models_cfg]
    modellist = ModuleList(layers)
    modellist.init_weights()
    assert torch.equal(modellist[0].conv1d.weight,
                       torch.full(modellist[0].conv1d.weight.shape, 0.))
    assert torch.equal(modellist[0].conv1d.bias,
                       torch.full(modellist[0].conv1d.bias.shape, 1.))
    assert torch.equal(modellist[1].conv2d.weight,
                       torch.full(modellist[1].conv2d.weight.shape, 2.))
    assert torch.equal(modellist[1].conv2d.bias,
                       torch.full(modellist[1].conv2d.bias.shape, 3.))
    # inner init_cfg has higher priority
    layers = [build_from_cfg(cfg, COMPONENTS) for cfg in models_cfg]
    modellist = ModuleList(
        layers,
        init_cfg=dict(
            type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.))
    modellist.init_weights()
    assert torch.equal(modellist[0].conv1d.weight,
                       torch.full(modellist[0].conv1d.weight.shape, 0.))
    assert torch.equal(modellist[0].conv1d.bias,
                       torch.full(modellist[0].conv1d.bias.shape, 1.))
    assert torch.equal(modellist[1].conv2d.weight,
                       torch.full(modellist[1].conv2d.weight.shape, 2.))
    assert torch.equal(modellist[1].conv2d.bias,
                       torch.full(modellist[1].conv2d.bias.shape, 3.))
Ejemplo n.º 15
0
    def __init__(self,
                 in_channels=256,
                 out_channels=256,
                 kernel_size=[3, 3, 3],
                 dilation=[1, 1, 1],
                 groups=[1, 1, 1],
                 ibn=False,
                 norm_cfg=dict(type='BN', requires_grad=True),
                 norm_eval=True,
                 part_deform=False,
                 init_cfg=None):
        assert init_cfg is None, 'To prevent abnormal initialization ' \
                                 'behavior, init_cfg is not allowed to be set'
        super(PConvModule, self).__init__(init_cfg=init_cfg)

        self.ibn = ibn
        self.norm_cfg = norm_cfg
        self.norm_eval = norm_eval
        self.pconv = ModuleList()
        self.pconv.append(
            SEPCConv(in_channels,
                     out_channels,
                     kernel_size=kernel_size[0],
                     dilation=dilation[0],
                     groups=groups[0],
                     padding=(kernel_size[0] + (dilation[0] - 1) * 2) // 2,
                     part_deform=part_deform))
        self.pconv.append(
            SEPCConv(in_channels,
                     out_channels,
                     kernel_size=kernel_size[1],
                     dilation=dilation[1],
                     groups=groups[1],
                     padding=(kernel_size[1] + (dilation[1] - 1) * 2) // 2,
                     part_deform=part_deform))
        self.pconv.append(
            SEPCConv(in_channels,
                     out_channels,
                     kernel_size=kernel_size[2],
                     dilation=dilation[2],
                     groups=groups[2],
                     padding=(kernel_size[2] + (dilation[2] - 1) * 2) // 2,
                     stride=2,
                     part_deform=part_deform))

        if self.ibn:
            self.pnorm_name, pnorm = build_norm_layer(self.norm_cfg, 256)
            self.add_module(self.pnorm_name, pnorm)

        self.relu = nn.ReLU()
Ejemplo n.º 16
0
    def __init__(self,
                 n_layers=2,
                 n_head=8,
                 d_model=512,
                 d_inner=2048,
                 dropout=0.1,
                 max_len=8 * 32,
                 init_cfg=None):
        super().__init__(init_cfg=init_cfg)

        assert d_model % n_head == 0, 'd_model must be divisible by n_head'

        self.pos_encoder = PositionalEncoding(d_model, n_position=max_len)
        encoder_layer = BaseTransformerLayer(
            operation_order=('self_attn', 'norm', 'ffn', 'norm'),
            attn_cfgs=dict(
                type='MultiheadAttention',
                embed_dims=d_model,
                num_heads=n_head,
                attn_drop=dropout,
                dropout_layer=dict(type='Dropout', drop_prob=dropout),
            ),
            ffn_cfgs=dict(
                type='FFN',
                embed_dims=d_model,
                feedforward_channels=d_inner,
                ffn_drop=dropout,
            ),
            norm_cfg=dict(type='LN'),
        )
        self.transformer = ModuleList(
            [copy.deepcopy(encoder_layer) for _ in range(n_layers)])
Ejemplo n.º 17
0
 def __init__(self,
              n_layers=12,
              n_head=8,
              d_k=64,
              d_v=64,
              d_model=512,
              n_position=100,
              d_inner=256,
              dropout=0.1,
              init_cfg=None,
              **kwargs):
     super().__init__(init_cfg=init_cfg)
     self.d_model = d_model
     self.position_enc = Adaptive2DPositionalEncoding(d_hid=d_model,
                                                      n_height=n_position,
                                                      n_width=n_position,
                                                      dropout=dropout)
     self.layer_stack = ModuleList([
         SatrnEncoderLayer(d_model,
                           d_inner,
                           n_head,
                           d_k,
                           d_v,
                           dropout=dropout) for _ in range(n_layers)
     ])
     self.layer_norm = nn.LayerNorm(d_model)
Ejemplo n.º 18
0
    def _make_branches(self, num_branches, block, num_blocks, num_channels):
        branches = []

        for i in range(num_branches):
            branches.append(
                self._make_one_branch(i, block, num_blocks, num_channels))

        return ModuleList(branches)
Ejemplo n.º 19
0
    def _init_layers(self):
        self.layers = ModuleList()
        in_channels = self.in_channels
        for hidden_channels in self.mid_channels:
            self.layers.append(
                LinearBlock(in_channels,
                            hidden_channels,
                            dropout_rate=self.dropout_rate,
                            norm_cfg=self.norm_cfg,
                            act_cfg=self.act_cfg))
            in_channels = hidden_channels

        self.layers.append(
            LinearBlock(self.mid_channels[-1],
                        self.num_classes,
                        dropout_rate=0.,
                        norm_cfg=None,
                        act_cfg=None))
Ejemplo n.º 20
0
    def __init__(self,
                 in_channels,
                 feat_channels,
                 out_channels,
                 norm_cfg=dict(type='GN', num_groups=32),
                 act_cfg=dict(type='ReLU'),
                 init_cfg=None):
        super().__init__(init_cfg=init_cfg)
        self.in_channels = in_channels
        self.num_inputs = len(in_channels)
        self.lateral_convs = ModuleList()
        self.output_convs = ModuleList()
        self.use_bias = norm_cfg is None
        for i in range(0, self.num_inputs - 1):
            lateral_conv = ConvModule(in_channels[i],
                                      feat_channels,
                                      kernel_size=1,
                                      bias=self.use_bias,
                                      norm_cfg=norm_cfg,
                                      act_cfg=None)
            output_conv = ConvModule(feat_channels,
                                     feat_channels,
                                     kernel_size=3,
                                     stride=1,
                                     padding=1,
                                     bias=self.use_bias,
                                     norm_cfg=norm_cfg,
                                     act_cfg=act_cfg)
            self.lateral_convs.append(lateral_conv)
            self.output_convs.append(output_conv)

        self.last_feat_conv = ConvModule(in_channels[-1],
                                         feat_channels,
                                         kernel_size=3,
                                         padding=1,
                                         stride=1,
                                         bias=self.use_bias,
                                         norm_cfg=norm_cfg,
                                         act_cfg=act_cfg)
        self.mask_feature = Conv2d(feat_channels,
                                   out_channels,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
Ejemplo n.º 21
0
    def __init__(self,
                 embed_dims,
                 num_heads,
                 feedforward_channels,
                 depth,
                 window_size=7,
                 qkv_bias=True,
                 qk_scale=None,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 downsample=None,
                 act_cfg=dict(type='GELU'),
                 norm_cfg=dict(type='LN'),
                 with_cp=False,
                 init_cfg=None):
        super().__init__(init_cfg=init_cfg)

        if isinstance(drop_path_rate, list):
            drop_path_rates = drop_path_rate
            assert len(drop_path_rates) == depth
        else:
            drop_path_rates = [deepcopy(drop_path_rate) for _ in range(depth)]

        self.blocks = ModuleList()
        for i in range(depth):
            block = SwinBlock(
                embed_dims=embed_dims,
                num_heads=num_heads,
                feedforward_channels=feedforward_channels,
                window_size=window_size,
                shift=False if i % 2 == 0 else True,
                qkv_bias=qkv_bias,
                qk_scale=qk_scale,
                drop_rate=drop_rate,
                attn_drop_rate=attn_drop_rate,
                drop_path_rate=drop_path_rates[i],
                act_cfg=act_cfg,
                norm_cfg=norm_cfg,
                with_cp=with_cp,
                init_cfg=None)
            self.blocks.append(block)

        self.downsample = downsample
Ejemplo n.º 22
0
    def init_bbox_head(self, bbox_roi_extractor, bbox_head):
        """Initialize box head and box roi extractor.

        Args:
            bbox_roi_extractor (dict): Config of box roi extractor.
            bbox_head (dict): Config of box in box head.
        """
        self.bbox_roi_extractor = ModuleList()
        self.bbox_head = ModuleList()
        if not isinstance(bbox_roi_extractor, list):
            bbox_roi_extractor = [
                bbox_roi_extractor for _ in range(self.num_stages)
            ]
        if not isinstance(bbox_head, list):
            bbox_head = [bbox_head for _ in range(self.num_stages)]
        assert len(bbox_roi_extractor) == len(bbox_head) == self.num_stages
        for roi_extractor, head in zip(bbox_roi_extractor, bbox_head):
            self.bbox_roi_extractor.append(build_roi_extractor(roi_extractor))
            self.bbox_head.append(build_head(head))
Ejemplo n.º 23
0
def test_basetransformerlayer_cuda():
    # To test if the BaseTransformerLayer's behaviour remains
    # consistent after being deepcopied
    operation_order = ('self_attn', 'ffn')
    baselayer = BaseTransformerLayer(
        operation_order=operation_order,
        batch_first=True,
        attn_cfgs=dict(
            type='MultiheadAttention',
            embed_dims=256,
            num_heads=8,
        ),
    )
    baselayers = ModuleList([copy.deepcopy(baselayer) for _ in range(2)])
    baselayers.to('cuda')
    x = torch.rand(2, 10, 256).cuda()
    for m in baselayers:
        x = m(x)
        assert x.shape == torch.Size([2, 10, 256])
    def _init_layers(self):
        self.layers = ModuleList(init_cfg=dict(
            type='Normal', layer='Linear', mean=0., std=0.01, bias=0.))
        in_channels = self.in_channels
        for hidden_channels in self.mid_channels:
            self.layers.append(
                LinearBlock(in_channels,
                            hidden_channels,
                            dropout_rate=self.dropout_rate,
                            norm_cfg=self.norm_cfg,
                            act_cfg=self.act_cfg))
            in_channels = hidden_channels

        self.layers.append(
            LinearBlock(self.mid_channels[-1],
                        self.num_classes,
                        dropout_rate=0.,
                        norm_cfg=None,
                        act_cfg=None))
Ejemplo n.º 25
0
 def _build_layers(self):
     dpr = [
         x.item()
         for x in torch.linspace(0, self.drop_path_rate, self.num_layers)
     ]
     self.layers = ModuleList()
     for i in range(self.num_layers):
         self.layers.append(
             MAETransformerEncoderLayer(
                 embed_dims=self.embed_dims,
                 num_heads=self.num_heads,
                 feedforward_channels=self.mlp_ratio * self.embed_dims,
                 attn_drop_rate=self.attn_drop_rate,
                 drop_path_rate=dpr[i],
                 num_fcs=self.num_fcs,
                 bias=True,
                 act_cfg=self.act_cfg,
                 norm_cfg=self.norm_cfg,
                 window_size=self.patch_shape,
                 init_values=self.init_values))
Ejemplo n.º 26
0
    def __init__(self,
                 d_model=512,
                 n_head=8,
                 d_inner=2048,
                 n_layers=4,
                 max_seq_len=40,
                 dropout=0.1,
                 detach_tokens=True,
                 num_chars=90,
                 use_self_attn=False,
                 pad_idx=0,
                 init_cfg=None,
                 **kwargs):
        super().__init__(init_cfg=init_cfg)
        self.detach_tokens = detach_tokens

        self.d_model = d_model
        self.max_seq_len = max_seq_len

        self.proj = nn.Linear(num_chars, d_model, False)
        self.token_encoder = PositionalEncoding(d_model,
                                                n_position=self.max_seq_len,
                                                dropout=0.1)
        self.pos_encoder = PositionalEncoding(d_model,
                                              n_position=self.max_seq_len)
        self.pad_idx = pad_idx

        if use_self_attn:
            operation_order = ('self_attn', 'norm', 'cross_attn', 'norm',
                               'ffn', 'norm')
        else:
            operation_order = ('cross_attn', 'norm', 'ffn', 'norm')

        decoder_layer = BaseTransformerLayer(
            operation_order=operation_order,
            attn_cfgs=dict(
                type='MultiheadAttention',
                embed_dims=d_model,
                num_heads=n_head,
                attn_drop=dropout,
                dropout_layer=dict(type='Dropout', drop_prob=dropout),
            ),
            ffn_cfgs=dict(
                type='FFN',
                embed_dims=d_model,
                feedforward_channels=d_inner,
                ffn_drop=dropout,
            ),
            norm_cfg=dict(type='LN'),
        )
        self.decoder_layers = ModuleList(
            [copy.deepcopy(decoder_layer) for _ in range(n_layers)])

        self.cls = nn.Linear(d_model, num_chars)
Ejemplo n.º 27
0
 def _init_layers(self):
     """A helper function to take a config setting and turn it into a
     network."""
     # Possible patterns:
     # ( 256, 3) -> conv
     # ( 256,-2) -> deconv
     # (None,-2) -> bilinear interpolate
     in_channels = self.in_channels
     protonets = ModuleList()
     for num_channels, kernel_size in zip(self.proto_channels,
                                          self.proto_kernel_sizes):
         if kernel_size > 0:
             layer = nn.Conv2d(in_channels,
                               num_channels,
                               kernel_size,
                               padding=kernel_size // 2)
         else:
             if num_channels is None:
                 layer = InterpolateModule(scale_factor=-kernel_size,
                                           mode='bilinear',
                                           align_corners=False)
             else:
                 layer = nn.ConvTranspose2d(in_channels,
                                            num_channels,
                                            -kernel_size,
                                            padding=kernel_size // 2)
         protonets.append(layer)
         protonets.append(nn.ReLU(inplace=True))
         in_channels = num_channels if num_channels is not None \
             else in_channels
     if not self.include_last_relu:
         protonets = protonets[:-1]
     return nn.Sequential(*protonets)
Ejemplo n.º 28
0
    def _build_layers(self):
        """Build transformer encoding layers."""

        dpr = [
            x.item()
            for x in torch.linspace(0, self.drop_path_rate, self.num_layers)
        ]
        self.layers = ModuleList()
        for i in range(self.num_layers):
            self.layers.append(
                BEiTTransformerEncoderLayer(
                    embed_dims=self.embed_dims,
                    num_heads=self.num_heads,
                    feedforward_channels=self.mlp_ratio * self.embed_dims,
                    attn_drop_rate=self.attn_drop_rate,
                    drop_path_rate=dpr[i],
                    num_fcs=self.num_fcs,
                    bias='qv_bias' if self.qv_bias else False,
                    act_cfg=self.act_cfg,
                    norm_cfg=self.norm_cfg,
                    window_size=self.window_size,
                    init_values=self.init_values))
Ejemplo n.º 29
0
    def _make_branches(self, num_branches, block, num_blocks, num_channels):
        branches = []

        for i in range(num_branches):
            out_channels = num_channels[i] * get_expansion(block)
            branches.append(
                ResLayer(
                    block=block,
                    num_blocks=num_blocks[i],
                    in_channels=self.in_channels[i],
                    out_channels=out_channels,
                    conv_cfg=self.conv_cfg,
                    norm_cfg=self.norm_cfg,
                    with_cp=self.with_cp,
                    init_cfg=self.block_init_cfg,
                ))

        return ModuleList(branches)
Ejemplo n.º 30
0
    def __init__(self,
                 n_layers=6,
                 d_embedding=512,
                 n_head=8,
                 d_k=64,
                 d_v=64,
                 d_model=512,
                 d_inner=256,
                 n_position=200,
                 dropout=0.1,
                 num_classes=93,
                 max_seq_len=40,
                 start_idx=1,
                 padding_idx=92,
                 init_cfg=None,
                 **kwargs):
        super().__init__(init_cfg=init_cfg)

        self.padding_idx = padding_idx
        self.start_idx = start_idx
        self.max_seq_len = max_seq_len

        self.trg_word_emb = nn.Embedding(num_classes,
                                         d_embedding,
                                         padding_idx=padding_idx)

        self.position_enc = PositionalEncoding(d_embedding,
                                               n_position=n_position)
        self.dropout = nn.Dropout(p=dropout)

        self.layer_stack = ModuleList([
            TFDecoderLayer(d_model,
                           d_inner,
                           n_head,
                           d_k,
                           d_v,
                           dropout=dropout,
                           **kwargs) for _ in range(n_layers)
        ])
        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)

        pred_num_class = num_classes - 1  # ignore padding_idx
        self.classifier = nn.Linear(d_model, pred_num_class)