def test_modulelist_weight_init(): models_cfg = [ dict( type='FooConv1d', init_cfg=dict(type='Constant', layer='Conv1d', val=0., bias=1.)), dict( type='FooConv2d', init_cfg=dict(type='Constant', layer='Conv2d', val=2., bias=3.)), ] layers = [build_from_cfg(cfg, COMPONENTS) for cfg in models_cfg] modellist = ModuleList(layers) modellist.init_weights() assert torch.equal(modellist[0].conv1d.weight, torch.full(modellist[0].conv1d.weight.shape, 0.)) assert torch.equal(modellist[0].conv1d.bias, torch.full(modellist[0].conv1d.bias.shape, 1.)) assert torch.equal(modellist[1].conv2d.weight, torch.full(modellist[1].conv2d.weight.shape, 2.)) assert torch.equal(modellist[1].conv2d.bias, torch.full(modellist[1].conv2d.bias.shape, 3.)) # inner init_cfg has higher priority layers = [build_from_cfg(cfg, COMPONENTS) for cfg in models_cfg] modellist = ModuleList( layers, init_cfg=dict( type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.)) modellist.init_weights() assert torch.equal(modellist[0].conv1d.weight, torch.full(modellist[0].conv1d.weight.shape, 0.)) assert torch.equal(modellist[0].conv1d.bias, torch.full(modellist[0].conv1d.bias.shape, 1.)) assert torch.equal(modellist[1].conv2d.weight, torch.full(modellist[1].conv2d.weight.shape, 2.)) assert torch.equal(modellist[1].conv2d.bias, torch.full(modellist[1].conv2d.bias.shape, 3.))
def __init__(self, in_channels=[256, 512, 1024, 2048], out_channels=256, fusion_type='concat', upsample_ratio=1, init_cfg=dict(type='Xavier', layer='Conv2d', distribution='uniform')): super().__init__(init_cfg=init_cfg) conv_cfg = None norm_cfg = dict(type='BN') act_cfg = dict(type='ReLU') self.in_channels = in_channels self.out_channels = out_channels self.lateral_convs = ModuleList() self.fpn_convs = ModuleList() self.backbone_end_level = len(in_channels) for i in range(self.backbone_end_level): l_conv = ConvModule(in_channels[i], out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) self.lateral_convs.append(l_conv) if i < self.backbone_end_level - 1: fpn_conv = ConvModule(out_channels, out_channels, 3, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) self.fpn_convs.append(fpn_conv) self.fusion_type = fusion_type if self.fusion_type == 'concat': feature_channels = 1024 elif self.fusion_type == 'add': feature_channels = 256 else: raise NotImplementedError self.output_convs = ConvModule(feature_channels, out_channels, 3, padding=1, conv_cfg=None, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) self.upsample_ratio = upsample_ratio
def __init__(self, n_layers=12, n_head=8, d_k=64, d_v=64, d_model=512, n_position=100, d_inner=256, dropout=0.1, init_cfg=None, **kwargs): super().__init__(init_cfg=init_cfg) self.d_model = d_model self.position_enc = Adaptive2DPositionalEncoding(d_hid=d_model, n_height=n_position, n_width=n_position, dropout=dropout) self.layer_stack = ModuleList([ SatrnEncoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers) ]) self.layer_norm = nn.LayerNorm(d_model)
def __init__(self, blocks, dims, mlp_ratios, in_channels=3, stem_channels=64, num_conv_blocks=3, out_indices=(0, 1, 2, 3), init_cfg=None): super().__init__(init_cfg=init_cfg) if out_indices != (0, 1, 2, 3): raise NotImplementedError assert len(blocks) == len(dims) == len(mlp_ratios), \ 'blocks, dims and mlp_ratios must agree in size, ' \ f'{len(blocks)}, {len(dims)} and {len(mlp_ratios)} passed.' self.tokenizer = ConvTokenizer( in_dim=in_channels, embed_dim=stem_channels) self.conv_stages = ConvStage( num_conv_blocks, embed_dim_in=stem_channels, hidden_dim=dims[0], embed_dim_out=dims[0]) self.stages = ModuleList() for i in range(0, len(blocks)): is_last_stage = i == len(blocks) - 1 stage = ConvMLPStage( num_blocks=blocks[i], embed_dims=dims[i:i + 2], mlp_ratio=mlp_ratios[i], drop_path_rate=0.1, downsample=(not is_last_stage)) self.stages.append(stage)
def __init__(self, in_channels, conv_out=128, fpem_repeat=2, align_corners=False, init_cfg=dict( type='Xavier', layer='Conv2d', distribution='uniform')): super().__init__(init_cfg=init_cfg) # reduce layers self.reduce_conv_c2 = nn.Sequential( nn.Conv2d( in_channels=in_channels[0], out_channels=conv_out, kernel_size=1), nn.BatchNorm2d(conv_out), nn.ReLU()) self.reduce_conv_c3 = nn.Sequential( nn.Conv2d( in_channels=in_channels[1], out_channels=conv_out, kernel_size=1), nn.BatchNorm2d(conv_out), nn.ReLU()) self.reduce_conv_c4 = nn.Sequential( nn.Conv2d( in_channels=in_channels[2], out_channels=conv_out, kernel_size=1), nn.BatchNorm2d(conv_out), nn.ReLU()) self.reduce_conv_c5 = nn.Sequential( nn.Conv2d( in_channels=in_channels[3], out_channels=conv_out, kernel_size=1), nn.BatchNorm2d(conv_out), nn.ReLU()) self.align_corners = align_corners self.fpems = ModuleList() for _ in range(fpem_repeat): self.fpems.append(FPEM(conv_out))
def __init__(self, n_layers=2, n_head=8, d_model=512, d_inner=2048, dropout=0.1, max_len=8 * 32, init_cfg=None): super().__init__(init_cfg=init_cfg) assert d_model % n_head == 0, 'd_model must be divisible by n_head' self.pos_encoder = PositionalEncoding(d_model, n_position=max_len) encoder_layer = BaseTransformerLayer( operation_order=('self_attn', 'norm', 'ffn', 'norm'), attn_cfgs=dict( type='MultiheadAttention', embed_dims=d_model, num_heads=n_head, attn_drop=dropout, dropout_layer=dict(type='Dropout', drop_prob=dropout), ), ffn_cfgs=dict( type='FFN', embed_dims=d_model, feedforward_channels=d_inner, ffn_drop=dropout, ), norm_cfg=dict(type='LN'), ) self.transformer = ModuleList( [copy.deepcopy(encoder_layer) for _ in range(n_layers)])
def __init__(self, in_channels, inner_channels, num_layers=1, num_upsample=None, conv_cfg=None, norm_cfg=None, init_cfg=None, **kwargs): super(ConvUpsample, self).__init__(init_cfg) if num_upsample is None: num_upsample = num_layers assert num_upsample <= num_layers, \ f'num_upsample({num_upsample})must be no more than ' \ f'num_layers({num_layers})' self.num_layers = num_layers self.num_upsample = num_upsample self.conv = ModuleList() for i in range(num_layers): self.conv.append( ConvModule(in_channels, inner_channels, 3, padding=1, stride=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs)) in_channels = inner_channels
def __init__(self, rfp_steps, rfp_backbone, aspp_out_channels, aspp_dilations=(1, 3, 6, 1), init_cfg=None, **kwargs): assert init_cfg is None, 'To prevent abnormal initialization ' \ 'behavior, init_cfg is not allowed to be set' super().__init__(init_cfg=init_cfg, **kwargs) self.rfp_steps = rfp_steps # Be careful! Pretrained weights cannot be loaded when use # nn.ModuleList self.rfp_modules = ModuleList() for rfp_idx in range(1, rfp_steps): rfp_module = build_backbone(rfp_backbone) self.rfp_modules.append(rfp_module) self.rfp_aspp = ASPP(self.out_channels, aspp_out_channels, aspp_dilations) self.rfp_weight = nn.Conv2d(self.out_channels, 1, kernel_size=1, stride=1, padding=0, bias=True)
def _init_layers(self): """Initialize layers of the head.""" self.relu = nn.ReLU(inplace=True) self.head_convs = ModuleList() for i in range(self.num_head_convs): chn = self.in_channels if i == 0 else self.feat_channels self.head_convs.append( ConvModule( chn, self.feat_channels, 3, stride=1, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg)) self.conv_cls = nn.Conv2d( self.feat_channels, self.num_base_priors * self.cls_out_channels, 3, padding=1) self.conv_reg = nn.Conv2d( self.feat_channels, self.num_base_priors * 4, 3, padding=1) self.conv_coeff = nn.Conv2d( self.feat_channels, self.num_base_priors * self.num_protos, 3, padding=1)
def init_mask_head(self, mask_roi_extractor, mask_head): """Initialize mask head and mask roi extractor. Args: mask_roi_extractor (dict): Config of mask roi extractor. mask_head (dict): Config of mask in mask head. """ self.mask_head = nn.ModuleList() if not isinstance(mask_head, list): mask_head = [mask_head for _ in range(self.num_stages)] assert len(mask_head) == self.num_stages for head in mask_head: self.mask_head.append(build_head(head)) if mask_roi_extractor is not None: self.share_roi_extractor = False self.mask_roi_extractor = ModuleList() if not isinstance(mask_roi_extractor, list): mask_roi_extractor = [ mask_roi_extractor for _ in range(self.num_stages) ] assert len(mask_roi_extractor) == self.num_stages for roi_extractor in mask_roi_extractor: self.mask_roi_extractor.append( build_roi_extractor(roi_extractor)) else: self.share_roi_extractor = True self.mask_roi_extractor = self.bbox_roi_extractor
def _init_layers(self): """A helper function to take a config setting and turn it into a network.""" # Possible patterns: # ( 256, 3) -> conv # ( 256,-2) -> deconv # (None,-2) -> bilinear interpolate in_channels = self.in_channels protonets = ModuleList() for num_channels, kernel_size in zip(self.proto_channels, self.proto_kernel_sizes): if kernel_size > 0: layer = nn.Conv2d(in_channels, num_channels, kernel_size, padding=kernel_size // 2) else: if num_channels is None: layer = InterpolateModule(scale_factor=-kernel_size, mode='bilinear', align_corners=False) else: layer = nn.ConvTranspose2d(in_channels, num_channels, -kernel_size, padding=kernel_size // 2) protonets.append(layer) protonets.append(nn.ReLU(inplace=True)) in_channels = num_channels if num_channels is not None \ else in_channels if not self.include_last_relu: protonets = protonets[:-1] return nn.Sequential(*protonets)
def _add_fc_branch(self): """Add the fc branch which consists of a sequential of fc layers.""" branch_fcs = ModuleList() for i in range(self.num_fcs): fc_in_channels = (self.in_channels * self.roi_feat_area if i == 0 else self.fc_out_channels) branch_fcs.append(nn.Linear(fc_in_channels, self.fc_out_channels)) return branch_fcs
def _make_branches(self, num_branches, block, num_blocks, num_channels): branches = [] for i in range(num_branches): branches.append( self._make_one_branch(i, block, num_blocks, num_channels)) return ModuleList(branches)
def __init__(self, in_channels, feat_channels, out_channels, norm_cfg=dict(type='GN', num_groups=32), act_cfg=dict(type='ReLU'), init_cfg=None): super().__init__(init_cfg=init_cfg) self.in_channels = in_channels self.num_inputs = len(in_channels) self.lateral_convs = ModuleList() self.output_convs = ModuleList() self.use_bias = norm_cfg is None for i in range(0, self.num_inputs - 1): lateral_conv = ConvModule(in_channels[i], feat_channels, kernel_size=1, bias=self.use_bias, norm_cfg=norm_cfg, act_cfg=None) output_conv = ConvModule(feat_channels, feat_channels, kernel_size=3, stride=1, padding=1, bias=self.use_bias, norm_cfg=norm_cfg, act_cfg=act_cfg) self.lateral_convs.append(lateral_conv) self.output_convs.append(output_conv) self.last_feat_conv = ConvModule(in_channels[-1], feat_channels, kernel_size=3, padding=1, stride=1, bias=self.use_bias, norm_cfg=norm_cfg, act_cfg=act_cfg) self.mask_feature = Conv2d(feat_channels, out_channels, kernel_size=3, stride=1, padding=1)
def _add_conv_branch(self): """Add the fc branch which consists of a sequential of conv layers.""" branch_convs = ModuleList() for i in range(self.num_convs): branch_convs.append( Bottleneck(inplanes=self.conv_out_channels, planes=self.conv_out_channels // 4, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg)) return branch_convs
def init_bbox_head(self, bbox_roi_extractor, bbox_head): """Initialize box head and box roi extractor. Args: bbox_roi_extractor (dict): Config of box roi extractor. bbox_head (dict): Config of box in box head. """ self.bbox_roi_extractor = ModuleList() self.bbox_head = ModuleList() if not isinstance(bbox_roi_extractor, list): bbox_roi_extractor = [ bbox_roi_extractor for _ in range(self.num_stages) ] if not isinstance(bbox_head, list): bbox_head = [bbox_head for _ in range(self.num_stages)] assert len(bbox_roi_extractor) == len(bbox_head) == self.num_stages for roi_extractor, head in zip(bbox_roi_extractor, bbox_head): self.bbox_roi_extractor.append(build_roi_extractor(roi_extractor)) self.bbox_head.append(build_head(head))
def __init__(self, num_things_classes=80, num_stuff_classes=53, num_classes=None, in_channels=256, inner_channels=128, start_level=0, end_level=4, fg_range=None, bg_range=None, conv_cfg=None, norm_cfg=dict(type='GN', num_groups=32, requires_grad=True), init_cfg=None, loss_seg=dict(type='CrossEntropyLoss', ignore_index=-1, loss_weight=1.0)): if num_classes is not None: warnings.warn( '`num_classes` is deprecated now, please set ' '`num_stuff_classes` directly, the `num_classes` will be ' 'set to `num_stuff_classes + 1`') # num_classes = num_stuff_classes + 1 for PanopticFPN. assert num_classes == num_stuff_classes + 1 super(PanopticFPNHead, self).__init__(num_stuff_classes + 1, init_cfg, loss_seg) self.num_things_classes = num_things_classes self.num_stuff_classes = num_stuff_classes if fg_range is not None and bg_range is not None: self.fg_range = fg_range self.bg_range = bg_range self.num_things_classes = fg_range[1] - fg_range[0] + 1 self.num_stuff_classes = bg_range[1] - bg_range[0] + 1 warnings.warn( '`fg_range` and `bg_range` are deprecated now, ' f'please use `num_things_classes`={self.num_things_classes} ' f'and `num_stuff_classes`={self.num_stuff_classes} instead.') # Used feature layers are [start_level, end_level) self.start_level = start_level self.end_level = end_level self.num_stages = end_level - start_level self.inner_channels = inner_channels self.conv_upsample_layers = ModuleList() for i in range(start_level, end_level): self.conv_upsample_layers.append( ConvUpsample( in_channels, inner_channels, num_layers=i if i > 0 else 1, num_upsample=i if i > 0 else 0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, )) self.conv_logits = nn.Conv2d(inner_channels, self.num_classes, 1)
def __init__(self, num_convs=0, num_fcs=2, fc_out_channels=1024, downsample_factor=2, init_cfg=dict(type='Xavier', override=[ dict(name='fcs'), dict(type='Constant', val=0.001, name='fc_logits') ]), *arg, **kwarg): super(CoarseMaskHead, self).__init__(*arg, num_convs=num_convs, upsample_cfg=dict(type=None), init_cfg=None, **kwarg) self.init_cfg = init_cfg self.num_fcs = num_fcs assert self.num_fcs > 0 self.fc_out_channels = fc_out_channels self.downsample_factor = downsample_factor assert self.downsample_factor >= 1 # remove conv_logit delattr(self, 'conv_logits') if downsample_factor > 1: downsample_in_channels = (self.conv_out_channels if self.num_convs > 0 else self.in_channels) self.downsample_conv = ConvModule(downsample_in_channels, self.conv_out_channels, kernel_size=downsample_factor, stride=downsample_factor, padding=0, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg) else: self.downsample_conv = None self.output_size = (self.roi_feat_size[0] // downsample_factor, self.roi_feat_size[1] // downsample_factor) self.output_area = self.output_size[0] * self.output_size[1] last_layer_dim = self.conv_out_channels * self.output_area self.fcs = ModuleList() for i in range(num_fcs): fc_in_channels = (last_layer_dim if i == 0 else self.fc_out_channels) self.fcs.append(Linear(fc_in_channels, self.fc_out_channels)) last_layer_dim = self.fc_out_channels output_channels = self.num_classes * self.output_area self.fc_logits = Linear(last_layer_dim, output_channels)
def __init__(self, d_model=512, n_head=8, d_inner=2048, n_layers=4, max_seq_len=40, dropout=0.1, detach_tokens=True, num_chars=90, use_self_attn=False, pad_idx=0, init_cfg=None, **kwargs): super().__init__(init_cfg=init_cfg) self.detach_tokens = detach_tokens self.d_model = d_model self.max_seq_len = max_seq_len self.proj = nn.Linear(num_chars, d_model, False) self.token_encoder = PositionalEncoding(d_model, n_position=self.max_seq_len, dropout=0.1) self.pos_encoder = PositionalEncoding(d_model, n_position=self.max_seq_len) self.pad_idx = pad_idx if use_self_attn: operation_order = ('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm') else: operation_order = ('cross_attn', 'norm', 'ffn', 'norm') decoder_layer = BaseTransformerLayer( operation_order=operation_order, attn_cfgs=dict( type='MultiheadAttention', embed_dims=d_model, num_heads=n_head, attn_drop=dropout, dropout_layer=dict(type='Dropout', drop_prob=dropout), ), ffn_cfgs=dict( type='FFN', embed_dims=d_model, feedforward_channels=d_inner, ffn_drop=dropout, ), norm_cfg=dict(type='LN'), ) self.decoder_layers = ModuleList( [copy.deepcopy(decoder_layer) for _ in range(n_layers)]) self.cls = nn.Linear(d_model, num_chars)
def __init__( self, in_channels, num_layers, num_heads, embed_dims, mlp_ratio=4, drop_path_rate=0.1, drop_rate=0.0, attn_drop_rate=0.0, num_fcs=2, qkv_bias=True, act_cfg=dict(type='GELU'), norm_cfg=dict(type='LN'), init_std=0.02, **kwargs, ): super(SegmenterMaskTransformerHead, self).__init__( in_channels=in_channels, **kwargs) dpr = [x.item() for x in torch.linspace(0, drop_path_rate, num_layers)] self.layers = ModuleList() for i in range(num_layers): self.layers.append( TransformerEncoderLayer( embed_dims=embed_dims, num_heads=num_heads, feedforward_channels=mlp_ratio * embed_dims, attn_drop_rate=attn_drop_rate, drop_rate=drop_rate, drop_path_rate=dpr[i], num_fcs=num_fcs, qkv_bias=qkv_bias, act_cfg=act_cfg, norm_cfg=norm_cfg, batch_first=True, )) self.dec_proj = nn.Linear(in_channels, embed_dims) self.cls_emb = nn.Parameter( torch.randn(1, self.num_classes, embed_dims)) self.patch_proj = nn.Linear(embed_dims, embed_dims, bias=False) self.classes_proj = nn.Linear(embed_dims, embed_dims, bias=False) self.decoder_norm = build_norm_layer( norm_cfg, embed_dims, postfix=1)[1] self.mask_norm = build_norm_layer( norm_cfg, self.num_classes, postfix=2)[1] self.init_std = init_std delattr(self, 'conv_seg')
def __init__(self, in_channels=256, out_channels=256, kernel_size=[3, 3, 3], dilation=[1, 1, 1], groups=[1, 1, 1], ibn=False, norm_cfg=dict(type='BN', requires_grad=True), norm_eval=True, part_deform=False, init_cfg=None): assert init_cfg is None, 'To prevent abnormal initialization ' \ 'behavior, init_cfg is not allowed to be set' super(PConvModule, self).__init__(init_cfg=init_cfg) self.ibn = ibn self.norm_cfg = norm_cfg self.norm_eval = norm_eval self.pconv = ModuleList() self.pconv.append( SEPCConv(in_channels, out_channels, kernel_size=kernel_size[0], dilation=dilation[0], groups=groups[0], padding=(kernel_size[0] + (dilation[0] - 1) * 2) // 2, part_deform=part_deform)) self.pconv.append( SEPCConv(in_channels, out_channels, kernel_size=kernel_size[1], dilation=dilation[1], groups=groups[1], padding=(kernel_size[1] + (dilation[1] - 1) * 2) // 2, part_deform=part_deform)) self.pconv.append( SEPCConv(in_channels, out_channels, kernel_size=kernel_size[2], dilation=dilation[2], groups=groups[2], padding=(kernel_size[2] + (dilation[2] - 1) * 2) // 2, stride=2, part_deform=part_deform)) if self.ibn: self.pnorm_name, pnorm = build_norm_layer(self.norm_cfg, 256) self.add_module(self.pnorm_name, pnorm) self.relu = nn.ReLU()
def __init__(self, num_stages, stages, train_cfg, test_cfg, init_cfg=None): super(CascadeRPNHead, self).__init__(init_cfg) assert num_stages == len(stages) self.num_stages = num_stages # Be careful! Pretrained weights cannot be loaded when use # nn.ModuleList self.stages = ModuleList() for i in range(len(stages)): train_cfg_i = train_cfg[i] if train_cfg is not None else None stages[i].update(train_cfg=train_cfg_i) stages[i].update(test_cfg=test_cfg) self.stages.append(build_head(stages[i])) self.train_cfg = train_cfg self.test_cfg = test_cfg
def __init__(self, in_channels, out_channels, last_stage_only=True, init_cfg=None): super().__init__(init_cfg=init_cfg) self.in_channels = in_channels self.out_channels = out_channels self.num_ins = len(in_channels) self.last_stage_only = last_stage_only self.lateral_convs = ModuleList() self.smooth_convs_1x1 = ModuleList() self.smooth_convs_3x3 = ModuleList() for i in range(self.num_ins): l_conv = ConvModule(in_channels[i], out_channels, 1, norm_cfg=dict(type='BN')) self.lateral_convs.append(l_conv) for i in range(self.num_ins - 1): s_conv_1x1 = ConvModule(out_channels * 2, out_channels, 1, norm_cfg=dict(type='BN')) s_conv_3x3 = ConvModule(out_channels, out_channels, 3, padding=1, norm_cfg=dict(type='BN')) self.smooth_convs_1x1.append(s_conv_1x1) self.smooth_convs_3x3.append(s_conv_3x3)
def _make_branches(self, num_branches, block, num_blocks, num_channels): branches = [] for i in range(num_branches): out_channels = num_channels[i] * get_expansion(block) branches.append( ResLayer( block=block, num_blocks=num_blocks[i], in_channels=self.in_channels[i], out_channels=out_channels, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, with_cp=self.with_cp, init_cfg=self.block_init_cfg, )) return ModuleList(branches)
def _init_layers(self): self.layers = ModuleList() in_channels = self.in_channels for hidden_channels in self.mid_channels: self.layers.append( LinearBlock(in_channels, hidden_channels, dropout_rate=self.dropout_rate, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg)) in_channels = hidden_channels self.layers.append( LinearBlock(self.mid_channels[-1], self.num_classes, dropout_rate=0., norm_cfg=None, act_cfg=None))
def __init__(self, embed_dims, num_heads, feedforward_channels, depth, window_size=7, qkv_bias=True, qk_scale=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., downsample=None, act_cfg=dict(type='GELU'), norm_cfg=dict(type='LN'), with_cp=False, init_cfg=None): super().__init__(init_cfg=init_cfg) if isinstance(drop_path_rate, list): drop_path_rates = drop_path_rate assert len(drop_path_rates) == depth else: drop_path_rates = [deepcopy(drop_path_rate) for _ in range(depth)] self.blocks = ModuleList() for i in range(depth): block = SwinBlock( embed_dims=embed_dims, num_heads=num_heads, feedforward_channels=feedforward_channels, window_size=window_size, shift=False if i % 2 == 0 else True, qkv_bias=qkv_bias, qk_scale=qk_scale, drop_rate=drop_rate, attn_drop_rate=attn_drop_rate, drop_path_rate=drop_path_rates[i], act_cfg=act_cfg, norm_cfg=norm_cfg, with_cp=with_cp, init_cfg=None) self.blocks.append(block) self.downsample = downsample
def __init__(self, n_layers=6, d_embedding=512, n_head=8, d_k=64, d_v=64, d_model=512, d_inner=256, n_position=200, dropout=0.1, num_classes=93, max_seq_len=40, start_idx=1, padding_idx=92, init_cfg=None, **kwargs): super().__init__(init_cfg=init_cfg) self.padding_idx = padding_idx self.start_idx = start_idx self.max_seq_len = max_seq_len self.trg_word_emb = nn.Embedding(num_classes, d_embedding, padding_idx=padding_idx) self.position_enc = PositionalEncoding(d_embedding, n_position=n_position) self.dropout = nn.Dropout(p=dropout) self.layer_stack = ModuleList([ TFDecoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout, **kwargs) for _ in range(n_layers) ]) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) pred_num_class = num_classes - 1 # ignore padding_idx self.classifier = nn.Linear(d_model, pred_num_class)
def test_basetransformerlayer_cuda(): # To test if the BaseTransformerLayer's behaviour remains # consistent after being deepcopied operation_order = ('self_attn', 'ffn') baselayer = BaseTransformerLayer( operation_order=operation_order, batch_first=True, attn_cfgs=dict( type='MultiheadAttention', embed_dims=256, num_heads=8, ), ) baselayers = ModuleList([copy.deepcopy(baselayer) for _ in range(2)]) baselayers.to('cuda') x = torch.rand(2, 10, 256).cuda() for m in baselayers: x = m(x) assert x.shape == torch.Size([2, 10, 256])
def _init_layers(self): self.layers = ModuleList(init_cfg=dict( type='Normal', layer='Linear', mean=0., std=0.01, bias=0.)) in_channels = self.in_channels for hidden_channels in self.mid_channels: self.layers.append( LinearBlock(in_channels, hidden_channels, dropout_rate=self.dropout_rate, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg)) in_channels = hidden_channels self.layers.append( LinearBlock(self.mid_channels[-1], self.num_classes, dropout_rate=0., norm_cfg=None, act_cfg=None))
def _build_layers(self): dpr = [ x.item() for x in torch.linspace(0, self.drop_path_rate, self.num_layers) ] self.layers = ModuleList() for i in range(self.num_layers): self.layers.append( MAETransformerEncoderLayer( embed_dims=self.embed_dims, num_heads=self.num_heads, feedforward_channels=self.mlp_ratio * self.embed_dims, attn_drop_rate=self.attn_drop_rate, drop_path_rate=dpr[i], num_fcs=self.num_fcs, bias=True, act_cfg=self.act_cfg, norm_cfg=self.norm_cfg, window_size=self.patch_shape, init_values=self.init_values))