def __init__(self, embed_dims=256, feedforward_channels=1024, num_fcs=2, act_cfg=dict(type='ReLU', inplace=True), ffn_drop=0., dropout_layer=None, add_identity=True, init_cfg=None, **kwargs): super(FFN, self).__init__(init_cfg) assert num_fcs >= 2, 'num_fcs should be no less ' \ f'than 2. got {num_fcs}.' self.embed_dims = embed_dims self.feedforward_channels = feedforward_channels self.num_fcs = num_fcs self.act_cfg = act_cfg self.activate = build_activation_layer(act_cfg) layers = [] in_channels = embed_dims for _ in range(num_fcs - 1): layers.append( Sequential(Linear(in_channels, feedforward_channels), self.activate, nn.Dropout(ffn_drop))) in_channels = feedforward_channels layers.append(Linear(feedforward_channels, embed_dims)) layers.append(nn.Dropout(ffn_drop)) self.layers = Sequential(*layers) self.dropout_layer = build_dropout( dropout_layer) if dropout_layer else torch.nn.Identity() self.add_identity = add_identity
def __init__(self, embed_dims, feedforward_channels, num_fcs=2, act_cfg=dict(type='ReLU', inplace=True), dropout=0., add_residual=True, init_cfg=None): super(FFN, self).__init__(init_cfg) assert num_fcs >= 2, 'num_fcs should be no less ' \ f'than 2. got {num_fcs}.' self.embed_dims = embed_dims self.feedforward_channels = feedforward_channels self.num_fcs = num_fcs self.act_cfg = act_cfg self.dropout = dropout self.activate = build_activation_layer(act_cfg) layers = [] in_channels = embed_dims for _ in range(num_fcs - 1): layers.append( nn.Sequential( Linear(in_channels, feedforward_channels), self.activate, nn.Dropout(dropout))) in_channels = feedforward_channels layers.append(Linear(feedforward_channels, embed_dims)) self.layers = nn.Sequential(*layers) self.dropout = nn.Dropout(dropout) self.add_residual = add_residual
def _init_layers(self): """Initialize classification branch and regression branch of head.""" fc_cls = Linear(self.embed_dims, self.cls_out_channels) reg_branch = [] for _ in range(self.num_reg_fcs): reg_branch.append(Linear(self.embed_dims, self.embed_dims)) reg_branch.append(nn.ReLU()) reg_branch.append(Linear(self.embed_dims, 4)) reg_branch = nn.Sequential(*reg_branch) def _get_clones(module, N): return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) # last reg_branch is used to generate proposal from # encode feature map when as_two_stage is True. num_pred = (self.transformer.decoder.num_layers + 1) if \ self.as_two_stage else self.transformer.decoder.num_layers if self.with_box_refine: self.cls_branches = _get_clones(fc_cls, num_pred) self.reg_branches = _get_clones(reg_branch, num_pred) else: self.cls_branches = nn.ModuleList( [fc_cls for _ in range(num_pred)]) self.reg_branches = nn.ModuleList( [reg_branch for _ in range(num_pred)]) if not self.as_two_stage: self.query_embedding = nn.Embedding(self.num_query, self.embed_dims * 2)
def __init__(self, num_convs=0, num_fcs=2, fc_out_channels=1024, downsample_factor=2, init_cfg=dict(type='Xavier', override=[ dict(name='fcs'), dict(type='Constant', val=0.001, name='fc_logits') ]), *arg, **kwarg): super(CoarseMaskHead, self).__init__(*arg, num_convs=num_convs, upsample_cfg=dict(type=None), init_cfg=None, **kwarg) self.init_cfg = init_cfg self.num_fcs = num_fcs assert self.num_fcs > 0 self.fc_out_channels = fc_out_channels self.downsample_factor = downsample_factor assert self.downsample_factor >= 1 # remove conv_logit delattr(self, 'conv_logits') if downsample_factor > 1: downsample_in_channels = (self.conv_out_channels if self.num_convs > 0 else self.in_channels) self.downsample_conv = ConvModule(downsample_in_channels, self.conv_out_channels, kernel_size=downsample_factor, stride=downsample_factor, padding=0, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg) else: self.downsample_conv = None self.output_size = (self.roi_feat_size[0] // downsample_factor, self.roi_feat_size[1] // downsample_factor) self.output_area = self.output_size[0] * self.output_size[1] last_layer_dim = self.conv_out_channels * self.output_area self.fcs = ModuleList() for i in range(num_fcs): fc_in_channels = (last_layer_dim if i == 0 else self.fc_out_channels) self.fcs.append(Linear(fc_in_channels, self.fc_out_channels)) last_layer_dim = self.fc_out_channels output_channels = self.num_classes * self.output_area self.fc_logits = Linear(last_layer_dim, output_channels)
def __init__(self, num_convs=4, num_fcs=2, roi_feat_size=14, in_channels=256, conv_out_channels=256, fc_out_channels=1024, num_classes=80, loss_iou=dict(type='MSELoss', loss_weight=0.5), init_cfg=[ dict(type='Kaiming', override=dict(name='convs')), dict(type='Caffe2Xavier', override=dict(name='fcs')), dict( type='Normal', std=0.01, override=dict(name='fc_mask_iou')) ]): super(MaskIoUHead, self).__init__(init_cfg) self.in_channels = in_channels self.conv_out_channels = conv_out_channels self.fc_out_channels = fc_out_channels self.num_classes = num_classes self.fp16_enabled = False self.convs = nn.ModuleList() for i in range(num_convs): if i == 0: # concatenation of mask feature and mask prediction in_channels = self.in_channels + 1 else: in_channels = self.conv_out_channels stride = 2 if i == num_convs - 1 else 1 self.convs.append( Conv2d( in_channels, self.conv_out_channels, 3, stride=stride, padding=1)) roi_feat_size = _pair(roi_feat_size) pooled_area = (roi_feat_size[0] // 2) * (roi_feat_size[1] // 2) self.fcs = nn.ModuleList() for i in range(num_fcs): in_channels = ( self.conv_out_channels * pooled_area if i == 0 else self.fc_out_channels) self.fcs.append(Linear(in_channels, self.fc_out_channels)) self.fc_mask_iou = Linear(self.fc_out_channels, self.num_classes) self.relu = nn.ReLU() self.max_pool = MaxPool2d(2, 2) self.loss_iou = build_loss(loss_iou)
def __init__(self, in_features, hidden_features=None, out_features=None, act_cfg=dict(type='GELU'), drop=0.): super(Mlp, self).__init__() out_features = out_features or in_features hidden_features = hidden_features or in_features self.fc1 = Linear(in_features, hidden_features) self.act = build_activation_layer(act_cfg) self.fc2 = Linear(hidden_features, out_features) self.drop = nn.Dropout(drop)
def _init_layers(self): """Initialize layers of the transformer head.""" self.input_proj = Conv2d( self.in_channels, self.embed_dims, kernel_size=1) self.fc_cls = Linear(self.embed_dims, self.cls_out_channels) self.reg_ffn = FFN( self.embed_dims, self.embed_dims, self.num_fcs, self.act_cfg, dropout=0.0, add_residual=False) self.fc_reg = Linear(self.embed_dims, 4) self.query_embedding = nn.Embedding(self.num_query, self.embed_dims)
def __init__(self, out_channels, norm_cfg=dict(type='BN')): # Protect mutable default arguments norm_cfg = cp.deepcopy(norm_cfg) super().__init__() self.out_channels = out_channels self.global_pooling = nn.AdaptiveAvgPool2d((1, 1)) self.middle_path = nn.Sequential( Linear(self.out_channels, self.out_channels), build_norm_layer(dict(type='BN1d'), out_channels)[1], build_activation_layer(dict(type='ReLU')), Linear(self.out_channels, self.out_channels), build_norm_layer(dict(type='BN1d'), out_channels)[1], build_activation_layer(dict(type='ReLU')), build_activation_layer(dict(type='Sigmoid'))) self.bottom_path = nn.Sequential( ConvModule( self.out_channels, self.out_channels, kernel_size=1, stride=1, padding=0, norm_cfg=norm_cfg, inplace=False), DepthwiseSeparableConvModule( self.out_channels, 1, kernel_size=9, stride=1, padding=4, norm_cfg=norm_cfg, inplace=False), build_activation_layer(dict(type='Sigmoid'))) self.conv_bn_relu_prm_1 = ConvModule( self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1, norm_cfg=norm_cfg, inplace=False)
def __init__(self, in_channels=768, out_channels=[96, 192, 384, 768], readout_type='ignore', patch_size=16, init_cfg=None): super(ReassembleBlocks, self).__init__(init_cfg) assert readout_type in ['ignore', 'add', 'project'] self.readout_type = readout_type self.patch_size = patch_size self.projects = nn.ModuleList([ ConvModule( in_channels=in_channels, out_channels=out_channel, kernel_size=1, act_cfg=None, ) for out_channel in out_channels ]) self.resize_layers = nn.ModuleList([ nn.ConvTranspose2d( in_channels=out_channels[0], out_channels=out_channels[0], kernel_size=4, stride=4, padding=0), nn.ConvTranspose2d( in_channels=out_channels[1], out_channels=out_channels[1], kernel_size=2, stride=2, padding=0), nn.Identity(), nn.Conv2d( in_channels=out_channels[3], out_channels=out_channels[3], kernel_size=3, stride=2, padding=1) ]) if self.readout_type == 'project': self.readout_projects = nn.ModuleList() for _ in range(len(self.projects)): self.readout_projects.append( nn.Sequential( Linear(2 * in_channels, in_channels), build_activation_layer(dict(type='GELU'))))
def __init__( self, embed_dim, num_heads, dropout=0.0, bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None, app_relation=True, ): super(MultiheadAttention, self).__init__() self.embed_dim = embed_dim self.kdim = kdim if kdim is not None else embed_dim self.vdim = vdim if vdim is not None else embed_dim self._qkv_same_embed_dim = (self.kdim == embed_dim) and (self.vdim == embed_dim) self.num_heads = num_heads self.dropout = dropout self.head_dim = embed_dim // num_heads assert (self.head_dim * num_heads == self.embed_dim ), 'embed_dim must be divisible by num_heads' self.in_proj_weight = Parameter(torch.empty(3 * embed_dim, embed_dim)) if self._qkv_same_embed_dim is False: self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim)) self.k_proj_weight = Parameter(torch.Tensor(embed_dim, self.kdim)) self.v_proj_weight = Parameter(torch.Tensor(embed_dim, self.vdim)) if bias: self.in_proj_bias = Parameter(torch.empty(3 * embed_dim)) else: self.register_parameter('in_proj_bias', None) self.out_proj = Linear(embed_dim, embed_dim, bias=bias) if add_bias_kv: self.bias_k = Parameter(torch.empty(1, 1, embed_dim)) self.bias_v = Parameter(torch.empty(1, 1, embed_dim)) else: self.bias_k = self.bias_v = None self.add_zero_attn = add_zero_attn self.app_relation = app_relation self._reset_parameters()
def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): super(Attention, self).__init__() self.num_heads = num_heads head_dim = dim // num_heads self.scale = qk_scale or head_dim**-0.5 self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) self.attn_drop = nn.Dropout(attn_drop) self.proj = Linear(dim, dim) self.proj_drop = nn.Dropout(proj_drop)