def __init__(self, in_features, out_features, bias=True, act_cfg=dict(type='ReLU'), inplace=True, with_spectral_norm=False, order=('linear', 'act')): super().__init__() assert act_cfg is None or isinstance(act_cfg, dict) self.act_cfg = act_cfg self.inplace = inplace self.with_spectral_norm = with_spectral_norm self.order = order assert isinstance(self.order, tuple) and len(self.order) == 2 assert set(order) == set(['linear', 'act']) self.with_activation = act_cfg is not None self.with_bias = bias # build linear layer self.linear = nn.Linear(in_features, out_features, bias=bias) # export the attributes of self.linear to a higher level for # convenience self.in_features = self.linear.in_features self.out_features = self.linear.out_features if self.with_spectral_norm: self.linear = nn.utils.spectral_norm(self.linear) # build activation layer if self.with_activation: act_cfg_ = act_cfg.copy() act_cfg_.setdefault('inplace', inplace) self.activate = build_activation_layer(act_cfg_) # Use msra init by default self.init_weights()
def __init__(self, embed_dims, feedforward_channels, act_cfg=dict(type='GELU'), ffn_drop=0., dropout_layer=None, init_cfg=None): super(MixFFN, self).__init__(init_cfg) self.embed_dims = embed_dims self.feedforward_channels = feedforward_channels self.act_cfg = act_cfg self.activate = build_activation_layer(act_cfg) in_channels = embed_dims fc1 = Conv2d(in_channels=in_channels, out_channels=feedforward_channels, kernel_size=1, stride=1, bias=True) # 3x3 depth wise conv to provide positional encode information pe_conv = Conv2d(in_channels=feedforward_channels, out_channels=feedforward_channels, kernel_size=3, stride=1, padding=(3 - 1) // 2, bias=True, groups=feedforward_channels) fc2 = Conv2d(in_channels=feedforward_channels, out_channels=in_channels, kernel_size=1, stride=1, bias=True) drop = nn.Dropout(ffn_drop) layers = [fc1, pe_conv, self.activate, drop, fc2, drop] self.layers = Sequential(*layers) self.dropout_layer = build_dropout( dropout_layer) if dropout_layer else torch.nn.Identity()
def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU', inplace=True)): super(BasicBlock, self).__init__() self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) self.conv1 = build_conv_layer(conv_cfg, inplanes, planes, 3, stride=stride, padding=dilation, dilation=dilation, bias=False) self.add_module(self.norm1_name, norm1) self.conv2 = build_conv_layer(conv_cfg, planes, planes, 3, padding=1, bias=False) self.add_module(self.norm2_name, norm2) self.relu = build_activation_layer(act_cfg) self.downsample = downsample self.stride = stride self.dilation = dilation assert not with_cp
def __init__(self, input_dim, output_dim, kernel_size, stride=1, with_se=False, se_reduction=16, se_bias=False, dilation=1, batch_norm=True, pad=True, nonlinearity="LeakyReLU"): super(TDNN_pad, self).__init__() self.context_size = kernel_size self.stride = stride self.input_dim = input_dim self.output_dim = output_dim self.dilation = dilation self.pad = pad if self.pad: self.pad_length = (kernel_size - 1) * dilation // 2 self.with_se = with_se if self.with_se: self.se = SELayer(self.output_dim, reduction=se_reduction, nonlinearity=nonlinearity, bias=se_bias) self.kernel = nn.Conv1d(self.input_dim, self.output_dim, self.context_size, dilation=self.dilation, stride=self.stride) self.nonlinearity = build_activation_layer( dict(type=nonlinearity, inplace=True)) self.batch_norm = batch_norm if batch_norm: self.bn = nn.BatchNorm1d(output_dim)
def __init__(self, in_channels, out_channels, zero_init_offset=True, act_cfg=dict(type='HSigmoid', bias=3.0, divisor=6.0)): super().__init__() self.zero_init_offset = zero_init_offset # (offset_x, offset_y, mask) * kernel_size_y * kernel_size_x self.offset_and_mask_dim = 3 * 3 * 3 self.offset_dim = 2 * 3 * 3 self.spatial_conv_high = DyDCNv2(in_channels, out_channels) self.spatial_conv_mid = DyDCNv2(in_channels, out_channels) self.spatial_conv_low = DyDCNv2(in_channels, out_channels, stride=2) self.spatial_conv_offset = nn.Conv2d(in_channels, self.offset_and_mask_dim, 3, padding=1) self.scale_attn_module = nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.Conv2d(out_channels, 1, 1), nn.ReLU(inplace=True), build_activation_layer(act_cfg)) self.task_attn_module = DyReLU(out_channels) self._init_weights()
def __init__(self, in_channels, hidden_channels=None, out_channels=None, norm_cfg=dict(type='BN', requires_grad=True), act_cfg=dict(type='GELU')): super().__init__() out_features = out_channels or in_channels hidden_features = hidden_channels or in_channels self.ffn_fc1 = ConvModule(in_channels=in_channels, out_channels=hidden_features, kernel_size=1, stride=1, padding=0, norm_cfg=norm_cfg, act_cfg=None) self.ffn_fc2 = ConvModule(in_channels=hidden_features, out_channels=out_features, kernel_size=1, stride=1, padding=0, norm_cfg=norm_cfg, act_cfg=None) self.act = build_activation_layer(act_cfg)
def __init__(self, inplanes, planes, spatial_stride=1, temporal_stride=1, dilation=1, downsample=None, style='pytorch', inflate=True, inflate_style='3x1x1', non_local=False, non_local_cfg=dict(), conv_cfg=dict(type='Conv3d'), norm_cfg=dict(type='BN3d'), act_cfg=dict(type='ReLU'), with_cp=False): super().__init__() assert style in ['pytorch', 'caffe'] assert inflate_style in ['3x1x1', '3x3x3'] self.inplanes = inplanes self.planes = planes self.spatial_stride = spatial_stride self.temporal_stride = temporal_stride self.dilation = dilation self.style = style self.inflate = inflate self.inflate_style = inflate_style self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.with_cp = with_cp self.non_local = non_local self.non_local_cfg = non_local_cfg self.conv1_stride_s = spatial_stride self.conv2_stride_s = 1 self.conv1_stride_t = temporal_stride self.conv2_stride_t = 1 if self.inflate: conv1_kernel_size = (3, 3, 3) conv1_padding = (1, dilation, dilation) conv2_kernel_size = (3, 3, 3) conv2_padding = (1, 1, 1) else: conv1_kernel_size = (1, 3, 3) conv1_padding = (0, dilation, dilation) conv2_kernel_size = (1, 3, 3) conv2_padding = (0, 1, 1) self.conv1 = ConvModule(inplanes, planes, conv1_kernel_size, stride=(self.conv1_stride_t, self.conv1_stride_s, self.conv1_stride_s), padding=conv1_padding, dilation=(1, dilation, dilation), bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) self.conv2 = ConvModule(planes, planes * self.expansion, conv2_kernel_size, stride=(self.conv2_stride_t, self.conv2_stride_s, self.conv2_stride_s), padding=conv2_padding, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None) self.downsample = downsample self.relu = build_activation_layer(self.act_cfg) if self.non_local: self.non_local_block = NonLocal3d(self.conv2.norm.num_features, **self.non_local_cfg)
def __init__(self, inplanes, planes, spatial_stride=1, temporal_stride=1, dilation=1, downsample=None, style='pytorch', inflate=True, inflate_style='3x1x1', conv_cfg=dict(type='Conv3d'), norm_cfg=dict(type='BN3d'), act_cfg=dict(type='ReLU'), with_cp=False): super().__init__() assert style in ['pytorch', 'caffe'] assert inflate_style in ['3x1x1', '3x3x3'] self.inplanes = inplanes self.planes = planes self.spatial_stride = spatial_stride self.temporal_stride = temporal_stride self.dilation = dilation self.style = style self.inflate = inflate self.inflate_style = inflate_style self.norm_cfg = norm_cfg self.conv_cfg = conv_cfg self.act_cfg = act_cfg self.with_cp = with_cp if self.style == 'pytorch': self.conv1_stride_s = 1 self.conv2_stride_s = spatial_stride self.conv1_stride_t = 1 self.conv2_stride_t = temporal_stride else: self.conv1_stride_s = spatial_stride self.conv2_stride_s = 1 self.conv1_stride_t = temporal_stride self.conv2_stride_t = 1 if self.inflate: if inflate_style == '3x1x1': conv1_kernel_size = (3, 1, 1) conv1_padding = (1, 0, 0) conv2_kernel_size = (1, 3, 3) conv2_padding = (0, dilation, dilation) else: conv1_kernel_size = (1, 1, 1) conv1_padding = (0, 0, 0) conv2_kernel_size = (3, 3, 3) conv2_padding = (1, dilation, dilation) else: conv1_kernel_size = (1, 1, 1) conv1_padding = (0, 0, 0) conv2_kernel_size = (1, 3, 3) conv2_padding = (0, dilation, dilation) self.conv1 = ConvModule( inplanes, planes, conv1_kernel_size, stride=(self.conv1_stride_t, self.conv1_stride_s, self.conv1_stride_s), padding=conv1_padding, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) self.conv2 = ConvModule( planes, planes, conv2_kernel_size, stride=(self.conv2_stride_t, self.conv2_stride_s, self.conv2_stride_s), padding=conv2_padding, dilation=(1, dilation, dilation), bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) self.conv3 = ConvModule( planes, planes * self.expansion, 1, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, # No activation in the third ConvModule for bottleneck act_cfg=None) self.downsample = downsample self.relu = build_activation_layer(self.act_cfg)
def __init__( self, num_classes, in_channels, num_query=100, num_reg_fcs=2, transformer=None, sync_cls_avg_factor=False, positional_encoding=dict(type='SinePositionalEncoding', num_feats=128, normalize=True), loss_cls=dict(type='CrossEntropyLoss', bg_cls_weight=0.1, use_sigmoid=False, loss_weight=1.0, class_weight=1.0), loss_bbox=dict(type='L1Loss', loss_weight=5.0), loss_iou=dict(type='GIoULoss', loss_weight=2.0), train_cfg=dict(assigner=dict( type='HungarianAssigner', cls_cost=dict(type='ClassificationCost', weight=1.), reg_cost=dict(type='BBoxL1Cost', weight=5.0), iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))), test_cfg=dict(max_per_img=100), init_cfg=None, **kwargs): # NOTE here use `AnchorFreeHead` instead of `TransformerHead`, # since it brings inconvenience when the initialization of # `AnchorFreeHead` is called. super(AnchorFreeHead, self).__init__(init_cfg) self.bg_cls_weight = 0 self.sync_cls_avg_factor = sync_cls_avg_factor class_weight = loss_cls.get('class_weight', None) if class_weight is not None and (self.__class__ is DETRHead): assert isinstance(class_weight, float), 'Expected ' \ 'class_weight to have type float. Found ' \ f'{type(class_weight)}.' # NOTE following the official DETR rep0, bg_cls_weight means # relative classification weight of the no-object class. bg_cls_weight = loss_cls.get('bg_cls_weight', class_weight) assert isinstance(bg_cls_weight, float), 'Expected ' \ 'bg_cls_weight to have type float. Found ' \ f'{type(bg_cls_weight)}.' class_weight = torch.ones(num_classes + 1) * class_weight # set background class as the last indice class_weight[num_classes] = bg_cls_weight loss_cls.update({'class_weight': class_weight}) if 'bg_cls_weight' in loss_cls: loss_cls.pop('bg_cls_weight') self.bg_cls_weight = bg_cls_weight if train_cfg: assert 'assigner' in train_cfg, 'assigner should be provided '\ 'when train_cfg is set.' assigner = train_cfg['assigner'] assert loss_cls['loss_weight'] == assigner['cls_cost']['weight'], \ 'The classification weight for loss and matcher should be' \ 'exactly the same.' assert loss_bbox['loss_weight'] == assigner['reg_cost'][ 'weight'], 'The regression L1 weight for loss and matcher ' \ 'should be exactly the same.' assert loss_iou['loss_weight'] == assigner['iou_cost']['weight'], \ 'The regression iou weight for loss and matcher should be' \ 'exactly the same.' self.assigner = build_assigner(assigner) # DETR sampling=False, so use PseudoSampler sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.num_query = num_query self.num_classes = num_classes self.in_channels = in_channels self.num_reg_fcs = num_reg_fcs self.train_cfg = train_cfg self.test_cfg = test_cfg self.fp16_enabled = False self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.loss_iou = build_loss(loss_iou) if self.loss_cls.use_sigmoid: self.cls_out_channels = num_classes else: self.cls_out_channels = num_classes + 1 self.act_cfg = transformer.get('act_cfg', dict(type='ReLU', inplace=True)) self.activate = build_activation_layer(self.act_cfg) self.positional_encoding = build_positional_encoding( positional_encoding) self.transformer = build_transformer(transformer) self.embed_dims = self.transformer.embed_dims assert 'num_feats' in positional_encoding num_feats = positional_encoding['num_feats'] assert num_feats * 2 == self.embed_dims, 'embed_dims should' \ f' be exactly 2 times of num_feats. Found {self.embed_dims}' \ f' and {num_feats}.' self._init_layers()
def __init__(self, in_channels, out_channels, groups=3, first_block=True, combine='add', conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), with_cp=False): # Protect mutable default arguments norm_cfg = copy.deepcopy(norm_cfg) act_cfg = copy.deepcopy(act_cfg) super().__init__() self.in_channels = in_channels self.out_channels = out_channels self.first_block = first_block self.combine = combine self.groups = groups self.bottleneck_channels = self.out_channels // 4 self.with_cp = with_cp if self.combine == 'add': self.depthwise_stride = 1 self._combine_func = self._add assert in_channels == out_channels, ( 'in_channels must be equal to out_channels when combine ' 'is add') elif self.combine == 'concat': self.depthwise_stride = 2 self._combine_func = self._concat self.out_channels -= self.in_channels self.avgpool = nn.AvgPool2d(kernel_size=3, stride=2, padding=1) else: raise ValueError(f'Cannot combine tensors with {self.combine}. ' 'Only "add" and "concat" are supported') self.first_1x1_groups = 1 if first_block else self.groups self.g_conv_1x1_compress = ConvModule( in_channels=self.in_channels, out_channels=self.bottleneck_channels, kernel_size=1, groups=self.first_1x1_groups, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) self.depthwise_conv3x3_bn = ConvModule( in_channels=self.bottleneck_channels, out_channels=self.bottleneck_channels, kernel_size=3, stride=self.depthwise_stride, padding=1, groups=self.bottleneck_channels, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) self.g_conv_1x1_expand = ConvModule( in_channels=self.bottleneck_channels, out_channels=self.out_channels, kernel_size=1, groups=self.groups, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) self.act = build_activation_layer(act_cfg)
def __init__(self, num_classes=150, num_ffn_fcs=2, num_heads=8, num_mask_fcs=3, feedforward_channels=2048, in_channels=256, out_channels=256, dropout=0.0, act_cfg=dict(type='ReLU', inplace=True), ffn_act_cfg=dict(type='ReLU', inplace=True), conv_kernel_size=1, feat_transform_cfg=None, kernel_init=False, with_ffn=True, feat_gather_stride=1, mask_transform_stride=1, kernel_updator_cfg=dict(type='DynamicConv', in_channels=256, feat_channels=64, out_channels=256, act_cfg=dict(type='ReLU', inplace=True), norm_cfg=dict(type='LN'))): super(KernelUpdateHead, self).__init__() self.num_classes = num_classes self.in_channels = in_channels self.out_channels = out_channels self.fp16_enabled = False self.dropout = dropout self.num_heads = num_heads self.kernel_init = kernel_init self.with_ffn = with_ffn self.conv_kernel_size = conv_kernel_size self.feat_gather_stride = feat_gather_stride self.mask_transform_stride = mask_transform_stride self.attention = MultiheadAttention(in_channels * conv_kernel_size**2, num_heads, dropout) self.attention_norm = build_norm_layer( dict(type='LN'), in_channels * conv_kernel_size**2)[1] self.kernel_update_conv = build_transformer_layer(kernel_updator_cfg) if feat_transform_cfg is not None: kernel_size = feat_transform_cfg.pop('kernel_size', 1) transform_channels = in_channels self.feat_transform = ConvModule(transform_channels, in_channels, kernel_size, stride=feat_gather_stride, padding=int(feat_gather_stride // 2), **feat_transform_cfg) else: self.feat_transform = None if self.with_ffn: self.ffn = FFN(in_channels, feedforward_channels, num_ffn_fcs, act_cfg=ffn_act_cfg, dropout=dropout) self.ffn_norm = build_norm_layer(dict(type='LN'), in_channels)[1] self.mask_fcs = nn.ModuleList() for _ in range(num_mask_fcs): self.mask_fcs.append( nn.Linear(in_channels, in_channels, bias=False)) self.mask_fcs.append( build_norm_layer(dict(type='LN'), in_channels)[1]) self.mask_fcs.append(build_activation_layer(act_cfg)) self.fc_mask = nn.Linear(in_channels, out_channels)
def __init__(self, in_channels, out_channels, num_kernels, norm_cfg=dict(type='BN2d', momentum=0.1), act_cfg=dict(type='ReLU', inplace=True), scorenet_input='w_neighbor_dist', weight_bank_init='kaiming', kernel_input='w_neighbor', scorenet_cfg=dict(mlp_channels=[8, 16, 16], score_norm='softmax', temp_factor=1.0, last_bn=False)): super(PAConv, self).__init__() # determine weight kernel size according to used features if kernel_input == 'identity': # only use grouped_features self.kernel_mul = 1 elif kernel_input == 'w_neighbor': # concat of (grouped_features - center_features, grouped_features) self.kernel_mul = 2 else: raise NotImplementedError( f'unsupported kernel_input {kernel_input}') self.kernel_input = kernel_input # determine mlp channels in ScoreNet according to used xyz features if scorenet_input == 'identity': # only use relative position (grouped_xyz - center_xyz) self.scorenet_in_channels = 3 elif scorenet_input == 'w_neighbor': # (grouped_xyz - center_xyz, grouped_xyz) self.scorenet_in_channels = 6 elif scorenet_input == 'w_neighbor_dist': # (center_xyz, grouped_xyz - center_xyz, Euclidian distance) self.scorenet_in_channels = 7 else: raise NotImplementedError( f'unsupported scorenet_input {scorenet_input}') self.scorenet_input = scorenet_input # construct weight kernels in weight bank # self.weight_bank is of shape [C, num_kernels * out_c] # where C can be in_c or (2 * in_c) if weight_bank_init == 'kaiming': weight_init = nn.init.kaiming_normal_ elif weight_bank_init == 'xavier': weight_init = nn.init.xavier_normal_ else: raise NotImplementedError( f'unsupported weight bank init method {weight_bank_init}') self.m = num_kernels weight_bank = weight_init( torch.empty(self.m, in_channels * self.kernel_mul, out_channels)) weight_bank = weight_bank.permute(1, 0, 2).reshape( in_channels * self.kernel_mul, self.m * out_channels).contiguous() self.weight_bank = nn.Parameter(weight_bank, requires_grad=True) # construct ScoreNet scorenet_cfg_ = copy.deepcopy(scorenet_cfg) scorenet_cfg_['mlp_channels'].insert(0, self.scorenet_in_channels) scorenet_cfg_['mlp_channels'].append(self.m) self.scorenet = ScoreNet(**scorenet_cfg_) self.bn = build_norm_layer(norm_cfg, out_channels)[1] if \ norm_cfg is not None else None self.activate = build_activation_layer(act_cfg) if \ act_cfg is not None else None self.init_weights()
def __init__(self, input_scale, num_classes=0, base_channels=128, input_channels=3, attention_cfg=dict(type='SelfAttentionBlock'), attention_after_nth_block=-1, channels_cfg=None, downsample_cfg=None, from_rgb_cfg=dict(type='SNGANDiscHeadResBlock'), blocks_cfg=dict(type='SNGANDiscResBlock'), act_cfg=dict(type='ReLU'), with_spectral_norm=True, sn_eps=1e-12, init_cfg=dict(type='BigGAN'), pretrained=None): super().__init__() self.init_type = init_cfg.get('type', None) # add SN options and activation function options to cfg self.from_rgb_cfg = deepcopy(from_rgb_cfg) self.from_rgb_cfg.setdefault('act_cfg', act_cfg) self.from_rgb_cfg.setdefault('with_spectral_norm', with_spectral_norm) self.from_rgb_cfg.setdefault('init_cfg', init_cfg) # add SN options and activation function options to cfg self.blocks_cfg = deepcopy(blocks_cfg) self.blocks_cfg.setdefault('act_cfg', act_cfg) self.blocks_cfg.setdefault('with_spectral_norm', with_spectral_norm) self.blocks_cfg.setdefault('sn_eps', sn_eps) self.blocks_cfg.setdefault('init_cfg', init_cfg) channels_cfg = deepcopy(self._defualt_channels_cfg) \ if channels_cfg is None else deepcopy(channels_cfg) if isinstance(channels_cfg, dict): if input_scale not in channels_cfg: raise KeyError(f'`input_scale={input_scale} is not found in ' '`channel_cfg`, only support configs for ' f'{[chn for chn in channels_cfg.keys()]}') self.channel_factor_list = channels_cfg[input_scale] elif isinstance(channels_cfg, list): self.channel_factor_list = channels_cfg else: raise ValueError('Only support list or dict for `channel_cfg`, ' f'receive {type(channels_cfg)}') downsample_cfg = deepcopy(self._defualt_downsample_cfg) \ if downsample_cfg is None else deepcopy(downsample_cfg) if isinstance(downsample_cfg, dict): if input_scale not in downsample_cfg: raise KeyError(f'`output_scale={input_scale} is not found in ' '`downsample_cfg`, only support configs for ' f'{[chn for chn in downsample_cfg.keys()]}') self.downsample_list = downsample_cfg[input_scale] elif isinstance(downsample_cfg, list): self.downsample_list = downsample_cfg else: raise ValueError('Only support list or dict for `channel_cfg`, ' f'receive {type(downsample_cfg)}') if len(self.downsample_list) != len(self.channel_factor_list): raise ValueError('`downsample_cfg` should have same length with ' '`channels_cfg`, but receive ' f'{len(self.downsample_list)} and ' f'{len(self.channel_factor_list)}.') # check `attention_after_nth_block` if not isinstance(attention_after_nth_block, list): attention_after_nth_block = [attention_after_nth_block] if not all([isinstance(idx, int) for idx in attention_after_nth_block]): raise ValueError('`attention_after_nth_block` only support int or ' 'a list of int. Please check your input type.') self.from_rgb = build_module( self.from_rgb_cfg, dict(in_channels=input_channels, out_channels=base_channels)) self.conv_blocks = nn.ModuleList() # add self-attention block after the first block if 1 in attention_after_nth_block: attn_cfg_ = deepcopy(attention_cfg) attn_cfg_['in_channels'] = base_channels self.conv_blocks.append(build_module(attn_cfg_)) for idx in range(len(self.downsample_list)): factor_input = 1 if idx == 0 else self.channel_factor_list[idx - 1] factor_output = self.channel_factor_list[idx] # get block-specific config block_cfg_ = deepcopy(self.blocks_cfg) block_cfg_['downsample'] = self.downsample_list[idx] block_cfg_['in_channels'] = factor_input * base_channels block_cfg_['out_channels'] = factor_output * base_channels self.conv_blocks.append(build_module(block_cfg_)) # build self-attention block # the first ConvBlock is `from_rgb` block, # add 2 to get the index of the ConvBlocks if idx + 2 in attention_after_nth_block: attn_cfg_ = deepcopy(attention_cfg) attn_cfg_['in_channels'] = factor_output * base_channels self.conv_blocks.append(build_module(attn_cfg_)) self.decision = nn.Linear(factor_output * base_channels, 1) if with_spectral_norm: self.decision = spectral_norm(self.decision) self.num_classes = num_classes # In this case, discriminator is designed for conditional synthesis. if num_classes > 0: self.proj_y = nn.Embedding(num_classes, factor_output * base_channels) if with_spectral_norm: self.proj_y = spectral_norm(self.proj_y) self.activate = build_activation_layer(act_cfg) self.init_weights(pretrained)
def __init__(self, in_channels, out_channels, stride=1, padding=1, dilation=1, groups=1, padding_mode='zeros', se_cfg=None, with_cp=False, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), deploy=False, init_cfg=None): super(RepVGGBlock, self).__init__(init_cfg) assert se_cfg is None or isinstance(se_cfg, dict) self.in_channels = in_channels self.out_channels = out_channels self.stride = stride self.padding = padding self.dilation = dilation self.groups = groups self.se_cfg = se_cfg self.with_cp = with_cp self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.deploy = deploy if deploy: self.branch_reparam = build_conv_layer(conv_cfg, in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode) else: # judge if input shape and output shape are the same. # If true, add a normalized identity shortcut. if out_channels == in_channels and stride == 1 and \ padding == dilation: self.branch_norm = build_norm_layer(norm_cfg, in_channels)[1] else: self.branch_norm = None self.branch_3x3 = self.create_conv_bn( kernel_size=3, dilation=dilation, padding=padding, ) self.branch_1x1 = self.create_conv_bn(kernel_size=1) if se_cfg is not None: self.se_layer = SELayer(channels=out_channels, **se_cfg) else: self.se_layer = None self.act = build_activation_layer(act_cfg)
def __init__(self, in_channels, out_channels, hidden_channels=None, num_classes=0, use_cbn=True, use_norm_affine=False, act_cfg=dict(type='ReLU'), norm_cfg=dict(type='BN'), upsample_cfg=dict(type='nearest', scale_factor=2), upsample=True, auto_sync_bn=True, conv_cfg=None, with_spectral_norm=False, with_embedding_spectral_norm=None, sn_style='torch', norm_eps=1e-4, sn_eps=1e-12, init_cfg=dict(type='BigGAN')): super().__init__() self.learnable_sc = in_channels != out_channels or upsample self.with_upsample = upsample self.init_type = init_cfg.get('type', None) self.activate = build_activation_layer(act_cfg) hidden_channels = out_channels if hidden_channels is None \ else hidden_channels if self.with_upsample: self.upsample = build_upsample_layer(upsample_cfg) self.conv_cfg = deepcopy(self._default_conv_cfg) if conv_cfg is not None: self.conv_cfg.update(conv_cfg) # set `norm_spectral_norm` as `with_spectral_norm` if not defined with_embedding_spectral_norm = with_embedding_spectral_norm \ if with_embedding_spectral_norm is not None else with_spectral_norm sn_cfg = dict(eps=sn_eps, sn_style=sn_style) self.conv_1 = SNConvModule( in_channels, hidden_channels, with_spectral_norm=with_spectral_norm, spectral_norm_cfg=sn_cfg, **self.conv_cfg) self.conv_2 = SNConvModule( hidden_channels, out_channels, with_spectral_norm=with_spectral_norm, spectral_norm_cfg=sn_cfg, **self.conv_cfg) self.norm_1 = SNConditionNorm(in_channels, num_classes, use_cbn, norm_cfg, use_norm_affine, auto_sync_bn, with_embedding_spectral_norm, sn_style, norm_eps, sn_eps, init_cfg) self.norm_2 = SNConditionNorm(hidden_channels, num_classes, use_cbn, norm_cfg, use_norm_affine, auto_sync_bn, with_embedding_spectral_norm, sn_style, norm_eps, sn_eps, init_cfg) if self.learnable_sc: # use hyperparameters-fixed shortcut here self.shortcut = SNConvModule( in_channels, out_channels, kernel_size=1, stride=1, padding=0, act_cfg=None, with_spectral_norm=with_spectral_norm, spectral_norm_cfg=sn_cfg) self.init_weights()
def __init__(self, output_scale, num_classes=0, base_channels=64, out_channels=3, input_scale=4, noise_size=128, attention_cfg=dict(type='SelfAttentionBlock'), attention_after_nth_block=0, channels_cfg=None, blocks_cfg=dict(type='SNGANGenResBlock'), act_cfg=dict(type='ReLU'), use_cbn=True, auto_sync_bn=True, with_spectral_norm=False, with_embedding_spectral_norm=None, norm_eps=1e-4, sn_eps=1e-12, init_cfg=dict(type='BigGAN'), pretrained=None): super().__init__() self.input_scale = input_scale self.output_scale = output_scale self.noise_size = noise_size self.num_classes = num_classes self.init_type = init_cfg.get('type', None) self.blocks_cfg = deepcopy(blocks_cfg) self.blocks_cfg.setdefault('num_classes', num_classes) self.blocks_cfg.setdefault('act_cfg', act_cfg) self.blocks_cfg.setdefault('use_cbn', use_cbn) self.blocks_cfg.setdefault('auto_sync_bn', auto_sync_bn) self.blocks_cfg.setdefault('with_spectral_norm', with_spectral_norm) # set `norm_spectral_norm` as `with_spectral_norm` if not defined with_embedding_spectral_norm = with_embedding_spectral_norm \ if with_embedding_spectral_norm is not None else with_spectral_norm self.blocks_cfg.setdefault('with_embedding_spectral_norm', with_embedding_spectral_norm) self.blocks_cfg.setdefault('init_cfg', init_cfg) self.blocks_cfg.setdefault('norm_eps', norm_eps) self.blocks_cfg.setdefault('sn_eps', sn_eps) channels_cfg = deepcopy(self._default_channels_cfg) \ if channels_cfg is None else deepcopy(channels_cfg) if isinstance(channels_cfg, dict): if output_scale not in channels_cfg: raise KeyError(f'`output_scale={output_scale} is not found in ' '`channel_cfg`, only support configs for ' f'{[chn for chn in channels_cfg.keys()]}') self.channel_factor_list = channels_cfg[output_scale] elif isinstance(channels_cfg, list): self.channel_factor_list = channels_cfg else: raise ValueError('Only support list or dict for `channel_cfg`, ' f'receive {type(channels_cfg)}') self.noise2feat = nn.Linear( noise_size, input_scale**2 * base_channels * self.channel_factor_list[0]) if with_spectral_norm: self.noise2feat = spectral_norm(self.noise2feat) # check `attention_after_nth_block` if not isinstance(attention_after_nth_block, list): attention_after_nth_block = [attention_after_nth_block] if not is_list_of(attention_after_nth_block, int): raise ValueError('`attention_after_nth_block` only support int or ' 'a list of int. Please check your input type.') self.conv_blocks = nn.ModuleList() self.attention_block_idx = [] for idx in range(len(self.channel_factor_list)): factor_input = self.channel_factor_list[idx] factor_output = self.channel_factor_list[idx+1] \ if idx < len(self.channel_factor_list)-1 else 1 # get block-specific config block_cfg_ = deepcopy(self.blocks_cfg) block_cfg_['in_channels'] = factor_input * base_channels block_cfg_['out_channels'] = factor_output * base_channels self.conv_blocks.append(build_module(block_cfg_)) # build self-attention block # `idx` is start from 0, add 1 to get the index if idx + 1 in attention_after_nth_block: self.attention_block_idx.append(len(self.conv_blocks)) attn_cfg_ = deepcopy(attention_cfg) attn_cfg_['in_channels'] = factor_output * base_channels self.conv_blocks.append(build_module(attn_cfg_)) to_rgb_norm_cfg = dict(type='BN', eps=norm_eps) if check_dist_init() and auto_sync_bn: to_rgb_norm_cfg['type'] = 'SyncBN' self.to_rgb = ConvModule(factor_output * base_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=True, norm_cfg=to_rgb_norm_cfg, act_cfg=act_cfg, order=('norm', 'act', 'conv'), with_spectral_norm=with_spectral_norm) self.final_act = build_activation_layer(dict(type='Tanh')) self.init_weights(pretrained)
def __init__(self, num_classes, in_channels, num_fcs=2, transformer=dict( type='Transformer', embed_dims=256, num_heads=8, num_encoder_layers=6, num_decoder_layers=6, feedforward_channels=2048, dropout=0.1, act_cfg=dict(type='ReLU', inplace=True), norm_cfg=dict(type='LN'), num_fcs=2, pre_norm=False, return_intermediate_dec=True), positional_encoding=dict( type='SinePositionalEncoding', num_feats=128, normalize=True), loss_cls=dict( type='CrossEntropyLoss', bg_cls_weight=0.1, use_sigmoid=False, loss_weight=1.0, class_weight=1.0), loss_bbox=dict(type='L1Loss', loss_weight=5.0), loss_iou=dict(type='GIoULoss', loss_weight=2.0), train_cfg=dict( assigner=dict( type='HungarianAssigner', cls_weight=1., bbox_weight=5., iou_weight=2., iou_calculator=dict(type='BboxOverlaps2D'), iou_mode='giou')), test_cfg=dict(max_per_img=100), **kwargs): # NOTE here use `AnchorFreeHead` instead of `TransformerHead`, # since it brings inconvenience when the initialization of # `AnchorFreeHead` is called. super(AnchorFreeHead, self).__init__() use_sigmoid_cls = loss_cls.get('use_sigmoid', False) assert not use_sigmoid_cls, 'setting use_sigmoid_cls as True is ' \ 'not supported in DETR, since background is needed for the ' \ 'matching process.' assert 'embed_dims' in transformer \ and 'num_feats' in positional_encoding num_feats = positional_encoding['num_feats'] embed_dims = transformer['embed_dims'] assert num_feats * 2 == embed_dims, 'embed_dims should' \ f' be exactly 2 times of num_feats. Found {embed_dims}' \ f' and {num_feats}.' assert test_cfg is not None and 'max_per_img' in test_cfg class_weight = loss_cls.get('class_weight', None) if class_weight is not None: assert isinstance(class_weight, float), 'Expected ' \ 'class_weight to have type float. Found ' \ f'{type(class_weight)}.' # NOTE following the official DETR rep0, bg_cls_weight means # relative classification weight of the no-object class. bg_cls_weight = loss_cls.get('bg_cls_weight', class_weight) assert isinstance(bg_cls_weight, float), 'Expected ' \ 'bg_cls_weight to have type float. Found ' \ f'{type(bg_cls_weight)}.' class_weight = torch.ones(num_classes + 1) * class_weight # set background class as the last indice class_weight[num_classes] = bg_cls_weight loss_cls.update({'class_weight': class_weight}) if 'bg_cls_weight' in loss_cls: loss_cls.pop('bg_cls_weight') self.bg_cls_weight = bg_cls_weight if train_cfg: assert 'assigner' in train_cfg, 'assigner should be provided '\ 'when train_cfg is set.' assigner = train_cfg['assigner'] assert loss_cls['loss_weight'] == assigner['cls_weight'], \ 'The classification weight for loss and matcher should be' \ 'exactly the same.' assert loss_bbox['loss_weight'] == assigner['bbox_weight'], \ 'The regression L1 weight for loss and matcher should be' \ 'exactly the same.' assert loss_iou['loss_weight'] == assigner['iou_weight'], \ 'The regression iou weight for loss and matcher should be' \ 'exactly the same.' self.assigner = build_assigner(assigner) # DETR sampling=False, so use PseudoSampler sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.num_classes = num_classes self.cls_out_channels = num_classes + 1 self.in_channels = in_channels self.num_fcs = num_fcs self.train_cfg = train_cfg self.test_cfg = test_cfg self.use_sigmoid_cls = use_sigmoid_cls self.embed_dims = embed_dims self.num_query = test_cfg['max_per_img'] self.fp16_enabled = False self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.loss_iou = build_loss(loss_iou) self.act_cfg = transformer.get('act_cfg', dict(type='ReLU', inplace=True)) self.activate = build_activation_layer(self.act_cfg) self.positional_encoding = build_positional_encoding( positional_encoding) self.transformer = build_transformer(transformer) self._init_layers()
def __init__(self, in_channels, out_channels, expansion=4, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU', inplace=True), drop_path_rate=0.0, init_cfg=None): super(Bottleneck, self).__init__(init_cfg=init_cfg) assert style in ['pytorch', 'caffe'] self.in_channels = in_channels self.out_channels = out_channels self.expansion = expansion assert out_channels % expansion == 0 self.mid_channels = out_channels // expansion self.stride = stride self.dilation = dilation self.style = style self.with_cp = with_cp self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg if self.style == 'pytorch': self.conv1_stride = 1 self.conv2_stride = stride else: self.conv1_stride = stride self.conv2_stride = 1 self.norm1_name, norm1 = build_norm_layer(norm_cfg, self.mid_channels, postfix=1) self.norm2_name, norm2 = build_norm_layer(norm_cfg, self.mid_channels, postfix=2) self.norm3_name, norm3 = build_norm_layer(norm_cfg, out_channels, postfix=3) self.conv1 = build_conv_layer(conv_cfg, in_channels, self.mid_channels, kernel_size=1, stride=self.conv1_stride, bias=False) self.add_module(self.norm1_name, norm1) self.conv2 = build_conv_layer(conv_cfg, self.mid_channels, self.mid_channels, kernel_size=3, stride=self.conv2_stride, padding=dilation, dilation=dilation, bias=False) self.add_module(self.norm2_name, norm2) self.conv3 = build_conv_layer(conv_cfg, self.mid_channels, out_channels, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3) self.relu = build_activation_layer(act_cfg) self.downsample = downsample self.drop_path = DropPath(drop_prob=drop_path_rate ) if drop_path_rate > eps else nn.Identity()
def __init__(self, inplanes, planes, outplanes, spatial_stride=1, downsample=None, se_ratio=None, use_swish=True, conv_cfg=dict(type='Conv3d'), norm_cfg=dict(type='BN3d'), act_cfg=dict(type='ReLU'), with_cp=False): super().__init__() self.inplanes = inplanes self.planes = planes self.outplanes = outplanes self.spatial_stride = spatial_stride self.downsample = downsample self.se_ratio = se_ratio self.use_swish = use_swish self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.act_cfg_swish = dict(type='Swish') self.with_cp = with_cp self.conv1 = ConvModule(in_channels=inplanes, out_channels=planes, kernel_size=1, stride=1, padding=0, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) # Here we use the channel-wise conv self.conv2 = ConvModule(in_channels=planes, out_channels=planes, kernel_size=3, stride=(1, self.spatial_stride, self.spatial_stride), padding=1, groups=planes, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None) self.swish = Swish() self.conv3 = ConvModule(in_channels=planes, out_channels=outplanes, kernel_size=1, stride=1, padding=0, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None) if self.se_ratio is not None: self.se_module = SEModule(planes, self.se_ratio) self.relu = build_activation_layer(self.act_cfg)
def __init__(self, in_channels, out_channels, exp_ratio=6, stride=1, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), init_cfg=None): super(GELayer, self).__init__(init_cfg=init_cfg) mid_channel = in_channels * exp_ratio self.conv1 = ConvModule( in_channels=in_channels, out_channels=in_channels, kernel_size=3, stride=1, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) if stride == 1: self.dwconv = nn.Sequential( # ReLU in ConvModule not shown in paper ConvModule( in_channels=in_channels, out_channels=mid_channel, kernel_size=3, stride=stride, padding=1, groups=in_channels, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)) self.shortcut = None else: self.dwconv = nn.Sequential( ConvModule( in_channels=in_channels, out_channels=mid_channel, kernel_size=3, stride=stride, padding=1, groups=in_channels, bias=False, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None), # ReLU in ConvModule not shown in paper ConvModule( in_channels=mid_channel, out_channels=mid_channel, kernel_size=3, stride=1, padding=1, groups=mid_channel, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg), ) self.shortcut = nn.Sequential( DepthwiseSeparableConvModule( in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, dw_norm_cfg=norm_cfg, dw_act_cfg=None, pw_norm_cfg=norm_cfg, pw_act_cfg=None, )) self.conv2 = nn.Sequential( ConvModule( in_channels=mid_channel, out_channels=out_channels, kernel_size=1, stride=1, padding=0, bias=False, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None, )) self.act = build_activation_layer(act_cfg)
def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU', inplace=True)): """Bottleneck block for ResNet. If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is "caffe", the stride-two layer is the first 1x1 conv layer. """ super(Bottleneck, self).__init__() assert style in ['pytorch', 'caffe'] self.inplanes = inplanes self.planes = planes self.stride = stride self.dilation = dilation self.style = style self.with_cp = with_cp self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg if self.style == 'pytorch': self.conv1_stride = 1 self.conv2_stride = stride else: self.conv1_stride = stride self.conv2_stride = 1 self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) self.norm3_name, norm3 = build_norm_layer(norm_cfg, planes * self.expansion, postfix=3) self.conv1 = build_conv_layer(conv_cfg, inplanes, planes, kernel_size=1, stride=self.conv1_stride, bias=False) self.add_module(self.norm1_name, norm1) self.conv2 = build_conv_layer(conv_cfg, planes, planes, kernel_size=3, stride=self.conv2_stride, padding=dilation, dilation=dilation, bias=False) self.add_module(self.norm2_name, norm2) self.conv3 = build_conv_layer(conv_cfg, planes, planes * self.expansion, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3) self.relu = build_activation_layer(act_cfg) self.downsample = downsample
def __init__(self, num_classes=80, num_ffn_fcs=2, num_heads=8, num_cls_fcs=1, num_reg_fcs=3, feedforward_channels=2048, in_channels=256, dropout=0.0, ffn_act_cfg=dict(type='ReLU', inplace=True), dynamic_conv_cfg=dict(type='DynamicConv', in_channels=256, feat_channels=64, out_channels=256, input_feat_shape=7, act_cfg=dict(type='ReLU', inplace=True), norm_cfg=dict(type='LN')), loss_iou=dict(type='GIoULoss', loss_weight=2.0), init_cfg=None, **kwargs): assert init_cfg is None, 'To prevent abnormal initialization ' \ 'behavior, init_cfg is not allowed to be set' super(DIIHead, self).__init__(num_classes=num_classes, reg_decoded_bbox=True, reg_class_agnostic=True, init_cfg=init_cfg, **kwargs) self.loss_iou = build_loss(loss_iou) self.in_channels = in_channels self.fp16_enabled = False self.attention = MultiheadAttention(in_channels, num_heads, dropout) self.attention_norm = build_norm_layer(dict(type='LN'), in_channels)[1] self.instance_interactive_conv = build_transformer(dynamic_conv_cfg) self.instance_interactive_conv_dropout = nn.Dropout(dropout) self.instance_interactive_conv_norm = build_norm_layer( dict(type='LN'), in_channels)[1] self.ffn = FFN(in_channels, feedforward_channels, num_ffn_fcs, act_cfg=ffn_act_cfg, dropout=dropout) self.ffn_norm = build_norm_layer(dict(type='LN'), in_channels)[1] self.cls_fcs = nn.ModuleList() for _ in range(num_cls_fcs): self.cls_fcs.append(nn.Linear(in_channels, in_channels, bias=False)) self.cls_fcs.append( build_norm_layer(dict(type='LN'), in_channels)[1]) self.cls_fcs.append( build_activation_layer(dict(type='ReLU', inplace=True))) # over load the self.fc_cls in BBoxHead if self.loss_cls.use_sigmoid: self.fc_cls = nn.Linear(in_channels, self.num_classes) else: self.fc_cls = nn.Linear(in_channels, self.num_classes + 1) self.reg_fcs = nn.ModuleList() for _ in range(num_reg_fcs): self.reg_fcs.append(nn.Linear(in_channels, in_channels, bias=False)) self.reg_fcs.append( build_norm_layer(dict(type='LN'), in_channels)[1]) self.reg_fcs.append( build_activation_layer(dict(type='ReLU', inplace=True))) # over load the self.fc_cls in BBoxHead self.fc_reg = nn.Linear(in_channels, 4) assert self.reg_class_agnostic, 'DIIHead only ' \ 'suppport `reg_class_agnostic=True` ' assert self.reg_decoded_bbox, 'DIIHead only ' \ 'suppport `reg_decoded_bbox=True`'
def _extra_norm_ac(self, norm_cfg, num_features): return nn.Sequential( build_norm_layer(norm_cfg, num_features)[1], build_activation_layer(self.act_cfg))