def __init__(self, outer_channels, inner_channels, in_channels=None, submodule=None, is_outermost=False, is_innermost=False, norm_cfg=dict(type='BN'), use_dropout=False): super().__init__() # cannot be both outermost and innermost assert not (is_outermost and is_innermost), ( "'is_outermost' and 'is_innermost' cannot be True" 'at the same time.') self.is_outermost = is_outermost assert isinstance(norm_cfg, dict), ("'norm_cfg' should be dict, but" f'got {type(norm_cfg)}') assert 'type' in norm_cfg, "'norm_cfg' must have key 'type'" # We use norm layers in the unet skip connection block. # Only for IN, use bias since it does not have affine parameters. use_bias = norm_cfg['type'] == 'IN' kernel_size = 4 stride = 2 padding = 1 if in_channels is None: in_channels = outer_channels down_conv_cfg = dict(type='Conv2d') down_norm_cfg = norm_cfg down_act_cfg = dict(type='LeakyReLU', negative_slope=0.2) up_conv_cfg = dict(type='Deconv') up_norm_cfg = norm_cfg up_act_cfg = dict(type='ReLU') up_in_channels = inner_channels * 2 up_bias = use_bias middle = [submodule] upper = [] if is_outermost: down_act_cfg = None down_norm_cfg = None up_bias = True up_norm_cfg = None upper = [nn.Tanh()] elif is_innermost: down_norm_cfg = None up_in_channels = inner_channels middle = [] else: upper = [nn.Dropout(0.5)] if use_dropout else [] down = [ ConvModule(in_channels=in_channels, out_channels=inner_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=use_bias, conv_cfg=down_conv_cfg, norm_cfg=down_norm_cfg, act_cfg=down_act_cfg, order=('act', 'conv', 'norm')) ] up = [ ConvModule(in_channels=up_in_channels, out_channels=outer_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=up_bias, conv_cfg=up_conv_cfg, norm_cfg=up_norm_cfg, act_cfg=up_act_cfg, order=('act', 'conv', 'norm')) ] model = down + middle + up + upper self.model = nn.Sequential(*model)
def __init__(self, widen_factor=1., out_indices=(1, 2, 4, 7), frozen_stages=-1, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU6'), norm_eval=False, with_cp=False, pretrained=None, init_cfg=None): super(MobileNetV2, self).__init__(init_cfg) self.pretrained = pretrained assert not (init_cfg and pretrained), \ 'init_cfg and pretrained cannot be setting at the same time' if isinstance(pretrained, str): warnings.warn('DeprecationWarning: pretrained is deprecated, ' 'please use "init_cfg" instead') self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) elif pretrained is None: if init_cfg is None: self.init_cfg = [ dict(type='Kaiming', layer='Conv2d'), dict(type='Constant', val=1, layer=['_BatchNorm', 'GroupNorm']) ] else: raise TypeError('pretrained must be a str or None') self.widen_factor = widen_factor self.out_indices = out_indices if not set(out_indices).issubset(set(range(0, 8))): raise ValueError('out_indices must be a subset of range' f'(0, 8). But received {out_indices}') if frozen_stages not in range(-1, 8): raise ValueError('frozen_stages must be in range(-1, 8). ' f'But received {frozen_stages}') self.out_indices = out_indices self.frozen_stages = frozen_stages self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.norm_eval = norm_eval self.with_cp = with_cp self.in_channels = make_divisible(32 * widen_factor, 8) self.conv1 = ConvModule(in_channels=3, out_channels=self.in_channels, kernel_size=3, stride=2, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) self.layers = [] for i, layer_cfg in enumerate(self.arch_settings): expand_ratio, channel, num_blocks, stride = layer_cfg out_channels = make_divisible(channel * widen_factor, 8) inverted_res_layer = self.make_layer(out_channels=out_channels, num_blocks=num_blocks, stride=stride, expand_ratio=expand_ratio) layer_name = f'layer{i + 1}' self.add_module(layer_name, inverted_res_layer) self.layers.append(layer_name) if widen_factor > 1.0: self.out_channel = int(1280 * widen_factor) else: self.out_channel = 1280 layer = ConvModule(in_channels=self.in_channels, out_channels=self.out_channel, kernel_size=1, stride=1, padding=0, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) self.add_module('conv2', layer) self.layers.append('conv2')
def make_res_layer(self, block, layer_inplanes, inplanes, planes, blocks, spatial_stride=1, se_style='half', se_ratio=None, use_swish=True, norm_cfg=None, act_cfg=None, conv_cfg=None, with_cp=False, **kwargs): """Build residual layer for ResNet3D. Args: block (nn.Module): Residual module to be built. layer_inplanes (int): Number of channels for the input feature of the res layer. inplanes (int): Number of channels for the input feature in each block, which equals to base_channels * gamma_w. planes (int): Number of channels for the output feature in each block, which equals to base_channel * gamma_w * gamma_b. blocks (int): Number of residual blocks. spatial_stride (int): Spatial strides in residual and conv layers. Default: 1. se_style (str): The style of inserting SE modules into BlockX3D, 'half' denotes insert into half of the blocks, while 'all' denotes insert into all blocks. Default: 'half'. se_ratio (float | None): The reduction ratio of squeeze and excitation unit. If set as None, it means not using SE unit. Default: None. use_swish (bool): Whether to use swish as the activation function before and after the 3x3x3 conv. Default: True. conv_cfg (dict | None): Config for norm layers. Default: None. norm_cfg (dict | None): Config for norm layers. Default: None. act_cfg (dict | None): Config for activate layers. Default: None. with_cp (bool | None): Use checkpoint or not. Using checkpoint will save some memory while slowing down the training speed. Default: False. Returns: nn.Module: A residual layer for the given config. """ downsample = None if spatial_stride != 1 or layer_inplanes != inplanes: downsample = ConvModule( layer_inplanes, inplanes, kernel_size=1, stride=(1, spatial_stride, spatial_stride), padding=0, bias=False, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) if self.se_style == 'all': use_se = [True] * blocks elif self.se_style == 'half': use_se = [True if i % 2 == 0 else False for i in range(blocks)] else: raise NotImplementedError layers = [] layers.append( block( layer_inplanes, planes, inplanes, spatial_stride=spatial_stride, downsample=downsample, se_ratio=se_ratio if use_se[0] else None, use_swish=use_swish, norm_cfg=norm_cfg, conv_cfg=conv_cfg, act_cfg=act_cfg, with_cp=with_cp, **kwargs)) for i in range(1, blocks): layers.append( block( inplanes, planes, inplanes, spatial_stride=1, se_ratio=se_ratio if use_se[i] else None, use_swish=use_swish, norm_cfg=norm_cfg, conv_cfg=conv_cfg, act_cfg=act_cfg, with_cp=with_cp, **kwargs)) return nn.Sequential(*layers)
def _init_layers(self): self.cls_convs = nn.ModuleList() self.reg_convs = nn.ModuleList() self.mask_convs = nn.ModuleList() for i in range(self.stacked_convs): chn = self.in_channels if i == 0 else self.feat_channels if not self.use_dcn: self.cls_convs.append( ConvModule( chn, self.feat_channels, 3, stride=1, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, bias=self.norm_cfg is None)) self.reg_convs.append( ConvModule( chn, self.feat_channels, 3, stride=1, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, bias=self.norm_cfg is None)) self.mask_convs.append( ConvModule( chn, self.feat_channels, 3, stride=1, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, bias=self.norm_cfg is None)) else: self.cls_convs.append( ModulatedDeformConv2dPack( chn, self.feat_channels, 3, stride=1, padding=1, dilation=1, deformable_groups=1, )) if self.norm_cfg: self.cls_convs.append(build_norm_layer( self.norm_cfg, self.feat_channels)[1]) self.cls_convs.append(nn.ReLU(inplace=True)) self.reg_convs.append( ModulatedDeformConv2dPack( chn, self.feat_channels, 3, stride=1, padding=1, dilation=1, deformable_groups=1, )) if self.norm_cfg: self.reg_convs.append(build_norm_layer( self.norm_cfg, self.feat_channels)[1]) self.reg_convs.append(nn.ReLU(inplace=True)) self.mask_convs.append( ModulatedDeformConv2dPack( chn, self.feat_channels, 3, stride=1, padding=1, dilation=1, deformable_groups=1, )) if self.norm_cfg: self.mask_convs.append(build_norm_layer( self.norm_cfg, self.feat_channels)[1]) self.mask_convs.append(nn.ReLU(inplace=True)) self.polar_cls = nn.Conv2d( self.feat_channels, self.cls_out_channels, 3, padding=1) self.polar_reg = nn.Conv2d(self.feat_channels, 4, 3, padding=1) self.polar_mask = nn.Conv2d(self.feat_channels, 36, 3, padding=1) self.polar_centerness = nn.Conv2d(self.feat_channels, 1, 3, padding=1) self.scales_bbox = nn.ModuleList([Scale(1.0) for _ in self.strides]) self.scales_mask = nn.ModuleList([Scale(1.0) for _ in self.strides])
def __init__(self, in_channels, out_channels, num_outs, start_level=0, end_level=-1, add_extra_convs=False, relu_before_extra_convs=False, no_norm_on_lateral=False, conv_cfg=None, norm_cfg=None, act_cfg=None, upsample_cfg=dict(mode='nearest'), init_cfg=dict(type='Xavier', layer='Conv2d', distribution='uniform')): super(FPN, self).__init__(init_cfg) assert isinstance(in_channels, list) self.in_channels = in_channels self.out_channels = out_channels self.num_ins = len(in_channels) self.num_outs = num_outs self.relu_before_extra_convs = relu_before_extra_convs self.no_norm_on_lateral = no_norm_on_lateral self.fp16_enabled = False self.upsample_cfg = upsample_cfg.copy() if end_level == -1: self.backbone_end_level = self.num_ins assert num_outs >= self.num_ins - start_level else: # if end_level < inputs, no extra level is allowed self.backbone_end_level = end_level assert end_level <= len(in_channels) assert num_outs == end_level - start_level self.start_level = start_level self.end_level = end_level self.add_extra_convs = add_extra_convs assert isinstance(add_extra_convs, (str, bool)) if isinstance(add_extra_convs, str): # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') elif add_extra_convs: # True self.add_extra_convs = 'on_input' self.lateral_convs = nn.ModuleList() self.fpn_convs = nn.ModuleList() for i in range(self.start_level, self.backbone_end_level): l_conv = ConvModule( in_channels[i], out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, inplace=False) fpn_conv = ConvModule(out_channels, out_channels, 3, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) self.lateral_convs.append(l_conv) self.fpn_convs.append(fpn_conv) # add extra conv layers (e.g., RetinaNet) extra_levels = num_outs - self.backbone_end_level + self.start_level if self.add_extra_convs and extra_levels >= 1: for i in range(extra_levels): if i == 0 and self.add_extra_convs == 'on_input': in_channels = self.in_channels[self.backbone_end_level - 1] else: in_channels = out_channels extra_fpn_conv = ConvModule(in_channels, out_channels, 3, stride=2, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) self.fpn_convs.append(extra_fpn_conv)
def __init__(self, in_channels, out_channels, num_outs, start_level=0, end_level=-1, add_extra_convs=False, extra_convs_on_inputs=True, relu_before_extra_convs=False, no_norm_on_lateral=False, conv_cfg=None, norm_cfg=None, act_cfg=None, upsample_cfg=dict(mode='nearest')): super(FPNDcnLconv3Dcn1, self).__init__() assert isinstance(in_channels, list) self.in_channels = in_channels self.out_channels = out_channels self.num_ins = len(in_channels) self.num_outs = num_outs self.relu_before_extra_convs = relu_before_extra_convs self.no_norm_on_lateral = no_norm_on_lateral self.fp16_enabled = False self.upsample_cfg = upsample_cfg.copy() if end_level == -1: self.backbone_end_level = self.num_ins assert num_outs >= self.num_ins - start_level else: # if end_level < inputs, no extra level is allowed self.backbone_end_level = end_level assert end_level <= len(in_channels) assert num_outs == end_level - start_level self.start_level = start_level self.end_level = end_level self.add_extra_convs = add_extra_convs assert isinstance(add_extra_convs, (str, bool)) if isinstance(add_extra_convs, str): # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') elif add_extra_convs: # True if extra_convs_on_inputs: # For compatibility with previous release # TODO: deprecate `extra_convs_on_inputs` self.add_extra_convs = 'on_input' else: self.add_extra_convs = 'on_output' self.lateral_convs = nn.ModuleList() self.fpn_convs = nn.ModuleList() for i in range(self.start_level, self.backbone_end_level): ''' l_conv = ConvModule( in_channels[i], out_channels, 3, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, inplace=False) ''' l_conv = DeformConv2dPack( in_channels[i], out_channels, 3, padding=1 ) fpn_conv = DeformConv2dPack( (self.backbone_end_level - i) * out_channels, out_channels, 3, padding=1 ) self.lateral_convs.append(l_conv) self.fpn_convs.append(fpn_conv) # add extra conv layers (e.g., RetinaNet) extra_levels = num_outs - self.backbone_end_level + self.start_level if self.add_extra_convs and extra_levels >= 1: for i in range(extra_levels): if i == 0 and self.add_extra_convs == 'on_input': in_channels = self.in_channels[self.backbone_end_level - 1] else: in_channels = out_channels extra_fpn_conv = ConvModule( in_channels, out_channels, 3, stride=2, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) self.fpn_convs.append(extra_fpn_conv)
def make_res_layer(self, block, inplanes, planes, blocks, spatial_stride=1, temporal_stride=1, dilation=1, style='pytorch', inflate=1, inflate_style='3x1x1', non_local=0, non_local_cfg=dict(), conv_cfg=None, norm_cfg=None, act_cfg=None, with_cp=False): """Build residual layer for Slowfast. Args: block (nn.Module): Residual module to be built. inplanes (int): Number of channels for the input feature in each block. planes (int): Number of channels for the output feature in each block. blocks (int): Number of residual blocks. spatial_stride (int | Sequence[int]): Spatial strides in residual and conv layers. Default: 1. temporal_stride (int | Sequence[int]): Temporal strides in residual and conv layers. Default: 1. dilation (int): Spacing between kernel elements. Default: 1. style (str): ``pytorch`` or ``caffe``. If set to ``pytorch``, the stride-two layer is the 3x3 conv layer, otherwise the stride-two layer is the first 1x1 conv layer. Default: ``pytorch``. inflate (int | Sequence[int]): Determine whether to inflate for each block. Default: 1. inflate_style (str): ``3x1x1`` or ``1x1x1``. which determines the kernel sizes and padding strides for conv1 and conv2 in each block. Default: ``3x1x1``. non_local (int | Sequence[int]): Determine whether to apply non-local module in the corresponding block of each stages. Default: 0. non_local_cfg (dict): Config for non-local module. Default: ``dict()``. conv_cfg (dict | None): Config for conv layers. Default: None. norm_cfg (dict | None): Config for norm layers. Default: None. act_cfg (dict | None): Config for activate layers. Default: None. with_cp (bool): Use checkpoint or not. Using checkpoint will save some memory while slowing down the training speed. Default: False. Returns: nn.Module: A residual layer for the given config. """ inflate = inflate if not isinstance(inflate, int) else (inflate, ) * blocks non_local = non_local if not isinstance( non_local, int) else (non_local, ) * blocks assert len(inflate) == blocks and len(non_local) == blocks if self.lateral: lateral_inplanes = inplanes * 2 // self.channel_ratio else: lateral_inplanes = 0 if (spatial_stride != 1 or (inplanes + lateral_inplanes) != planes * block.expansion): downsample = ConvModule(inplanes + lateral_inplanes, planes * block.expansion, kernel_size=1, stride=(temporal_stride, spatial_stride, spatial_stride), bias=False, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) else: downsample = None layers = [] layers.append( block(inplanes + lateral_inplanes, planes, spatial_stride, temporal_stride, dilation, downsample, style=style, inflate=(inflate[0] == 1), inflate_style=inflate_style, non_local=(non_local[0] == 1), non_local_cfg=non_local_cfg, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, with_cp=with_cp)) inplanes = planes * block.expansion for i in range(1, blocks): layers.append( block(inplanes, planes, 1, 1, dilation, style=style, inflate=(inflate[i] == 1), inflate_style=inflate_style, non_local=(non_local[i] == 1), non_local_cfg=non_local_cfg, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, with_cp=with_cp)) return nn.Sequential(*layers)
def __init__(self, inplanes, planes, spatial_stride=1, temporal_stride=1, dilation=1, downsample=None, style='pytorch', inflate=True, non_local=False, non_local_cfg=dict(), conv_cfg=dict(type='Conv3d'), norm_cfg=dict(type='BN3d'), act_cfg=dict(type='ReLU'), with_cp=False, **kwargs): super().__init__() assert style in ['pytorch', 'caffe'] # make sure that only ``inflate_style`` is passed into kwargs assert set(kwargs).issubset(['inflate_style']) self.inplanes = inplanes self.planes = planes self.spatial_stride = spatial_stride self.temporal_stride = temporal_stride self.dilation = dilation self.style = style self.inflate = inflate self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.with_cp = with_cp self.non_local = non_local self.non_local_cfg = non_local_cfg self.conv1_stride_s = spatial_stride self.conv2_stride_s = 1 self.conv1_stride_t = temporal_stride self.conv2_stride_t = 1 if self.inflate: conv1_kernel_size = (3, 3, 3) conv1_padding = (1, dilation, dilation) conv2_kernel_size = (3, 3, 3) conv2_padding = (1, 1, 1) else: conv1_kernel_size = (1, 3, 3) conv1_padding = (0, dilation, dilation) conv2_kernel_size = (1, 3, 3) conv2_padding = (0, 1, 1) self.conv1 = ConvModule(inplanes, planes, conv1_kernel_size, stride=(self.conv1_stride_t, self.conv1_stride_s, self.conv1_stride_s), padding=conv1_padding, dilation=(1, dilation, dilation), bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) self.conv2 = ConvModule(planes, planes * self.expansion, conv2_kernel_size, stride=(self.conv2_stride_t, self.conv2_stride_s, self.conv2_stride_s), padding=conv2_padding, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None) self.downsample = downsample self.relu = build_activation_layer(self.act_cfg) if self.non_local: self.non_local_block = NonLocal3d(self.conv2.norm.num_features, **self.non_local_cfg)
def __init__(self, widen_factor=1.0, out_indices=(3, ), frozen_stages=-1, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), norm_eval=False, with_cp=False): super().__init__() self.stage_blocks = [4, 8, 4] for index in out_indices: if index not in range(0, 4): raise ValueError('the item in out_indices must in ' f'range(0, 4). But received {index}') if frozen_stages not in range(-1, 4): raise ValueError('frozen_stages must be in range(-1, 4). ' f'But received {frozen_stages}') self.out_indices = out_indices self.frozen_stages = frozen_stages self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.norm_eval = norm_eval self.with_cp = with_cp if widen_factor == 0.5: channels = [48, 96, 192, 1024] elif widen_factor == 1.0: channels = [116, 232, 464, 1024] elif widen_factor == 1.5: channels = [176, 352, 704, 1024] elif widen_factor == 2.0: channels = [244, 488, 976, 2048] else: raise ValueError('widen_factor must be in [0.5, 1.0, 1.5, 2.0]. ' f'But received {widen_factor}') self.in_channels = 24 self.conv1 = ConvModule(in_channels=3, out_channels=self.in_channels, kernel_size=3, stride=2, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layers = nn.ModuleList() for i, num_blocks in enumerate(self.stage_blocks): layer = self._make_layer(channels[i], num_blocks) self.layers.append(layer) output_channels = channels[-1] self.layers.append( ConvModule(in_channels=self.in_channels, out_channels=output_channels, kernel_size=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg))
def __init__(self, in_channels, out_channels, groups=3, first_block=True, combine='add', conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), with_cp=False): # Protect mutable default arguments norm_cfg = copy.deepcopy(norm_cfg) act_cfg = copy.deepcopy(act_cfg) super().__init__() self.in_channels = in_channels self.out_channels = out_channels self.first_block = first_block self.combine = combine self.groups = groups self.bottleneck_channels = self.out_channels // 4 self.with_cp = with_cp if self.combine == 'add': self.depthwise_stride = 1 self._combine_func = self._add assert in_channels == out_channels, ( 'in_channels must be equal to out_channels when combine ' 'is add') elif self.combine == 'concat': self.depthwise_stride = 2 self._combine_func = self._concat self.out_channels -= self.in_channels self.avgpool = nn.AvgPool2d(kernel_size=3, stride=2, padding=1) else: raise ValueError(f'Cannot combine tensors with {self.combine}. ' 'Only "add" and "concat" are supported') self.first_1x1_groups = 1 if first_block else self.groups self.g_conv_1x1_compress = ConvModule( in_channels=self.in_channels, out_channels=self.bottleneck_channels, kernel_size=1, groups=self.first_1x1_groups, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) self.depthwise_conv3x3_bn = ConvModule( in_channels=self.bottleneck_channels, out_channels=self.bottleneck_channels, kernel_size=3, stride=self.depthwise_stride, padding=1, groups=self.bottleneck_channels, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) self.g_conv_1x1_expand = ConvModule( in_channels=self.bottleneck_channels, out_channels=self.out_channels, kernel_size=1, groups=self.groups, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) self.act = build_activation_layer(act_cfg)
def __init__(self, inplanes, planes, spatial_stride=1, temporal_stride=1, dilation=1, downsample=None, style='pytorch', inflate=True, inflate_style='3x1x1', non_local=False, non_local_cfg=dict(), conv_cfg=dict(type='Conv3d'), norm_cfg=dict(type='BN3d'), act_cfg=dict(type='ReLU'), with_cp=False): super().__init__() assert style in ['pytorch', 'caffe'] assert inflate_style in ['3x1x1', '3x3x3'] self.inplanes = inplanes self.planes = planes self.spatial_stride = spatial_stride self.temporal_stride = temporal_stride self.dilation = dilation self.style = style self.inflate = inflate self.inflate_style = inflate_style self.norm_cfg = norm_cfg self.conv_cfg = conv_cfg self.act_cfg = act_cfg self.with_cp = with_cp self.non_local = non_local self.non_local_cfg = non_local_cfg if self.style == 'pytorch': self.conv1_stride_s = 1 self.conv2_stride_s = spatial_stride self.conv1_stride_t = 1 self.conv2_stride_t = temporal_stride else: self.conv1_stride_s = spatial_stride self.conv2_stride_s = 1 self.conv1_stride_t = temporal_stride self.conv2_stride_t = 1 if self.inflate: if inflate_style == '3x1x1': conv1_kernel_size = (3, 1, 1) conv1_padding = (1, 0, 0) conv2_kernel_size = (1, 3, 3) conv2_padding = (0, dilation, dilation) else: conv1_kernel_size = (1, 1, 1) conv1_padding = (0, 0, 0) conv2_kernel_size = (3, 3, 3) conv2_padding = (1, dilation, dilation) else: conv1_kernel_size = (1, 1, 1) conv1_padding = (0, 0, 0) conv2_kernel_size = (1, 3, 3) conv2_padding = (0, dilation, dilation) self.conv1 = ConvModule(inplanes, planes, conv1_kernel_size, stride=(self.conv1_stride_t, self.conv1_stride_s, self.conv1_stride_s), padding=conv1_padding, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) self.conv2 = ConvModule(planes, planes, conv2_kernel_size, stride=(self.conv2_stride_t, self.conv2_stride_s, self.conv2_stride_s), padding=conv2_padding, dilation=(1, dilation, dilation), bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) self.conv3 = ConvModule( planes, planes * self.expansion, 1, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, # No activation in the third ConvModule for bottleneck act_cfg=None) self.downsample = downsample self.relu = build_activation_layer(self.act_cfg) if self.non_local: self.non_local_block = NonLocal3d(self.conv3.norm.num_features, **self.non_local_cfg)
def __init__(self, groups=3, widen_factor=1.0, out_indices=(2, ), frozen_stages=-1, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), norm_eval=False, with_cp=False): # Protect mutable default arguments norm_cfg = copy.deepcopy(norm_cfg) act_cfg = copy.deepcopy(act_cfg) super().__init__() self.stage_blocks = [4, 8, 4] self.groups = groups for index in out_indices: if index not in range(0, 3): raise ValueError('the item in out_indices must in ' f'range(0, 3). But received {index}') if frozen_stages not in range(-1, 3): raise ValueError('frozen_stages must be in range(-1, 3). ' f'But received {frozen_stages}') self.out_indices = out_indices self.frozen_stages = frozen_stages self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.norm_eval = norm_eval self.with_cp = with_cp if groups == 1: channels = (144, 288, 576) elif groups == 2: channels = (200, 400, 800) elif groups == 3: channels = (240, 480, 960) elif groups == 4: channels = (272, 544, 1088) elif groups == 8: channels = (384, 768, 1536) else: raise ValueError(f'{groups} groups is not supported for 1x1 ' 'Grouped Convolutions') channels = [make_divisible(ch * widen_factor, 8) for ch in channels] self.in_channels = int(24 * widen_factor) self.conv1 = ConvModule(in_channels=3, out_channels=self.in_channels, kernel_size=3, stride=2, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layers = nn.ModuleList() for i, num_blocks in enumerate(self.stage_blocks): first_block = (i == 0) layer = self.make_layer(channels[i], num_blocks, first_block) self.layers.append(layer)
def make_res_layer(self, block, inplanes, planes, blocks, stride=1, dilation=1, factorize=1, norm_cfg=None, with_cp=False): """Build residual layer for ResNetAudio. Args: block (nn.Module): Residual module to be built. inplanes (int): Number of channels for the input feature in each block. planes (int): Number of channels for the output feature in each block. blocks (int): Number of residual blocks. strides (Sequence[int]): Strides of residual blocks of each stage. Default: (1, 2, 2, 2). dilation (int): Spacing between kernel elements. Default: 1. factorize (int | Sequence[int]): Determine whether to factorize for each block. Default: 1. norm_cfg (dict): Config for norm layers. required keys are `type` and `requires_grad`. Default: None. with_cp (bool): Use checkpoint or not. Using checkpoint will save some memory while slowing down the training speed. Default: False. Returns: A residual layer for the given config. """ factorize = factorize if not isinstance( factorize, int) else (factorize, ) * blocks assert len(factorize) == blocks downsample = None if stride != 1 or inplanes != planes * block.expansion: downsample = ConvModule(inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False, norm_cfg=norm_cfg, act_cfg=None) layers = [] layers.append( block(inplanes, planes, stride, dilation, downsample, factorize=(factorize[0] == 1), norm_cfg=norm_cfg, with_cp=with_cp)) inplanes = planes * block.expansion for i in range(1, blocks): layers.append( block(inplanes, planes, 1, dilation, factorize=(factorize[i] == 1), norm_cfg=norm_cfg, with_cp=with_cp)) return nn.Sequential(*layers)
def __init__(self, num_convs=4, roi_feat_size=14, in_channels=256, conv_kernel_size=3, conv_out_channels=256, num_classes=80, class_agnostic=False, upsample_cfg=dict(type='deconv', scale_factor=2), conv_cfg=None, norm_cfg=None, predictor_cfg=dict(type='Conv'), loss_mask=dict(type='CrossEntropyLoss', use_mask=True, loss_weight=1.0), init_cfg=None): assert init_cfg is None, 'To prevent abnormal initialization ' \ 'behavior, init_cfg is not allowed to be set' super(FCNMaskHead, self).__init__(init_cfg) self.upsample_cfg = upsample_cfg.copy() if self.upsample_cfg['type'] not in [ None, 'deconv', 'nearest', 'bilinear', 'carafe' ]: raise ValueError( f'Invalid upsample method {self.upsample_cfg["type"]}, ' 'accepted methods are "deconv", "nearest", "bilinear", ' '"carafe"') self.num_convs = num_convs # WARN: roi_feat_size is reserved and not used self.roi_feat_size = _pair(roi_feat_size) self.in_channels = in_channels self.conv_kernel_size = conv_kernel_size self.conv_out_channels = conv_out_channels self.upsample_method = self.upsample_cfg.get('type') self.scale_factor = self.upsample_cfg.pop('scale_factor', None) self.num_classes = num_classes self.class_agnostic = class_agnostic self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.predictor_cfg = predictor_cfg self.fp16_enabled = False self.loss_mask = build_loss(loss_mask) self.convs = ModuleList() for i in range(self.num_convs): in_channels = (self.in_channels if i == 0 else self.conv_out_channels) padding = (self.conv_kernel_size - 1) // 2 self.convs.append( ConvModule(in_channels, self.conv_out_channels, self.conv_kernel_size, padding=padding, conv_cfg=conv_cfg, norm_cfg=norm_cfg)) upsample_in_channels = (self.conv_out_channels if self.num_convs > 0 else in_channels) upsample_cfg_ = self.upsample_cfg.copy() if self.upsample_method is None: self.upsample = None elif self.upsample_method == 'deconv': upsample_cfg_.update(in_channels=upsample_in_channels, out_channels=self.conv_out_channels, kernel_size=self.scale_factor, stride=self.scale_factor) self.upsample = build_upsample_layer(upsample_cfg_) elif self.upsample_method == 'carafe': upsample_cfg_.update(channels=upsample_in_channels, scale_factor=self.scale_factor) self.upsample = build_upsample_layer(upsample_cfg_) else: # suppress warnings align_corners = (None if self.upsample_method == 'nearest' else False) upsample_cfg_.update(scale_factor=self.scale_factor, mode=self.upsample_method, align_corners=align_corners) self.upsample = build_upsample_layer(upsample_cfg_) out_channels = 1 if self.class_agnostic else self.num_classes logits_in_channel = (self.conv_out_channels if self.upsample_method == 'deconv' else upsample_in_channels) self.conv_logits = build_conv_layer(self.predictor_cfg, logits_in_channel, out_channels, 1) self.relu = nn.ReLU(inplace=True) self.debug_imgs = None
def __init__(self, in_channels, out_channels, num_outs, start_level=0, end_level=-1, add_extra_convs=False, extra_convs_on_inputs=True, relu_before_extra_convs=False, no_norm_on_lateral=False, conv_cfg=None, norm_cfg=None, act_cfg=None, order=('conv', 'norm', 'act'), upsample_cfg=dict(type='carafe', up_kernel=5, up_group=1, encoder_kernel=3, encoder_dilation=1)): super(PAN_CARAFE, self).__init__() assert isinstance(in_channels, list) self.in_channels = in_channels self.out_channels = out_channels self.num_ins = len(in_channels) self.num_outs = num_outs self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.with_bias = norm_cfg is None self.upsample_cfg = upsample_cfg.copy() self.upsample = self.upsample_cfg.get('type') self.relu = nn.ReLU(inplace=False) self.order = order assert order in [('conv', 'norm', 'act'), ('act', 'conv', 'norm')] assert self.upsample in [ 'nearest', 'bilinear', 'deconv', 'pixel_shuffle', 'carafe', None ] if self.upsample in ['deconv', 'pixel_shuffle']: assert hasattr( self.upsample_cfg, 'upsample_kernel') and self.upsample_cfg.upsample_kernel > 0 self.upsample_kernel = self.upsample_cfg.pop('upsample_kernel') if end_level == -1: self.backbone_end_level = self.num_ins assert num_outs >= self.num_ins - start_level else: # if end_level < inputs, no extra level is allowed self.backbone_end_level = end_level assert end_level <= len(in_channels) assert num_outs == end_level - start_level self.start_level = start_level self.end_level = end_level self.lateral_convs = nn.ModuleList() self.fpn_convs = nn.ModuleList() self.upsample_modules = nn.ModuleList() for i in range(self.start_level, self.backbone_end_level): l_conv = ConvModule(in_channels[i], out_channels, 1, norm_cfg=norm_cfg, bias=self.with_bias, act_cfg=act_cfg, inplace=False, order=self.order) fpn_conv = ConvModule(out_channels, out_channels, 3, padding=1, norm_cfg=self.norm_cfg, bias=self.with_bias, act_cfg=act_cfg, inplace=False, order=self.order) if i != self.backbone_end_level - 1: upsample_cfg_ = self.upsample_cfg.copy() if self.upsample == 'deconv': upsample_cfg_.update( in_channels=out_channels, out_channels=out_channels, kernel_size=self.upsample_kernel, stride=2, padding=(self.upsample_kernel - 1) // 2, output_padding=(self.upsample_kernel - 1) // 2) elif self.upsample == 'pixel_shuffle': upsample_cfg_.update(in_channels=out_channels, out_channels=out_channels, scale_factor=2, upsample_kernel=self.upsample_kernel) elif self.upsample == 'carafe': upsample_cfg_.update(channels=out_channels, scale_factor=2) else: # suppress warnings align_corners = (None if self.upsample == 'nearest' else False) upsample_cfg_.update(scale_factor=2, mode=self.upsample, align_corners=align_corners) upsample_module = build_upsample_layer(upsample_cfg_) self.upsample_modules.append(upsample_module) self.lateral_convs.append(l_conv) self.fpn_convs.append(fpn_conv) # add extra bottom up pathway self.downsample_convs = nn.ModuleList() self.pafpn_convs = nn.ModuleList() for i in range(self.start_level, self.backbone_end_level): d_conv = ConvModule(out_channels, out_channels, 3, stride=2, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) pafpn_conv = ConvModule(out_channels, out_channels, 3, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) self.downsample_convs.append(d_conv) self.pafpn_convs.append(pafpn_conv) # add extra conv layers (e.g., RetinaNet) extra_out_levels = (num_outs - self.backbone_end_level + self.start_level) if extra_out_levels >= 1: for i in range(extra_out_levels): in_channels = (self.in_channels[self.backbone_end_level - 1] if i == 0 else out_channels) extra_l_conv = ConvModule(in_channels, out_channels, 3, stride=2, padding=1, norm_cfg=norm_cfg, bias=self.with_bias, act_cfg=act_cfg, inplace=False, order=self.order) if self.upsample == 'deconv': upsampler_cfg_ = dict( in_channels=out_channels, out_channels=out_channels, kernel_size=self.upsample_kernel, stride=2, padding=(self.upsample_kernel - 1) // 2, output_padding=(self.upsample_kernel - 1) // 2) elif self.upsample == 'pixel_shuffle': upsampler_cfg_ = dict(in_channels=out_channels, out_channels=out_channels, scale_factor=2, upsample_kernel=self.upsample_kernel) elif self.upsample == 'carafe': upsampler_cfg_ = dict(channels=out_channels, scale_factor=2, **self.upsample_cfg) else: # suppress warnings align_corners = (None if self.upsample == 'nearest' else False) upsampler_cfg_ = dict(scale_factor=2, mode=self.upsample, align_corners=align_corners) upsampler_cfg_['type'] = self.upsample upsample_module = build_upsample_layer(upsampler_cfg_) extra_fpn_conv = ConvModule(out_channels, out_channels, 3, padding=1, norm_cfg=self.norm_cfg, bias=self.with_bias, act_cfg=act_cfg, inplace=False, order=self.order) self.upsample_modules.append(upsample_module) self.fpn_convs.append(extra_fpn_conv) self.lateral_convs.append(extra_l_conv)
def __init__(self, in_channels, out_channels, stride=1, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), with_cp=False): super().__init__() self.stride = stride self.with_cp = with_cp branch_features = out_channels // 2 if self.stride == 1: assert in_channels == branch_features * 2, ( f'in_channels ({in_channels}) should equal to ' f'branch_features * 2 ({branch_features * 2}) ' 'when stride is 1') if in_channels != branch_features * 2: assert self.stride != 1, ( f'stride ({self.stride}) should not equal 1 when ' f'in_channels != branch_features * 2') if self.stride > 1: self.branch1 = nn.Sequential( ConvModule(in_channels, in_channels, kernel_size=3, stride=self.stride, padding=1, groups=in_channels, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None), ConvModule(in_channels, branch_features, kernel_size=1, stride=1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg), ) self.branch2 = nn.Sequential( ConvModule(in_channels if (self.stride > 1) else branch_features, branch_features, kernel_size=1, stride=1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg), ConvModule(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1, groups=branch_features, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None), ConvModule(branch_features, branch_features, kernel_size=1, stride=1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg))
def _init_layers(self): """Initialize layers of the head.""" self.relu = nn.ReLU(inplace=True) self.topk_conv = nn.ModuleList() self.mlvl_cls = nn.ModuleList() self.mlvl_reg = nn.ModuleList() self.mlvl_gfl_cls = nn.ModuleList() self.mlvl_gfl_reg = nn.ModuleList() self.mlvl_conf_vector = nn.ModuleList() for level in range(self.num_out): cls_convs = nn.ModuleList() reg_convs = nn.ModuleList() for i in range(self.stacked_convs): chn = self.in_channels if i == 0 else self.feat_channels cls_convs.append( ConvModule(chn, self.feat_channels, 3, stride=1, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg)) reg_convs.append( ConvModule(chn, self.feat_channels, 3, stride=1, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg)) gfl_cls = nn.Conv2d(self.feat_channels, self.num_anchors * self.cls_out_channels, 1, padding=0) gfl_reg = nn.Conv2d(self.feat_channels, 4 * (self.reg_max + 1), 1, padding=0) topk = topk_conv(self.reg_max, self.total_dim) conf_vector = [nn.Conv2d(4 * self.total_dim, self.reg_channels, 1)] conf_vector += [self.relu] conf_vector += [nn.Conv2d(self.reg_channels, 1, 1), nn.Sigmoid()] self.topk_conv.append(topk) self.mlvl_cls.append(cls_convs) self.mlvl_reg.append(reg_convs) self.mlvl_gfl_cls.append(gfl_cls) self.mlvl_gfl_reg.append(gfl_reg) self.mlvl_conf_vector.append(nn.Sequential(*conf_vector)) if self.scales: self.scales = nn.ModuleList([ Scale(float(self.scales)) for _ in self.anchor_generator.strides ]) else: self.scales = nn.ModuleList( [nn.Identity() for _ in self.anchor_generator.strides])
def __init__(self, in_channels, out_stride=32, width_mult=1, index_mode='m2o', aspp=True, norm_cfg=dict(type='BN'), freeze_bn=False, use_nonlinear=True, use_context=True): super(IndexNetEncoder, self).__init__() if out_stride not in [16, 32]: raise ValueError(f'out_stride must 16 or 32, got {out_stride}') self.out_stride = out_stride self.width_mult = width_mult # we name the index network in the paper index_block if index_mode == 'holistic': index_block = HolisticIndexBlock elif index_mode == 'o2o' or index_mode == 'm2o': index_block = partial(DepthwiseIndexBlock, mode=index_mode) else: raise NameError('Unknown index block mode {}'.format(index_mode)) # default setting initial_channels = 32 inverted_residual_setting = [ # expand_ratio, input_chn, output_chn, num_blocks, stride, dilation [1, initial_channels, 16, 1, 1, 1], [6, 16, 24, 2, 2, 1], [6, 24, 32, 3, 2, 1], [6, 32, 64, 4, 2, 1], [6, 64, 96, 3, 1, 1], [6, 96, 160, 3, 2, 1], [6, 160, 320, 1, 1, 1], ] # update layer setting according to width_mult initial_channels = int(initial_channels * width_mult) for layer_setting in inverted_residual_setting: # update in_channels and out_channels layer_setting[1] = int(layer_setting[1] * self.width_mult) layer_setting[2] = int(layer_setting[2] * self.width_mult) if out_stride == 32: # It should be noted that layers 0 is not an InvertedResidual layer # but a ConvModule. Thus, the index of InvertedResidual layer in # downsampled_layers starts at 1. self.downsampled_layers = [0, 2, 3, 4, 6] else: # out_stride is 16 self.downsampled_layers = [0, 2, 3, 4] # if out_stride is 16, then increase the dilation of the last two # InvertedResidual layer to increase the receptive field inverted_residual_setting[5][5] = 2 inverted_residual_setting[6][5] = 2 # build the first layer self.layers = nn.ModuleList([ ConvModule(in_channels, initial_channels, 3, padding=1, norm_cfg=norm_cfg, act_cfg=dict(type='ReLU6')) ]) # build bottleneck layers for layer_setting in inverted_residual_setting: self.layers.append(self._make_layer(layer_setting, norm_cfg)) # freeze encoder batch norm layers if freeze_bn: self.freeze_bn() # build index blocks self.index_layers = nn.ModuleList() for layer in self.downsampled_layers: # inverted_residual_setting begins at layer1, the in_channels # of layer1 is the out_channels of layer0 self.index_layers.append( index_block(inverted_residual_setting[layer][1], norm_cfg, use_context, use_nonlinear)) self.avg_pool = nn.AvgPool2d(2, stride=2) if aspp: dilation = (2, 4, 8) if out_stride == 32 else (6, 12, 18) self.dconv = ASPP(320 * self.width_mult, 160, mid_channels=int(256 * self.width_mult), dilations=dilation, norm_cfg=norm_cfg, act_cfg=dict(type='ReLU6'), separable_conv=True) else: self.dconv = ConvModule(320 * self.width_mult, 160, 1, norm_cfg=norm_cfg, act_cfg=dict(type='ReLU6')) self.out_channels = 160
def __init__(self, in_channels, out_channels, feat_channels=128, middle_channels=32, num_stages=6, norm_cfg=dict(type='BN', requires_grad=True)): super().__init__() assert in_channels == 3 self.num_stages = num_stages assert self.num_stages >= 1 self.stem = nn.Sequential( ConvModule(in_channels, 128, 9, padding=4, norm_cfg=norm_cfg), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ConvModule(128, 128, 9, padding=4, norm_cfg=norm_cfg), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ConvModule(128, 128, 9, padding=4, norm_cfg=norm_cfg), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ConvModule(128, 32, 5, padding=2, norm_cfg=norm_cfg), ConvModule(32, 512, 9, padding=4, norm_cfg=norm_cfg), ConvModule(512, 512, 1, padding=0, norm_cfg=norm_cfg), ConvModule(512, out_channels, 1, padding=0, act_cfg=None)) self.middle = nn.Sequential( ConvModule(in_channels, 128, 9, padding=4, norm_cfg=norm_cfg), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ConvModule(128, 128, 9, padding=4, norm_cfg=norm_cfg), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ConvModule(128, 128, 9, padding=4, norm_cfg=norm_cfg), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) self.cpm_stages = nn.ModuleList([ CpmBlock(middle_channels + out_channels, feat_channels, norm_cfg) for _ in range(num_stages - 1) ]) self.middle_conv = nn.ModuleList([ nn.Sequential( ConvModule(128, middle_channels, 5, padding=2, norm_cfg=norm_cfg)) for _ in range(num_stages - 1) ]) self.out_convs = nn.ModuleList([ nn.Sequential( ConvModule(feat_channels, feat_channels, 1, padding=0, norm_cfg=norm_cfg), ConvModule(feat_channels, out_channels, 1, act_cfg=None)) for _ in range(num_stages - 1) ])
def __init__(self, in_channels, out_channels, base_channels=64, norm_cfg=dict(type='IN'), use_dropout=False, num_blocks=9, padding_mode='reflect', init_cfg=dict(type='normal', gain=0.02)): super().__init__() assert num_blocks >= 0, ('Number of residual blocks must be ' f'non-negative, but got {num_blocks}.') assert isinstance(norm_cfg, dict), ("'norm_cfg' should be dict, but" f'got {type(norm_cfg)}') assert 'type' in norm_cfg, "'norm_cfg' must have key 'type'" # We use norm layers in the resnet generator. # Only for IN, use bias to follow cyclegan's original implementation. use_bias = norm_cfg['type'] == 'IN' model = [] model += [ ConvModule( in_channels=in_channels, out_channels=base_channels, kernel_size=7, padding=3, bias=use_bias, norm_cfg=norm_cfg, padding_mode=padding_mode) ] num_down = 2 # add downsampling layers for i in range(num_down): multiple = 2**i model += [ ConvModule( in_channels=base_channels * multiple, out_channels=base_channels * multiple * 2, kernel_size=3, stride=2, padding=1, bias=use_bias, norm_cfg=norm_cfg) ] # add residual blocks multiple = 2**num_down for i in range(num_blocks): model += [ ResidualBlockWithDropout( base_channels * multiple, padding_mode=padding_mode, norm_cfg=norm_cfg, use_dropout=use_dropout) ] # add upsampling layers for i in range(num_down): multiple = 2**(num_down - i) model += [ ConvModule( in_channels=base_channels * multiple, out_channels=base_channels * multiple // 2, kernel_size=3, stride=2, padding=1, bias=use_bias, conv_cfg=dict(type='deconv', output_padding=1), norm_cfg=norm_cfg) ] model += [ ConvModule( in_channels=base_channels, out_channels=out_channels, kernel_size=7, padding=3, bias=True, norm_cfg=None, act_cfg=dict(type='Tanh'), padding_mode=padding_mode) ] self.model = nn.Sequential(*model) self.init_type = 'normal' if init_cfg is None else init_cfg.get( 'type', 'normal') self.init_gain = 0.02 if init_cfg is None else init_cfg.get( 'gain', 0.02)
def __init__(self, downsample_times=5, num_stacks=2, stage_channels=(256, 256, 384, 384, 384, 512), stage_blocks=(2, 2, 2, 2, 2, 4), feat_channel=256, norm_cfg=dict(type='BN', requires_grad=True), pretrained=None, init_cfg=None): assert init_cfg is None, 'To prevent abnormal initialization ' \ 'behavior, init_cfg is not allowed to be set' super(HourglassNet, self).__init__(init_cfg) self.num_stacks = num_stacks assert self.num_stacks >= 1 assert len(stage_channels) == len(stage_blocks) assert len(stage_channels) > downsample_times cur_channel = stage_channels[0] self.stem = nn.Sequential( ConvModule(3, cur_channel // 2, 7, padding=3, stride=2, norm_cfg=norm_cfg), ResLayer(BasicBlock, cur_channel // 2, cur_channel, 1, stride=2, norm_cfg=norm_cfg)) self.hourglass_modules = nn.ModuleList([ HourglassModule(downsample_times, stage_channels, stage_blocks) for _ in range(num_stacks) ]) self.inters = ResLayer(BasicBlock, cur_channel, cur_channel, num_stacks - 1, norm_cfg=norm_cfg) self.conv1x1s = nn.ModuleList([ ConvModule(cur_channel, cur_channel, 1, norm_cfg=norm_cfg, act_cfg=None) for _ in range(num_stacks - 1) ]) self.out_convs = nn.ModuleList([ ConvModule(cur_channel, feat_channel, 3, padding=1, norm_cfg=norm_cfg) for _ in range(num_stacks) ]) self.remap_convs = nn.ModuleList([ ConvModule(feat_channel, cur_channel, 1, norm_cfg=norm_cfg, act_cfg=None) for _ in range(num_stacks - 1) ]) self.relu = nn.ReLU(inplace=True)
def __init__(self, num_classes, cls_in_channels=256, reg_in_channels=256, roi_feat_size=7, reg_feat_up_ratio=2, reg_pre_kernel=3, reg_post_kernel=3, reg_pre_num=2, reg_post_num=1, cls_out_channels=1024, reg_offset_out_channels=256, reg_cls_out_channels=256, num_cls_fcs=1, num_reg_fcs=0, reg_class_agnostic=True, norm_cfg=None, bbox_coder=dict(type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.7), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1, loss_weight=1.0)): super(SABLHead, self).__init__() self.cls_in_channels = cls_in_channels self.reg_in_channels = reg_in_channels self.roi_feat_size = roi_feat_size self.reg_feat_up_ratio = int(reg_feat_up_ratio) self.num_buckets = bbox_coder['num_buckets'] assert self.reg_feat_up_ratio // 2 >= 1 self.up_reg_feat_size = roi_feat_size * self.reg_feat_up_ratio assert self.up_reg_feat_size == bbox_coder['num_buckets'] self.reg_pre_kernel = reg_pre_kernel self.reg_post_kernel = reg_post_kernel self.reg_pre_num = reg_pre_num self.reg_post_num = reg_post_num self.num_classes = num_classes self.cls_out_channels = cls_out_channels self.reg_offset_out_channels = reg_offset_out_channels self.reg_cls_out_channels = reg_cls_out_channels self.num_cls_fcs = num_cls_fcs self.num_reg_fcs = num_reg_fcs self.reg_class_agnostic = reg_class_agnostic assert self.reg_class_agnostic self.norm_cfg = norm_cfg self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox_cls = build_loss(loss_bbox_cls) self.loss_bbox_reg = build_loss(loss_bbox_reg) self.cls_fcs = self._add_fc_branch(self.num_cls_fcs, self.cls_in_channels, self.roi_feat_size, self.cls_out_channels) self.side_num = int(np.ceil(self.num_buckets / 2)) if self.reg_feat_up_ratio > 1: self.upsample_x = nn.ConvTranspose1d(reg_in_channels, reg_in_channels, self.reg_feat_up_ratio, stride=self.reg_feat_up_ratio) self.upsample_y = nn.ConvTranspose1d(reg_in_channels, reg_in_channels, self.reg_feat_up_ratio, stride=self.reg_feat_up_ratio) self.reg_pre_convs = nn.ModuleList() for i in range(self.reg_pre_num): reg_pre_conv = ConvModule(reg_in_channels, reg_in_channels, kernel_size=reg_pre_kernel, padding=reg_pre_kernel // 2, norm_cfg=norm_cfg, act_cfg=dict(type='ReLU')) self.reg_pre_convs.append(reg_pre_conv) self.reg_post_conv_xs = nn.ModuleList() for i in range(self.reg_post_num): reg_post_conv_x = ConvModule(reg_in_channels, reg_in_channels, kernel_size=(1, reg_post_kernel), padding=(0, reg_post_kernel // 2), norm_cfg=norm_cfg, act_cfg=dict(type='ReLU')) self.reg_post_conv_xs.append(reg_post_conv_x) self.reg_post_conv_ys = nn.ModuleList() for i in range(self.reg_post_num): reg_post_conv_y = ConvModule(reg_in_channels, reg_in_channels, kernel_size=(reg_post_kernel, 1), padding=(reg_post_kernel // 2, 0), norm_cfg=norm_cfg, act_cfg=dict(type='ReLU')) self.reg_post_conv_ys.append(reg_post_conv_y) self.reg_conv_att_x = nn.Conv2d(reg_in_channels, 1, 1) self.reg_conv_att_y = nn.Conv2d(reg_in_channels, 1, 1) self.fc_cls = nn.Linear(self.cls_out_channels, self.num_classes + 1) self.relu = nn.ReLU(inplace=True) self.reg_cls_fcs = self._add_fc_branch(self.num_reg_fcs, self.reg_in_channels, 1, self.reg_cls_out_channels) self.reg_offset_fcs = self._add_fc_branch(self.num_reg_fcs, self.reg_in_channels, 1, self.reg_offset_out_channels) self.fc_reg_cls = nn.Linear(self.reg_cls_out_channels, 1) self.fc_reg_offset = nn.Linear(self.reg_offset_out_channels, 1)
def make_res_layer(block, inplanes, planes, blocks, stride=1, dilation=1, style='pytorch', conv_cfg=None, norm_cfg=None, act_cfg=None, with_cp=False): """Build residual layer for ResNet. Args: block: (nn.Module): Residual module to be built. inplanes (int): Number of channels for the input feature in each block. planes (int): Number of channels for the output feature in each block. blocks (int): Number of residual blocks. stride (int): Stride in the conv layer. Default: 1. dilation (int): Spacing between kernel elements. Default: 1. style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two layer is the 3x3 conv layer, otherwise the stride-two layer is the first 1x1 conv layer. Default: 'pytorch'. conv_cfg (dict | None): Config for norm layers. Default: None. norm_cfg (dict | None): Config for norm layers. Default: None. act_cfg (dict | None): Config for activate layers. Default: None. with_cp (bool): Use checkpoint or not. Using checkpoint will save some memory while slowing down the training speed. Default: False. Returns: nn.Module: A residual layer for the given config. """ downsample = None if stride != 1 or inplanes != planes * block.expansion: downsample = ConvModule( inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) layers = [] layers.append( block( inplanes, planes, stride, dilation, downsample, style=style, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, with_cp=with_cp)) inplanes = planes * block.expansion for _ in range(1, blocks): layers.append( block( inplanes, planes, 1, dilation, style=style, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, with_cp=with_cp)) return nn.Sequential(*layers)
def __init__(self, in_channels, lateral_channels=256, out_channels=64, bias_on_lateral=False, bn_re_on_lateral=False, bias_on_smooth=False, bn_re_on_smooth=False, conv_after_concat=False): super(FPNC, self).__init__() assert isinstance(in_channels, list) self.in_channels = in_channels self.lateral_channels = lateral_channels self.out_channels = out_channels self.num_ins = len(in_channels) self.bn_re_on_lateral = bn_re_on_lateral self.bn_re_on_smooth = bn_re_on_smooth self.conv_after_concat = conv_after_concat self.lateral_convs = nn.ModuleList() self.smooth_convs = nn.ModuleList() self.num_outs = self.num_ins for i in range(self.num_ins): norm_cfg = None act_cfg = None if self.bn_re_on_lateral: norm_cfg = dict(type='BN') act_cfg = dict(type='ReLU') l_conv = ConvModule(in_channels[i], lateral_channels, 1, bias=bias_on_lateral, conv_cfg=None, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) norm_cfg = None act_cfg = None if self.bn_re_on_smooth: norm_cfg = dict(type='BN') act_cfg = dict(type='ReLU') smooth_conv = ConvModule(lateral_channels, out_channels, 3, bias=bias_on_smooth, padding=1, conv_cfg=None, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) self.lateral_convs.append(l_conv) self.smooth_convs.append(smooth_conv) if self.conv_after_concat: norm_cfg = dict(type='BN') act_cfg = dict(type='ReLU') self.out_conv = ConvModule(out_channels * self.num_outs, out_channels * self.num_outs, 3, padding=1, conv_cfg=None, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False)
def __init__(self, in_channels, out_channels, spatial_modulation_cfg=None, temporal_modulation_cfg=None, upsample_cfg=None, downsample_cfg=None, level_fusion_cfg=None, aux_head_cfg=None, flow_type='cascade'): super().__init__() assert isinstance(in_channels, tuple) assert isinstance(out_channels, int) self.in_channels = in_channels self.out_channels = out_channels self.num_tpn_stages = len(in_channels) assert spatial_modulation_cfg is None or isinstance( spatial_modulation_cfg, dict) assert temporal_modulation_cfg is None or isinstance( temporal_modulation_cfg, dict) assert upsample_cfg is None or isinstance(upsample_cfg, dict) assert downsample_cfg is None or isinstance(downsample_cfg, dict) assert aux_head_cfg is None or isinstance(aux_head_cfg, dict) assert level_fusion_cfg is None or isinstance(level_fusion_cfg, dict) if flow_type not in ['cascade', 'parallel']: raise ValueError( f"flow type in TPN should be 'cascade' or 'parallel', " f'but got {flow_type} instead.') self.flow_type = flow_type self.temporal_modulation_ops = nn.ModuleList() self.upsample_ops = nn.ModuleList() self.downsample_ops = nn.ModuleList() self.level_fusion_1 = LevelFusion(**level_fusion_cfg) self.spatial_modulation = SpatialModulation(**spatial_modulation_cfg) for i in range(self.num_tpn_stages): if temporal_modulation_cfg is not None: downsample_scale = temporal_modulation_cfg[ 'downsample_scales'][i] temporal_modulation = TemporalModulation( in_channels[-1], out_channels, downsample_scale) self.temporal_modulation_ops.append(temporal_modulation) if i < self.num_tpn_stages - 1: if upsample_cfg is not None: upsample = nn.Upsample(**upsample_cfg) self.upsample_ops.append(upsample) if downsample_cfg is not None: downsample = DownSample(out_channels, out_channels, **downsample_cfg) self.downsample_ops.append(downsample) out_dims = level_fusion_cfg['out_channels'] # two pyramids self.level_fusion_2 = LevelFusion(**level_fusion_cfg) self.pyramid_fusion = ConvModule( out_dims * 2, 2048, 1, stride=1, padding=0, bias=False, conv_cfg=dict(type='Conv3d'), norm_cfg=dict(type='BN3d', requires_grad=True)) if aux_head_cfg is not None: self.aux_head = AuxHead(self.in_channels[-2], **aux_head_cfg) else: self.aux_head = None self.init_weights()
def __init__(self, num_classes, bbox_coder, train_cfg=None, test_cfg=None, vote_moudule_cfg=None, vote_aggregation_cfg=None, feat_channels=(128, 128), conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), objectness_loss=None, center_loss=None, dir_class_loss=None, dir_res_loss=None, size_class_loss=None, size_res_loss=None, semantic_loss=None): super(VoteHead, self).__init__() self.num_classes = num_classes self.train_cfg = train_cfg self.test_cfg = test_cfg self.gt_per_seed = vote_moudule_cfg['gt_per_seed'] self.num_proposal = vote_aggregation_cfg['num_point'] self.objectness_loss = build_loss(objectness_loss) self.center_loss = build_loss(center_loss) self.dir_class_loss = build_loss(dir_class_loss) self.dir_res_loss = build_loss(dir_res_loss) self.size_class_loss = build_loss(size_class_loss) self.size_res_loss = build_loss(size_res_loss) self.semantic_loss = build_loss(semantic_loss) assert vote_aggregation_cfg['mlp_channels'][0] == vote_moudule_cfg[ 'in_channels'] self.bbox_coder = build_bbox_coder(bbox_coder) self.num_sizes = self.bbox_coder.num_sizes self.num_dir_bins = self.bbox_coder.num_dir_bins self.vote_module = VoteModule(**vote_moudule_cfg) self.vote_aggregation = PointSAModule(**vote_aggregation_cfg) prev_channel = vote_aggregation_cfg['mlp_channels'][-1] conv_pred_list = list() for k in range(len(feat_channels)): conv_pred_list.append( ConvModule(prev_channel, feat_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=True)) prev_channel = feat_channels[k] self.conv_pred = nn.Sequential(*conv_pred_list) # Objectness scores (2), center residual (3), # heading class+residual (num_dir_bins*2), # size class+residual(num_sizes*4) conv_out_channel = (2 + 3 + self.num_dir_bins * 2 + self.num_sizes * 4 + num_classes) self.conv_pred.add_module('conv_out', nn.Conv1d(prev_channel, conv_out_channel, 1))
def __init__(self, gamma_w=1.0, gamma_b=1.0, gamma_d=1.0, pretrained=None, pretrained2d=False, in_channels=3, num_stages=4, spatial_strides=(2, 2, 2, 2), frozen_stages=-1, se_style='half', se_ratio=1 / 16, use_swish=True, conv_cfg=dict(type='Conv3d'), norm_cfg=dict(type='BN3d', requires_grad=True), act_cfg=dict(type='ReLU', inplace=True), norm_eval=False, with_cp=False, zero_init_residual=True, **kwargs): super().__init__() self.gamma_w = gamma_w self.gamma_b = gamma_b self.gamma_d = gamma_d self.pretrained = pretrained self.pretrained2d = pretrained2d self.in_channels = in_channels # Hard coded, can be changed by gamma_w self.base_channels = 24 self.stage_blocks = [1, 2, 5, 3] # apply parameters gamma_w and gamma_d self.base_channels = self._round_width(self.base_channels, self.gamma_w) self.stage_blocks = [ self._round_repeats(x, self.gamma_d) for x in self.stage_blocks ] self.num_stages = num_stages assert 1 <= num_stages <= 4 self.spatial_strides = spatial_strides assert len(spatial_strides) == num_stages self.frozen_stages = frozen_stages self.se_style = se_style assert self.se_style in ['all', 'half'] self.se_ratio = se_ratio assert (self.se_ratio is None) or (self.se_ratio > 0) self.use_swish = use_swish self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.norm_eval = norm_eval self.with_cp = with_cp self.zero_init_residual = zero_init_residual self.block = BlockX3D self.stage_blocks = self.stage_blocks[:num_stages] self.layer_inplanes = self.base_channels self._make_stem_layer() self.res_layers = [] for i, num_blocks in enumerate(self.stage_blocks): spatial_stride = spatial_strides[i] inplanes = self.base_channels * 2**i planes = int(inplanes * self.gamma_b) res_layer = self.make_res_layer( self.block, self.layer_inplanes, inplanes, planes, num_blocks, spatial_stride=spatial_stride, se_style=self.se_style, se_ratio=self.se_ratio, use_swish=self.use_swish, norm_cfg=self.norm_cfg, conv_cfg=self.conv_cfg, act_cfg=self.act_cfg, with_cp=with_cp, **kwargs ) self.layer_inplanes = inplanes layer_name = f'layer{i + 1}' self.add_module(layer_name, res_layer) self.res_layers.append(layer_name) self.feat_dim = self.base_channels * 2**(len(self.stage_blocks) - 1) self.conv5 = ConvModule( self.feat_dim, int(self.feat_dim * self.gamma_b), kernel_size=1, stride=1, padding=0, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg ) self.feat_dim = int(self.feat_dim * self.gamma_b)
def __init__(self, in_channels, stem_channels=1024, num_blocks=2, kernel_sizes=(3, 3, 3), dropout=0.25, causal=False, residual=True, use_stride_conv=False, conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), max_norm=None): # Protect mutable default arguments conv_cfg = copy.deepcopy(conv_cfg) norm_cfg = copy.deepcopy(norm_cfg) super().__init__() self.in_channels = in_channels self.stem_channels = stem_channels self.num_blocks = num_blocks self.kernel_sizes = kernel_sizes self.dropout = dropout self.causal = causal self.residual = residual self.use_stride_conv = use_stride_conv self.max_norm = max_norm assert num_blocks == len(kernel_sizes) - 1 for ks in kernel_sizes: assert ks % 2 == 1, 'Only odd filter widths are supported.' self.expand_conv = ConvModule( in_channels, stem_channels, kernel_size=kernel_sizes[0], stride=kernel_sizes[0] if use_stride_conv else 1, bias='auto', conv_cfg=conv_cfg, norm_cfg=norm_cfg) dilation = kernel_sizes[0] self.tcn_blocks = nn.ModuleList() for i in range(1, num_blocks + 1): self.tcn_blocks.append( BasicTemporalBlock(in_channels=stem_channels, out_channels=stem_channels, mid_channels=stem_channels, kernel_size=kernel_sizes[i], dilation=dilation, dropout=dropout, causal=causal, residual=residual, use_stride_conv=use_stride_conv, conv_cfg=conv_cfg, norm_cfg=norm_cfg)) dilation *= kernel_sizes[i] if self.max_norm is not None: # Apply weight norm clip to conv layers weight_clip = WeightNormClipHook(self.max_norm) for module in self.modules(): if isinstance(module, nn.modules.conv._ConvNd): weight_clip.register(module) self.dropout = nn.Dropout(dropout) if dropout > 0 else None
def __init__(self, inplanes, planes, outplanes, spatial_stride=1, downsample=None, se_ratio=None, use_swish=True, conv_cfg=dict(type='Conv3d'), norm_cfg=dict(type='BN3d'), act_cfg=dict(type='ReLU'), with_cp=False): super().__init__() self.inplanes = inplanes self.planes = planes self.outplanes = outplanes self.spatial_stride = spatial_stride self.downsample = downsample self.se_ratio = se_ratio self.use_swish = use_swish self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.act_cfg_swish = dict(type='Swish') self.with_cp = with_cp self.conv1 = ConvModule( in_channels=inplanes, out_channels=planes, kernel_size=1, stride=1, padding=0, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) # Here we use the channel-wise conv self.conv2 = ConvModule( in_channels=planes, out_channels=planes, kernel_size=3, stride=(1, self.spatial_stride, self.spatial_stride), padding=1, groups=planes, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None) self.swish = Swish() self.conv3 = ConvModule( in_channels=planes, out_channels=outplanes, kernel_size=1, stride=1, padding=0, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None) if self.se_ratio is not None: self.se_module = SEModule(planes, self.se_ratio) self.relu = build_activation_layer(self.act_cfg)
def _init_layers(self): """Initialize layers of the head.""" self.relu = nn.ReLU(inplace=True) self.mlvl_cls_convs = nn.ModuleList() self.mlvl_reg_convs = nn.ModuleList() self.mlvl_vfl_cls_convs = nn.ModuleList() self.mlvl_vfl_reg_convs = nn.ModuleList() self.mlvl_vfl_reg = nn.ModuleList() self.mlvl_vfl_refine_convs = nn.ModuleList() self.mlvl_scale = nn.ModuleList() self.mlvl_refine_scale = nn.ModuleList() if self.reg_cls_branch: self.mlvl_vfl_reg_cls = nn.ModuleList() for level in range(self.num_out): cls_convs = nn.ModuleList() reg_convs = nn.ModuleList() for i in range(self.stacked_convs): chn = self.in_channels if i == 0 else self.feat_channels cls_convs.append( ConvModule( chn, self.feat_channels, 3, stride=1, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg)) reg_convs.append( ConvModule( chn, self.feat_channels, 3, stride=1, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg)) vfl_cls_convs = nn.Conv2d(self.feat_channels, self.cls_out_channels, 1, padding=0) vfl_reg_convs = ConvModule(self.feat_channels, self.feat_channels, 3, stride=1, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, bias=self.conv_bias) vfl_refine_convs = nn.Conv2d(self.feat_channels, 4, 1, padding=0) vfl_reg = nn.Conv2d(self.feat_channels, 4, 1, padding=0) vfl_reg_cls = nn.Conv2d(self.feat_channels, self.cls_out_channels, 1, padding=0) scale = Scale(1.0) scale_refine = Scale(1.0) self.mlvl_cls_convs.append(cls_convs) self.mlvl_reg_convs.append(reg_convs) self.mlvl_vfl_cls_convs.append(vfl_cls_convs) self.mlvl_vfl_reg_convs.append(vfl_reg_convs) self.mlvl_vfl_refine_convs.append(vfl_refine_convs) self.mlvl_vfl_reg.append(vfl_reg) if self.reg_cls_branch: self.mlvl_vfl_reg_cls.append(vfl_reg_cls) self.mlvl_scale.append(scale) self.mlvl_refine_scale.append(scale_refine)