def __init__(self, scale, num_classes=1000, dropout=0.2, bn=None): r""" Arguments: - scale (:obj:`float`): scale rate of channels - num_classes (:obj:`int`): number of classification classes - dropout (:obj:`float`): dropout rate - bn (:obj:`dict`): definition of batchnorm """ super(MNASNet, self).__init__() global BN BN = get_bn(bn) assert scale > 0.0 self.scale = scale self.num_classes = num_classes depths = _get_depths(scale) layers = [ # First layer: regular conv. nn.Conv2d(3, depths[0], 3, padding=1, stride=2, bias=False), BN(depths[0]), nn.ReLU(inplace=True), # Depthwise separable, no skip. nn.Conv2d(depths[0], depths[0], 3, padding=1, stride=1, groups=depths[0], bias=False), BN(depths[0]), nn.ReLU(inplace=True), nn.Conv2d(depths[0], depths[1], 1, padding=0, stride=1, bias=False), BN(depths[1]), # MNASNet blocks: stacks of inverted residuals. _stack(depths[1], depths[2], 3, 2, 3, 3), _stack(depths[2], depths[3], 5, 2, 3, 3), _stack(depths[3], depths[4], 5, 2, 6, 3), _stack(depths[4], depths[5], 3, 1, 6, 2), _stack(depths[5], depths[6], 5, 2, 6, 4), _stack(depths[6], depths[7], 3, 1, 6, 1), # Final mapping to classifier input. nn.Conv2d(depths[7], 1280, 1, padding=0, stride=1, bias=False), BN(1280), nn.ReLU(inplace=True), ] self.layers = nn.Sequential(*layers) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.classifier = nn.Sequential(nn.Dropout(p=dropout, inplace=True), nn.Linear(1280, num_classes)) self._initialize_weights()
def __init__(self, cfg, num_classes=1000, scale=1.0, bn=None): # Generate RegNet ws per block b_ws, num_s, _, _ = generate_regnet( cfg['WA'], cfg['W0'], cfg['WM'], cfg['DEPTH'] ) # Convert to per stage format # ws: channel list for stages, ds: number of blocks list ws, ds = get_stages_from_blocks(b_ws, b_ws) # scale-up/down channels ws = [int(_w * scale) for _w in ws] # Generate group widths and bot muls gws = [cfg['GROUP_W'] for _ in range(num_s)] bms = [1 for _ in range(num_s)] # Adjust the compatibility of ws and gws ws, gws = adjust_ws_gs_comp(ws, bms, gws) # Use the same stride for each stage, stride set to 2 ss = [2 for _ in range(num_s)] # Use SE for RegNetY se_r = 0.25 if cfg['SE_ON'] else None # Construct the model STEM_W = int(32 * scale) global BN BN = get_bn(bn) kwargs = { "stem_w": STEM_W, "ss": ss, "ds": ds, "ws": ws, "bms": bms, "gws": gws, "se_r": se_r, "nc": num_classes, } super(RegNet, self).__init__(**kwargs)
def __init__(self, block, layers, num_classes=1000, ibn_ratio=0.5, bn=None): scale = 64 self.inplanes = scale self.ibn_ratio = ibn_ratio super(ResNetIBN, self).__init__() global BN BN = get_bn(bn) self.conv1 = nn.Conv2d(3, scale, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = BN(scale) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, scale, layers[0]) self.layer2 = self._make_layer(block, scale * 2, layers[1], stride=2) self.layer3 = self._make_layer(block, scale * 4, layers[2], stride=2) self.layer4 = self._make_layer(block, scale * 8, layers[3], stride=2) self.avgpool = nn.AvgPool2d(7) self.fc = nn.Linear(scale * 8 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, (nn.BatchNorm2d, SyncBatchNorm2d)): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.InstanceNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()
def __init__(self, blocks_args=None, global_params=None, use_fc_bn=False, fc_bn_init_scale=1.0, bn=None): super(EfficientNet, self).__init__() global BN BN = get_bn(bn) if not isinstance(blocks_args, list): raise ValueError('blocks_args should be a list.') self.logger = get_logger(__name__) self._global_params = global_params self._blocks_args = blocks_args self.use_fc_bn = use_fc_bn self.fc_bn_init_scale = fc_bn_init_scale self._build()
def __init__(self, block, layers, num_classes=1000, deep_stem=False, avg_down=False, bypass_last_bn=False, bn=None): r""" Arguments: - layers (:obj:`list` of 4 ints): how many layers in each stage - num_classes (:obj:`int`): number of classification classes - deep_stem (:obj:`bool`): whether to use deep_stem as the first conv - avg_down (:obj:`bool`): whether to use avg_down when spatial downsample - bypass_last_bn (:obj:`bool`): whether use bypass_last_bn - bn (:obj:`dict`): definition of batchnorm """ super(PreactResNet, self).__init__() logger = get_logger(__name__) global BN, bypass_bn_weight_list BN = get_bn(bn) bypass_bn_weight_list = [] self.inplanes = 64 self.deep_stem = deep_stem self.avg_down = avg_down self.logger = get_logger(__name__) if self.deep_stem: self.conv1 = nn.Sequential( nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False), BN(32), nn.ReLU(inplace=True), nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, bias=False), BN(32), nn.ReLU(inplace=True), nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1, bias=False), ) else: self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = BN(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.final_bn = BN(512 * block.expansion) self.final_relu = nn.ReLU(inplace=True) self.avgpool = nn.AvgPool2d(7, stride=1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif (isinstance(m, SyncBatchNorm2d) or isinstance(m, nn.BatchNorm2d)): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): n = m.weight.size(1) m.weight.data.normal_(0, 1.0 / float(n)) m.bias.data.zero_() if bypass_last_bn: for param in bypass_bn_weight_list: param.data.zero_() logger.info('bypass {} bn.weight in BottleneckBlocks'.format( len(bypass_bn_weight_list)))
def __init__(self, block, layers, groups, reduction, dropout_p=0.2, inplanes=128, input_3x3=True, downsample_kernel_size=3, downsample_padding=1, num_classes=1000, bn=None): """ Arguments: block (:obj:`nn.Module`): Bottleneck class. - For SENet154: SEBottleneck - For SE-ResNet models: SEResNetBottleneck - For SE-ResNeXt models: SEResNeXtBottleneck layers (:obj:`list` of :obj:`ints`): Number of residual blocks for 4 layers of the - network (layer1...layer4). groups (:obj:`int`): Number of groups for the 3x3 convolution in each bottleneck block. - For SENet154: 64 - For SE-ResNet models: 1 - For SE-ResNeXt models: 32 reduction (:obj:`int`): Reduction ratio for Squeeze-and-Excitation modules. - For all models: 16 dropout_p (:obj:`float` or :obj:`None`): Drop probability for the Dropout layer. If `None` the Dropout layer is not used. - For SENet154: 0.2 - For SE-ResNet models: None - For SE-ResNeXt models: None inplanes (:obj:`int`): Number of input channels for layer1. - For SENet154: 128 - For SE-ResNet models: 64 - For SE-ResNeXt models: 64 input_3x3 (:obj:`bool`): If `True`, use three 3x3 convolutions instead of a single 7x7 convolution in layer0. - For SENet154: True - For SE-ResNet models: False - For SE-ResNeXt models: False downsample_kernel_size (:obj:`int`): Kernel size for downsampling convolutions in layer2, layer3 and layer4. - For SENet154: 3 - For SE-ResNet models: 1 - For SE-ResNeXt models: 1 downsample_padding (:obj:`int`): Padding for downsampling convolutions in layer2, layer3 and layer4. - For SENet154: 1 - For SE-ResNet models: 0 - For SE-ResNeXt models: 0 num_classes (:obj:`int`): Number of outputs in `last_linear` layer. - For all models: 1000 """ super(SENet, self).__init__() self.inplanes = inplanes global BN BN = get_bn(bn) if input_3x3: layer0_modules = [ ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1, bias=False)), ('bn1', BN(64)), ('relu1', nn.ReLU(inplace=True)), ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1, bias=False)), ('bn2', BN(64)), ('relu2', nn.ReLU(inplace=True)), ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1, bias=False)), ('bn3', BN(inplanes)), ('relu3', nn.ReLU(inplace=True)), ] else: layer0_modules = [ ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2, padding=3, bias=False)), ('bn1', BN(inplanes)), ('relu1', nn.ReLU(inplace=True)), ] # To preserve compatibility with Caffe weights `ceil_mode=True` # is used instead of `padding=1`. layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2, ceil_mode=True))) self.layer0 = nn.Sequential(OrderedDict(layer0_modules)) self.layer1 = self._make_layer(block, planes=64, blocks=layers[0], groups=groups, reduction=reduction, downsample_kernel_size=1, downsample_padding=0) self.layer2 = self._make_layer( block, planes=128, blocks=layers[1], stride=2, groups=groups, reduction=reduction, downsample_kernel_size=downsample_kernel_size, downsample_padding=downsample_padding) self.layer3 = self._make_layer( block, planes=256, blocks=layers[2], stride=2, groups=groups, reduction=reduction, downsample_kernel_size=downsample_kernel_size, downsample_padding=downsample_padding) self.layer4 = self._make_layer( block, planes=512, blocks=layers[3], stride=2, groups=groups, reduction=reduction, downsample_kernel_size=downsample_kernel_size, downsample_padding=downsample_padding) self.avg_pool = nn.AvgPool2d(7, stride=1) self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None self.last_linear = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, SyncBatchNorm2d) or isinstance( m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()
def __init__(self, num_classes=1000, scale=1., bn=None): super(GhostNet, self).__init__() global BN BN = get_bn(bn) # setting of inverted residual blocks self.cfgs = [ # k, t, c, SE, s [3, 16, 16, 0, 1], [3, 48, 24, 0, 2], [3, 72, 24, 0, 1], [5, 72, 40, 1, 2], [5, 120, 40, 1, 1], [3, 240, 80, 0, 2], [3, 200, 80, 0, 1], [3, 184, 80, 0, 1], [3, 184, 80, 0, 1], [3, 480, 112, 1, 1], [3, 672, 112, 1, 1], [5, 672, 160, 1, 2], [5, 960, 160, 0, 1], [5, 960, 160, 1, 1], [5, 960, 160, 0, 1], [5, 960, 160, 1, 1] ] # building first layer output_channel = _make_divisible(16 * scale, 4) layers = [ nn.Sequential(nn.Conv2d(3, output_channel, 3, 2, 1, bias=False), BN(output_channel), nn.ReLU(inplace=True)) ] input_channel = output_channel # building inverted residual blocks block = GhostBottleneck for k, exp_size, c, use_se, s in self.cfgs: output_channel = _make_divisible(c * scale, 4) hidden_channel = _make_divisible(exp_size * scale, 4) layers.append( block(input_channel, hidden_channel, output_channel, k, s, use_se)) input_channel = output_channel self.features = nn.Sequential(*layers) # building last several layers output_channel = _make_divisible(exp_size * scale, 4) self.squeeze = nn.Sequential( nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=False), BN(output_channel), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1)), ) input_channel = output_channel output_channel = 1280 self.classifier = nn.Sequential( nn.Linear(input_channel, output_channel, bias=False), nn.BatchNorm1d(output_channel), nn.ReLU(inplace=True), nn.Dropout(0.2), nn.Linear(output_channel, num_classes), ) self._initialize_weights()
def __init__(self, block, layers, num_classes=1000, bn=None, channel_config=None, nnie_type=True): r""" Arguments: - block (:obj:`nn.Module`): block type - layers (:obj:`list` of 4 ints): how many layers in each stage - num_classes (:obj:`int`): number of classification classes - bn (:obj:`dict`): definition of batchnorm - channel_config (:obj:`dict`): configurations of the pruned channels - nnie_type (:obj:`bool`): if ``True``, the first maxpool is set with ceil_mode=True """ super(Adaptive_ResNet, self).__init__() global BN BN = get_bn(bn) self.inplanes = 64 conv1_out_ch = channel_config['conv1'] self.conv1 = nn.Conv2d(3, conv1_out_ch, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = BN(conv1_out_ch) self.relu = nn.ReLU(inplace=True) if nnie_type: self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) else: self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer( block, 64, layers[0], bottleneck_settings=channel_config['layer1']) self.layer2 = self._make_layer( block, 128, layers[1], stride=2, bottleneck_settings=channel_config['layer2']) self.layer3 = self._make_layer( block, 256, layers[2], stride=2, bottleneck_settings=channel_config['layer3']) self.layer4 = self._make_layer( block, 512, layers[3], stride=2, bottleneck_settings=channel_config['layer4']) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(channel_config['fc'], num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif (isinstance(m, SyncBatchNorm2d) or isinstance(m, nn.BatchNorm2d)): m.weight.data.fill_(1) m.bias.data.zero_()
def __init__(self, num_classes=1000, scale=1.0, inverted_residual_setting=None, round_nearest=8, block=InvertedResidual, dropout=0.2, bn=None): r""" Arguments: - num_classes (:obj:`int`): Number of classes - scale (:obj:`float`): Width multiplier, adjusts number of channels in each layer by this amount - inverted_residual_setting: Network structure - round_nearest (:obj:`int`): Round the number of channels in each layer to be a multiple of this number Set to 1 to turn off rounding - block: Module specifying inverted residual building block for mobilenet - bn (:obj:`dict`): definition of batchnorm """ super(MobileNetV2, self).__init__() global BN BN = get_bn(bn) if block is None: block = InvertedResidual input_channel = 32 last_channel = 1280 if inverted_residual_setting is None: inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] # only check the first element, assuming user knows t,c,n,s are required if len(inverted_residual_setting) == 0 or len( inverted_residual_setting[0]) != 4: raise ValueError("inverted_residual_setting should be non-empty " "or a 4-element list, got {}".format( inverted_residual_setting)) # building first layer input_channel = _make_divisible(input_channel * scale, round_nearest) self.last_channel = _make_divisible(last_channel * max(1.0, scale), round_nearest) features = [ConvBNReLU(3, input_channel, stride=2)] # building inverted residual blocks for t, c, n, s in inverted_residual_setting: output_channel = _make_divisible(c * scale, round_nearest) for i in range(n): stride = s if i == 0 else 1 features.append( block(input_channel, output_channel, stride, expand_ratio=t)) input_channel = output_channel # building last several layers features.append( ConvBNReLU(input_channel, self.last_channel, kernel_size=1)) # make it nn.Sequential self.features = nn.Sequential(*features) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # building classifier self.classifier = nn.Sequential( nn.Dropout(dropout), nn.Linear(self.last_channel, num_classes), ) self.init_params()
def __init__(self, num_classes=1000, width=[8, 8, 16, 48, 224], depth=[1, 2, 2, 1, 2], stride_stages=[2, 2, 2, 2, 2], kernel_size=[7, 3, 3, 3, 3, 3, 3, 3], expand_ratio=[0, 1, 1, 1, 1, 0.5, 0.5, 0.5], act_stages=['relu', 'relu', 'relu', 'relu', 'relu'], dropout_rate=0., bn=None): r""" Arguments: - num_classes (:obj:`int`): number of classification classes - width (:obj:`list` of 5 (stages+1) ints): channel list - depth (:obj:`list` of 5 (stages+1) ints): depth list for stages - stride_stages (:obj:`list` of 5 (stages+1) ints): stride list for stages - kernel_size (:obj:`list` of 8 (blocks+1) ints): kernel size list for blocks - expand_ratio (:obj:`list` of 8 (blocks+1) ints): expand ratio list for blocks - act_stages(:obj:`list` of 8 (blocks+1) ints): activation list for blocks - dropout_rate (:obj:`float`): dropout rate - bn (:obj:`dict`): definition of batchnorm """ super(BigNAS_ResNet_Basic, self).__init__() global BN BN = get_bn(bn) self.depth = depth self.width = width self.kernel_size = get_same_length(kernel_size, self.depth) self.expand_ratio = get_same_length(expand_ratio, self.depth) self.dropout_rate = dropout_rate # first conv layer self.first_conv = ConvBlock( in_channel=3, out_channel=self.width[0], kernel_size=self.kernel_size[0], stride=stride_stages[0], act_func=act_stages[0]) blocks = [] _block_index = 0 input_channel = self.width[0] stage_num = 1 for s, act_func, n_block, output_channel in zip(stride_stages[1:], act_stages[1:], self.depth[1:], self.width[1:]): _block_index += n_block kernel_size = self.kernel_size[_block_index] expand_ratio = self.expand_ratio[_block_index] stage_num += 1 for i in range(n_block): if i == 0: stride = s else: stride = 1 basic_block = BasicBlock( in_channel=input_channel, out_channel=output_channel, kernel_size=kernel_size, expand_ratio=expand_ratio, stride=stride, act_func=act_func) blocks.append(basic_block) input_channel = output_channel self.blocks = nn.ModuleList(blocks) self.avg_pool = nn.AdaptiveAvgPool2d(output_size=1) self.classifier = LinearBlock( in_features=self.width[-1], out_features=num_classes, bias=True, dropout_rate=dropout_rate) self.init_model()
def __init__(self, block, layers, inplanes=64, num_classes=1000, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None, deep_stem=False, avg_down=False, freeze_layer=False, bn=None): super(ResNet, self).__init__() global BN self.logger = get_logger(__name__) if norm_layer is None: BN = get_bn(bn) norm_layer = BN else: norm_layer = get_norm_layer(norm_layer) self._norm_layer = norm_layer self.inplanes = inplanes self.dilation = 1 self.deep_stem = deep_stem self.avg_down = avg_down self.num_classes = num_classes self.freeze_layer = freeze_layer if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format( replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group if self.deep_stem: self.conv1 = nn.Sequential( nn.Conv2d(3, inplanes // 2, kernel_size=3, stride=2, padding=1, bias=False), norm_layer(inplanes // 2), nn.ReLU(inplace=True), nn.Conv2d(inplanes // 2, inplanes // 2, kernel_size=3, stride=1, padding=1, bias=False), norm_layer(inplanes // 2), nn.ReLU(inplace=True), nn.Conv2d(inplanes // 2, inplanes, kernel_size=3, stride=1, padding=1, bias=False), ) else: self.conv1 = nn.Conv2d(3, inplanes, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm, SyncBatchNorm2d)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0)
def __init__(self, block, layers, radix=1, groups=1, bottleneck_width=64, num_classes=1000, dilated=False, dilation=1, deep_stem=False, stem_width=64, avg_down=False, rectified_conv=False, rectify_avg=False, avd=False, avd_first=False, final_drop=0.0, dropblock_prob=0, last_gamma=False, norm_layer=None, bn=None): """ Arguments: - block (:obj:`Block`): Class for the residual block. Options are BasicBlockV1, BottleneckV1 - layers (:obj:`list` of :obj:`int`): Numbers of layers in each block - classes (:obj:`int`, default 1000): Number of classification classes - dilated (:obj:`bool`, default False): Applying dilation strategy to \ pretrained ResNet yielding a stride-8 model. \ typically used in Semantic Segmentation - norm_layer (:obj:`object`): Normalization layer used in backbone network \ (default: :class:`mxnet.gluon.nn.BatchNorm`. \ for Synchronized Cross-GPU BachNormalization) """ self.cardinality = groups self.bottleneck_width = bottleneck_width # ResNet-D params self.inplanes = stem_width*2 if deep_stem else 64 self.avg_down = avg_down self.last_gamma = last_gamma # ResNeSt params self.radix = radix self.avd = avd self.avd_first = avd_first super(ResNeSt, self).__init__() self.rectified_conv = rectified_conv self.rectify_avg = rectify_avg global BN if norm_layer is None: BN = get_bn(bn) norm_layer = BN if rectified_conv: from rfconv import RFConv2d conv_layer = RFConv2d else: conv_layer = nn.Conv2d conv_kwargs = {'average_mode': rectify_avg} if rectified_conv else {} if deep_stem: self.conv1 = nn.Sequential( conv_layer(3, stem_width, kernel_size=3, stride=2, padding=1, bias=False, **conv_kwargs), norm_layer(stem_width), nn.ReLU(inplace=True), conv_layer(stem_width, stem_width, kernel_size=3, stride=1, padding=1, bias=False, **conv_kwargs), norm_layer(stem_width), nn.ReLU(inplace=True), conv_layer(stem_width, stem_width*2, kernel_size=3, stride=1, padding=1, bias=False, **conv_kwargs), ) else: self.conv1 = conv_layer(3, 64, kernel_size=7, stride=2, padding=3, bias=False, **conv_kwargs) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer( block, 64, layers[0], norm_layer=norm_layer, is_first=False) self.layer2 = self._make_layer( block, 128, layers[1], stride=2, norm_layer=norm_layer) if dilated or dilation == 4: self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2, norm_layer=norm_layer, dropblock_prob=dropblock_prob) self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, norm_layer=norm_layer, dropblock_prob=dropblock_prob) elif dilation == 2: self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=1, norm_layer=norm_layer, dropblock_prob=dropblock_prob) self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=2, norm_layer=norm_layer, dropblock_prob=dropblock_prob) else: self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer, dropblock_prob=dropblock_prob) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer, dropblock_prob=dropblock_prob) self.avgpool = GlobalAvgPool2d() self.drop = nn.Dropout(final_drop) if final_drop > 0.0 else None self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, SyncBatchNorm2d) or isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()
def __init__(self, input_size=224, num_classes=1000, model_size="1.5x", bn=None): super(ShuffleNetV2, self).__init__() self.stage_repeats = [4, 8, 4] self.model_size = model_size r"""The number of channels are slightly reduced to make WeightNet's FLOPs comparable to shufflenet baselines. """ if model_size == "0.5x": self.stage_out_channels = [-1, 24, 48, 96, 192, 1024] elif model_size == "1.0x": self.stage_out_channels = [-1, 24, 112, 224, 448, 1024] elif model_size == "1.5x": self.stage_out_channels = [-1, 24, 176, 352, 704, 1024] elif model_size == "2.0x": self.stage_out_channels = [-1, 24, 248, 496, 992, 1024] else: raise NotImplementedError global BN BN = get_bn(bn) # building first layer input_channel = self.stage_out_channels[1] self.first_conv = nn.Sequential( nn.Conv2d(3, input_channel, 3, 2, 1, bias=True), BN(input_channel), nn.ReLU(), ) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.features = [] for idxstage in range(len(self.stage_repeats)): numrepeat = self.stage_repeats[idxstage] output_channel = self.stage_out_channels[idxstage + 2] for i in range(numrepeat): if i == 0: self.features.append( ShuffleV2Block( input_channel, output_channel, mid_channels=output_channel // 2, ksize=3, stride=2, )) else: self.features.append( ShuffleV2Block( input_channel // 2, output_channel, mid_channels=output_channel // 2, ksize=3, stride=1, )) input_channel = output_channel self.features = nn.Sequential(*self.features) self.conv_last = nn.Sequential( nn.Conv2d(input_channel, self.stage_out_channels[-1], 1, 1, 0, bias=True), BN(self.stage_out_channels[-1]), nn.ReLU(), ) self.globalpool = nn.AvgPool2d(7) if self.model_size == "2.0x": self.dropout = nn.Dropout(0.2) self.classifier = nn.Sequential( nn.Linear(self.stage_out_channels[-1], num_classes, bias=True)) self._initialize_weights()
def __init__(self, num_classes=1000, scale=1.0, identity_tensor_multiplier=1.0, sand_glass_setting=None, round_nearest=8, block=None, dropout=0.0, bn=None): """ MobileNeXt main class Args: num_classes (int): Number of classes scale (float): Width multiplier - adjusts number of channels in each layer by this amount identity_tensor_multiplier(float): Identity tensor multiplier - reduce the number of element-wise additions in each block sand_glass_setting: Network structure round_nearest (int): Round the number of channels in each layer to be a multiple of this number Set to 1 to turn off rounding block: Module specifying inverted residual building block for mobilenet bn: Module specifying the normalization layer to use """ super(MobileNeXt, self).__init__() global BN BN = get_bn(bn) if block is None: block = SandGlass input_channel = 32 last_channel = 1280 # building first layer input_channel = _make_divisible(input_channel * scale, round_nearest) self.last_channel = _make_divisible(last_channel * max(1.0, scale), round_nearest) features = [ConvBNReLU(3, input_channel, stride=2)] if sand_glass_setting is None: sand_glass_setting = [ # t, c, b, s [2, 96, 1, 2], [6, 144, 1, 1], [6, 192, 3, 2], [6, 288, 3, 2], [6, 384, 4, 1], [6, 576, 4, 2], [6, 960, 2, 1], [6, self.last_channel / scale, 1, 1], ] # only check the first element, assuming user knows t,c,n,s are required if len(sand_glass_setting) == 0 or len(sand_glass_setting[0]) != 4: raise ValueError( "sand_glass_setting should be non-empty " "or a 4-element list, got {}".format(sand_glass_setting)) # building sand glass blocks for t, c, b, s in sand_glass_setting: output_channel = _make_divisible(c * scale, round_nearest) for i in range(b): stride = s if i == 0 else 1 features.append( block( input_channel, output_channel, stride, expand_ratio=t, identity_tensor_multiplier=identity_tensor_multiplier)) input_channel = output_channel # building last several layers # features.append(ConvBNReLU(nput_channel, self.last_channel, kernel_size=1)) # make it nn.Sequential self.features = nn.Sequential(*features) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # building classifier self.classifier = nn.Sequential( nn.Dropout(dropout), nn.Linear(self.last_channel, num_classes), ) self.init_params()
def __init__(self, num_classes=1000, scale=1.0, dropout=0.8, round_nearest=8, mode='small', bn=None): r""" Arguments: - num_classes (:obj:`int`): Number of classes - scale (:obj:`float`): Width multiplier, adjusts number of channels in each layer by this amount - dropout (:obj:`float`): Dropout rate - round_nearest (:obj:`int`): Round the number of channels in each layer to be a multiple of this number Set to 1 to turn off rounding - mode (:obj:`string`): model type, 'samll' or 'large' - bn (:obj:`dict`): definition of batchnorm """ super(MobileNetV3, self).__init__() global BN BN = get_bn(bn) input_channel = 16 last_channel = 1280 if mode == 'large': mobile_setting = [ [3, 16, 16, False, 'RE', 1], [3, 64, 24, False, 'RE', 2], [3, 72, 24, False, 'RE', 1], [5, 72, 40, True, 'RE', 2], [5, 120, 40, True, 'RE', 1], [5, 120, 40, True, 'RE', 1], [3, 240, 80, False, 'HS', 2], [3, 200, 80, False, 'HS', 1], [3, 184, 80, False, 'HS', 1], [3, 184, 80, False, 'HS', 1], [3, 480, 112, True, 'HS', 1], [3, 672, 112, True, 'HS', 1], [5, 672, 160, True, 'HS', 2], [5, 960, 160, True, 'HS', 1], [5, 960, 160, True, 'HS', 1], ] elif mode == 'small': mobile_setting = [ [3, 16, 16, True, 'RE', 2], [3, 72, 24, False, 'RE', 2], [3, 88, 24, False, 'RE', 1], [5, 96, 40, True, 'HS', 2], [5, 240, 40, True, 'HS', 1], [5, 240, 40, True, 'HS', 1], [5, 120, 48, True, 'HS', 1], [5, 144, 48, True, 'HS', 1], [5, 288, 96, True, 'HS', 2], [5, 576, 96, True, 'HS', 1], [5, 576, 96, True, 'HS', 1], ] else: raise NotImplementedError # building first layer last_channel = _make_divisible( last_channel * scale, round_nearest) if scale > 1.0 else last_channel self.features = [conv_bn(3, input_channel, 2, activation=Hswish)] self.classifier = [] # building mobile blocks for k, exp, c, se, nl, s in mobile_setting: output_channel = _make_divisible(c * scale, round_nearest) exp_channel = _make_divisible(exp * scale, round_nearest) self.features.append( InvertedResidual(input_channel, output_channel, k, s, exp_channel, se, nl)) input_channel = output_channel # building last several layers if mode == 'large': last_conv = _make_divisible(960 * scale, round_nearest) self.features.append( conv_1x1_bn(input_channel, last_conv, activation=Hswish)) self.features.append(nn.AdaptiveAvgPool2d(1)) self.features.append(nn.Conv2d(last_conv, last_channel, 1, 1, 0)) self.features.append(Hswish(inplace=True)) elif mode == 'small': last_conv = _make_divisible(576 * scale, round_nearest) self.features.append( conv_1x1_bn(input_channel, last_conv, activation=Hswish)) self.features.append(nn.AdaptiveAvgPool2d(1)) self.features.append(nn.Conv2d(last_conv, last_channel, 1, 1, 0)) self.features.append(Hswish(inplace=True)) else: raise NotImplementedError self.features = nn.Sequential(*self.features) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.classifier = nn.Sequential( nn.Dropout(p=dropout), nn.Linear(last_channel, num_classes), ) self.init_params()
def __init__(self, stages_repeats, stages_out_channels, num_classes=1000, bn=None): r""" - stages_repeats (:obj:`list` of 3 ints): how many layers in each stage - stages_out_channels (:obj:`list` of 5 ints): output channels - num_classes (:obj:`int`): number of classification classes - bn (:obj:`dict`): definition of batchnorm """ super(ShuffleNetV2, self).__init__() if len(stages_repeats) != 3: raise ValueError( 'expected stages_repeats as list of 3 positive ints') if len(stages_out_channels) != 5: raise ValueError( 'expected stages_out_channels as list of 5 positive ints') self._stage_out_channels = stages_out_channels global BN BN = get_bn(bn) input_channels = 3 output_channels = self._stage_out_channels[0] self.conv1 = nn.Sequential( nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False), BN(output_channels), nn.ReLU(inplace=True), ) input_channels = output_channels self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) stage_names = ['stage{}'.format(i) for i in [2, 3, 4]] for name, repeats, output_channels in zip( stage_names, stages_repeats, self._stage_out_channels[1:]): seq = [InvertedResidual(input_channels, output_channels, 2)] for i in range(repeats - 1): seq.append( InvertedResidual(output_channels, output_channels, 1)) setattr(self, name, nn.Sequential(*seq)) input_channels = output_channels output_channels = self._stage_out_channels[-1] self.conv5 = nn.Sequential( nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False), BN(output_channels), nn.ReLU(inplace=True), ) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(output_channels, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif (isinstance(m, SyncBatchNorm2d) or isinstance(m, nn.BatchNorm2d)): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): n = m.weight.size(1) m.weight.data.normal_(0, 1.0 / float(n)) m.bias.data.zero_()
def __init__(self, stages, bn=None): super(HighResolutionNet, self).__init__() global BN BN = get_bn(bn) self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False) self.bn1 = BN(64) self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False) self.bn2 = BN(64) self.relu = nn.ReLU(inplace=True) self.stage1_cfg = stages['STAGE1'] num_channels = self.stage1_cfg['NUM_CHANNELS'][0] block = blocks_dict[self.stage1_cfg['BLOCK']] num_blocks = self.stage1_cfg['NUM_BLOCKS'][0] self.layer1 = self._make_layer(block, 64, num_channels, num_blocks) stage1_out_channel = block.expansion*num_channels self.stage2_cfg = stages['STAGE2'] num_channels = self.stage2_cfg['NUM_CHANNELS'] block = blocks_dict[self.stage2_cfg['BLOCK']] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels))] self.transition1 = self._make_transition_layer( [stage1_out_channel], num_channels) self.stage2, pre_stage_channels = self._make_stage( self.stage2_cfg, num_channels) self.stage3_cfg = stages['STAGE3'] num_channels = self.stage3_cfg['NUM_CHANNELS'] block = blocks_dict[self.stage3_cfg['BLOCK']] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels))] self.transition2 = self._make_transition_layer( pre_stage_channels, num_channels) self.stage3, pre_stage_channels = self._make_stage( self.stage3_cfg, num_channels) self.stage4_cfg = stages['STAGE4'] num_channels = self.stage4_cfg['NUM_CHANNELS'] block = blocks_dict[self.stage4_cfg['BLOCK']] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels))] self.transition3 = self._make_transition_layer( pre_stage_channels, num_channels) self.stage4, pre_stage_channels = self._make_stage( self.stage4_cfg, num_channels, multi_scale_output=True) # Classification Head self.incre_modules, self.downsamp_modules, \ self.final_layer = self._make_head(pre_stage_channels) self.classifier = nn.Linear(2048, 1000) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif (isinstance(m, SyncBatchNorm2d) or isinstance(m, nn.BatchNorm2d)): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): n = m.weight.size(1) m.weight.data.normal_(0, 1.0/float(n)) m.bias.data.zero_()
def __init__(self, num_classes=1000, scale=1.0, inverted_residual_setting=None, round_nearest=8, block=InvertedResidual, dropout=0.2, bn=None, num_experts=1, final_condconv=False, fc_condconv=False, combine_kernel=False): super(MobileNetV2CondConv, self).__init__() global BN BN = get_bn(bn) self.logger = get_logger(__name__) self.fc_condconv = fc_condconv self.logger.info('Number of experts is {}'.format(num_experts)) self.logger.info( 'Replace finalconv with CondConv: {}'.format(final_condconv)) self.logger.info('Replace fc with CondConv: {}'.format(fc_condconv)) self.logger.info( 'Combine kernels to implement CondConv: {}'.format(combine_kernel)) if block is None: block = InvertedResidual input_channel = 32 last_channel = 1280 if inverted_residual_setting is None: inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] # only check the first element, assuming user knows t,c,n,s are required if len(inverted_residual_setting) == 0 or len( inverted_residual_setting[0]) != 4: raise ValueError("inverted_residual_setting should be non-empty " "or a 4-element list, got {}".format( inverted_residual_setting)) # building first layer input_channel = _make_divisible(input_channel * scale, round_nearest) self.last_channel = _make_divisible(last_channel * max(1.0, scale), round_nearest) features = [ConvBNReLU(3, input_channel, stride=2)] # building inverted residual blocks for t, c, n, s in inverted_residual_setting: output_channel = _make_divisible(c * scale, round_nearest) for i in range(n): stride = s if i == 0 else 1 features.append( block(input_channel, output_channel, stride, expand_ratio=t, num_experts=num_experts, combine_kernel=combine_kernel)) input_channel = output_channel # building last several layers if final_condconv: features.append( CondConvBNReLU(input_channel, self.last_channel, kernel_size=1, num_experts=num_experts, combine_kernel=combine_kernel)) else: features.append( ConvBNReLU(input_channel, self.last_channel, kernel_size=1)) # make it nn.Sequential self.features = nn.Sequential(*features) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # building classifier if fc_condconv: # change kernel_size to the size of feature maps self.dropout = nn.Dropout(0.2) self.classifier = CondConv2d(self.last_channel, num_classes, kernel_size=1, bias=False, num_experts=num_experts, combine_kernel=combine_kernel) self.classifier_router = BasicRouter(self.last_channel, num_experts) else: self.classifier = nn.Sequential( nn.Dropout(0.2), nn.Linear(self.last_channel, num_classes), ) # weight initialization for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out') if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.BatchNorm2d) or isinstance( m, link.nn.SyncBatchNorm2d): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias)