def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', weight_attr=None, bias_attr=None, data_format='NCHW'): super().__init__() self.conv = nn.Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, weight_attr=weight_attr, bias_attr=weight_attr, data_format=data_format) self.flatten = nn.Flatten()
def __init__(self, config, stop_grad_layers=0, class_num=1000, return_patterns=None): super().__init__() self.stop_grad_layers = stop_grad_layers self.conv_block_1 = ConvBlock(3, 64, config[0]) self.conv_block_2 = ConvBlock(64, 128, config[1]) self.conv_block_3 = ConvBlock(128, 256, config[2]) self.conv_block_4 = ConvBlock(256, 512, config[3]) self.conv_block_5 = ConvBlock(512, 512, config[4]) self.relu = nn.ReLU() self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) for idx, block in enumerate([ self.conv_block_1, self.conv_block_2, self.conv_block_3, self.conv_block_4, self.conv_block_5 ]): if self.stop_grad_layers >= idx + 1: for param in block.parameters(): param.trainable = False self.drop = Dropout(p=0.5, mode="downscale_in_infer") self.fc1 = Linear(7 * 7 * 512, 4096) self.fc2 = Linear(4096, 4096) self.fc3 = Linear(4096, class_num) if return_patterns is not None: self.update_res(return_patterns) self.register_forward_post_hook(self._return_dict_hook)
def __init__(self): super(RNet, self).__init__(name_scope='RNet') weight_attr = paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(0.0005)) self.conv1 = nn.Conv2D(in_channels=3, out_channels=28, kernel_size=3, padding='valid', weight_attr=weight_attr) self.prelu1 = nn.PReLU() self.pool1 = nn.MaxPool2D(kernel_size=3, stride=2, padding='same') self.conv2 = nn.Conv2D(in_channels=28, out_channels=48, kernel_size=3, padding='valid', weight_attr=weight_attr) self.prelu2 = nn.PReLU() self.pool2 = nn.MaxPool2D(kernel_size=3, stride=2) self.conv3 = nn.Conv2D(in_channels=48, out_channels=64, kernel_size=2, padding='valid', weight_attr=weight_attr) self.prelu3 = nn.PReLU() self.flatten = nn.Flatten() self.fc = nn.Linear(in_features=576, out_features=128) self.class_fc = nn.Linear(in_features=128, out_features=2) self.bbox_fc = nn.Linear(in_features=128, out_features=4) self.landmark_fc = nn.Linear(in_features=128, out_features=10)
def __init__(self): super(ONet, self).__init__() self.features = nn.Sequential( OrderedDict([ ('conv1', nn.Conv2D(3, 32, 3, 1)), ('prelu1', nn.PReLU(32)), ('pool1', nn.MaxPool2D(3, 2, ceil_mode=True)), ('conv2', nn.Conv2D(32, 64, 3, 1)), ('prelu2', nn.PReLU(64)), ('pool2', nn.MaxPool2D(3, 2, ceil_mode=True)), ('conv3', nn.Conv2D(64, 64, 3, 1)), ('prelu3', nn.PReLU(64)), ('pool3', nn.MaxPool2D(2, 2, ceil_mode=True)), ('conv4', nn.Conv2D(64, 128, 2, 1)), ('prelu4', nn.PReLU(128)), ('flatten', nn.Flatten()), ('conv5', nn.Linear(1152, 256)), ('drop5', nn.Dropout(0.25)), ('prelu5', nn.PReLU(256)), ])) self.conv6_1 = nn.Linear(256, 2) self.conv6_2 = nn.Linear(256, 4) self.conv6_3 = nn.Linear(256, 10) weights = np.load("./onet.npy", allow_pickle=True)[()] for n, p in self.named_parameters(): # ###p.data = torch.FloatTensor(weights[n]) p.data = paddle.to_tensor(weights[n])
def __init__(self, model_name='ResNet50', last_stride=1): super(ResNetEmbedding, self).__init__() assert model_name in ['ResNet50', 'ResNet101' ], "Unsupported ReID arch: {}".format(model_name) self.base = eval(model_name)(last_conv_stride=last_stride) self.gap = nn.AdaptiveAvgPool2D(output_size=1) self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) self.bn = nn.BatchNorm1D(self.in_planes, bias_attr=False)
def __init__(self, num_inputs, num_actions): super(Model, self).__init__() self.conv1 = nn.Conv2D(num_inputs, 32, 3, stride=3) self.conv2 = nn.Conv2D(32, 32, 3, stride=3) self.conv3 = nn.Conv2D(32, 64, 3, stride=1) self.linear = nn.Linear(64 * 1 * 8, 256) self.flatten = nn.Flatten() self.fc = nn.Linear(256, num_actions)
def __init__(self, num_inputs, num_actions): super(Model, self).__init__() self.conv1 = nn.Conv2D(num_inputs, 32, 3, stride=2, padding=1) self.conv2 = nn.Conv2D(32, 64, 3, stride=2, padding=1) self.conv3 = nn.Conv2D(64, 64, 3, stride=2, padding=1) self.conv4 = nn.Conv2D(64, 64, 3, stride=2, padding=1) self.linear = nn.Linear(64 * 6 * 6, 512) self.flatten = nn.Flatten() self.critic_linear = nn.Linear(512, 1) self.actor_linear = nn.Linear(512, num_actions)
def __init__(self, num_inputs, num_actions, atoms=51): super(Model, self).__init__() self.num_actions = num_actions self.atoms = atoms self.conv1 = nn.Conv2D(num_inputs, 32, 3, stride=3) self.conv2 = nn.Conv2D(32, 32, 3, stride=3) self.conv3 = nn.Conv2D(32, 64, 3, stride=1) self.flatten = nn.Flatten() self.fc1 = nn.Linear(64 * 3 * 2, 256) self.fc2 = nn.Linear(256, num_actions * atoms)
def __init__(self, cfg, in_chans=3, class_num=1000, output_stride=32, global_pool='avg', drop_rate=0., act_layer=nn.LeakyReLU, norm_layer=nn.BatchNorm2D, zero_init_last_bn=True, stage_fn=CrossStage, block_fn=DarkBlock): super().__init__() self.class_num = class_num self.drop_rate = drop_rate assert output_stride in (8, 16, 32) layer_args = dict(act_layer=act_layer, norm_layer=norm_layer) # Construct the stem self.stem, stem_feat_info = create_stem(in_chans, **cfg['stem'], **layer_args) self.feature_info = [stem_feat_info] prev_chs = stem_feat_info['num_chs'] curr_stride = stem_feat_info[ 'reduction'] # reduction does not include pool if cfg['stem']['pool']: curr_stride *= 2 # Construct the stages per_stage_args = _cfg_to_stage_args(cfg['stage'], curr_stride=curr_stride, output_stride=output_stride) self.stages = nn.LayerList() for i, sa in enumerate(per_stage_args): self.stages.add_sublayer( str(i), stage_fn(prev_chs, **sa, **layer_args, block_fn=block_fn)) prev_chs = sa['out_chs'] curr_stride *= sa['stride'] self.feature_info += [ dict(num_chs=prev_chs, reduction=curr_stride, module=f'stages.{i}') ] # Construct the head self.num_features = prev_chs self.pool = nn.AdaptiveAvgPool2D(1) self.flatten = nn.Flatten(1) self.fc = nn.Linear(prev_chs, class_num, weight_attr=ParamAttr(), bias_attr=ParamAttr())
def __init__(self, class_num=1000, scale=1.0, dropout_prob=0.2, class_expand=1280): super().__init__() self.scale = scale self.class_num = class_num self.class_expand = class_expand stage_repeats = [3, 7, 3] stage_out_channels = [ -1, 24, make_divisible(116 * scale), make_divisible(232 * scale), make_divisible(464 * scale), 1024 ] self.conv1 = ConvBNLayer(in_channels=3, out_channels=stage_out_channels[1], kernel_size=3, stride=2) self.max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1) block_list = [] for stage_id, num_repeat in enumerate(stage_repeats): for i in range(num_repeat): if i == 0: block = ESBlock2( in_channels=stage_out_channels[stage_id + 1], out_channels=stage_out_channels[stage_id + 2]) else: block = ESBlock1( in_channels=stage_out_channels[stage_id + 2], out_channels=stage_out_channels[stage_id + 2]) block_list.append(block) self.blocks = nn.Sequential(*block_list) self.conv2 = ConvBNLayer(in_channels=stage_out_channels[-2], out_channels=stage_out_channels[-1], kernel_size=1) self.avg_pool = AdaptiveAvgPool2D(1) self.last_conv = Conv2D(in_channels=stage_out_channels[-1], out_channels=self.class_expand, kernel_size=1, stride=1, padding=0, bias_attr=False) self.hardswish = nn.Hardswish() self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) self.fc = Linear(self.class_expand, self.class_num)
def __init__(self, num_inputs, num_actions): super(Model, self).__init__() self.conv1 = nn.Conv2D(num_inputs, 32, 3, stride=2, padding=1) self.bn1 = nn.BatchNorm2D(32) self.conv2 = nn.Conv2D(32, 64, 3, stride=2, padding=1) self.bn2 = nn.BatchNorm2D(64) self.conv3 = nn.Conv2D(64, 64, 3, stride=2, padding=1) self.bn3 = nn.BatchNorm2D(64) self.conv4 = nn.Conv2D(64, 128, 3, stride=2, padding=1) self.bn4 = nn.BatchNorm2D(128) self.linear = nn.Linear(128 * 6 * 6, 1024) self.flatten = nn.Flatten() self.critic_linear = nn.Linear(1024, 1) self.actor_linear = nn.Linear(1024, num_actions)
def __init__(self, block, layers, use_se=True): self.inplanes = 64 self.use_se = use_se super(ResNetFace, self).__init__() self.conv1 = nn.Conv2D(3, 64, kernel_size=3, padding=1) self.bn1 = nn.BatchNorm2D(64) self.prelu = nn.PReLU() self.maxpool = nn.MaxPool2D(kernel_size=2, stride=2) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.bn4 = nn.BatchNorm2D(512) self.dropout = nn.Dropout() self.flatten = nn.Flatten() self.fc5 = nn.Linear(512 * 7 * 7, 512) self.bn5 = nn.BatchNorm1D(512)
def train(): device = paddle.set_device('cpu') # or 'gpu' net = nn.Sequential(nn.Flatten(1), nn.Linear(784, 200), nn.Tanh(), nn.Linear(200, 10)) # inputs and labels are not required for dynamic graph. input = InputSpec([None, 784], 'float32', 'x') label = InputSpec([None, 1], 'int64', 'label') model = paddle.Model(net, input, label) optim = paddle.optimizer.SGD(learning_rate=1e-3, parameters=model.parameters()) model.prepare(optim, paddle.nn.CrossEntropyLoss(), paddle.metric.Accuracy()) data = paddle.vision.datasets.MNIST(mode='train') model.fit(data, epochs=2, batch_size=32, verbose=1)
def __init__(self, num_classes=1): super(MyNet, self).__init__() self.conv1 = nn.Conv2D(in_channels=3, out_channels=32, kernel_size=(3, 3)) self.pool1 = nn.MaxPool2D(kernel_size=2, stride=2) self.conv2 = nn.Conv2D(in_channels=32, out_channels=64, kernel_size=(3, 3)) self.pool2 = nn.MaxPool2D(kernel_size=2, stride=2) self.conv3 = nn.Conv2D(in_channels=64, out_channels=64, kernel_size=(3, 3)) self.flatten = nn.Flatten() self.linear1 = nn.Linear(in_features=1024, out_features=64) self.linear2 = nn.Linear(in_features=64, out_features=num_classes)
def __init__(self): super(RNet, self).__init__() self.features = nn.Sequential( OrderedDict([('conv1', nn.Conv2D(3, 28, 3, 1)), ('prelu1', nn.PReLU(28)), ('pool1', nn.MaxPool2D(3, 2, ceil_mode=True)), ('conv2', nn.Conv2D(28, 48, 3, 1)), ('prelu2', nn.PReLU(48)), ('pool2', nn.MaxPool2D(3, 2, ceil_mode=True)), ('conv3', nn.Conv2D(48, 64, 2, 1)), ('prelu3', nn.PReLU(64)), ('flatten', nn.Flatten()), ('conv4', nn.Linear(576, 128)), ('prelu4', nn.PReLU(128))])) self.conv5_1 = nn.Linear(128, 2) self.conv5_2 = nn.Linear(128, 4) weights = np.load("./rnet.npy", allow_pickle=True)[()] for n, p in self.named_parameters(): # ###p.data = torch.FloatTensor(weights[n]) p.data = paddle.to_tensor(weights[n])
def __init__(self, config, stages_pattern, scale=1.0, class_num=1000, inplanes=STEM_CONV_NUMBER, class_squeeze=LAST_SECOND_CONV_LARGE, class_expand=LAST_CONV, dropout_prob=0.2, return_patterns=None, return_stages=None): super().__init__() self.cfg = config self.scale = scale self.inplanes = inplanes self.class_squeeze = class_squeeze self.class_expand = class_expand self.class_num = class_num self.conv = ConvBNLayer( in_c=3, out_c=_make_divisible(self.inplanes * self.scale), filter_size=3, stride=2, padding=1, num_groups=1, if_act=True, act="hardswish") self.blocks = nn.Sequential(* [ ResidualUnit( in_c=_make_divisible(self.inplanes * self.scale if i == 0 else self.cfg[i - 1][2] * self.scale), mid_c=_make_divisible(self.scale * exp), out_c=_make_divisible(self.scale * c), filter_size=k, stride=s, use_se=se, act=act) for i, (k, exp, c, se, act, s) in enumerate(self.cfg) ]) self.last_second_conv = ConvBNLayer( in_c=_make_divisible(self.cfg[-1][2] * self.scale), out_c=_make_divisible(self.scale * self.class_squeeze), filter_size=1, stride=1, padding=0, num_groups=1, if_act=True, act="hardswish") self.avg_pool = AdaptiveAvgPool2D(1) self.last_conv = Conv2D( in_channels=_make_divisible(self.scale * self.class_squeeze), out_channels=self.class_expand, kernel_size=1, stride=1, padding=0, bias_attr=False) self.hardswish = nn.Hardswish() if dropout_prob is not None: self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") else: self.dropout = None self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) self.fc = Linear(self.class_expand, class_num) super().init_res( stages_pattern, return_patterns=return_patterns, return_stages=return_stages)
def __init__(self, depth_wise=False, arch=85, class_num=1000, with_pool=True): super().__init__() first_ch = [32, 64] second_kernel = 3 max_pool = True grmul = 1.7 drop_rate = 0.1 # HarDNet68 ch_list = [128, 256, 320, 640, 1024] gr = [14, 16, 20, 40, 160] n_layers = [8, 16, 16, 16, 4] downSamp = [1, 0, 1, 1, 0] if arch == 85: # HarDNet85 first_ch = [48, 96] ch_list = [192, 256, 320, 480, 720, 1280] gr = [24, 24, 28, 36, 48, 256] n_layers = [8, 16, 16, 16, 16, 4] downSamp = [1, 0, 1, 0, 1, 0] drop_rate = 0.2 elif arch == 39: # HarDNet39 first_ch = [24, 48] ch_list = [96, 320, 640, 1024] grmul = 1.6 gr = [16, 20, 64, 160] n_layers = [4, 16, 8, 4] downSamp = [1, 1, 1, 0] if depth_wise: second_kernel = 1 max_pool = False drop_rate = 0.05 blks = len(n_layers) self.base = nn.LayerList([]) # First Layer: Standard Conv3x3, Stride=2 self.base.append( ConvLayer(in_channels=3, out_channels=first_ch[0], kernel_size=3, stride=2, bias_attr=False)) # Second Layer self.base.append( ConvLayer(first_ch[0], first_ch[1], kernel_size=second_kernel)) # Maxpooling or DWConv3x3 downsampling if max_pool: self.base.append(nn.MaxPool2D(kernel_size=3, stride=2, padding=1)) else: self.base.append(DWConvLayer(first_ch[1], first_ch[1], stride=2)) # Build all HarDNet blocks ch = first_ch[1] for i in range(blks): blk = HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=depth_wise) ch = blk.out_channels self.base.append(blk) if i == blks - 1 and arch == 85: self.base.append(nn.Dropout(0.1)) self.base.append(ConvLayer(ch, ch_list[i], kernel_size=1)) ch = ch_list[i] if downSamp[i] == 1: if max_pool: self.base.append(nn.MaxPool2D(kernel_size=2, stride=2)) else: self.base.append(DWConvLayer(ch, ch, stride=2)) ch = ch_list[blks - 1] layers = [] if with_pool: layers.append(nn.AdaptiveAvgPool2D((1, 1))) if class_num > 0: layers.append(nn.Flatten()) layers.append(nn.Dropout(drop_rate)) layers.append(nn.Linear(ch, class_num)) self.base.append(nn.Sequential(*layers))
def __init__(self, scale=1.0, class_expand=1280): super(PPLCNet, self).__init__() self.scale = scale self.class_expand = class_expand self.conv1 = ConvBNLayer( num_channels=3, filter_size=3, num_filters=make_divisible(16 * scale), stride=2) self.blocks2 = nn.Sequential(*[ DepthwiseSeparable( num_channels=make_divisible(in_c * scale), num_filters=make_divisible(out_c * scale), dw_size=k, stride=s, use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"]) ]) self.blocks3 = nn.Sequential(*[ DepthwiseSeparable( num_channels=make_divisible(in_c * scale), num_filters=make_divisible(out_c * scale), dw_size=k, stride=s, use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"]) ]) self.blocks4 = nn.Sequential(*[ DepthwiseSeparable( num_channels=make_divisible(in_c * scale), num_filters=make_divisible(out_c * scale), dw_size=k, stride=s, use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"]) ]) self.blocks5 = nn.Sequential(*[ DepthwiseSeparable( num_channels=make_divisible(in_c * scale), num_filters=make_divisible(out_c * scale), dw_size=k, stride=s, use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"]) ]) self.blocks6 = nn.Sequential(*[ DepthwiseSeparable( num_channels=make_divisible(in_c * scale), num_filters=make_divisible(out_c * scale), dw_size=k, stride=s, use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"]) ]) self.avg_pool = AdaptiveAvgPool2D(1) self.last_conv = Conv2D( in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale), out_channels=self.class_expand, kernel_size=1, stride=1, padding=0, bias_attr=False) self.hardswish = nn.Hardswish() self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
def __init__(self, config, version="vb", class_num=1000, lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0], data_format="NCHW", input_image_channel=3, return_patterns=None): super().__init__() self.cfg = config self.lr_mult_list = lr_mult_list self.is_vd_mode = version == "vd" self.class_num = class_num self.num_filters = [64, 128, 256, 512] self.block_depth = self.cfg["block_depth"] self.block_type = self.cfg["block_type"] self.num_channels = self.cfg["num_channels"] self.channels_mult = 1 if self.num_channels[-1] == 256 else 4 assert isinstance(self.lr_mult_list, ( list, tuple )), "lr_mult_list should be in (list, tuple) but got {}".format( type(self.lr_mult_list)) assert len(self.lr_mult_list ) == 5, "lr_mult_list length should be 5 but got {}".format( len(self.lr_mult_list)) self.stem_cfg = { #num_channels, num_filters, filter_size, stride "vb": [[input_image_channel, 64, 7, 2]], "vd": [[input_image_channel, 32, 3, 2], [32, 32, 3, 1], [32, 64, 3, 1]] } self.stem = nn.Sequential(* [ ConvBNLayer( num_channels=in_c, num_filters=out_c, filter_size=k, stride=s, act="relu", lr_mult=self.lr_mult_list[0], data_format=data_format) for in_c, out_c, k, s in self.stem_cfg[version] ]) self.max_pool = MaxPool2D( kernel_size=3, stride=2, padding=1, data_format=data_format) block_list = [] for block_idx in range(len(self.block_depth)): shortcut = False for i in range(self.block_depth[block_idx]): block_list.append(globals()[self.block_type]( num_channels=self.num_channels[block_idx] if i == 0 else self.num_filters[block_idx] * self.channels_mult, num_filters=self.num_filters[block_idx], stride=2 if i == 0 and block_idx != 0 else 1, shortcut=shortcut, if_first=block_idx == i == 0 if version == "vd" else True, lr_mult=self.lr_mult_list[block_idx + 1], data_format=data_format)) shortcut = True self.blocks = nn.Sequential(*block_list) self.avg_pool = AdaptiveAvgPool2D(1, data_format=data_format) self.flatten = nn.Flatten() self.avg_pool_channels = self.num_channels[-1] * 2 stdv = 1.0 / math.sqrt(self.avg_pool_channels * 1.0) self.fc = Linear( self.avg_pool_channels, self.class_num, weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv))) self.data_format = data_format if return_patterns is not None: self.update_res(return_patterns) self.register_forward_post_hook(self._return_dict_hook)
def __init__(self, stages_pattern, scale=1.0, class_num=1000, dropout_prob=0.2, class_expand=1280, return_patterns=None, return_stages=None): super().__init__() self.scale = scale self.class_expand = class_expand self.conv1 = ConvBNLayer(num_channels=3, filter_size=3, num_filters=make_divisible(16 * scale), stride=2) self.blocks2 = nn.Sequential(*[ DepthwiseSeparable(num_channels=make_divisible(in_c * scale), num_filters=make_divisible(out_c * scale), dw_size=k, stride=s, use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"]) ]) self.blocks3 = nn.Sequential(*[ DepthwiseSeparable(num_channels=make_divisible(in_c * scale), num_filters=make_divisible(out_c * scale), dw_size=k, stride=s, use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"]) ]) self.blocks4 = nn.Sequential(*[ DepthwiseSeparable(num_channels=make_divisible(in_c * scale), num_filters=make_divisible(out_c * scale), dw_size=k, stride=s, use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"]) ]) self.blocks5 = nn.Sequential(*[ DepthwiseSeparable(num_channels=make_divisible(in_c * scale), num_filters=make_divisible(out_c * scale), dw_size=k, stride=s, use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"]) ]) self.blocks6 = nn.Sequential(*[ DepthwiseSeparable(num_channels=make_divisible(in_c * scale), num_filters=make_divisible(out_c * scale), dw_size=k, stride=s, use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"]) ]) self.avg_pool = AdaptiveAvgPool2D(1) self.last_conv = Conv2D(in_channels=make_divisible( NET_CONFIG["blocks6"][-1][2] * scale), out_channels=self.class_expand, kernel_size=1, stride=1, padding=0, bias_attr=False) self.hardswish = nn.Hardswish() self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) self.fc = Linear(self.class_expand, class_num) super().init_res(stages_pattern, return_patterns=return_patterns, return_stages=return_stages)