def __init__(self, cfgs, num_classes=1000, width=1.0, dropout=0.2): super(GhostNet, self).__init__() # setting of inverted residual blocks self.cfgs = cfgs self.dropout = dropout # building first layer output_channel = _make_divisible(16 * width, 4) self.conv_stem = nn.Conv2D(output_channel, in_channels=3, kernel_size=3, strides=2, padding=1, use_bias=False) self.bn1 = nn.BatchNorm(in_channels=output_channel, momentum=0.1) self.act1 = nn.Activation('relu') input_channel = output_channel # building inverted residual blocks stages = [] block = GhostBottleneck for cfg in self.cfgs: layers = [] for k, exp_size, c, se_ratio, s in cfg: output_channel = _make_divisible(c * width, 4) hidden_channel = _make_divisible(exp_size * width, 4) layers.append( block(input_channel, hidden_channel, output_channel, k, s, se_ratio=se_ratio)) input_channel = output_channel with self.name_scope(): stage = nn.HybridSequential() for i in range(len(layers)): stage.add(layers[i]) stages.append(stage) output_channel = _make_divisible(exp_size * width, 4) with self.name_scope(): convbnrelu = nn.HybridSequential() convbnrelu.add(ConvBnAct(input_channel, output_channel, 1)) stages.append(convbnrelu) input_channel = output_channel with self.name_scope(): self.blocks = nn.HybridSequential() for i in range(len(stages)): self.blocks.add(stages[i]) # building last several layers output_channel = 1280 self.global_pool = nn.GlobalAvgPool2D() self.conv_head = nn.Conv2D(output_channel, in_channels=input_channel, kernel_size=1, strides=1, padding=0, use_bias=True) self.act2 = nn.Activation('relu') self.classifier = nn.Dense(num_classes, in_units=output_channel)
def __init__(self, block, layers, classes=1000, dilated=False, norm_layer=BatchNorm, activation_type='relu', norm_kwargs=None, last_gamma=False, deep_stem=False, stem_width=32, avg_down=False, final_drop=0.0, use_global_stats=False, name_prefix='', **kwargs): self.inplanes = stem_width*2 if deep_stem else 64 super(ResNetV1b, self).__init__(prefix=name_prefix) norm_kwargs = norm_kwargs if norm_kwargs is not None else {} if use_global_stats: norm_kwargs['use_global_stats'] = True self.norm_kwargs = norm_kwargs with self.name_scope(): if not deep_stem: self.conv1 = nn.Conv2D(channels=64, kernel_size=7, strides=2, padding=3, use_bias=False) else: self.conv1 = nn.HybridSequential(prefix='conv1') self.conv1.add(nn.Conv2D(channels=stem_width, kernel_size=3, strides=2, padding=1, use_bias=False)) self.conv1.add(norm_layer(**({} if norm_kwargs is None else norm_kwargs))) if activation_type == 'prelu': self.conv1.add(nn.PReLU()) else: self.conv1.add(nn.Activation(activation_type)) self.conv1.add(nn.Conv2D(channels=stem_width, kernel_size=3, strides=1, padding=1, use_bias=False)) self.conv1.add(norm_layer(**({} if norm_kwargs is None else norm_kwargs))) if activation_type == 'prelu': self.conv1.add(nn.PReLU()) else: self.conv1.add(nn.Activation(activation_type)) self.conv1.add(nn.Conv2D(channels=stem_width*2, kernel_size=3, strides=1, padding=1, use_bias=False)) self.bn1 = norm_layer(**({} if norm_kwargs is None else norm_kwargs)) if activation_type == 'prelu': self.relu = nn.PReLU() else: self.relu = nn.Activation(activation_type) self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1) self.layer1 = self._make_layer(1, block, 64, layers[0], avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.layer2 = self._make_layer(2, block, 128, layers[1], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) if dilated: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=4, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) else: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.avgpool = nn.GlobalAvgPool2D() self.flat = nn.Flatten() self.drop = None if final_drop > 0.0: self.drop = nn.Dropout(final_drop) self.fc = nn.Dense(in_units=512 * block.expansion, units=classes)
def __init__(self, nbase, **kwargs): super(make_style, self).__init__(**kwargs) with self.name_scope(): self.pool_all = nn.GlobalAvgPool2D()
def __init__(self, block, layers, channels, classes=1000, thumbnail=True, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(MobileFace_AttributeV1, self).__init__(**kwargs) assert len(layers) == len(channels) - 1 with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add( norm_layer(scale=False, center=False, **({} if norm_kwargs is None else norm_kwargs))) if thumbnail: self.features.add(_conv3x3(channels[0], 1, 0)) else: self.features.add( nn.Conv2D(channels[0], 7, 2, 3, use_bias=False)) self.features.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.features.add(nn.Activation('relu')) self.features.add(nn.MaxPool2D(3, 2, 1)) in_channels = channels[0] for i, num_layer in enumerate(layers): # stride = 1 if i == 0 else 2 stride = 2 self.features.add( self._make_layer(block, num_layer, channels[i + 1], stride, i + 1, in_channels=in_channels, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) in_channels = channels[i + 1] self.features.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.features.add(nn.Activation('relu')) self.branch1 = nn.HybridSequential(prefix='') self.branch1.add(nn.Conv2D(64, 1, 1, 0, use_bias=False)) self.branch1.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.branch1.add(nn.Activation('relu')) self.branch1.add(nn.Conv2D(128, 3, 1, 1, use_bias=False)) self.branch1.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.branch1.add(nn.Activation('relu')) self.branch1.add(nn.Conv2D(64, 1, 1, 0, use_bias=False)) self.branch1.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.branch1.add(nn.Activation('relu')) self.branch1.add(nn.Conv2D(128, 3, 1, 1, use_bias=False)) self.branch1.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.branch1.add(nn.Activation('relu')) self.branch1.add(nn.GlobalAvgPool2D()) self.branch1.add(nn.Flatten()) self.output1 = nn.Dense(2, in_units=128) self.branch2 = nn.HybridSequential(prefix='') self.branch2.add(nn.Conv2D(128, 1, 1, 0, use_bias=False)) self.branch2.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.branch2.add(nn.Activation('relu')) self.branch2.add(nn.Conv2D(256, 3, 1, 1, use_bias=False)) self.branch2.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.branch2.add(nn.Activation('relu')) self.branch2.add(nn.Conv2D(128, 1, 1, 0, use_bias=False)) self.branch2.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.branch2.add(nn.Activation('relu')) self.branch2.add(nn.Conv2D(256, 3, 1, 1, use_bias=False)) self.branch2.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.branch2.add(nn.Activation('relu')) self.branch2.add(nn.GlobalAvgPool2D()) self.branch2.add(nn.Flatten()) self.output2 = nn.Dense(6, in_units=256) self.branch3 = nn.HybridSequential(prefix='') self.branch3.add(nn.Conv2D(128, 1, 1, 0, use_bias=False)) self.branch3.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.branch3.add(nn.Activation('relu')) self.branch3.add(nn.Conv2D(256, 3, 1, 1, use_bias=False)) self.branch3.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.branch3.add(nn.Activation('relu')) self.branch3.add(nn.Conv2D(128, 1, 1, 0, use_bias=False)) self.branch3.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.branch3.add(nn.Activation('relu')) self.branch3.add(nn.Conv2D(256, 3, 1, 1, use_bias=False)) self.branch3.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.branch3.add(nn.Activation('relu')) self.branch3.add(nn.GlobalAvgPool2D()) self.branch3.add(nn.Flatten()) self.output3 = nn.Dense(8, in_units=256)
nn.MaxPool2D(pool_size=3, strides=2, padding=1)) b3 = nn.Sequential() b3.add(Inception(64, (96, 128), (16, 32), 32), Inception(128, (128, 192), (32, 96), 64), nn.MaxPool2D(pool_size=3, strides=2, padding=1)) b4 = nn.Sequential() b4.add(Inception(192, (96, 208), (16, 48), 64), Inception(160, (112, 224), (24, 64), 64), Inception(128, (128, 256), (24, 64), 64), Inception(112, (144, 288), (32, 64), 64), Inception(256, (160, 320), (32, 128), 128), nn.MaxPool2D(pool_size=3, strides=2, padding=1)) b5 = nn.Sequential() b5.add(Inception(256, (160, 320), (32, 128), 128), Inception(384, (192, 384), (48, 128), 128), nn.GlobalAvgPool2D()) net = nn.Sequential() net.add(b1, b2, b3, b4, b5, nn.Dense(10)) X = nd.random.uniform(shape=(1, 1, 96, 96)) net.initialize() for layer in net: X = layer(X) print(layer.name, 'output shape:\t', X.shape) #5.9.3-获取数据和训练模型 print('train...') lr, num_epochs, batch_size, ctx = 0.1, 5, 50, d2l.try_gpu() net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
def __init__(self, channels, init_block_channels, final_block_channels, kernel_sizes, strides_per_stage, expansion_factors, dropout_rate=0.2, tf_mode=False, bn_epsilon=1e-5, bn_use_global_stats=False, in_channels=3, in_size=(224, 224), classes=1000, **kwargs): super(EfficientNet, self).__init__(**kwargs) self.in_size = in_size self.classes = classes activation = "swish" with self.name_scope(): self.features = nn.HybridSequential(prefix="") self.features.add( EffiInitBlock(in_channels=in_channels, out_channels=init_block_channels, bn_epsilon=bn_epsilon, bn_use_global_stats=bn_use_global_stats, activation=activation, tf_mode=tf_mode)) in_channels = init_block_channels for i, channels_per_stage in enumerate(channels): kernel_sizes_per_stage = kernel_sizes[i] expansion_factors_per_stage = expansion_factors[i] stage = nn.HybridSequential(prefix="stage{}_".format(i + 1)) with stage.name_scope(): for j, out_channels in enumerate(channels_per_stage): kernel_size = kernel_sizes_per_stage[j] expansion_factor = expansion_factors_per_stage[j] strides = strides_per_stage[i] if (j == 0) else 1 if i == 0: stage.add( EffiDwsConvUnit( in_channels=in_channels, out_channels=out_channels, strides=strides, bn_epsilon=bn_epsilon, bn_use_global_stats=bn_use_global_stats, activation=activation, tf_mode=tf_mode)) else: stage.add( EffiInvResUnit( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, strides=strides, exp_factor=expansion_factor, se_factor=4, bn_epsilon=bn_epsilon, bn_use_global_stats=bn_use_global_stats, activation=activation, tf_mode=tf_mode)) in_channels = out_channels self.features.add(stage) self.features.add( conv1x1_block(in_channels=in_channels, out_channels=final_block_channels, bn_epsilon=bn_epsilon, bn_use_global_stats=bn_use_global_stats, activation=activation)) in_channels = final_block_channels self.features.add(nn.GlobalAvgPool2D()) self.output = nn.HybridSequential(prefix="") self.output.add(nn.Flatten()) if dropout_rate > 0.0: self.output.add(nn.Dropout(rate=dropout_rate)) self.output.add(nn.Dense(units=classes, in_units=in_channels))
def __init__(self, num_classes=1000, width_mult=1.0, mode='small', **kwargs): super(MobilenetV3, self).__init__(**kwargs) input_channel = 16 last_channel = 1280 if mode == 'large': # refer to Table 1 in paper mobile_setting = [ # k, exp, c, se, nl, s, [3, 16, 16, False, 'RE', 1], [3, 64, 24, False, 'RE', 2], [3, 72, 24, False, 'RE', 1], [5, 72, 40, True, 'RE', 2], [5, 120, 40, True, 'RE', 1], [5, 120, 40, True, 'RE', 1], [3, 240, 80, False, 'HS', 2], [3, 200, 80, False, 'HS', 1], [3, 184, 80, False, 'HS', 1], [3, 184, 80, False, 'HS', 1], [3, 480, 112, True, 'HS', 1], [3, 672, 112, True, 'HS', 1], [5, 672, 112, True, 'HS', 1], # c = 112, paper set it to 160 by error [5, 672, 160, True, 'HS', 2], [5, 960, 160, True, 'HS', 1], ] elif mode == 'small': # refer to Table 2 in paper mobile_setting = [ # k, exp, c, se, nl, s, [3, 16, 16, True, 'RE', 2], [3, 72, 24, False, 'RE', 2], [3, 88, 24, False, 'RE', 1], [5, 96, 40, True, 'HS', 2], # stride = 2, paper set it to 1 by error [5, 240, 40, True, 'HS', 1], [5, 240, 40, True, 'HS', 1], [5, 120, 48, True, 'HS', 1], [5, 144, 48, True, 'HS', 1], [5, 288, 96, True, 'HS', 2], [5, 576, 96, True, 'HS', 1], [5, 576, 96, True, 'HS', 1], ] else: raise NotImplementedError # building first layer self.last_channel = make_divisible( last_channel * width_mult) if width_mult > 1.0 else last_channel with self.name_scope(): self.features = nn.HybridSequential() with self.features.name_scope(): self.features.add( ConvBlock(input_channel, 3, 1, nlin_layer=HSwish())) # building mobile blocks for k, exp, c, se, nl, s in mobile_setting: output_channel = make_divisible(c * width_mult) exp_channel = make_divisible(exp * width_mult) self.features.add( MobileBottleneck(input_channel, output_channel, k, s, exp_channel, se, nl)) input_channel = output_channel if mode == 'large': last_conv = make_divisible(960 * width_mult) self.features.add( ConvBlock(last_conv, 1, 1, nlin_layer=HSwish())) elif mode == 'small': last_conv = make_divisible(576 * width_mult) self.features.add( ConvBlock(last_conv, 1, 1, nlin_layer=HSwish())) self.features.add( SEModule(last_conv)) # refer to paper Table2 else: raise NotImplementedError self.output = nn.HybridSequential() with self.output.name_scope(): # building last several layers if mode == 'large': self.output.add( nn.GlobalAvgPool2D(), HSwish(), nn.Conv2D(last_channel, 1, padding=0, use_bias=False), HSwish(), nn.Conv2D(num_classes, 1, padding=0, use_bias=False)) elif mode == 'small': self.output.add( nn.GlobalAvgPool2D(), HSwish(), ConvBlock(last_channel, 1, 1, nlin_layer=HSwish()), ConvBlock(num_classes, 1, 1, nlin_layer=HSwish())) else: raise NotImplementedError
def __init__(self, input_size=224, n_class=1000, architecture=None, channel_scales=None, use_all_blocks=False, bn=nn.BatchNorm, use_se=False, last_conv_after_pooling=False): """ scale_cand_ids = [6, 5, 3, 5, 2, 6, 3, 4, 2, 5, 7, 5, 4, 6, 7, 4, 4, 5, 4, 3] scale_candidate_list = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0] stage_repeats = [4, 4, 8, 4] len(scale_cand_ids) == sum(stage_repeats) == # feature blocks == 20 """ super(ShuffleNasOneShot, self).__init__() # Predefined self.stage_repeats = [4, 4, 8, 4] self.stage_out_channels = [64, 160, 320, 640] self.candidate_scales = [ 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0 ] self.use_all_blocks = use_all_blocks self.use_se = use_se first_conv_out_channel = 16 last_conv_out_channel = 1024 self.last_conv_after_pooling = last_conv_after_pooling if architecture is None and channel_scales is None: fix_arch = False elif architecture is not None and channel_scales is not None: fix_arch = True assert len(architecture) == len(channel_scales) else: raise ValueError( "architecture and scale_ids should be both None or not None.") self.fix_arch = fix_arch assert input_size % 32 == 0 assert len(self.stage_repeats) == len(self.stage_out_channels) with self.name_scope(): self.features = nn.HybridSequential( ) if fix_arch else NasHybridSequential(prefix='features_') with self.features.name_scope(): # first conv self.features.add( nn.Conv2D(first_conv_out_channel, in_channels=3, kernel_size=3, strides=2, padding=1, use_bias=False, prefix='first_conv_'), bn(momentum=0.1), Activation('hard_swish' if self.use_se else 'relu')) # features input_channel = 16 block_id = 0 for stage_id in range(len(self.stage_repeats)): numrepeat = self.stage_repeats[stage_id] output_channel = self.stage_out_channels[stage_id] if self.use_se: act_name = 'hard_swish' if stage_id >= 1 else 'relu' block_use_se = True if stage_id >= 2 else False else: act_name = 'relu' block_use_se = False # create repeated blocks for current stage for i in range(numrepeat): stride = 2 if i == 0 else 1 # TODO: update SE and Activation in ShuffleNetBlock and ShuffleNasBlock if fix_arch: block_choice = architecture[block_id] mid_channel = int(output_channel // 2 * channel_scales[block_id]) # print("Mid channel: {}".format(mid_channel)) block_id += 1 if block_choice == 0: self.features.add( ShuffleNetBlock(input_channel, output_channel, mid_channel, bn=bn, block_mode='ShuffleNetV2', ksize=3, stride=stride, use_se=block_use_se, act_name=act_name)) elif block_choice == 1: self.features.add( ShuffleNetBlock(input_channel, output_channel, mid_channel, bn=bn, block_mode='ShuffleNetV2', ksize=5, stride=stride, use_se=block_use_se, act_name=act_name)) elif block_choice == 2: self.features.add( ShuffleNetBlock(input_channel, output_channel, mid_channel, bn=bn, block_mode='ShuffleNetV2', ksize=7, stride=stride, use_se=block_use_se, act_name=act_name)) elif block_choice == 3: self.features.add( ShuffleNetBlock( input_channel, output_channel, mid_channel, bn=bn, block_mode='ShuffleXception', ksize=3, stride=stride, use_se=block_use_se, act_name=act_name)) else: raise NotImplementedError else: block_id += 1 self.features.add( ShuffleNasBlock( input_channel, output_channel, stride=stride, bn=bn, max_channel_scale=self. candidate_scales[-1], use_all_blocks=self.use_all_blocks, use_se=block_use_se, act_name=act_name)) # update input_channel for next block input_channel = output_channel assert block_id == sum(self.stage_repeats) # last conv if self.last_conv_after_pooling: # MobileNet V3 approach self.features.add( nn.GlobalAvgPool2D(), # no last SE for MobileNet V3 style nn.Conv2D(last_conv_out_channel, in_channels=input_channel, kernel_size=1, strides=1, padding=0, use_bias=True, prefix='conv_fc_'), # No bn for the conv after pooling Activation('hard_swish' if self.use_se else 'relu')) else: if self.use_se: # ShuffleNetV2+ approach self.features.add( nn.Conv2D(last_conv_out_channel, in_channels=input_channel, kernel_size=1, strides=1, padding=0, use_bias=False, prefix='last_conv_'), bn(momentum=0.1), Activation( 'hard_swish' if self.use_se else 'relu'), nn.GlobalAvgPool2D(), SE(last_conv_out_channel), nn.Conv2D(last_conv_out_channel, in_channels=last_conv_out_channel, kernel_size=1, strides=1, padding=0, use_bias=True, prefix='conv_fc_'), # No bn for the conv after pooling Activation('hard_swish' if self.use_se else 'relu' )) else: # original Oneshot Nas approach self.features.add( nn.Conv2D(last_conv_out_channel, in_channels=input_channel, kernel_size=1, strides=1, padding=0, use_bias=False, prefix='last_conv_'), bn(momentum=0.1), Activation( 'hard_swish' if self.use_se else 'relu'), nn.GlobalAvgPool2D()) # Dropout ratio follows ShuffleNetV2+ for se self.features.add(nn.Dropout(0.2 if self.use_se else 0.1)) self.output = nn.HybridSequential(prefix='output_') with self.output.name_scope(): self.output.add( nn.Conv2D(n_class, in_channels=last_conv_out_channel, kernel_size=1, strides=1, padding=0, use_bias=True), nn.Flatten())
def __init__(self, block, layers, cardinality=1, bottleneck_width=64, classes=1000, dilated=False, dilation=1, norm_layer=BatchNorm, norm_kwargs=None, last_gamma=False, deep_stem=False, stem_width=32, avg_down=False, final_drop=0.0, use_global_stats=False, name_prefix='', dropblock_prob=0, input_size=224, use_splat=False, radix=2, avd=False, avd_first=False, split_drop_ratio=0, **kwargs): self.cardinality = cardinality self.bottleneck_width = bottleneck_width self.inplanes = stem_width * 2 if deep_stem else 64 self.radix = radix self.split_drop_ratio = split_drop_ratio self.avd_first = avd_first super(ResNet, self).__init__(prefix=name_prefix) norm_kwargs = norm_kwargs if norm_kwargs is not None else {} if use_global_stats: norm_kwargs['use_global_stats'] = True self.norm_kwargs = norm_kwargs self.face_recog = kwargs.get('face_recog', False) first_stride = 2 if (input_size == 224) else 1 self.act_type = kwargs.get('act_type', 'prelu') with self.name_scope(): if not deep_stem: self.conv1 = nn.Conv2D(channels=64, kernel_size=7, strides=first_stride, padding=3, use_bias=False, in_channels=3) else: self.conv1 = nn.HybridSequential(prefix='conv1') self.conv1.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=first_stride, padding=1, use_bias=False, in_channels=3)) self.conv1.add( norm_layer(in_channels=stem_width, **norm_kwargs)) self.conv1.add( nn.Activation(self.act_type) if self.act_type != 'prelu' else nn.PReLU()) self.conv1.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=1, padding=1, use_bias=False, in_channels=stem_width)) self.conv1.add( norm_layer(in_channels=stem_width, **norm_kwargs)) self.conv1.add( nn.Activation(self.act_type) if self.act_type != 'prelu' else nn.PReLU()) self.conv1.add( nn.Conv2D(channels=stem_width * 2, kernel_size=3, strides=1, padding=1, use_bias=False, in_channels=stem_width)) input_size = _update_input_size(input_size, first_stride) self.bn1 = norm_layer( in_channels=64 if not deep_stem else stem_width * 2, **norm_kwargs) if self.act_type == 'prelu': self.relu = nn.PReLU() else: self.relu = nn.Activation(self.act_type) self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1) input_size = _update_input_size(input_size, 2) self.layer1 = self._make_layer(1, block, 64, layers[0], avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, use_splat=use_splat, avd=avd) self.layer2 = self._make_layer(2, block, 128, layers[1], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, use_splat=use_splat, avd=avd) input_size = _update_input_size(input_size, 2) if dilated or dilation == 4: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=4, pre_dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) elif dilation == 3: # special self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=2, dilation=2, pre_dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) elif dilation == 2: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) else: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) input_size = _update_input_size(input_size, 2) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) input_size = _update_input_size(input_size, 2) self.avgpool = nn.GlobalAvgPool2D() self.flat = nn.Flatten() self.drop = None if final_drop > 0.0: self.drop = nn.Dropout(final_drop) if not self.face_recog: self.fc = nn.Dense(in_units=512 * block.expansion, units=classes)
def __init__(self, repeat=6, penultimate_filters=4032, stem_filters=96, filters_multiplier=2, classes=1000, use_aux=True, norm_layer=BatchNorm, norm_kwargs=None): super(NASNetALarge, self).__init__() filters = penultimate_filters // 24 self.conv0 = nn.HybridSequential(prefix='') self.conv0.add(nn.Conv2D(stem_filters, 3, padding=0, strides=2, use_bias=False)) self.conv0.add(norm_layer(momentum=0.1, epsilon=0.001, **({} if norm_kwargs is None else norm_kwargs))) self.cell_stem_0 = CellStem0(stem_filters, norm_layer, norm_kwargs, num_filters=filters // (filters_multiplier ** 2)) self.cell_stem_1 = CellStem1(filters // filters_multiplier, norm_layer, norm_kwargs) self.norm_1 = nn.HybridSequential(prefix='') self.norm_1.add(FirstCell(out_channels_left=filters//2, out_channels_right=filters, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) for _ in range(repeat - 1): self.norm_1.add(NormalCell(out_channels_left=filters, out_channels_right=filters, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.reduction_cell_0 = ReductionCell0(out_channels_left=2*filters, out_channels_right=2*filters, norm_layer=norm_layer, norm_kwargs=norm_kwargs) self.norm_2 = nn.HybridSequential(prefix='') self.norm_2.add(FirstCell(out_channels_left=filters, out_channels_right=2*filters, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) for _ in range(repeat - 1): self.norm_2.add(NormalCell(out_channels_left=2*filters, out_channels_right=2*filters, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) if use_aux: self.out_aux = nn.HybridSequential(prefix='') self.out_aux.add(nn.Conv2D(filters // 3, kernel_size=1, use_bias=False)) self.out_aux.add(norm_layer(epsilon=0.001, **({} if norm_kwargs is None else norm_kwargs))) self.out_aux.add(nn.Activation('relu')) self.out_aux.add(nn.Conv2D(2*filters, kernel_size=5, use_bias=False)) self.out_aux.add(norm_layer(epsilon=0.001, **({} if norm_kwargs is None else norm_kwargs))) self.out_aux.add(nn.Activation('relu')) self.out_aux.add(nn.Dense(classes)) else: self.out_aux = None self.reduction_cell_1 = ReductionCell1(out_channels_left=4*filters, out_channels_right=4*filters, norm_layer=norm_layer, norm_kwargs=norm_kwargs) self.norm_3 = nn.HybridSequential(prefix='') self.norm_3.add(FirstCell(out_channels_left=2*filters, out_channels_right=4*filters, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) for _ in range(repeat - 1): self.norm_3.add(NormalCell(out_channels_left=4*filters, out_channels_right=4*filters, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.out = nn.HybridSequential(prefix='') self.out.add(nn.Activation('relu')) self.out.add(nn.GlobalAvgPool2D()) self.out.add(nn.Dropout(0.5)) self.out.add(nn.Dense(classes))
padding=0, activation='relu'), nn.Conv2D(channels=channels, kernel_size=1, strides=strides, padding=0, activation='relu'), ) if max_pooling: out.add(nn.MaxPool2D(pool_size=3, strides=2)) return out net = nn.Sequential() with net.name_scope(): net.add(mlpconv(channels=96, kernel_size=11, padding=0, strides=4), mlpconv(channels=256, kernel_size=5, padding=2), mlpconv(channels=384, kernel_size=3, padding=1), nn.Dropout(.5), mlpconv(10, 3, 1, max_pooling=False), nn.GlobalAvgPool2D(), nn.Flatten()) import sys sys.path.append('..') import gluonbook as gb from mxnet import gluon from mxnet import init train_data, test_data = gb.load_data_fashion_mnist(batch_size=64, resize=224) ctx = gb.try_gpu() net.initialize(ctx=ctx, init=init.Xavier()) loss = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) gb.train(train_data, test_data, net, loss, trainer, ctx, 5)
def resnet_block(num_channels, num_residuals, first_block=False): blk = nn.Sequential() for i in range(num_residuals): if i == 0 and not first_block: blk.add(Residual(num_channels, use_1x1conv=True, strides=2)) else: blk.add(Residual(num_channels)) return blk net.add(resnet_block(64, 2, first_block=True), resnet_block(128, 2), resnet_block(256, 2), resnet_block(512, 2)) net.add(nn.GlobalAvgPool2D(), nn.Dense(10)) X = nd.random.uniform(shape=(1, 1, 224, 224)) net.initialize() for layer in net: X = layer(X) print(layer.name, 'output shape:\t', X.shape) # 1个epoch用了将近6259.2s,将近2个小时 lr, num_epochs, batch_size, ctx = 0.05, 5, 256, d2l.try_gpu() net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96) d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
def _make_layers(self, cfg, use_bias, use_bn, do_topdown, do_countpath): layers = [] layers_drm = [] layers_drm_cp = [] in_channels = 3 for i, x in enumerate(cfg): if x == 'M': layers += [ nn.MaxPool2D(pool_size=2, strides=2), nn.Dropout(0.5) ] if do_topdown: if use_bn: layers_drm += [ UpsampleLayer(size=2, scale=1.), nn.Dropout(0.5), nn.BatchNorm() ] else: layers_drm += [ UpsampleLayer(size=2, scale=1.), nn.Dropout(0.5) ] if do_countpath: if use_bn: layers_drm_cp += [ UpsampleLayer(size=2, scale=1.), nn.Dropout(0.5), nn.BatchNorm() ] else: layers_drm_cp += [ UpsampleLayer(size=2, scale=1.), nn.Dropout(0.5) ] elif x == 'A': layers += [nn.GlobalAvgPool2D(prefix='avg_')] if self.vgg_name == 'VGG16long': upsize = 7 elif self.vgg_name == 'VGG16': upsize = 7 elif self.vgg_name == 'AllConvImgNet': upsize = 5 else: upsize = 6 if do_topdown: if use_bn: layers_drm += [ UpsampleLayer(size=upsize, scale=1. / (upsize**2), prefix='avg_'), nn.BatchNorm() ] else: layers_drm += [ UpsampleLayer(size=upsize, scale=1. / (upsize**2), prefix='avg_') ] if do_countpath: if use_bn: layers_drm_cp += [ UpsampleLayer(size=upsize, scale=1. / (upsize**2), prefix='avg_'), nn.BatchNorm() ] else: layers_drm_cp += [ UpsampleLayer(size=upsize, scale=1. / (upsize**2), prefix='avg_') ] else: if self.vgg_name == 'AllConv13': padding_fw = (0, 0) if x == 512 else (1, 1) padding_bw = (0, 0) if x == 512 else (1, 1) elif self.vgg_name == 'AllConvImgNet': padding_fw = (0, 0) if (x == 1024 or x == 1000) else (1, 1) padding_bw = (0, 0) if (x == 1024 or x == 1000) else (1, 1) else: padding_fw = (1, 1) padding_bw = (1, 1) if use_bn: if self.vgg_name == 'AllConvImgNet' and ( x == 1024 or x == 1000) and cfg[i - 1] != 'M': conv_layer = nn.Conv2D(in_channels=in_channels, channels=x, kernel_size=(1, 1), padding=padding_fw, use_bias=False) else: conv_layer = nn.Conv2D(in_channels=in_channels, channels=x, kernel_size=(3, 3), padding=padding_fw, use_bias=False) if use_bias: layers += [ conv_layer, nn.BatchNorm(), BiasAdder(channels=x), nn.LeakyReLU(alpha=0.1) ] else: layers += [ conv_layer, nn.BatchNorm(), nn.LeakyReLU(alpha=0.1) ] if do_topdown: if (cfg[i - 1] == 'M' or cfg[i - 1] == 'A') and not i == 0: if self.vgg_name == 'AllConvImgNet' and ( x == 1024 or x == 1000) and cfg[i - 1] != 'M': layers_drm += [ nn.Conv2DTranspose( channels=in_channels, in_channels=x, kernel_size=1, strides=(1, 1), padding=padding_bw, use_bias=False, params=conv_layer.params) ] else: layers_drm += [ nn.Conv2DTranspose( channels=in_channels, in_channels=x, kernel_size=3, strides=(1, 1), padding=padding_bw, use_bias=False, params=conv_layer.params) ] else: if self.vgg_name == 'AllConvImgNet' and ( x == 1024 or x == 1000) and cfg[i - 1] != 'M': layers_drm += [ nn.BatchNorm(), nn.Conv2DTranspose( channels=in_channels, in_channels=x, kernel_size=1, strides=(1, 1), padding=padding_bw, use_bias=False, params=conv_layer.params) ] else: layers_drm += [ nn.BatchNorm(), nn.Conv2DTranspose( channels=in_channels, in_channels=x, kernel_size=3, strides=(1, 1), padding=padding_bw, use_bias=False, params=conv_layer.params) ] if do_countpath: if cfg[i - 1] == 'M' or cfg[i - 1] == 'A': if self.vgg_name == 'AllConvImgNet' and ( x == 1024 or x == 1000) and cfg[i - 1] != 'M': layers_drm_cp += [ nn.Conv2DTranspose(channels=in_channels, in_channels=x, kernel_size=1, strides=(1, 1), padding=padding_bw, use_bias=False) ] else: layers_drm_cp += [ nn.Conv2DTranspose(channels=in_channels, in_channels=x, kernel_size=3, strides=(1, 1), padding=padding_bw, use_bias=False) ] else: if self.vgg_name == 'AllConvImgNet' and ( x == 1024 or x == 1000) and cfg[i - 1] != 'M': layers_drm_cp += [ nn.BatchNorm(), nn.Conv2DTranspose(channels=in_channels, in_channels=x, kernel_size=1, strides=(1, 1), padding=padding_bw, use_bias=False) ] else: layers_drm_cp += [ nn.BatchNorm(), nn.Conv2DTranspose(channels=in_channels, in_channels=x, kernel_size=3, strides=(1, 1), padding=padding_bw, use_bias=False) ] elif use_bias: if self.vgg_name == 'AllConvImgNet' and ( x == 1024 or x == 1000) and cfg[i - 1] != 'M': conv_layer = nn.Conv2D(in_channels=in_channels, channels=x, kernel_size=(1, 1), padding=padding_fw, use_bias=True) else: conv_layer = nn.Conv2D(in_channels=in_channels, channels=x, kernel_size=(3, 3), padding=padding_fw, use_bias=True) layers += [conv_layer, nn.LeakyReLU(alpha=0.1)] if do_topdown: layers_drm += [ nn.Conv2DTranspose(channels=in_channels, in_channels=x, kernel_size=3, strides=(1, 1), padding=padding_bw, use_bias=False, params=conv_layer.params) ] if do_countpath: layers_drm_cp += [ nn.Conv2DTranspose(channels=in_channels, in_channels=x, kernel_size=3, strides=(1, 1), padding=padding_bw, use_bias=False) ] else: if self.vgg_name == 'AllConvImgNet' and ( x == 1024 or x == 1000) and cfg[i - 1] != 'M': conv_layer = nn.Conv2D(in_channels=in_channels, channels=x, kernel_size=(1, 1), padding=padding_fw, use_bias=False) else: conv_layer = nn.Conv2D(in_channels=in_channels, channels=x, kernel_size=(3, 3), padding=padding_fw, use_bias=False) layers += [conv_layer, nn.LeakyReLU(alpha=0.1)] if do_topdown: layers_drm += [ nn.Conv2DTranspose(channels=in_channels, in_channels=x, kernel_size=3, strides=(1, 1), padding=padding_bw, use_bias=False, params=conv_layer.params) ] if do_countpath: layers_drm_cp += [ nn.Conv2DTranspose(channels=in_channels, in_channels=x, kernel_size=3, strides=(1, 1), padding=padding_bw, use_bias=False) ] in_channels = x with self.name_scope(): model = nn.HybridSequential(prefix='features_') for block in layers: model.add(block) return model, layers_drm, layers_drm_cp
# ResNet, 18-layer net = nn.Sequential() # 1+17=18层 net.add(nn.Conv2D(64, kernel_size=7, padding=3, strides=2), nn.BatchNorm(), nn.Activation('relu'), nn.MaxPool2D(pool_size=3, padding=1, strides=2)) # (2+2+2+2)*2+1=17层 # 18-layer 2 2 2 2 # 34-layer 3 4 6 3 效果不是太好 net.add( resnet_block(64, 2, first_block=True), resnet_block(128, 2), resnet_block(256, 2), resnet_block(512, 2), nn.GlobalAvgPool2D(), # 全局平均池化和全连接层 nn.Dense(10)) # Test X = nd.random.uniform(shape=(1, 1, 224, 224)) net.initialize() for layer in net: X = layer(X) print(layer.name, 'output shape:\t', X.shape) # 读取数据 batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96) # 重新初始化模型 ctx = d2l.try_gpu()
def __init__(self, nkernel, **kwargs): super(Times3, self).__init__(**kwargs) with self.name_scope(): self.conv = nn.Conv2D(nkernel, kernel_size=(3, 3), padding=(1, 1)) self.dense = nn.Dense(nkernel) self.pool = nn.GlobalAvgPool2D()
def __init__(self, **kwargs): super(GlobalAvgMaxPool2D, self).__init__(**kwargs) with self.name_scope(): self.avg_pool = nn.GlobalAvgPool2D() self.max_pool = nn.GlobalMaxPool2D()
def __init__(self, num_classes, **kwargs): super(Output_Block, self).__init__(**kwargs) self.net = nn.HybridSequential() self.act = nn.Activation('relu') self.net.add(nn.BatchNorm(), self.act, nn.GlobalAvgPool2D(), nn.Dense(num_classes))
def __init__(self, channels, init_block_channels, init_block_kernel_size, init_block_padding, rs, bws, incs, groups, b_case, for_training, test_time_pool, in_channels=3, in_size=(224, 224), classes=1000, **kwargs): super(DPN, self).__init__(**kwargs) self.in_size = in_size self.classes = classes with self.name_scope(): self.features = DualPathSequential(return_two=False, first_ordinals=1, last_ordinals=0, prefix='') self.features.add( DPNInitBlock(in_channels=in_channels, out_channels=init_block_channels, kernel_size=init_block_kernel_size, padding=init_block_padding)) in_channels = init_block_channels for i, channels_per_stage in enumerate(channels): stage = DualPathSequential(prefix='stage{}_'.format(i + 1)) r = rs[i] bw = bws[i] inc = incs[i] with stage.name_scope(): for j, out_channels in enumerate(channels_per_stage): has_proj = (j == 0) key_strides = 2 if (j == 0) and (i != 0) else 1 stage.add( DPNUnit(in_channels=in_channels, mid_channels=r, bw=bw, inc=inc, groups=groups, has_proj=has_proj, key_strides=key_strides, b_case=b_case)) in_channels = out_channels self.features.add(stage) self.features.add(DPNFinalBlock(channels=in_channels)) self.output = nn.HybridSequential(prefix='') if for_training or not test_time_pool: self.output.add(nn.GlobalAvgPool2D()) self.output.add( conv1x1(in_channels=in_channels, out_channels=classes, use_bias=True)) self.output.add(nn.Flatten()) else: self.output.add(nn.AvgPool2D(pool_size=7, strides=1)) self.output.add( conv1x1(in_channels=in_channels, out_channels=classes, use_bias=True)) self.output.add(GlobalAvgMaxPool2D()) self.output.add(nn.Flatten())
from mxnet import autograd, gluon, init, nd from mxnet.gluon import loss as gloss, nn net = nn.Sequential() net.add( nn.Conv2D(channels=6, kernel_size=4, strides=2, padding=1, activation='relu'), nn.BatchNorm(), nn.Conv2D(channels=16, kernel_size=4, strides=2, padding=1, activation='relu'), nn.BatchNorm(), nn.Conv2D(channels=32, kernel_size=4, strides=2, padding=1, activation='relu'), nn.BatchNorm(), nn.GlobalAvgPool2D(), nn.Dense(7, activation='relu'), nn.Dense(1))
def __init__(self, channels, stride=1, downsample=False, **kwargs): super(PreActBottleneckCMPESEBlock3x3, self).__init__(**kwargs) self.channels = channels self.expansion = 4 self.downsample = downsample self.bn1 = nn.BatchNorm() self.conv1 = nn.Conv2D(channels=channels, kernel_size=1, use_bias=False, weight_initializer=init.Normal( math.sqrt(2. / (1. * channels)))) self.bn2 = nn.BatchNorm() self.conv2 = nn.Conv2D(channels=channels, kernel_size=3, strides=stride, padding=1, use_bias=False, weight_initializer=init.Normal( math.sqrt(2. / (9. * channels)))) self.bn3 = nn.BatchNorm() self.conv3 = nn.Conv2D(channels=self.expansion * channels, kernel_size=1, use_bias=False, weight_initializer=init.Normal( math.sqrt( 2. / (1. * self.expansion * channels)))) if downsample: self.shortcut = nn.HybridSequential() self.shortcut.add( nn.Conv2D(channels=self.expansion * channels, kernel_size=1, strides=stride, use_bias=False, weight_initializer=init.Normal( math.sqrt(2. / (1. * self.expansion * channels))))) self.net_Global_skipx = nn.HybridSequential() self.net_Global_skipx.add(nn.GlobalAvgPool2D()) self.net_Global_conv = nn.HybridSequential() self.net_Global_conv.add(nn.GlobalAvgPool2D()) self.net_reimage_layer = nn.HybridSequential() self.net_reimage_layer.add(ResFoldReimageLayer(group_ratio=reimage_k)) self.Multi_Map = nn.HybridSequential() self.Multi_Map.add( nn.Conv2D(channels=channels / cmpe_se_ratio, kernel_size=(3, 3), use_bias=False), nn.BatchNorm()) self.net_SE = nn.HybridSequential() self.net_SE.add( nn.Flatten(), nn.Dense(self.expansion * channels / net_se_ratio, activation='relu', use_bias=False), nn.Dense(self.expansion * channels, activation='sigmoid', use_bias=False), )
def __init__(self, version, num_classes=1000, prefix=None, params=None): super(MobileNetV3, self).__init__(prefix=prefix, params=params) self.version = version assert self.version in ('large', 'small'), \ "version is must one of (large, small)!!!" self.num_classes = num_classes with self.name_scope(): self.first = nn.HybridSequential() self.first.add( nn.Conv2D(channels=16, kernel_size=3, strides=2, padding=1, use_bias=False)) self.first.add(nn.BatchNorm()) self.first.add(HardSwish()) self.bnecks = nn.HybridSequential() for kernel_size, exp_size, out_size, se, nl, strides in model_config[ version]: se = SEBlock(exp_size) if se else None if nl == 're': nl = nn.Activation('relu') elif nl == 'hs': nl = HardSwish() else: raise "cannot use {} activation function".format(nl) self.bnecks.add( BneckBlock(kernel_size=kernel_size, expand_size=exp_size, out_size=out_size, nolinear=nl, seblock=se, strides=strides)) self.last = nn.HybridSequential() if self.version == 'small': self.last.add( nn.Conv2D(576, kernel_size=1, strides=1, use_bias=False)) self.last.add(SEBlock(576)) self.last.add(nn.GlobalAvgPool2D()) self.last.add( nn.Conv2D(1280, kernel_size=1, strides=1, use_bias=False)) self.last.add(nn.BatchNorm()) self.last.add(HardSwish()) self.last.add(nn.Conv2D(self.num_classes, kernel_size=1)) self.last.add(nn.BatchNorm()) self.last.add(HardSwish()) else: self.last.add( nn.Conv2D(960, kernel_size=1, strides=1, use_bias=False)) self.last.add(nn.BatchNorm()) self.last.add(HardSwish()) self.last.add(nn.GlobalAvgPool2D()) self.last.add( nn.Conv2D(1280, kernel_size=1, strides=1, use_bias=False)) self.last.add(HardSwish()) self.last.add(nn.Conv2D(self.num_classes, kernel_size=1)) self.last.add(nn.Flatten())
blk = transition_block(10) blk.initialize() blk(Y).shape net = nn.Sequential() net.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3), nn.BatchNorm(), nn.Activation('relu'), nn.MaxPool2D(pool_size=3, strides=2, padding=1)) num_channels, growth_rate = 64, 32 # num_channels为当前的通道数 num_convs_in_dense_blocks = [4, 4, 4, 4] for i, num_convs in enumerate(num_convs_in_dense_blocks): net.add(DenseBlock(num_convs, growth_rate)) # 上一个稠密块的输出通道数 num_channels += num_convs * growth_rate # 在稠密块之间加入通道数减半的过渡层 if i != len(num_convs_in_dense_blocks) - 1: num_channels //= 2 net.add(transition_block(num_channels)) net.add(nn.BatchNorm(), nn.Activation('relu'), nn.GlobalAvgPool2D(), nn.Dense(10)) lr, num_epochs, batch_size, ctx = 0.1, 5, 256, d2l.try_gpu() net.initialize(ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96) d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
if __name__=='__main__': if len(sys.argv) < 2: print("pls enter training epochs num") raise SystemExit(1) batch_size=100 train_data_batched, test_data_batched = load_data_fashion_mnist(batch_size=batch_size) nin_net = nn.Sequential() nin_net.add(nin_block(24, kernel_size=5, strides=2, padding=0), nn.MaxPool2D(pool_size=3, strides=2), nin_block(64, kernel_size=3, strides=1, padding=1), nn.MaxPool2D(pool_size=3, strides=2), nin_block(96, kernel_size=3, strides=1, padding=1), nn.MaxPool2D(pool_size=2, strides=1), nn.Dropout(0.5), nin_block(10, kernel_size=3, strides=1, padding=1), nn.GlobalAvgPool2D(), nn.Flatten()) ''' X = nd.random.uniform(shape=(100, 1, 28, 28)) nin_net.initialize() for blk in nin_net: X = blk(X) print(blk.name, 'output shape:\t', X.shape) ''' lr = 0.05 num_epochs = int(sys.argv[1]) nin_net.initialize(force_reinit=True, init=init.Xavier(), ctx=ctx) trainer = gluon.Trainer(nin_net.collect_params(), 'sgd', {'learning_rate': lr}) test_acc_list = do_train(net=nin_net,
def __init__(self, units, num_stage, filter_list, ratio_list, num_class, num_group, data_type, drop_out, bn_mom=0.9, **kwargs): super(resnext, self).__init__(**kwargs) num_unit = len(units) assert (num_unit == num_stage) self.num_class = num_class # fw self.conv0 = nn.Conv2D(in_channels=3, channels=filter_list[0], kernel_size=(7, 7), strides=(2, 2), padding=(3, 3), use_bias=False, prefix='conv0_') self.bn0 = nn.BatchNorm(in_channels=filter_list[0], epsilon=2e-5, momentum=bn_mom, prefix='batchnorm0_') self.in0 = nn.BatchNorm(in_channels=filter_list[0], epsilon=2e-5, momentum=bn_mom, prefix='insnorm0_') self.bias0 = BiasAdder(channels=filter_list[0], prefix='bias0_') self.relu0 = nn.Activation(activation='relu', prefix='relu0_') self.relu0min = NReLu(prefix='relu0min_') self.pool0 = nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding=(0, 0), prefix='pool0_') # td self.upsample0 = UpsampleLayer(size=2, scale=1., prefix='up0_') self.bntd0 = nn.BatchNorm(in_channels=filter_list[0], epsilon=2e-5, momentum=bn_mom, prefix='td_batchnorm0_') self.intd0 = nn.BatchNorm(in_channels=filter_list[0], epsilon=2e-5, momentum=bn_mom, prefix='td_insnorm0_') self.bntd0min = nn.BatchNorm(in_channels=filter_list[0], epsilon=2e-5, momentum=bn_mom, prefix='td_batchnorm0min_') self.intd0min = nn.BatchNorm(in_channels=filter_list[0], epsilon=2e-5, momentum=bn_mom, prefix='td_insnorm0min_') self.tdconv0 = nn.Conv2DTranspose(channels=3, in_channels=filter_list[0], kernel_size=(7, 7), strides=(2, 2), padding=(3, 3), output_padding=1, use_bias=False, params=self.conv0.params, prefix='td_conv0_') self.residual_stages = nn.HybridSequential(prefix='residual_') topdown_list = [] for i in range(num_stage): self.residual_stages.add( residual_unit(in_channels=filter_list[i], num_filter=filter_list[i + 1], ratio=ratio_list[2], strides=(1 if i == 0 else 2, 1 if i == 0 else 2), dim_match=False, name='stage%d_unit%d' % (i + 1, 1), num_group=num_group, bn_mom=bn_mom, prefix='stage%d_unit%d_' % (i + 1, 1))) topdown_list.append( topdown_residual_unit(fwblock=self.residual_stages[-1], name='stage%d_td_unit%d' % (i + 1, 1), prefix='stage%d_td_unit%d_' % (i + 1, 1))) for j in range(units[i] - 1): self.residual_stages.add( residual_unit(in_channels=filter_list[i + 1], num_filter=filter_list[i + 1], ratio=ratio_list[2], strides=(1, 1), dim_match=True, name='stage%d_unit%d' % (i + 1, j + 2), num_group=num_group, bn_mom=bn_mom, prefix='stage%d_unit%d_' % (i + 1, j + 2))) topdown_list.append( topdown_residual_unit( fwblock=self.residual_stages[-1], name='stage%d_td_unit%d' % (i + 1, j + 2), prefix='stage%d_td_unit%d_' % (i + 1, j + 2))) with self.name_scope(): self.topdown_stages = nn.HybridSequential(prefix='td_residual_') for block in topdown_list[::-1]: self.topdown_stages.add(block) # fw classifier self.pool1 = nn.GlobalAvgPool2D(prefix='pool1_') self.drop1 = nn.Dropout(rate=drop_out, prefix='dp1_') self.fc = nn.Conv2D(in_channels=filter_list[-1], channels=num_class, kernel_size=(1, 1), use_bias=True, prefix='dense_') self.flatten1 = nn.Flatten(prefix='flatten1_') # bw classifier self.reshape = Reshape(shape=(num_class, 1, 1), prefix='reshape_') self.td_drop1 = nn.Dropout(rate=drop_out, prefix='td_dp1_') self.td_fc = nn.Conv2DTranspose(channels=filter_list[-1], in_channels=num_class, kernel_size=(1, 1), strides=(1, 1), use_bias=False, params=self.fc.params, prefix='td_dense_') self.upsample1 = UpsampleLayer(size=7, scale=1. / (7**2), prefix='up1_')
def resnet_block(num_channels, num_residuals, first_block = False): blk = nn.Sequential() for i in range(num_residuals): if i == 0 and not first_block: blk.add(Residual(num_channels,use_1x1conv=True, strides=2)) else: blk.add(Residual(num_channels)) return blk net.add(resnet_block(64,2, first_block=True), resnet_block(128,2), resnet_block(256,2), resnet_block(512,2), nn.GlobalAvgPool2D(),nn.Dense(10)) # X = nd.random.uniform(shape=(1, 64, 56, 56)) # net.initialize() # for layer in net: # X = layer(X) # print(layer.name, 'out put shape:', X.shape) lr = 0.05 num_epochs = 5 batch_size = 256 ctx = mx.gpu(2) net.initialize(ctx = ctx, init = init.Xavier())
def __init__(self, block, layers, channels, alpha=2, beta=4, classes=1000, thumbnail=False, last_gamma=False, use_se=False, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(BLResNetV1, self).__init__(**kwargs) with self.name_scope(): self.features = nn.HybridSequential(prefix='') with self.features.name_scope(): self.features.add( nn.Conv2D(channels[0], 7, 2, 3, use_bias=False, in_channels=3)) self.features.add(norm_layer(in_channels=channels[0])) self.features.add(nn.Activation('relu')) self.features.add(BLModule_0(channels[0], alpha, norm_layer)) self.features.add( BLModule(block, channels[0], channels[0] * block.expansion, layers[0], alpha, beta, stride=2, hw=56)) self.features.add( BLModule(block, channels[0] * block.expansion, channels[1] * block.expansion, layers[1], alpha, beta, stride=2, hw=28)) self.features.add( BLModule(block, channels[1] * block.expansion, channels[2] * block.expansion, layers[2], alpha, beta, stride=1, hw=14)) self.features.add( BLModule_4(block, channels[2] * block.expansion, channels[3] * block.expansion, layers[3], stride=2)) self.features.add(nn.GlobalAvgPool2D()) self.features.add(nn.Flatten()) self.fc = nn.Dense(classes, in_units=channels[-1] * block.expansion)
def __init__(self, input_size=224, n_class=1000, architecture=None, channels_idx=None, act_type='relu', search=False): super(ShuffleNetV2_OneShot, self).__init__() assert input_size % 32 == 0 assert architecture is not None and channels_idx is not None self.stage_repeats = [4, 4, 8, 4] self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024] self.candidate_scales = [ 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0 ] #self.stage_out_channels = [-1, 16, 48, 128, 256, 512, 1024] input_channel = self.stage_out_channels[1] self.search = search self.first_conv = nn.HybridSequential(prefix='first_') self.first_conv.add( nn.Conv2D(input_channel, in_channels=3, kernel_size=3, strides=2, padding=1, use_bias=False)) self.first_conv.add( nn.BatchNorm(in_channels=input_channel, momentum=0.1)) self.first_conv.add(Activation(act_type)) self.features = nn.HybridSequential(prefix='features_') archIndex = 0 for idxstage in range(len(self.stage_repeats)): numrepeat = self.stage_repeats[idxstage] output_channel = self.stage_out_channels[idxstage + 2] for i in range(numrepeat): if i == 0: inp, outp, stride = input_channel, output_channel, 2 else: inp, outp, stride = input_channel, output_channel, 1 blockIndex = architecture[archIndex] base_mid_channels = outp // 2 mid_channels = int( base_mid_channels * self.candidate_scales[channels_idx[archIndex]]) archIndex += 1 self.features.add(nn.HybridSequential(prefix='')) if blockIndex == 0: #print('Shuffle3x3') self.features[-1].add( Shufflenet(inp, outp, mid_channels=mid_channels, ksize=3, stride=stride, act_type='relu', BatchNorm=nn.BatchNorm, search=self.search)) elif blockIndex == 1: #print('Shuffle5x5') self.features[-1].add( Shufflenet(inp, outp, mid_channels=mid_channels, ksize=5, stride=stride, act_type='relu', BatchNorm=nn.BatchNorm, search=self.search)) elif blockIndex == 2: #print('Shuffle7x7') self.features[-1].add( Shufflenet(inp, outp, mid_channels=mid_channels, ksize=7, stride=stride, act_type='relu', BatchNorm=nn.BatchNorm, search=self.search)) elif blockIndex == 3: #print('Xception') self.features[-1].add( Shuffle_Xception(inp, outp, mid_channels=mid_channels, stride=stride, act_type='relu', BatchNorm=nn.BatchNorm, search=self.search)) else: raise NotImplementedError input_channel = output_channel assert archIndex == len(architecture) self.conv_last = nn.HybridSequential(prefix='last_') self.conv_last.add( nn.Conv2D(self.stage_out_channels[-1], in_channels=input_channel, kernel_size=1, strides=1, padding=0, use_bias=False)) self.conv_last.add( nn.BatchNorm(in_channels=self.stage_out_channels[-1], momentum=0.1)) self.conv_last.add(Activation(act_type)) self.globalpool = nn.GlobalAvgPool2D() self.output = nn.HybridSequential(prefix='output_') with self.output.name_scope(): self.output.add( nn.Dropout(0.1), nn.Dense(units=n_class, in_units=self.stage_out_channels[-1], use_bias=False))
def __init__(self, alpha=1.0, beta=1.0, lite=False, dropout_rate=0.0, classes=1000, **kwargs): super(EfficientNet, self).__init__(**kwargs) with self.name_scope(): self.features = nn.HybridSequential(prefix='features_') with self.features.name_scope(): # stem conv channels = 32 if lite else int(32 * beta) _add_conv(self.features, channels, kernel=3, stride=2, pad=1, active=True, lite=lite) # base model settings repeats = [1, 2, 2, 3, 3, 4, 1] channels_num = [16, 24, 40, 80, 112, 192, 320] kernels_num = [3, 3, 5, 3, 5, 5, 3] t_num = [1, 6, 6, 6, 6, 6, 6] strides_first = [1, 2, 2, 1, 2, 2, 1] # determine params of MBConv layers in_channels_group = [] for rep, ch_num in zip([1] + repeats[:-1], [32] + channels_num[:-1]): in_channels_group += [int(ch_num * beta)] * int( ceil(alpha * rep)) channels_group, kernels, ts, strides = [], [], [], [] for rep, ch, kernel, t, s in zip(repeats, channels_num, kernels_num, t_num, strides_first): rep = int(ceil(alpha * rep)) channels_group += [int(ch * beta)] * rep kernels += [kernel] * rep ts += [t] * rep strides += [s] + [1] * (rep - 1) # add MBConv layers for in_c, c, t, k, s in zip(in_channels_group, channels_group, ts, kernels, strides): self.features.add( MBConv(in_channels=in_c, channels=c, t=t, kernel=k, stride=s, lite=lite)) # head layers last_channels = int(1280 * beta) if not lite and beta > 1.0 else 1280 _add_conv(self.features, last_channels, active=True, lite=lite) self.features.add(nn.GlobalAvgPool2D()) # features dropout self.dropout = nn.Dropout( dropout_rate) if dropout_rate > 0.0 else None # output layer self.output = nn.HybridSequential(prefix='output_') with self.output.name_scope(): self.output.add( nn.Conv2D(classes, 1, use_bias=False, prefix='pred_'), nn.Flatten())
def __init__(self, block, layers, channels, classes=1000, embed_size=512, thumbnail=False, use_dropout=False, use_norm=False, use_angular=False, **kwargs): super(ResNetV2, self).__init__(**kwargs) assert len(layers) == len(channels) - 1 with self.name_scope(): self.use_norm = use_norm self.use_angular = use_angular self.features = nn.HybridSequential(prefix='') self.features.add(nn.BatchNorm(scale=False, center=False)) if thumbnail: self.features.add(_conv3x3(channels[0], 1, 0)) else: self.features.add( nn.Conv2D(channels[0], 7, 2, 3, use_bias=False)) self.features.add(nn.BatchNorm()) self.features.add(nn.Activation('relu')) self.features.add(nn.MaxPool2D(3, 2, 1)) in_channels = channels[0] for i, num_layer in enumerate(layers): stride = 1 if i == 0 else 2 self.features.add( self._make_layer(block, num_layer, channels[i + 1], stride, i + 1, in_channels=in_channels)) in_channels = channels[i + 1] self.features.add(nn.BatchNorm()) self.features.add(nn.Activation('relu')) self.features.add(nn.GlobalAvgPool2D()) self.features.add(nn.Flatten()) self.embeds = nn.HybridSequential(prefix='') self.embeds.add( nn.Dense(4096, activation='relu', weight_initializer='normal', bias_initializer='zeros')) if use_dropout: self.embeds.add(nn.Dropout(rate=0.5)) self.embeds.add( nn.Dense(embed_size, activation='relu', weight_initializer='normal', bias_initializer='zeros')) if use_dropout: self.embeds.add(nn.Dropout(rate=0.5)) if self.use_norm: self.embeds.add(L2Normalization(mode='instance')) if self.use_angular: self.output = AngularLinear(classes, in_uints=embed_size) else: self.output = nn.Dense(classes, in_units=embed_size)
def __init__(self, nclass, block, layers, shortcut_type='B', block_design=('A', 'B', 'C'), dropout_ratio=0.5, num_segments=1, num_crop=1, feat_ext=False, init_std=0.001, ctx=None, partial_bn=False, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(P3D, self).__init__() self.shortcut_type = shortcut_type self.block_design = block_design self.partial_bn = partial_bn self.dropout_ratio = dropout_ratio self.init_std = init_std self.num_segments = num_segments self.num_crop = num_crop self.feat_ext = feat_ext self.inplanes = 64 self.feat_dim = 512 * block.expansion with self.name_scope(): self.conv1 = nn.Conv3D(in_channels=3, channels=64, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=False) self.bn1 = norm_layer( in_channels=64, **({} if norm_kwargs is None else norm_kwargs)) self.relu = nn.Activation('relu') self.pool = nn.MaxPool3D(pool_size=(2, 3, 3), strides=2, padding=(0, 1, 1)) self.pool2 = nn.MaxPool3D(pool_size=(2, 1, 1), strides=(2, 1, 1), padding=0) if self.partial_bn: if norm_kwargs is not None: norm_kwargs['use_global_stats'] = True else: norm_kwargs = {} norm_kwargs['use_global_stats'] = True # 3D layers are only for (layers1, layers2 and layers3), layers4 is C2D self.depth_3d = sum(layers[:3]) self.layer_cnt = 0 self.layer1 = self._make_res_layer(block=block, planes=64, blocks=layers[0], layer_name='layer1_') self.layer2 = self._make_res_layer(block=block, planes=128, blocks=layers[1], spatial_stride=2, layer_name='layer2_') self.layer3 = self._make_res_layer(block=block, planes=256, blocks=layers[2], spatial_stride=2, layer_name='layer3_') self.layer4 = self._make_res_layer(block=block, planes=512, blocks=layers[3], spatial_stride=2, layer_name='layer4_') self.avgpool = nn.GlobalAvgPool2D() self.dropout = nn.Dropout(rate=self.dropout_ratio) self.fc = nn.Dense( in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std))