def __init__(self, num_classes, depth, pretrained=None, pretrained_base=True, feat_ext=False, num_segments=1, num_crop=1, num_stages=4, spatial_strides=(1, 2, 2, 2), temporal_strides=(1, 1, 1, 1), dilations=(1, 1, 1, 1), out_indices=(0, 1, 2, 3), conv1_kernel_t=1, conv1_stride_t=1, pool1_kernel_t=1, pool1_stride_t=1, frozen_stages=-1, inflate_freq=(0, 0, 1, 1), inflate_stride=(1, 1, 1, 1), inflate_style='3x1x1', nonlocal_stages=(-1, ), nonlocal_freq=(0, 0, 0, 0), nonlocal_cfg=None, bn_eval=False, bn_frozen=False, partial_bn=False, dropout_ratio=0.5, init_std=0.01, norm_layer=BatchNorm, norm_kwargs=None, ctx=None, **kwargs): super(ResNet_SlowFast, self).__init__() if depth not in self.arch_settings: raise KeyError('invalid depth {} for resnet'.format(depth)) self.num_classes = num_classes self.depth = depth self.pretrained = pretrained self.pretrained_base = pretrained_base self.feat_ext = feat_ext self.num_segments = num_segments self.num_crop = num_crop self.num_stages = num_stages assert 1 <= num_stages <= 4 self.spatial_strides = spatial_strides self.temporal_strides = temporal_strides self.dilations = dilations assert len(spatial_strides) == len(temporal_strides) == len( dilations) == num_stages self.out_indices = out_indices assert max(out_indices) < num_stages self.frozen_stages = frozen_stages self.inflate_freqs = inflate_freq if not isinstance( inflate_freq, int) else (inflate_freq, ) * num_stages self.inflate_style = inflate_style self.nonlocal_stages = nonlocal_stages self.nonlocal_freqs = nonlocal_freq if not isinstance( nonlocal_freq, int) else (nonlocal_freq, ) * num_stages self.nonlocal_cfg = nonlocal_cfg self.bn_eval = bn_eval self.bn_frozen = bn_frozen self.partial_bn = partial_bn self.feat_ext = feat_ext self.dropout_ratio = dropout_ratio self.init_std = init_std self.block, stage_blocks = self.arch_settings[depth] self.stage_blocks = stage_blocks[:num_stages] self.inplanes = 64 self.first_stage = nn.HybridSequential(prefix='') self.first_stage.add( nn.Conv3D(in_channels=3, channels=64, kernel_size=(conv1_kernel_t, 7, 7), strides=(conv1_stride_t, 2, 2), padding=((conv1_kernel_t - 1) // 2, 3, 3), use_bias=False)) self.first_stage.add( norm_layer(in_channels=64, **({} if norm_kwargs is None else norm_kwargs))) self.first_stage.add(nn.Activation('relu')) self.first_stage.add( nn.MaxPool3D(pool_size=(pool1_kernel_t, 3, 3), strides=(pool1_stride_t, 2, 2), padding=(pool1_kernel_t // 2, 1, 1))) self.res_layers = nn.HybridSequential(prefix='') for i, num_blocks in enumerate(self.stage_blocks): spatial_stride = spatial_strides[i] temporal_stride = temporal_strides[i] dilation = dilations[i] planes = 64 * 2**i layer_name = 'layer{}_'.format(i + 1) res_layer = make_res_layer(self.block, self.inplanes, planes, num_blocks, spatial_stride=spatial_stride, temporal_stride=temporal_stride, dilation=dilation, inflate_freq=self.inflate_freqs[i], inflate_style=self.inflate_style, nonlocal_freq=self.nonlocal_freqs[i], nonlocal_cfg=self.nonlocal_cfg if i in self.nonlocal_stages else None, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name=layer_name) self.inplanes = planes * self.block.expansion self.res_layers.add(res_layer) self.feat_dim = self.block.expansion * 64 * 2**( len(self.stage_blocks) - 1) self.st_avg = nn.GlobalAvgPool3D() self.head = nn.HybridSequential(prefix='') self.head.add(nn.Dropout(rate=self.dropout_ratio)) self.fc = nn.Dense(in_units=self.feat_dim, units=num_classes, weight_initializer=init.Normal(sigma=self.init_std)) self.head.add(self.fc) self.init_weights(ctx)
# 保存更新过的moving_mean和moving_var Y, self.moving_mean, self.moving_var = batch_norm(X, self.gamma.data(), self.beta.data(), self.moving_mean, self.moving_var, eps=1e-5, momentum=0.9) return Y # In[3]: net = nn.Sequential() net.add(nn.Conv2D(6, kernel_size=5), BatchNorm(6, num_dims=4), nn.Activation('sigmoid'), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(16, kernel_size=5), BatchNorm(16, num_dims=4), nn.Activation('sigmoid'), nn.MaxPool2D(pool_size=2, strides=2), nn.Dense(120), BatchNorm(120, num_dims=2), nn.Activation('sigmoid'), nn.Dense(84), BatchNorm(84, num_dims=2), nn.Activation('sigmoid'), nn.Dense(10)) # In[ ]: lr, num_epochs, batch_size, ctx = 1.0, 5, 256, d2l.try_gpu() net.initialize(ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs) # In[5]:
def conv2d_block(num_filters, kernel_size): block = nn.HybridSequential() block.add(nn.Conv2D(num_filters, (kernel_size, kernel_size))) block.add(nn.BatchNorm()) block.add(nn.Activation(activation='relu')) return block
def __init__(self, channels, init_block_channels, stem_blocks_channels, in_channels=3, classes=1000, **kwargs): super(NASNet, self).__init__(**kwargs) with self.name_scope(): self.features = nasnet_dual_path_sequential( return_two=False, first_ordinals=1, last_ordinals=2, prefix='') self.features.add(NASNetInitBlock( in_channels=in_channels, out_channels=init_block_channels)) in_channels = init_block_channels out_channels = stem_blocks_channels[0] self.features.add(Stem1Unit( in_channels=in_channels, out_channels=out_channels)) prev_in_channels = in_channels in_channels = out_channels out_channels = stem_blocks_channels[1] self.features.add(Stem2Unit( in_channels=in_channels, prev_in_channels=prev_in_channels, out_channels=out_channels)) prev_in_channels = in_channels in_channels = out_channels for i, channels_per_stage in enumerate(channels): stage = nasnet_dual_path_sequential(prefix='stage{}_'.format(i + 1)) with stage.name_scope(): for j, out_channels in enumerate(channels_per_stage): if (j == 0) and (i != 0): unit = ReductionUnit elif ((i == 0) and (j == 0)) or ((i != 0) and (j == 1)): unit = FirstUnit else: unit = NormalUnit stage.add(unit( in_channels=in_channels, prev_in_channels=prev_in_channels, out_channels=out_channels)) prev_in_channels = in_channels in_channels = out_channels self.features.add(stage) self.features.add(nn.Activation('relu')) self.features.add(nn.AvgPool2D( pool_size=7, strides=1)) self.output = nn.HybridSequential(prefix='') self.output.add(nn.Flatten()) self.output.add(nn.Dropout(rate=0.5)) self.output.add(nn.Dense( units=classes, in_units=in_channels))
def conv_block(num_channels): blk = nn.Sequential() blk.add(nn.BatchNorm(), nn.Activation('relu'), nn.Conv2D(num_channels, kernel_size=3, padding=1)) return blk
def Act(): if config.net_act == 'prelu': return nn.PReLU() else: return nn.Activation(config.net_act)
netD.add(nn.BatchNorm()) netD.add(nn.LeakyReLU(.2)) netD.add(nn.Conv2D(channels=512, kernel_size=6, strides=2, padding=2)) netD.add(nn.BatchNorm()) netD.add(nn.LeakyReLU(.2)) netD.add(nn.Conv2D(channels=1024, kernel_size=6, strides=2, padding=2)) netD.add(nn.BatchNorm()) netD.add(nn.LeakyReLU(.2)) netD.add(nn.Conv2D(channels=1, kernel_size=6)) #===============generator================== netG = nn.Sequential() with netG.name_scope(): netG.add(nn.Conv2DTranspose(channels=1024, kernel_size=6)) netG.add(nn.BatchNorm()) netG.add(nn.Activation(activation='relu')) netG.add( nn.Conv2DTranspose(channels=512, kernel_size=6, strides=2, padding=2)) netG.add(nn.BatchNorm()) netG.add(nn.Activation(activation='relu')) netG.add( nn.Conv2DTranspose(channels=256, kernel_size=6, strides=2, padding=2)) netG.add(nn.BatchNorm()) netG.add(nn.Activation(activation='relu')) netG.add( nn.Conv2DTranspose(channels=128, kernel_size=6, strides=2, padding=2)) netG.add(nn.BatchNorm()) netG.add(nn.Activation(activation='relu')) netG.add( nn.Conv2DTranspose(channels=3, kernel_size=6, strides=2, padding=2)) netG.add(nn.BatchNorm())
def __init__(self, levels, channels, classes=1000, block=BasicBlock, momentum=0.9, norm_layer=BatchNorm, norm_kwargs=None, residual_root=False, linear_root=False, use_feature=False, **kwargs): super(DLA, self).__init__(**kwargs) if norm_kwargs is None: norm_kwargs = {} norm_kwargs['momentum'] = momentum self._use_feature = use_feature self.channels = channels self.base_layer = nn.HybridSequential('base') self.base_layer.add( nn.Conv2D(in_channels=3, channels=channels[0], kernel_size=7, strides=1, padding=3, use_bias=False)) self.base_layer.add(norm_layer(in_channels=channels[0], **norm_kwargs)) self.base_layer.add(nn.Activation('relu')) self.level0 = self._make_conv_level(channels[0], channels[0], levels[0], norm_layer, norm_kwargs) self.level1 = self._make_conv_level(channels[0], channels[1], levels[1], norm_layer, norm_kwargs, stride=2) self.level2 = Tree(levels[2], block, channels[1], channels[2], 2, level_root=False, root_residual=residual_root, norm_layer=norm_layer, norm_kwargs=norm_kwargs, prefix='level2_') self.level3 = Tree(levels[3], block, channels[2], channels[3], 2, level_root=True, root_residual=residual_root, norm_layer=norm_layer, norm_kwargs=norm_kwargs, prefix='level3_') self.level4 = Tree(levels[4], block, channels[3], channels[4], 2, level_root=True, root_residual=residual_root, norm_layer=norm_layer, norm_kwargs=norm_kwargs, prefix='level4_') self.level5 = Tree(levels[5], block, channels[4], channels[5], 2, level_root=True, root_residual=residual_root, norm_layer=norm_layer, norm_kwargs=norm_kwargs, prefix='level5_') if not self._use_feature: self.global_avg_pool = nn.GlobalAvgPool2D() self.fc = nn.Dense(units=classes)
def conv_block(channels): out = nn.Sequential() out.add(nn.BatchNorm(), nn.Activation('relu'), nn.Conv2D(channels, kernel_size=3, padding=1)) return out
def conv_1x1_bn(channels, activation=nn.Activation('relu')): out = nn.HybridSequential() out.add(nn.Conv2D(channels, 1, 1, 0, use_bias=False), nn.BatchNorm(scale=True), activation) return out
def __init__(self, in_channels, bn_use_global_stats=False, **kwargs): super(PreActivation, self).__init__(**kwargs) with self.name_scope(): self.bn = nn.BatchNorm(in_channels=in_channels, use_global_stats=bn_use_global_stats) self.activ = nn.Activation("relu")
def conv_bn(channels, filter_size, stride, activation=nn.Activation('relu')): out = nn.HybridSequential() out.add(nn.Conv2D(channels, 3, stride, 1, use_bias=False), nn.BatchNorm(scale=True), activation) return out
def __init__(self, features, top_features, classes, box_features=None, short=600, max_size=1000, min_stage=4, max_stage=4, train_patterns=None, nms_thresh=0.3, nms_topk=400, post_nms=100, roi_mode='align', roi_size=(14, 14), strides=16, clip=None, rpn_channel=1024, base_size=16, scales=(8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=300, rpn_min_size=16, per_device_batch_size=1, num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=300, additional_output=False, force_nms=False, minimal_opset=False, **kwargs): super(DoubleHeadRCNN, self).__init__( features=features, top_features=top_features, classes=classes, box_features=box_features, short=short, max_size=max_size, train_patterns=train_patterns, nms_thresh=nms_thresh, nms_topk=nms_topk, post_nms=post_nms, roi_mode=roi_mode, roi_size=roi_size, strides=strides, clip=clip, force_nms=force_nms, minimal_opset=minimal_opset, **kwargs) if max_stage - min_stage > 1 and isinstance(strides, (int, float)): raise ValueError('Multi level detected but strides is of a single number:', strides) if rpn_train_post_nms > rpn_train_pre_nms: rpn_train_post_nms = rpn_train_pre_nms if rpn_test_post_nms > rpn_test_pre_nms: rpn_test_post_nms = rpn_test_pre_nms self.ashape = alloc_size[0] self._min_stage = min_stage self._max_stage = max_stage self.num_stages = max_stage - min_stage + 1 if self.num_stages > 1: assert len(scales) == len(strides) == self.num_stages, \ "The num_stages (%d) must match number of scales (%d) and strides (%d)" \ % (self.num_stages, len(scales), len(strides)) self._batch_size = per_device_batch_size self._num_sample = num_sample self._rpn_test_post_nms = rpn_test_post_nms if minimal_opset: self._target_generator = None else: self._target_generator = lambda: RCNNTargetGenerator(self.num_class, int(num_sample * pos_ratio), self._batch_size) self._additional_output = additional_output with self.name_scope(): self.rpn = RPN( channels=rpn_channel, strides=strides, base_size=base_size, scales=scales, ratios=ratios, alloc_size=alloc_size, clip=clip, nms_thresh=rpn_nms_thresh, train_pre_nms=rpn_train_pre_nms, train_post_nms=rpn_train_post_nms, test_pre_nms=rpn_test_pre_nms, test_post_nms=rpn_test_post_nms, min_size=rpn_min_size, multi_level=self.num_stages > 1, per_level_nms=False, minimal_opset=minimal_opset) self.sampler = RCNNTargetSampler(num_image=self._batch_size, num_proposal=rpn_train_post_nms, num_sample=num_sample, pos_iou_thresh=pos_iou_thresh, pos_ratio=pos_ratio, max_num_gt=max_num_gt) # double head branch with class and box self.class_features = nn.HybridSequential(prefix='double_fc_') with self.class_features.name_scope(): for _ in range(2): self.class_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01))) self.class_features.add(nn.Activation('relu')) self.newbox_features = nn.HybridSequential(prefix='double_') with self.newbox_features.name_scope(): for _ in range(2): self.newbox_features.add(BottleneckV1(channels=1024, stride=1))
def __init__(self, inplanes, planes, spatial_stride=1, temporal_stride=1, dilation=1, downsample=None, if_inflate=True, inflate_style='3x1x1', if_nonlocal=True, nonlocal_cfg=None, norm_layer=BatchNorm, norm_kwargs=None, layer_name='', **kwargs): super(Bottleneck, self).__init__() assert inflate_style in ['3x1x1', '3x3x3'] self.inplanes = inplanes self.planes = planes self.conv1_stride = 1 self.conv2_stride = spatial_stride self.conv1_stride_t = 1 self.conv2_stride_t = temporal_stride self.layer_name = layer_name if if_inflate: if inflate_style == '3x1x1': self.conv1 = nn.Conv3D(in_channels=inplanes, channels=planes, kernel_size=(3, 1, 1), strides=(self.conv1_stride_t, self.conv1_stride, self.conv1_stride), padding=(1, 0, 0), use_bias=False) self.conv2 = nn.Conv3D(in_channels=planes, channels=planes, kernel_size=(1, 3, 3), strides=(self.conv2_stride_t, self.conv2_stride, self.conv2_stride), padding=(0, dilation, dilation), dilation=(1, dilation, dilation), use_bias=False) else: self.conv1 = nn.Conv3D(in_channels=inplanes, channels=planes, kernel_size=1, strides=(self.conv1_stride_t, self.conv1_stride, self.conv1_stride), use_bias=False) self.conv2 = nn.Conv3D(in_channels=planes, channels=planes, kernel_size=3, strides=(self.conv2_stride_t, self.conv2_stride, self.conv2_stride), padding=(1, dilation, dilation), dilation=(1, dilation, dilation), use_bias=False) else: self.conv1 = nn.Conv3D(in_channels=inplanes, channels=planes, kernel_size=1, strides=(1, self.conv1_stride, self.conv1_stride), use_bias=False) self.conv2 = nn.Conv3D(in_channels=planes, channels=planes, kernel_size=(1, 3, 3), strides=(1, self.conv2_stride, self.conv2_stride), padding=(0, dilation, dilation), dilation=(1, dilation, dilation), use_bias=False) self.bn1 = norm_layer(in_channels=planes, **({} if norm_kwargs is None else norm_kwargs)) self.bn2 = norm_layer(in_channels=planes, **({} if norm_kwargs is None else norm_kwargs)) self.conv3 = nn.Conv3D(in_channels=planes, channels=planes * self.expansion, kernel_size=1, use_bias=False) self.bn3 = norm_layer(in_channels=planes * self.expansion, **({} if norm_kwargs is None else norm_kwargs)) self.relu = nn.Activation('relu') self.downsample = downsample self.spatial_tride = spatial_stride self.temporal_tride = temporal_stride self.dilation = dilation if if_nonlocal and nonlocal_cfg is not None: nonlocal_cfg_ = nonlocal_cfg.copy() nonlocal_cfg_['in_channels'] = planes * self.expansion self.nonlocal_block = build_nonlocal_block(nonlocal_cfg_) else: self.nonlocal_block = None
use_gpu = True ctx = mx.gpu() if use_gpu else mx.cpu() #%% # build the generator nc = 3 # number of channel ngf = 64 # final image width X height netG = nn.Sequential() # sequential model # simple generator. Use any models but should upscale the late # nt variable(randome vectors) to 64 * 64 * 3 channel image with netG.name_scope(): # input is random_z (batchsize X 150 X 1), going into a tranposed convolution netG.add(nn.Conv2DTranspose(ngf * 8, 4, 1, 0)) netG.add(nn.BatchNorm()) netG.add(nn.Activation('relu')) # output size. (ngf*8) x 4 x 4 netG.add(nn.Conv2DTranspose(ngf * 4, 4, 2, 1)) netG.add(nn.BatchNorm()) netG.add(nn.Activation('relu')) # output size. (ngf*8) x 8 x 8 netG.add(nn.Conv2DTranspose(ngf * 2, 4, 2, 1)) netG.add(nn.BatchNorm()) netG.add(nn.Activation('relu')) # output size. (ngf*8) x 16 x 16 netG.add(nn.Conv2DTranspose(ngf, 4, 2, 1)) netG.add(nn.BatchNorm()) netG.add(nn.Activation('relu')) # output size. (ngf*8) x 32 x 32 netG.add(nn.Conv2DTranspose(nc, 4, 2, 1)) netG.add(
def __init__(self, block, layers, classes=1000, dilated=False, norm_layer=BatchNorm, norm_kwargs=None, last_gamma=False, deep_stem=False, stem_width=32, avg_down=False, final_drop=0.0, use_global_stats=False, name_prefix='', **kwargs): self.inplanes = stem_width * 2 if deep_stem else 64 super(ResNetV1b, self).__init__(prefix=name_prefix) norm_kwargs = norm_kwargs if norm_kwargs is not None else {} if use_global_stats: norm_kwargs['use_global_stats'] = True self.norm_kwargs = norm_kwargs with self.name_scope(): if not deep_stem: self.conv1 = nn.Conv2D(channels=64, kernel_size=7, strides=2, padding=3, use_bias=False) else: self.conv1 = nn.HybridSequential(prefix='conv1') self.conv1.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=2, padding=1, use_bias=False)) self.conv1.add( norm_layer(in_channels=stem_width, **norm_kwargs)) self.conv1.add(nn.Activation('relu')) self.conv1.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=1, padding=1, use_bias=False)) self.conv1.add( norm_layer(in_channels=stem_width, **norm_kwargs)) self.conv1.add(nn.Activation('relu')) self.conv1.add( nn.Conv2D(channels=stem_width * 2, kernel_size=3, strides=1, padding=1, use_bias=False)) self.bn1 = norm_layer( in_channels=64 if not deep_stem else stem_width * 2, **norm_kwargs) self.relu = nn.Activation('relu') self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1) self.layer1 = self._make_layer(1, block, 64, layers[0], avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.layer2 = self._make_layer(2, block, 128, layers[1], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) if dilated: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=4, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) else: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.avgpool = nn.GlobalAvgPool2D() self.flat = nn.Flatten() self.drop = None if final_drop > 0.0: self.drop = nn.Dropout(final_drop)
def __init__(self, bits, bits_a, num_init_features, growth_rate, block_config, reduction, bn_size, modifier=[], thumbnail=False, dropout=0, classes=1000, **kwargs): assert len(modifier) == 0 super(DenseNetX, self).__init__(**kwargs) with self.name_scope(): self.fp_features = nn.HybridSequential(prefix='') if thumbnail: self.fp_features.add( nn.Conv2D(num_init_features, kernel_size=3, strides=1, padding=1, in_channels=0, use_bias=False)) else: self.fp_features.add( nn.Conv2D(num_init_features, kernel_size=7, strides=2, padding=3, use_bias=False)) self.fp_features.add(nn.BatchNorm()) self.fp_features.add(nn.Activation('relu')) self.fp_features.add( nn.MaxPool2D(pool_size=3, strides=2, padding=1)) # Add dense blocks num_features = num_init_features self.features1 = nn.HybridSequential(prefix='') self.features2 = nn.HybridSequential(prefix='') add_to = self.features1 for i, num_layers in enumerate(block_config): add_to.add( _make_dense_block(bits, bits_a, num_layers, bn_size, growth_rate, dropout, i + 1)) num_features = num_features + num_layers * growth_rate if i != len(block_config) - 1: features_after_transition = num_features // reduction[i] # make it to be multiples of 32 features_after_transition = int( round(features_after_transition / 32)) * 32 if i == 0: add_to.add(nn.BatchNorm()) add_to.add(nn.QActivation(bits=bits_a)) add_to.add( nn.QConv2D(features_after_transition, bits=bits, kernel_size=1)) add_to = self.features2 add_to.add(nn.AvgPool2D(pool_size=2, strides=2)) else: add_to.add(nn.BatchNorm()) add_to.add(nn.QActivation(bits=bits_a)) add_to.add( nn.QConv2D(features_after_transition, bits=bits, kernel_size=1)) add_to.add(nn.AvgPool2D(pool_size=2, strides=2)) num_features = features_after_transition add_to.add(nn.BatchNorm()) add_to.add(nn.Activation('relu')) add_to.add(nn.AvgPool2D(pool_size=4 if thumbnail else 7)) add_to.add(nn.Flatten()) self.output = nn.Dense(classes)
def __init__(self, units, act=nn.Activation('tanh'), normalized=False, dropout=0.0, weight_initializer=None, bias_initializer='zeros', prefix=None, params=None): # Define a temporary class to implement the normalized version # TODO(sxjscience) Find a better solution class _NormalizedScoreProj(HybridBlock): def __init__(self, in_units, weight_initializer=None, prefix=None, params=None): super(_NormalizedScoreProj, self).__init__(prefix=prefix, params=params) self.g = self.params.get('g', shape=(1, ), init=mx.init.Constant( 1.0 / math.sqrt(in_units)), allow_deferred_init=True) self.v = self.params.get('v', shape=(1, in_units), init=weight_initializer, allow_deferred_init=True) def hybrid_forward(self, F, x, g, v): # pylint: disable=arguments-differ v = F.broadcast_div(v, F.sqrt(F.dot(v, v, transpose_b=True))) weight = F.broadcast_mul(g, v) out = F.FullyConnected(x, weight, None, no_bias=True, num_hidden=1, flatten=False, name='fwd') return out super(MLPAttentionCell, self).__init__(prefix=prefix, params=params) self._units = units self._act = act self._normalized = normalized self._dropout = dropout with self.name_scope(): self._dropout_layer = nn.Dropout(dropout) self._query_mid_layer = nn.Dense( units=self._units, flatten=False, use_bias=True, weight_initializer=weight_initializer, bias_initializer=bias_initializer, prefix='query_') self._key_mid_layer = nn.Dense( units=self._units, flatten=False, use_bias=False, weight_initializer=weight_initializer, prefix='key_') if self._normalized: self._attention_score = \ _NormalizedScoreProj(in_units=units, weight_initializer=weight_initializer, prefix='score_') else: self._attention_score = nn.Dense( units=1, in_units=self._units, flatten=False, use_bias=False, weight_initializer=weight_initializer, prefix='score_')
def _make_basic_conv(in_channels, channels, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): out = nn.HybridSequential(prefix='') out.add(nn.Conv3D(in_channels=in_channels, channels=channels, use_bias=False, **kwargs)) out.add(norm_layer(in_channels=channels, epsilon=0.001, **({} if norm_kwargs is None else norm_kwargs))) out.add(nn.Activation('relu')) return out
def __init__(self, scale, m, classes=10, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(cifar_ResidualAttentionModel, self).__init__(**kwargs) assert len(scale) == 3 and len(m) == 3 m1, m2, m3 = m with self.name_scope(): self.conv1 = nn.HybridSequential() with self.conv1.name_scope(): self.conv1.add( nn.Conv2D(32, kernel_size=3, strides=1, padding=1, use_bias=False)) self.conv1.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.conv1.add(nn.Activation('relu')) # 32 x 32 # self.mpool1 = nn.MaxPool2D(pool_size=2, strides=2, padding=0) self.residual_block1 = ResidualBlock(128, in_channels=32) self.attention_module1 = nn.HybridSequential() _add_block(self.attention_module1, AttentionModule_stage2, m1, 128, size1=32, size2=16, scale=scale, norm_layer=norm_layer, norm_kwargs=norm_kwargs) self.residual_block2 = ResidualBlock(256, in_channels=128, stride=2) self.attention_module2 = nn.HybridSequential() _add_block(self.attention_module2, AttentionModule_stage3, m2, 256, size1=16, scale=scale, norm_layer=norm_layer, norm_kwargs=norm_kwargs) self.residual_block3 = ResidualBlock(512, in_channels=256, stride=2) self.attention_module3 = nn.HybridSequential() _add_block(self.attention_module3, AttentionModule_stage4, m3, 512, scale=scale, norm_layer=norm_layer, norm_kwargs=norm_kwargs) self.residual_block4 = ResidualBlock(1024, in_channels=512) self.residual_block5 = ResidualBlock(1024) self.residual_block6 = ResidualBlock(1024) self.mpool2 = nn.HybridSequential() with self.mpool2.name_scope(): self.mpool2.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.mpool2.add(nn.Activation('relu')) self.mpool2.add(nn.AvgPool2D(pool_size=8, strides=1)) self.fc = nn.Conv2D(classes, kernel_size=1)
def __init__(self, **kwargs): super(GRP_DSOD320, self).__init__(**kwargs) growth_rate = 48 dropout = 0 nchannels = 128 with self.name_scope(): self.net0 = nn.HybridSequential() self.net0.add( nn.Conv2D(64, 3, strides=2, padding=1, use_bias=False, weight_initializer='xavier'), nn.BatchNorm(epsilon=1e-4), nn.Activation('relu'), nn.Conv2D(64, 3, strides=1, padding=1, use_bias=False, weight_initializer='xavier'), nn.BatchNorm(epsilon=1e-4), nn.Activation('relu'), nn.Conv2D(128, 3, strides=1, padding=1, use_bias=False, weight_initializer='xavier'), nn.BatchNorm(epsilon=1e-4), nn.Activation('relu')) self.net1 = nn.HybridSequential() self.net1.add( nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2), ceil_mode=True)) times = 1 for i in range(6): self.net1.add(bl_layer_block(growth_rate, dropout, 4)) nchannels += growth_rate nchannels = int(nchannels / times) self.net1.add(transition_w_o_block(nchannels, dropout)) self.net2 = nn.HybridSequential() self.net2.add( nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2), ceil_mode=True)) for i in range(8): self.net2.add(bl_layer_block(growth_rate, dropout, 4)) nchannels += growth_rate nchannels = int(nchannels / times) self.net2.add(transition_w_o_block(nchannels, dropout)) self.extra0 = nn.HybridSequential() self.extra0.add( nn.MaxPool2D(pool_size=(4, 4), strides=(4, 4), ceil_mode=True), conv_block(kernel_size=1, channels=128, stride=1, pad=0, dropout=dropout)) self.extra1 = nn.HybridSequential() self.extra1.add( nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2), ceil_mode=True), conv_block(kernel_size=1, channels=128, stride=1, pad=0, dropout=dropout)) self.net3 = nn.HybridSequential() self.net3.add( nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2), ceil_mode=True)) for i in range(8): self.net3.add(bl_layer_block(growth_rate, dropout, 4)) nchannels += growth_rate nchannels = int(nchannels / times) self.net3.add(transition_w_o_block(nchannels, dropout)) for i in range(8): self.net3.add(bl_layer_block(growth_rate, dropout, 4)) nchannels += growth_rate self.net3.add(transition_w_o_block(171, dropout)) self.first = nn.HybridSequential() self.first.add( nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2), ceil_mode=True), conv_block(kernel_size=1, channels=171, stride=1, pad=0, dropout=dropout)) self.net4 = nn.HybridSequential() self.net4.add(bl_layer_block2(86, dropout, 1)) self.net5 = nn.HybridSequential() self.net5.add(bl_layer_block2(86, dropout, 1)) self.net6 = nn.HybridSequential() self.net6.add(bl_layer_block2(86, dropout, 1)) self.net7 = nn.HybridSequential() self.net7.add(bl_layer_block2(128, dropout, 1)) self.Recurrent1 = nn.HybridSequential() self.Recurrent1.add(recurrent_layer(2, 128, dropout)) self.Recurrent2 = nn.HybridSequential() self.Recurrent2.add(recurrent_layer(2, 171, dropout)) self.Recurrent3 = nn.HybridSequential() self.Recurrent3.add(recurrent_layer(2, 86, dropout)) self.Recurrent4 = nn.HybridSequential() self.Recurrent4.add(recurrent_layer(1, 86, dropout)) self.Recurrent5 = nn.HybridSequential() self.Recurrent5.add(recurrent_layer(1, 86, dropout))
def __init__( self, bin_values: mx.nd.NDArray, n_residue: int, n_skip: int, dilation_depth: int, n_stacks: int, act_type: str, cardinality: List[int], embedding_dimension: int, pred_length: int, **kwargs, ): super().__init__(**kwargs) self.dilation_depth = dilation_depth self.pred_length = pred_length self.mu = len(bin_values) self.dilations = WaveNet._get_dilations( dilation_depth=dilation_depth, n_stacks=n_stacks ) self.receptive_field = WaveNet.get_receptive_field( dilation_depth=dilation_depth, n_stacks=n_stacks ) self.trim_lengths = [ sum(self.dilations) - sum(self.dilations[: i + 1]) for i, _ in enumerate(self.dilations) ] with self.name_scope(): self.feature_embedder = FeatureEmbedder( cardinalities=cardinality, embedding_dims=[embedding_dimension for _ in cardinality], ) self.post_transform = LookupValues(bin_values) self.target_embed = nn.Embedding( input_dim=self.mu, output_dim=n_residue ) self.residuals = nn.HybridSequential() for i, d in enumerate(self.dilations): is_not_last = i + 1 < len(self.dilations) self.residuals.add( CausalDilatedResidue( n_residue=n_residue, n_skip=n_skip, dilation=d, return_dense_out=is_not_last, kernel_size=2, ) ) # heuristic assuming ~5 features std = 1.0 / math.sqrt(n_residue + 5) self.conv_project = nn.Conv1D( channels=n_residue, kernel_size=1, use_bias=True, weight_initializer=mx.init.Uniform(std), bias_initializer="zero", ) self.conv1 = conv1d( in_channels=n_skip, channels=n_skip, kernel_size=1 ) self.conv2 = conv1d( in_channels=n_skip, channels=self.mu, kernel_size=1 ) self.output_act = ( nn.ELU() if act_type == "elu" else nn.Activation(act_type=act_type) ) self.cross_entropy_loss = gluon.loss.SoftmaxCrossEntropyLoss()
return X #Transition_block to decrease the complexity def transition_block(num_channels): blk = nn.Sequential() blk.add(nn.BatchNorm(), nn.Activation('relu'), nn.Conv2D(num_channels, kernel_size=1), nn.AvgPool2D(pool_size=2, strides=2)) return blk #Build DenseNet net = nn.Sequential() net.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3), nn.BatchNorm(), nn.Activation('relu'), nn.MaxPool2D(pool_size=3, strides=2, padding=1)) num_channels, growth_rate = 64, 32 # num_convs_in_dense_blocks = [4, 4, 4, 4] for i, num_convs in enumerate(num_convs_in_dense_blocks): net.add(DenseBlock(num_convs, growth_rate)) # Record channels of last block num_channels += num_convs * growth_rate # Add transition block between dense blocks if i != len(num_convs_in_dense_blocks) - 1: num_channels //= 2 net.add(transition_block(num_channels)) net.add(nn.BatchNorm(), nn.Activation('relu'), nn.GlobalAvgPool2D(), nn.Dense(10))
def __init__(self, repeat=6, penultimate_filters=4032, stem_filters=96, filters_multiplier=2, classes=1000, use_aux=True): super(NASNetALarge, self).__init__() filters = penultimate_filters // 24 self.conv0 = nn.HybridSequential(prefix='') self.conv0.add( nn.Conv2D(stem_filters, 3, padding=0, strides=2, use_bias=False)) self.conv0.add(nn.BatchNorm(momentum=0.1, epsilon=0.001)) self.cell_stem_0 = CellStem0(stem_filters, num_filters=filters // (filters_multiplier**2)) self.cell_stem_1 = CellStem1(num_filters=filters // filters_multiplier) self.norm_1 = nn.HybridSequential(prefix='') self.norm_1.add( FirstCell(out_channels_left=filters // 2, out_channels_right=filters)) for _ in range(repeat - 1): self.norm_1.add( NormalCell(out_channels_left=filters, out_channels_right=filters)) self.reduction_cell_0 = ReductionCell0(out_channels_left=2 * filters, out_channels_right=2 * filters) self.norm_2 = nn.HybridSequential(prefix='') self.norm_2.add( FirstCell(out_channels_left=filters, out_channels_right=2 * filters)) for _ in range(repeat - 1): self.norm_2.add( NormalCell(out_channels_left=2 * filters, out_channels_right=2 * filters)) if use_aux: self.out_aux = nn.HybridSequential(prefix='') self.out_aux.add( nn.Conv2D(filters // 3, kernel_size=1, use_bias=False)) self.out_aux.add(nn.BatchNorm(epsilon=0.001)) self.out_aux.add(nn.Activation('relu')) self.out_aux.add( nn.Conv2D(2 * filters, kernel_size=5, use_bias=False)) self.out_aux.add(nn.BatchNorm(epsilon=0.001)) self.out_aux.add(nn.Activation('relu')) self.out_aux.add(nn.Dense(classes)) else: self.out_aux = None self.reduction_cell_1 = ReductionCell1(out_channels_left=4 * filters, out_channels_right=4 * filters) self.norm_3 = nn.HybridSequential(prefix='') self.norm_3.add( FirstCell(out_channels_left=2 * filters, out_channels_right=4 * filters)) for _ in range(repeat - 1): self.norm_3.add( NormalCell(out_channels_left=4 * filters, out_channels_right=4 * filters)) self.out = nn.HybridSequential(prefix='') self.out.add(nn.Activation('relu')) self.out.add(nn.GlobalAvgPool2D()) self.out.add(nn.Dropout(0.5)) self.out.add(nn.Dense(classes))
def transition_block(num_channels): blk = nn.Sequential() blk.add(nn.BatchNorm(), nn.Activation('relu'), nn.Conv2D(num_channels, kernel_size=1), nn.AvgPool2D(pool_size=2, strides=2)) return blk
def __init__(self, channels, **kwargs): super(PreActivation, self).__init__(**kwargs) with self.name_scope(): self.bn = dpn_batch_norm(channels=channels) self.activ = nn.Activation('relu')
def forward(self, X): # 如果X不在内存上,将moving_mean和moving_var复制到X所在显存上 if self.moving_mean.context != X.context: self.moving_mean = self.moving_mean.copyto(X.context) self.moving_var = self.moving_var.copyto(X.context) # 保存更新过的moving_mean和moving_var Y, self.moving_mean, self.moving_var = batch_norm(X, self.gamma.data(), self.beta.data(), self.moving_mean, self.moving_var, eps=1e-5, momentum=0.9) return Y net = nn.Sequential() net.add(nn.Conv2D(6, kernel_size=5), BatchNorm(6, num_dims=4), nn.Activation('sigmoid'), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(16, kernel_size=5), BatchNorm(16, num_dims=4), nn.Activation('sigmoid'), nn.MaxPool2D(pool_size=2, strides=2), nn.Dense(120), BatchNorm(120, num_dims=2), nn.Activation('sigmoid'), nn.Dense(84), BatchNorm(84, num_dims=2), nn.Activation('sigmoid'), nn.Dense(10)) lr, num_epochs, batch_size, ctx = 1.0, 5, 256, d2l.try_gpu() net.initialize(ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
def _make_basic_conv(**kwargs): out = nn.HybridSequential(prefix='') out.add(nn.QConv2D(use_bias=False, **kwargs)) out.add(nn.BatchNorm(epsilon=0.001)) out.add(nn.Activation('relu')) return out
def __init__(self, num_filter, stride, dim_match, isBin=False, prefix='', **kwargs): super(ResidualUnit, self).__init__(**kwargs) self.dim_match = dim_match self.features = nn.HybridSequential() self.bn1 = nn.BatchNorm(use_global_stats=use_global_stats, prefix=prefix + '_nb1_') self.act1 = nn.Activation('relu') self.scale = nn.Conv2D(channels=num_filter, kernel_size=(1, 1), strides=stride, use_bias=False, prefix=prefix + '_sc_') if isBin: self.features.add( nn.QConv2D(channels=int(num_filter * 0.25), kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), use_bias=False, apply_scaling=True, prefix=prefix + '_conv1_')) self.features.add( nn.BatchNorm(use_global_stats=use_global_stats, prefix=prefix + '_nb2_')) self.features.add(nn.Activation('relu')) self.features.add( nn.QConv2D(channels=int(num_filter * 0.25), kernel_size=(3, 3), strides=stride, padding=(1, 1), use_bias=False, apply_scaling=True, prefix=prefix + '_conv2_')) self.features.add( nn.BatchNorm(use_global_stats=use_global_stats, prefix=prefix + '_nb3_')) self.features.add(nn.Activation('relu')) self.features.add( nn.QConv2D(channels=num_filter, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), use_bias=False, apply_scaling=True, prefix=prefix + '_conv3_')) else: self.features.add( nn.Conv2D(channels=int(num_filter * 0.25), kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), use_bias=False, prefix=prefix + '_conv1_')) self.features.add( nn.BatchNorm(use_global_stats=use_global_stats, prefix=prefix + '_nb2_')) self.features.add(nn.Activation('relu')) self.features.add( nn.Conv2D(channels=int(num_filter * 0.25), kernel_size=(3, 3), strides=stride, padding=(1, 1), use_bias=False, prefix=prefix + '_conv2_')) self.features.add( nn.BatchNorm(use_global_stats=use_global_stats, prefix=prefix + '_nb3_')) self.features.add(nn.Activation('relu')) self.features.add( nn.Conv2D(channels=num_filter, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), use_bias=False, prefix=prefix + '_conv3_'))
def __init__( self, num_scenes, num_actions, model_depth, final_spatial_kernel=7, final_temporal_kernel=2, with_bias=False, ): super(R2Plus2D_MT, self).__init__() self.comp_count = 0 self.base = nn.Sequential(prefix='base_') with self.base.name_scope(): self.base.add( nn.Conv3D(channels=45, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=with_bias), nn.BatchNorm(), nn.Activation(activation='relu'), nn.Conv3D(channels=64, kernel_size=(3, 1, 1), strides=(1, 1, 1), padding=(1, 0, 0), use_bias=with_bias), nn.BatchNorm(), nn.Activation(activation='relu')) self.base_name = self.set_base_name() (n2, n3, n4, n5) = BLOCK_CONFIG[model_depth] self.conv2_name = [] self.conv2 = nn.Sequential(prefix='conv2_') with self.conv2.name_scope(): for _ in range(n2): self.conv2_name.extend( self.add_comp_count_index(change_channels=False, comp_index=self.comp_count, prefix=self.conv2.prefix)) self.conv2.add( R3DBlock(input_filter=64, num_filter=64, comp_index=self.comp_count, use_bias=with_bias)) self.comp_count += 1 #self.conv3 self.conv3_name = [] self.conv3 = nn.Sequential(prefix='conv3_') with self.conv3.name_scope(): print("this in conv3 comp_count is ", self.comp_count) self.conv3_name.extend( self.add_comp_count_index(change_channels=True, downsampling=True, comp_index=self.comp_count)) self.conv3.add( R3DBlock(input_filter=64, num_filter=128, comp_index=self.comp_count, downsampling=True, use_bias=with_bias)) self.comp_count += 1 for _ in range(n3 - 1): self.conv3_name.extend( self.add_comp_count_index(change_channels=False, downsampling=False, comp_index=self.comp_count)) self.conv3.add( R3DBlock(input_filter=128, num_filter=128, comp_index=self.comp_count, use_bias=with_bias)) self.comp_count += 1 # self.conv4 self.conv4_name = [] self.conv4 = nn.Sequential(prefix='conv4_') with self.conv4.name_scope(): self.conv4_name.extend( self.add_comp_count_index(change_channels=True, downsampling=True, comp_index=self.comp_count)) self.conv4.add( R3DBlock(128, 256, comp_index=self.comp_count, downsampling=True, use_bias=with_bias)) self.comp_count += 1 for _ in range(n4 - 1): self.conv4_name.extend( self.add_comp_count_index(change_channels=False, downsampling=False, comp_index=self.comp_count)) self.conv4.add( R3DBlock(256, 256, comp_index=self.comp_count, use_bias=with_bias)) self.comp_count += 1 #conv5 self.conv5_name = [] self.conv5 = nn.Sequential(prefix='conv5_') with self.conv5.name_scope(): self.conv5_name.extend( self.add_comp_count_index(change_channels=True, downsampling=True, comp_index=self.comp_count)) self.conv5.add( R3DBlock(256, 512, comp_index=self.comp_count, downsampling=True, use_bias=with_bias)) self.comp_count += 1 for _ in range(n5 - 1): self.conv5_name.extend( self.add_comp_count_index(comp_index=self.comp_count)) self.conv5.add( R3DBlock(512, 512, self.comp_count, use_bias=with_bias)) self.comp_count += 1 # final output of conv5 is [512,t/8,7,7] #512x1x7x7 # for static scene tagging self.scene_conv = nn.Sequential() self.scene_conv.add( nn.Conv3D(256, kernel_size=(1, 3, 3), strides=(1, 2, 2)), nn.BatchNorm(), nn.Activation('relu')) # shape 256*1*2*2 # reshape(1024) self.scene_drop = nn.Dropout(rate=0.3) self.scene_output = nn.Dense(num_scenes) # for action classification self.action_conv = nn.Sequential() self.action_conv.add( nn.Conv3D(512, kernel_size=(1, 3, 3), strides=(1, 1, 1), padding=(0, 1, 1)), nn.BatchNorm(), nn.Activation('relu')) self.action_avg = nn.AvgPool3D(pool_size=(final_temporal_kernel, final_spatial_kernel, final_spatial_kernel), strides=(1, 1, 1), padding=(0, 0, 0)) self.action_output = nn.Dense(units=num_actions) self.dense0_name = ['final_fc_weight', 'final_fc_bias']