data_iter = gdata.DataLoader(dataset, batch_size=batch_size, shuffle=True) for X, y in data_iter: print(X, y) print("####") # define model from mxnet.gluon import nn net = nn.Sequential() net.add(nn.Dense(1)) # init model param from mxnet import init net.initialize(init.Normal(0.1)) # define loss from mxnet.gluon import loss as gloss loss = gloss.L2Loss() # define optimier from mxnet import gluon trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03}) # train num_epoche = 10 for epoch in range(num_epoche): for X, y in data_iter:
def __init__(self, nclass, depth, num_stages=4, pretrained=False, pretrained_base=True, num_segments=1, spatial_strides=(1, 2, 2, 2), temporal_strides=(1, 1, 1, 1), dilations=(1, 1, 1, 1), out_indices=(0, 1, 2, 3), conv1_kernel_t=5, conv1_stride_t=2, pool1_kernel_t=1, pool1_stride_t=2, inflate_freq=(1, 1, 1, 1), inflate_stride=(1, 1, 1, 1), inflate_style='3x1x1', nonlocal_stages=(-1, ), nonlocal_freq=(0, 1, 1, 0), nonlocal_cfg=None, bn_eval=True, bn_frozen=False, partial_bn=False, frozen_stages=-1, dropout_ratio=0.5, init_std=0.01, norm_layer=BatchNorm, norm_kwargs=None, ctx=None, **kwargs): super(I3D_ResNetV1, self).__init__() if depth not in self.arch_settings: raise KeyError('invalid depth {} for resnet'.format(depth)) self.nclass = nclass self.depth = depth self.num_stages = num_stages self.pretrained = pretrained self.pretrained_base = pretrained_base self.num_segments = num_segments self.spatial_strides = spatial_strides self.temporal_strides = temporal_strides self.dilations = dilations assert len(spatial_strides) == len(temporal_strides) == len( dilations) == num_stages self.out_indices = out_indices assert max(out_indices) < num_stages self.inflate_freqs = inflate_freq if not isinstance( inflate_freq, int) else (inflate_freq, ) * num_stages self.inflate_style = inflate_style self.nonlocal_stages = nonlocal_stages self.nonlocal_freqs = nonlocal_freq if not isinstance( nonlocal_freq, int) else (nonlocal_freq, ) * num_stages self.nonlocal_cfg = nonlocal_cfg self.bn_eval = bn_eval self.bn_frozen = bn_frozen self.partial_bn = partial_bn self.frozen_stages = frozen_stages self.dropout_ratio = dropout_ratio self.init_std = init_std self.block, stage_blocks = self.arch_settings[depth] self.stage_blocks = stage_blocks[:num_stages] self.inplanes = 64 self.first_stage = nn.HybridSequential(prefix='') self.first_stage.add( nn.Conv3D(in_channels=3, channels=64, kernel_size=(conv1_kernel_t, 7, 7), strides=(conv1_stride_t, 2, 2), padding=((conv1_kernel_t - 1) // 2, 3, 3), use_bias=False)) self.first_stage.add( norm_layer(in_channels=64, **({} if norm_kwargs is None else norm_kwargs))) self.first_stage.add(nn.Activation('relu')) self.first_stage.add( nn.MaxPool3D(pool_size=(pool1_kernel_t, 3, 3), strides=(pool1_stride_t, 2, 2), padding=(pool1_kernel_t // 2, 1, 1))) self.pool2 = nn.MaxPool3D(pool_size=(2, 1, 1), strides=(2, 1, 1), padding=(0, 0, 0)) self.res_layers = nn.HybridSequential(prefix='') for i, num_blocks in enumerate(self.stage_blocks): spatial_stride = spatial_strides[i] temporal_stride = temporal_strides[i] dilation = dilations[i] planes = 64 * 2**i layer_name = 'layer{}_'.format(i + 1) res_layer = make_res_layer(self.block, self.inplanes, planes, num_blocks, spatial_stride=spatial_stride, temporal_stride=temporal_stride, dilation=dilation, inflate_freq=self.inflate_freqs[i], inflate_style=self.inflate_style, nonlocal_freq=self.nonlocal_freqs[i], nonlocal_cfg=self.nonlocal_cfg if i in self.nonlocal_stages else None, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name=layer_name) self.inplanes = planes * self.block.expansion self.res_layers.add(res_layer) self.feat_dim = self.block.expansion * 64 * 2**( len(self.stage_blocks) - 1) # We use ``GlobalAvgPool3D`` here for simplicity. Otherwise the input size must be fixed. # You can also use ``AvgPool3D`` and specify the arguments on your own, e.g. # self.st_avg = nn.AvgPool3D(pool_size=(4, 7, 7), strides=1, padding=0) # ``AvgPool3D`` is 10% faster, but ``GlobalAvgPool3D`` makes the code cleaner. self.st_avg = nn.GlobalAvgPool3D() self.head = nn.HybridSequential(prefix='') self.head.add(nn.Dropout(rate=self.dropout_ratio)) self.fc = nn.Dense(in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std)) self.head.add(self.fc) self.init_weights(ctx)
net.add(block1()) return net rgnet = nn.Sequential() rgnet.add(block2()) rgnet.add(nn.Dense(10)) rgnet.initialize() rgnet(x) print(rgnet.collect_params) print(rgnet.collect_params()) # force_reinit ensures that the variables are initialized again, regardless of # whether they were already initialized previously net.initialize(init=init.Normal(sigma=0.01), force_reinit=True) print(net[0].weight.data()[0]) net.initialize(init=init.Constant(1), force_reinit=True) print(net[0].weight.data()[0]) net = nn.Sequential() # We need to give the shared layer a name such that we can reference its # parameters shared = nn.Dense(8, activation='relu') net.add(nn.Dense(8, activation='relu'), shared, nn.Dense(8, activation='relu', params=shared.params), nn.Dense(10)) net.initialize()
def __init__(self, depth, ctx, pretrained=True, num_features=0, num_classes=0): super(ResNet, self).__init__() self.pretrained = pretrained self.num_classes = num_classes with self.name_scope(): model1 = ResNet.__factory[depth](pretrained=pretrained, ctx=ctx).features[:-1] model1[-1][0].body[0]._kwargs['stride'] = (1, 1) model1[-1][0].downsample[0]._kwargs['stride'] = (1, 1) model2 = ResNet.__factory[depth](pretrained=pretrained, ctx=ctx).features[:-1] model2[-1][0].body[0]._kwargs['stride'] = (1, 1) model2[-1][0].downsample[0]._kwargs['stride'] = (1, 1) model3 = ResNet.__factory[depth](pretrained=pretrained, ctx=ctx).features[:-1] model3[-1][0].body[0]._kwargs['stride'] = (1, 1) model3[-1][0].downsample[0]._kwargs['stride'] = (1, 1) #backbone self.base = nn.HybridSequential() for m in model1[:-2]: self.base.add(m) self.base.add(model1[-2][0]) #branch 1 self.branch1 = nn.HybridSequential() for m in model1[-2][1:]: self.branch1.add(m) for m in model1[-1]: self.branch1.add(m) #branch 2 self.branch2 = nn.HybridSequential() for m in model2[-2][1:]: self.branch2.add(m) for m in model2[-1]: self.branch2.add(m) #branch 3 self.branch3 = nn.HybridSequential() for m in model3[-2][1:]: self.branch3.add(m) for m in model3[-1]: self.branch3.add(m) #local self.feat = nn.HybridSequential() self.classify = nn.HybridSequential() for _ in range(5): tmp = nn.HybridSequential() tmp.add(nn.GlobalMaxPool2D()) feat = nn.Conv2D(channels=num_features, kernel_size=1, use_bias=False) feat.initialize(init=init.MSRAPrelu('in', 0), ctx=ctx) tmp.add(feat) bn = nn.BatchNorm() bn.initialize(init=init.Zero(), ctx=ctx) tmp.add(bn) tmp.add(nn.Flatten()) self.feat.add(tmp) classifier = nn.Dense(num_classes, use_bias=False) classifier.weight.initialize(init=init.Normal(0.001), ctx=ctx) self.classify.add(classifier) #global self.g_feat = nn.HybridSequential() self.g_classify = nn.HybridSequential() for _ in range(3): tmp = nn.HybridSequential() tmp.add(nn.GlobalAvgPool2D()) feat = nn.Conv2D(channels=num_features, kernel_size=1, use_bias=False) feat.initialize(init=init.MSRAPrelu('in', 0), ctx=ctx) tmp.add(feat) bn = nn.BatchNorm(center=False, scale=True) bn.initialize(init=init.Zero(), ctx=ctx) tmp.add(bn) tmp.add(nn.Flatten()) self.g_feat.add(tmp) classifier = nn.Dense(num_classes, use_bias=False) classifier.initialize(init=init.Normal(0.001), ctx=ctx) self.g_classify.add(classifier)
def train_and_predict_rnn_gluon( model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes, ): loss = gloss.SoftmaxCrossEntropyLoss() model.initialize(ctx=ctx, force_reinit=True, init=init.Normal(0.01)) trainer = gluon.Trainer( model.collect_params(), "sgd", {"learning_rate": lr, "momentum": 0, "wd": 0} ) for epoch in range(num_epochs): l_sum, n, start = 0.0, 0, time.time() data_iter = d2l.data_iter_consecutive( corpus_indices, batch_size, num_steps, ctx ) state = model.begin_state(batch_size=batch_size, ctx=ctx) for X, Y in data_iter: for s in state: s.detach() with autograd.record(): (output, state) = model(X, state) y = Y.T.reshape((-1,)) l = loss(output, y).mean() l.backward() # 梯度裁剪 params = [p.data() for p in model.collect_params().values()] d2l.grad_clipping(params, clipping_theta, ctx) trainer.step(1) # 因为已经误差取过均值,梯度不用再做平均 l_sum += l.asscalar() * y.size n += y.size if (epoch + 1) % pred_period == 0: print( "epoch %d, perplexity %f, time %.2f sec" % (epoch + 1, math.exp(l_sum / n), time.time() - start) ) for prefix in prefixes: print( " -", predict_rnn_gluon( prefix, pred_len, model, vocab_size, ctx, idx_to_char, char_to_idx, ), )
def __init__(self, nclass, base_model='resnet18_v1b', pretrained_base=True, num_segments=8, num_temporal=1, ifTSN=True, input_channel=3, batch_normal=True, dropout_ratio=0.8, init_std=0.001, **kwargs): super(ECO, self).__init__() self.nclass = nclass self.dropout_ratio = dropout_ratio self.init_std = init_std self.num_segments = num_segments self.ifTSN = ifTSN self.input_shape = 224 self.base_model = base_model #['resnet18_v1b','resnet18_v2','resnet18_v1b_kinetics400','resnet18_v1b_k400_ucf101'][1] # resnet50 101 152 的 self.expansion == 4 #self.expansion = 4 if ('resnet50_v1b' in self.base_model)or('resnet101_v1b' in self.base_model)or('resnet152_v1b' in self.base_model) else 1 if 'resnet18_v1b' in self.base_model: self.expansion = 1 elif 'resnet34_v1b' in self.base_model: self.expansion = 1 elif 'resnet50_v1b' in self.base_model: self.expansion = 4 elif 'resnet101_v1b' in self.base_model: self.expansion = 4 elif 'resnet152_v1b' in self.base_model: self.expansion = 4 else: self.expansion = 1 #2d 卷积的出来的维度 self.feat_dim_2d = 128 * self.expansion # num_temporal 默认为1 论文中 一开始不减少时间维 self.num_temporal = num_temporal if self.num_segments == 4: self.num_temporal = 1 elif self.num_segments == 8: self.num_temporal = num_temporal elif self.num_segments == 16: self.num_temporal = num_temporal elif self.num_segments == 32: self.num_temporal = num_temporal else: self.num_temporal = 1 # 输入fc的维度 if self.ifTSN == True: self.feat_dim_3d = 512 else: # Flatten tmppara = self.num_segments // 4 tmppara = tmppara // (self.num_temporal if tmppara > 1 else 1) self.feat_dim_3d = 512 * tmppara pretrained_model = get_model(self.base_model, pretrained=pretrained_base) with self.name_scope(): # x = nd.zeros(shape=(7x8,3,224,224)) #2D feature if self.base_model == 'resnet18_v2': self.feature2d = pretrained_model.features else: #'resnet18_v1b' in self.base_model: self.conv1 = pretrained_model.conv1 self.bn1 = pretrained_model.bn1 self.relu = pretrained_model.relu self.conv1 = pretrained_model.conv1 self.maxpool = pretrained_model.maxpool self.layer1 = pretrained_model.layer1 self.layer2 = pretrained_model.layer2 #3D feature self.features_3d = nn.HybridSequential(prefix='') # conv3_x self.features_3d.add( BasicBlock(in_channel=self.feat_dim_2d, out_channel=128, spatial_stride=1, temporal_stride=self.num_temporal)) self.features_3d.add( BasicBlock(in_channel=128, out_channel=128, spatial_stride=1, temporal_stride=1)) # conv4_x self.features_3d.add( BasicBlock(in_channel=128, out_channel=256, spatial_stride=2, temporal_stride=2)) self.features_3d.add( BasicBlock(in_channel=256, out_channel=256, spatial_stride=1, temporal_stride=1)) # conv5_x self.features_3d.add( BasicBlock(in_channel=256, out_channel=512, spatial_stride=2, temporal_stride=2)) self.features_3d.add( BasicBlock(in_channel=512, out_channel=512, spatial_stride=1, temporal_stride=1)) self.features_3d.add(nn.AvgPool3D(pool_size=(1, 7, 7))) self.dropout = nn.Dropout(rate=self.dropout_ratio) self.output = nn.HybridSequential(prefix='') if self.ifTSN == True: self.output.add( nn.Dense( units=self.nclass, in_units=512, weight_initializer=init.Normal(sigma=self.init_std))) else: self.output.add( nn.Dense( units=512, in_units=self.feat_dim_3d, weight_initializer=init.Normal(sigma=self.init_std)), nn.Dense( units=self.nclass, in_units=512, weight_initializer=init.Normal(sigma=self.init_std))) # init if pretrained_base: self.features_3d.initialize(init.MSRAPrelu()) self.output.initialize(init.MSRAPrelu())
def main() -> None: """ Main execution of the module. """ # Setup the same initial constraints as our previous linear regression model. true_weights = np.array([2, -3.4]) true_bias = 4.2 features, targets = d2l.synthetic_data(true_weights, true_bias, 1000) batch_size = 10 data_iterator = load_array((features, targets), batch_size, True) # Create a seuqential neural network with one output layer. gluon will infer # the input shape the first time data is passed through to it. net = nn.Sequential() net.add(nn.Dense(1)) # Initialize the weights with a random sample from a normal distribution # with a mean of 0 and a standard deviation of 0.01. bias is initialized as # by default. The initialization is deferred until the first attempt to pass # data through the network. net.initialize(init.Normal(sigma=0.01)) # The squared loss is also known as the L2 norm loss. l2_loss = loss.L2Loss() # Setup our SGD optimizer through the trainer class. trainer = gluon.Trainer(net.collect_params(), "sgd", {"learning_rate": 0.03}) num_epochs = 3 # Training loop time for epoch in range(1, num_epochs + 1): for feature_batch, target_batch in data_iterator: with autograd.record(): predicted_targets = net(feature_batch) batch_loss = l2_loss(predicted_targets, target_batch) # Compute the gradients for all of our weights and bias. The trainer # initialized the parameters for us already, allowing us to not worry # about manually attaching gradients. batch_loss.backward() # Because we're passing in a number of batches, we need to compute # reduction of all gradients in order to update our model # accordingly. trainer.step(batch_size) # Compute the overall loss for the epoch. epoch_loss = l2_loss(net(features), targets) print(f"epoch {epoch}, loss: {epoch_loss.mean().asnumpy()}") # Obtain the weights and biases from the first (and only) layer inside of # our model. first_layer_weights = net[0].weight.data() first_layer_bias = net[0].bias.data() print( f"Error in estimating the weights: {true_weights.reshape(first_layer_weights.shape) - first_layer_weights}" ) print(f"Error in estimating the bias: {true_bias - first_layer_bias}")
# check the gpus ctx = [mx.gpu(0), mx.gpu(1), mx.gpu(2), mx.gpu(3)] print(ctx) # initialize the networknet mx.random.seed(SEED) input_dim = scale * (len(vocabulary) + 1) net1 = {c: Embedding(input_dim, embedding_dim // len(ctx)) for c in ctx} net2 = Siamese(embedding_dim) subembeddings = [ mx.nd.array(x) for x in np.split(embeddings, len(net1), axis=1) ] for i, (k, v) in enumerate(net1.items()): v.initialize(init=EmbeddingInit(subembeddings[i]), ctx=k) net2.initialize(init=init.Normal(sigma=0.01), ctx=ctx) # %% [markdown] # # Train # %% trainer1 = { k: gluon.Trainer(v.collect_params(), 'adagrad', {'clip_gradient': 1.25}) for (k, v) in net1.items() } trainer2 = gluon.Trainer(net2.collect_params(), 'adagrad', {'clip_gradient': 1.25}) loss = gluon.loss.L2Loss() # %% profiler.set_config(profile_all=True,
dataset = gdata.ArrayDataset(features, labels) data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) # for X, y in data_iter: # print(X, y) # break # define model from mxnet.gluon import nn net = nn.Sequential() net.add(nn.Dense(1)) # in gluon, fully connected layer is a Dense instance # initial model params from mxnet import init net.initialize( init.Normal(sigma=0.01)) # weight initial as normal distribution # bias initial as all zero # define loss function from mxnet.gluon import loss as gloss loss = gloss.L2Loss() # define optimize algorithm from mxnet import gluon trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03}) # train the model num_epochs = 3 for epoch in range(1, num_epochs + 1): for X, y in data_iter: with autograd.record():
from mxnet import npx from mxnet import gluon from mxnet import init from tqdm import tqdm import mxnet as mx import numpy as np from mxnet.optimizer import Adam from mxnet.gluon.data import DataLoader from mxnet.gluon.loss import SigmoidBCELoss from engine import train_generator from engine import train_discriminator device = npx.gpu() if npx.num_gpus() > 0 else npx.cpu() gen = Generator() gen.collect_params().initialize(init=init.Normal(sigma=0.02), force_reinit=True, ctx=device) # noise = random.randn(1, 100, 1, 1) # output = gen(noise) # print(output.shape) dis = Discriminator() dis.collect_params().initialize(init=init.Normal(sigma=0.02), force_reinit=True, ctx=device) # noise = random.randn(1, 3, 64, 64) # output = dis(noise) # print(output.shape) loss_fn = SigmoidBCELoss()
def __init__(self,nclass,num_segments, fusion_method='avg',num_crop=1,input_channel=3,dropout_ratio=0.9, init_std=0.001,feat_dim=4096,**kwargs): super(DualNet, self).__init__(**kwargs) self.nclass = nclass self.num_segments = num_segments self.feat_dim = feat_dim self.dropout_ratio=dropout_ratio self.init_std=init_std self.num_crop=num_crop self.fusion_method = fusion_method pretrained_model_bgs = vgg16(pretrained=True) pretrained_model_fgs = vgg16(pretrained=True) vgg16_feature_bgs = pretrained_model_bgs.features vgg16_feature_fgs = pretrained_model_fgs.features if input_channel == 3: self.feature_bgs = vgg16_feature_bgs self.feature_fgs = vgg16_feature_fgs else: raise ValueError('not support input_channel not equal 3') # change the input channel of first layer convnet # self.feature = nn.HybridSequential() # with pretrained_model.name_scope(): # self.feature.add(nn.Conv2D(in_channels=input_channel,channels=64,kernel_size=3,strides=(1,1),padding=(1,1))) # self.feature[0].initialize() # for layer in vgg16_feature[1:]: # self.feature.add(layer) def update_dropout_ratio(block): if isinstance(block, nn.basic_layers.Dropout): block._rate = self.dropout_ratio self.feature_bgs.apply(update_dropout_ratio) self.feature_fgs.apply(update_dropout_ratio) if self.fusion_method == 'avg' or self.fusion_method == 'max': self.output = nn.Dense(units=self.nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std)) self.output.initialize() elif self.fusion_method == 'out_avg' or self.fusion_method == 'out_max': self.output_fgs = nn.Dense(units=self.nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std)) self.output_bgs = nn.Dense(units=self.nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std)) self.output_fgs.initialize() self.output_bgs.initialize() else: raise ValueError("not support fusion method")
train_trans = gdata.vision.transforms.Compose([ gdata.vision.transforms.Resize((224, 224)), gdata.vision.transforms.ToTensor() ]) test_trans = gdata.vision.transforms.Compose([ gdata.vision.transforms.Resize((224, 224)), gdata.vision.transforms.ToTensor() ]) #model pretrained_net = model_zoo.vision.mobilenet_v2_1_0(pretrained=True) finetune_net = model_zoo.vision.mobilenet_v2_1_0(classes=4) finetune_net.features = pretrained_net.features finetune_net.output.initialize(init.Normal(sigma=0.05)) ratio = 250.0 / 224.0 #train function def train(train_iter, net, loss, trainer, batch_size, num_epochs, ctx): """Train and evaluate a model.""" print('training on', ctx) if isinstance(ctx, mx.Context): ctx = [ctx] for epoch in range(num_epochs): # print("lr = ", trainer.learning_rate) train_l_sum, train_acc_sum, n, m, start = 0.0, 0.0, 0, 0, time.time() for i, batch in enumerate(train_iter): Xs, ys, batch_size = d2l.utils._get_batch(batch, ctx)
net = nn.Sequential() net.add(nn.Dense(256, activation='relu')) net.add(nn.Dense(10)) print(net[0].weight) net.initialize() x = nd.random.uniform(shape=(20, 2)) y = net(x) print(y) print(net[0].params['dense0_weight']) print(net[1].weight) print(net[0].weight.data()[0]) net.initialize(init=init.Normal(0.01), force_reinit=True) print(net[0].weight.data()[0]) net[0].initialize(init=init.Normal(1), force_reinit=True) print(net[0].weight.data()[0]) class MyInit(init.Initializer): def _init_weight(self, name, data): print('Init', name, data.shape) data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape) data *= data.abs() >= 5 net.initialize(MyInit(), force_reinit=True) print(net[0].weight.data()[0])
batch_size, ctx, mx_train_data, mx_valid_data, init_type=init.Uniform(), path='uniform') print('Finished Traing the Model 1') print('Traing the Model 2 with Normal Initialization') train_loss_hist_m1, train_acc_hist_m1, valid_loss_hist_m1, valid_acc_hist_m1 = model_fit( no_epochs, batch_size, ctx, mx_train_data, mx_valid_data, init_type=init.Normal(), path='normal') print('Finished Traing the Model 2') print('\nTraing the Model 3 with Xavier Initialization') train_loss_hist_m2, train_acc_hist_m2, valid_loss_hist_m2, valid_acc_hist_m2 = model_fit( no_epochs, batch_size, ctx, mx_train_data, mx_valid_data, init_type=init.Xavier(), path='xavier') print('Finished Traing the Model 3') print('Traing the Model 4 with Orthogonal Initialization')
def __init__(self, nclass=1000, pretrained=False, pretrained_base=True, num_segments=1, num_crop=1, feat_ext=False, dropout_ratio=0.5, init_std=0.01, partial_bn=False, ctx=None, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(I3D_InceptionV3, self).__init__(**kwargs) self.num_segments = num_segments self.num_crop = num_crop self.feat_dim = 2048 self.dropout_ratio = dropout_ratio self.init_std = init_std self.feat_ext = feat_ext with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add(_make_basic_conv(in_channels=3, channels=32, kernel_size=3, strides=2, padding=(1, 0, 0), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) if partial_bn: if norm_kwargs is not None: norm_kwargs['use_global_stats'] = True else: norm_kwargs = {} norm_kwargs['use_global_stats'] = True self.features.add(_make_basic_conv(in_channels=32, channels=32, kernel_size=3, padding=(1, 0, 0), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(_make_basic_conv(in_channels=32, channels=64, kernel_size=3, padding=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=3, strides=(1, 2, 2), padding=(1, 0, 0))) self.features.add(_make_basic_conv(in_channels=64, channels=80, kernel_size=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(_make_basic_conv(in_channels=80, channels=192, kernel_size=3, padding=(1, 0, 0), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=3, strides=(1, 2, 2), padding=(1, 0, 0))) self.features.add(_make_A(192, 32, 'A1_', norm_layer, norm_kwargs)) self.features.add(_make_A(256, 64, 'A2_', norm_layer, norm_kwargs)) self.features.add(_make_A(288, 64, 'A3_', norm_layer, norm_kwargs)) self.features.add(_make_B('B_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 128, 'C1_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 160, 'C2_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 160, 'C3_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 192, 'C4_', norm_layer, norm_kwargs)) self.features.add(_make_D('D_', norm_layer, norm_kwargs)) self.features.add(_make_E(1280, 'E1_', norm_layer, norm_kwargs)) self.features.add(_make_E(2048, 'E2_', norm_layer, norm_kwargs)) self.features.add(nn.GlobalAvgPool3D()) self.head = nn.HybridSequential(prefix='') self.head.add(nn.Dropout(rate=self.dropout_ratio)) self.output = nn.Dense(units=nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std)) self.head.add(self.output) self.features.initialize(ctx=ctx) self.head.initialize(ctx=ctx) if pretrained_base and not pretrained: inceptionv3_2d = inception_v3(pretrained=True) weights2d = inceptionv3_2d.collect_params() weights3d = self.collect_params() assert len(weights2d.keys()) == len(weights3d.keys()), 'Number of parameters should be same.' dict2d = {} for key_id, key_name in enumerate(weights2d.keys()): dict2d[key_id] = key_name dict3d = {} for key_id, key_name in enumerate(weights3d.keys()): dict3d[key_id] = key_name dict_transform = {} for key_id, key_name in dict3d.items(): dict_transform[dict2d[key_id]] = key_name cnt = 0 for key2d, key3d in dict_transform.items(): if 'conv' in key3d: temporal_dim = weights3d[key3d].shape[2] temporal_2d = nd.expand_dims(weights2d[key2d].data(), axis=2) inflated_2d = nd.broadcast_to(temporal_2d, shape=[0, 0, temporal_dim, 0, 0]) / temporal_dim assert inflated_2d.shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d) weights3d[key3d].set_data(inflated_2d) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'batchnorm' in key3d: assert weights2d[key2d].shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d) weights3d[key3d].set_data(weights2d[key2d].data()) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'dense' in key3d: cnt += 1 print('%s is skipped with shape: ' % (key3d), weights3d[key3d].shape) assert cnt == len(weights2d.keys()), 'Not all parameters have been ported, check the initialization.'
def __init__(self, nclass, block, layers, dropout_ratio=0.5, num_segments=1, num_crop=1, feat_ext=False, init_std=0.001, ctx=None, partial_bn=False, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(R2Plus1D, self).__init__() self.partial_bn = partial_bn self.dropout_ratio = dropout_ratio self.init_std = init_std self.num_segments = num_segments self.num_crop = num_crop self.feat_ext = feat_ext self.inplanes = 64 self.feat_dim = 512 * block.expansion with self.name_scope(): self.conv1 = nn.Conv3D(in_channels=3, channels=45, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=False) self.bn1 = norm_layer( in_channels=45, **({} if norm_kwargs is None else norm_kwargs)) self.relu = nn.Activation('relu') self.conv2 = conv3x1x1(in_planes=45, out_planes=64) self.bn2 = norm_layer( in_channels=64, **({} if norm_kwargs is None else norm_kwargs)) if self.partial_bn: if norm_kwargs is not None: norm_kwargs['use_global_stats'] = True else: norm_kwargs = {} norm_kwargs['use_global_stats'] = True self.layer1 = self._make_res_layer(block=block, planes=64, blocks=layers[0], layer_name='layer1_') self.layer2 = self._make_res_layer(block=block, planes=128, blocks=layers[1], stride=2, layer_name='layer2_') self.layer3 = self._make_res_layer(block=block, planes=256, blocks=layers[2], stride=2, layer_name='layer3_') self.layer4 = self._make_res_layer(block=block, planes=512, blocks=layers[3], stride=2, layer_name='layer4_') self.avgpool = nn.GlobalAvgPool3D() self.dropout = nn.Dropout(rate=self.dropout_ratio) self.fc = nn.Dense( in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std))
def __init__(self, nclass, dropout_ratio=0.5, num_segments=1, num_crop=1, feat_ext=False, init_std=0.001, ctx=None, **kwargs): super(C3D, self).__init__() self.num_segments = num_segments self.num_crop = num_crop self.feat_ext = feat_ext self.feat_dim = 8192 with self.name_scope(): self.conv1 = nn.Conv3D(in_channels=3, channels=64, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.pool1 = nn.MaxPool3D(pool_size=(1, 2, 2), strides=(1, 2, 2)) self.conv2 = nn.Conv3D(in_channels=64, channels=128, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.pool2 = nn.MaxPool3D(pool_size=(2, 2, 2), strides=(2, 2, 2)) self.conv3a = nn.Conv3D(in_channels=128, channels=256, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.conv3b = nn.Conv3D(in_channels=256, channels=256, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.pool3 = nn.MaxPool3D(pool_size=(2, 2, 2), strides=(2, 2, 2)) self.conv4a = nn.Conv3D(in_channels=256, channels=512, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.conv4b = nn.Conv3D(in_channels=512, channels=512, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.pool4 = nn.MaxPool3D(pool_size=(2, 2, 2), strides=(2, 2, 2)) self.conv5a = nn.Conv3D(in_channels=512, channels=512, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.conv5b = nn.Conv3D(in_channels=512, channels=512, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.pool5 = nn.MaxPool3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding=(0, 1, 1)) self.fc6 = nn.Dense(in_units=8192, units=4096, weight_initializer=init.Normal(sigma=init_std)) self.fc7 = nn.Dense(in_units=4096, units=4096, weight_initializer=init.Normal(sigma=init_std)) self.fc8 = nn.Dense(in_units=4096, units=nclass, weight_initializer=init.Normal(sigma=init_std)) self.dropout = nn.Dropout(rate=dropout_ratio) self.relu = nn.Activation('relu')
true_w = [2, -3.4] true_b = 4.2 features = nd.random.normal(scale=1, shape=(num_examples, num_inputs)) labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b labels += nd.random.normal(scale=0.01, shape=labels.shape) batch_size = 10 dataset = gdata.ArrayDataset(features, labels) #随机读取小批量 data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) net = nn.Sequential() net.add(nn.Dense(1)) #Dense定义该层输出个数为 1。全连接:Dense #初始化模型参数 net.initialize(init.Normal(sigma=0.01)) #指定权重参数每个元素将在初始化时随机采样于均值为 0 标准差为 0.01 的正态分布。 #定义损失函数 loss = gloss.L2Loss() # 平⽅损失⼜称 L2 范数损失。 #定义优化算法 学习率的数值一般设置为1/batch_size trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03}) #指定学习率为 0.03 的小批量随机梯度下降(sgd)为优化算法 些参数可以通过 collect_params 函数获取 #训练模型 num_epochs = 3 for epoch in range(1, num_epochs + 1): for X, y in data_iter: with autograd.record(): l = loss(net(X), y) l.backward() trainer.step(batch_size) #迭代模型参数 指明批量⼤小,从而对批量中样本梯度求平均
test_images = load_test_images(test_images_idx3_ubyte_file) / 255 test_labels = load_test_labels(test_labels_idx1_ubyte_file) import mxnet as mx from mxnet import gluon, autograd, nd, init from mxnet.gluon import nn from mxnet.gluon import loss as gloss from mxnet.gluon import data as gdata net = nn.Sequential() net.add(nn.Conv2D(16, (5, 5), strides=(2, 2), activation="relu")) net.add(nn.Conv2D(32, (5, 5), activation="relu")) net.add(nn.Flatten()) net.add(nn.Dense(128, activation="relu")) net.add(nn.Dense(10, activation="relu")) net.initialize(init.Normal(sigma=0.01), ctx=mx.gpu()) loss = gloss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03}) num_epochs = 100000 batch_size = 1000 dataset = gdata.ArrayDataset( nd.array(train_images.reshape(train_images.shape[0], 1, train_images.shape[1], train_images.shape[2]), ctx=mx.gpu(), dtype=np.float32),
def __init__(self, nclass, block, layers, shortcut_type='B', block_design=('A', 'B', 'C'), dropout_ratio=0.5, num_segments=1, num_crop=1, feat_ext=False, init_std=0.001, ctx=None, partial_bn=False, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(P3D, self).__init__() self.shortcut_type = shortcut_type self.block_design = block_design self.partial_bn = partial_bn self.dropout_ratio = dropout_ratio self.init_std = init_std self.num_segments = num_segments self.num_crop = num_crop self.feat_ext = feat_ext self.inplanes = 64 self.feat_dim = 512 * block.expansion with self.name_scope(): self.conv1 = nn.Conv3D(in_channels=3, channels=64, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=False) self.bn1 = norm_layer( in_channels=64, **({} if norm_kwargs is None else norm_kwargs)) self.relu = nn.Activation('relu') self.pool = nn.MaxPool3D(pool_size=(2, 3, 3), strides=2, padding=(0, 1, 1)) self.pool2 = nn.MaxPool3D(pool_size=(2, 1, 1), strides=(2, 1, 1), padding=0) if self.partial_bn: if norm_kwargs is not None: norm_kwargs['use_global_stats'] = True else: norm_kwargs = {} norm_kwargs['use_global_stats'] = True # 3D layers are only for (layers1, layers2 and layers3), layers4 is C2D self.depth_3d = sum(layers[:3]) self.layer_cnt = 0 self.layer1 = self._make_res_layer(block=block, planes=64, blocks=layers[0], layer_name='layer1_') self.layer2 = self._make_res_layer(block=block, planes=128, blocks=layers[1], spatial_stride=2, layer_name='layer2_') self.layer3 = self._make_res_layer(block=block, planes=256, blocks=layers[2], spatial_stride=2, layer_name='layer3_') self.layer4 = self._make_res_layer(block=block, planes=512, blocks=layers[3], spatial_stride=2, layer_name='layer4_') self.avgpool = nn.GlobalAvgPool2D() self.dropout = nn.Dropout(rate=self.dropout_ratio) self.fc = nn.Dense( in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std))
def train_and_predit_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes): loss = gloss.SoftmaxCrossEntropyLoss() model.initialize(force_reinit=True, ctx=ctx, init=init.Normal())
def __init__(self, nclass, block=Bottleneck, layers=None, num_block_temp_kernel_fast=None, num_block_temp_kernel_slow=None, pretrained=False, pretrained_base=False, feat_ext=False, num_segments=1, num_crop=1, bn_eval=True, bn_frozen=False, partial_bn=False, frozen_stages=-1, dropout_ratio=0.5, init_std=0.01, alpha=8, beta_inv=8, fusion_conv_channel_ratio=2, fusion_kernel_size=5, width_per_group=64, num_groups=1, slow_temporal_stride=16, fast_temporal_stride=2, slow_frames=4, fast_frames=32, norm_layer=BatchNorm, norm_kwargs=None, ctx=None, **kwargs): super(SlowFast, self).__init__() self.num_segments = num_segments self.num_crop = num_crop self.dropout_ratio = dropout_ratio self.init_std = init_std self.alpha = alpha self.beta_inv = beta_inv self.fusion_conv_channel_ratio = fusion_conv_channel_ratio self.fusion_kernel_size = fusion_kernel_size self.width_per_group = width_per_group self.num_groups = num_groups self.dim_inner = self.num_groups * self.width_per_group self.out_dim_ratio = self.beta_inv // self.fusion_conv_channel_ratio self.slow_temporal_stride = slow_temporal_stride self.fast_temporal_stride = fast_temporal_stride self.slow_frames = slow_frames self.fast_frames = fast_frames self.feat_ext = feat_ext with self.name_scope(): # build fast pathway fast = nn.HybridSequential(prefix='fast_') with fast.name_scope(): self.fast_conv1 = nn.Conv3D(in_channels=3, channels=self.width_per_group // self.beta_inv, kernel_size=(5, 7, 7), strides=(1, 2, 2), padding=(2, 3, 3), use_bias=False) self.fast_bn1 = norm_layer( in_channels=self.width_per_group // self.beta_inv, **({} if norm_kwargs is None else norm_kwargs)) self.fast_relu = nn.Activation('relu') self.fast_maxpool = nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1)) self.fast_res2 = self._make_layer_fast( inplanes=self.width_per_group // self.beta_inv, planes=self.dim_inner // self.beta_inv, num_blocks=layers[0], head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='fast_res2_') self.fast_res3 = self._make_layer_fast( inplanes=self.width_per_group * 4 // self.beta_inv, planes=self.dim_inner * 2 // self.beta_inv, num_blocks=layers[1], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='fast_res3_') self.fast_res4 = self._make_layer_fast( inplanes=self.width_per_group * 8 // self.beta_inv, planes=self.dim_inner * 4 // self.beta_inv, num_blocks=layers[2], num_block_temp_kernel_fast=num_block_temp_kernel_fast, strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='fast_res4_') self.fast_res5 = self._make_layer_fast( inplanes=self.width_per_group * 16 // self.beta_inv, planes=self.dim_inner * 8 // self.beta_inv, num_blocks=layers[3], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='fast_res5_') # build lateral connections self.lateral_p1 = nn.HybridSequential(prefix='lateral_p1_') with self.lateral_p1.name_scope(): self.lateral_p1.add( nn.Conv3D(in_channels=self.width_per_group // self.beta_inv, channels=self.width_per_group // self.beta_inv * self.fusion_conv_channel_ratio, kernel_size=(self.fusion_kernel_size, 1, 1), strides=(self.alpha, 1, 1), padding=(self.fusion_kernel_size // 2, 0, 0), use_bias=False)) self.lateral_p1.add( norm_layer(in_channels=self.width_per_group // self.beta_inv * self.fusion_conv_channel_ratio, **({} if norm_kwargs is None else norm_kwargs))) self.lateral_p1.add(nn.Activation('relu')) self.lateral_res2 = nn.HybridSequential(prefix='lateral_res2_') with self.lateral_res2.name_scope(): self.lateral_res2.add( nn.Conv3D( in_channels=self.width_per_group * 4 // self.beta_inv, channels=self.width_per_group * 4 // self.beta_inv * self.fusion_conv_channel_ratio, kernel_size=(self.fusion_kernel_size, 1, 1), strides=(self.alpha, 1, 1), padding=(self.fusion_kernel_size // 2, 0, 0), use_bias=False)) self.lateral_res2.add( norm_layer(in_channels=self.width_per_group * 4 // self.beta_inv * self.fusion_conv_channel_ratio, **({} if norm_kwargs is None else norm_kwargs))) self.lateral_res2.add(nn.Activation('relu')) self.lateral_res3 = nn.HybridSequential(prefix='lateral_res3_') with self.lateral_res3.name_scope(): self.lateral_res3.add( nn.Conv3D( in_channels=self.width_per_group * 8 // self.beta_inv, channels=self.width_per_group * 8 // self.beta_inv * self.fusion_conv_channel_ratio, kernel_size=(self.fusion_kernel_size, 1, 1), strides=(self.alpha, 1, 1), padding=(self.fusion_kernel_size // 2, 0, 0), use_bias=False)) self.lateral_res3.add( norm_layer(in_channels=self.width_per_group * 8 // self.beta_inv * self.fusion_conv_channel_ratio, **({} if norm_kwargs is None else norm_kwargs))) self.lateral_res3.add(nn.Activation('relu')) self.lateral_res4 = nn.HybridSequential(prefix='lateral_res4_') with self.lateral_res4.name_scope(): self.lateral_res4.add( nn.Conv3D( in_channels=self.width_per_group * 16 // self.beta_inv, channels=self.width_per_group * 16 // self.beta_inv * self.fusion_conv_channel_ratio, kernel_size=(self.fusion_kernel_size, 1, 1), strides=(self.alpha, 1, 1), padding=(self.fusion_kernel_size // 2, 0, 0), use_bias=False)) self.lateral_res4.add( norm_layer(in_channels=self.width_per_group * 16 // self.beta_inv * self.fusion_conv_channel_ratio, **({} if norm_kwargs is None else norm_kwargs))) self.lateral_res4.add(nn.Activation('relu')) # build slow pathway slow = nn.HybridSequential(prefix='slow_') with slow.name_scope(): self.slow_conv1 = nn.Conv3D(in_channels=3, channels=self.width_per_group, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=False) self.slow_bn1 = norm_layer( in_channels=self.width_per_group, **({} if norm_kwargs is None else norm_kwargs)) self.slow_relu = nn.Activation('relu') self.slow_maxpool = nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1)) self.slow_res2 = self._make_layer_slow( inplanes=self.width_per_group + self.width_per_group // self.out_dim_ratio, planes=self.dim_inner, num_blocks=layers[0], head_conv=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='slow_res2_') self.slow_res3 = self._make_layer_slow( inplanes=self.width_per_group * 4 + self.width_per_group * 4 // self.out_dim_ratio, planes=self.dim_inner * 2, num_blocks=layers[1], strides=2, head_conv=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='slow_res3_') self.slow_res4 = self._make_layer_slow( inplanes=self.width_per_group * 8 + self.width_per_group * 8 // self.out_dim_ratio, planes=self.dim_inner * 4, num_blocks=layers[2], num_block_temp_kernel_slow=num_block_temp_kernel_slow, strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='slow_res4_') self.slow_res5 = self._make_layer_slow( inplanes=self.width_per_group * 16 + self.width_per_group * 16 // self.out_dim_ratio, planes=self.dim_inner * 8, num_blocks=layers[3], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='slow_res5_') # build classifier self.avg = nn.GlobalAvgPool3D() self.dp = nn.Dropout(rate=self.dropout_ratio) self.feat_dim = self.width_per_group * 32 // self.beta_inv + self.width_per_group * 32 self.fc = nn.Dense( in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std), use_bias=True) self.initialize(init.MSRAPrelu(), ctx=ctx)
def train_and_predit_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes): loss = gloss.SoftmaxCrossEntropyLoss() model.initialize(force_reinit=True, ctx=ctx, init=init.Normal(sigma=0.01)) trainer = gluon.Trainer(model.collect_params(), 'sgd', {'learning_rate':lr, 'momentum'})
# which is used for finding the best reconstruction x_recon_batch = nd.zeros((batch_size, 3, 64, 64)) x_recon_loss = nd.ones((batch_size, )) * 100000 # Use different initialization of z for restart in range(num_random_restarts): tic = time.time() # train_last_loss = 2. train_curr_loss = 0.1 # Put z into the dict of parameters to be optimized # Only z will be updated in this algorithm paramdict = gluon.ParameterDict('noise') paramdict.get('z', shape=(batch_size, n_z, 1, 1), init=init.Normal(1)) #default sigma is 0.01 paramdict.initialize(ctx=ctx) z = paramdict.get('z').data() trainer = gluon.Trainer(paramdict, 'Adam', {'learning_rate': learn_rate}) # Define Loss recon_loss = dcgan.Recon_Loss() z_loss = dcgan.Z_Loss() ## Optimization process: find the best z for epoch in range(total_epoch): if abs(train_last_loss - train_curr_loss) / train_last_loss < 1e-3: break with autograd.record():
def __init__(self, nclass=1000, norm_layer=BatchNorm, num_segments=1, norm_kwargs=None, partial_bn=False, pretrained_base=True, dropout_ratio=0.5, init_std=0.01, ctx=None, **kwargs): super(I3D_InceptionV1, self).__init__(**kwargs) self.num_segments = num_segments self.feat_dim = 1024 self.dropout_ratio = dropout_ratio self.init_std = init_std with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add(_make_basic_conv(in_channels=3, channels=64, kernel_size=7, strides=2, padding=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1))) if partial_bn: if norm_kwargs is not None: norm_kwargs['use_global_stats'] = True else: norm_kwargs = {} norm_kwargs['use_global_stats'] = True self.features.add(_make_basic_conv(in_channels=64, channels=64, kernel_size=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(_make_basic_conv(in_channels=64, channels=192, kernel_size=3, padding=(1, 1, 1), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1))) self.features.add(_make_Mixed_3a(192, 32, 'Mixed_3a_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_3b(256, 64, 'Mixed_3b_', norm_layer, norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=3, strides=(2, 2, 2), padding=(1, 1, 1))) self.features.add(_make_Mixed_4a(480, 64, 'Mixed_4a_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_4b(512, 64, 'Mixed_4b_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_4c(512, 64, 'Mixed_4c_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_4d(512, 64, 'Mixed_4d_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_4e(528, 128, 'Mixed_4e_', norm_layer, norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=2, strides=(2, 2, 2))) self.features.add(_make_Mixed_5a(832, 128, 'Mixed_5a_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_5b(832, 128, 'Mixed_5b_', norm_layer, norm_kwargs)) self.features.add(nn.GlobalAvgPool3D()) self.head = nn.HybridSequential(prefix='') self.head.add(nn.Dropout(rate=self.dropout_ratio)) self.output = nn.Dense(units=nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std)) self.head.add(self.output) self.features.initialize(ctx=ctx) self.head.initialize(ctx=ctx) if pretrained_base: inceptionv1_2d = googlenet(pretrained=True) weights2d = inceptionv1_2d.collect_params() weights3d = self.collect_params() assert len(weights2d.keys()) == len(weights3d.keys()), 'Number of parameters should be same.' dict2d = {} for key_id, key_name in enumerate(weights2d.keys()): dict2d[key_id] = key_name dict3d = {} for key_id, key_name in enumerate(weights3d.keys()): dict3d[key_id] = key_name dict_transform = {} for key_id, key_name in dict3d.items(): dict_transform[dict2d[key_id]] = key_name cnt = 0 for key2d, key3d in dict_transform.items(): if 'conv' in key3d: temporal_dim = weights3d[key3d].shape[2] temporal_2d = nd.expand_dims(weights2d[key2d].data(), axis=2) inflated_2d = nd.broadcast_to(temporal_2d, shape=[0, 0, temporal_dim, 0, 0]) / temporal_dim assert inflated_2d.shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d) weights3d[key3d].set_data(inflated_2d) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'batchnorm' in key3d: assert weights2d[key2d].shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d) weights3d[key3d].set_data(weights2d[key2d].data()) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'dense' in key3d: cnt += 1 print('%s is skipped with shape: ' % (key3d), weights3d[key3d].shape) assert cnt == len(weights2d.keys()), 'Not all parameters have been ported, check the initialization.'
with net.name_scope(): net.add(nn.Dense(10)) net.add(nn.Dense(100)) return net x = nd.random.normal(shape=(3, 5)) net = get_net() params = net.collect_params() net.params print(params) net.initialize() y = net(x) print(y) w = net[0].weight b = net[0].bias print('weight:', w.data()) print('bias:', b.data()) print('w grad:', w.grad()) print('bias grad:', b.grad()) from mxnet import init params = net.collect_params() print(params) params.initialize(init=init.Normal(sigma=0.2), force_reinit=True) print(net[0].weight.data(), net[0].bias.data())
import mxnet as mx from mxnet import gluon, init, nd, autograd from mxnet.gluon import data as gdata, nn, loss as gloss, utils as gutils import numpy as np import math import matplotlib.pyplot as plt layer_num = 100 net = nn.Sequential() for i in range(layer_num): net.add(nn.Dense(100, activation="tanh", use_bias=False)) net.add(nn.BatchNorm()) net.initialize(force_reinit=True, init=init.Normal()) X = nd.random.uniform(-1, 1, (128, 100)) var = [] for i in range(20): y0 = net[:i](X) y0 = y0.asnumpy() v = math.log(np.var(y0)) var.append(v) print(var) plt.plot(range(20), var, label='Normal') plt.show()
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b labels += nd.random.normal(scale=0.01, shape=labels.shape) # 2.read dataset in a mini-batch way batch_size = 10 # 将训练数据的特征和标签组合 dataset = gdata.ArrayDataset(features, labels) # 随机读取小批量 data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) # define our model net = nn.Sequential() # sequential 实例可以看成一个串联各个层的 container net.add(nn.Dense(1)) # 添加一个全连接层 -- 线性回归输出层 # initialize model parameters net.initialize(init.Normal(sigma=0.01)) # 参数初始化(weights and bias) # define loss function loss = gloss.L2Loss() # 平方损失又称L2范数损失 # define optimization algorithm trainer = mx.gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01}) # train the model with epochs of three num_epochs = 3 for epoch in range(1, num_epochs + 1): for X, y in data_iter: with autograd.record(): l = loss(net(X), y)
from mxnet.gluon import nn # nn neural networks # nn 中定义了大量的神经网络层 , Sequential 串联神经网络层的容器 net = nn.Sequential() # 线性回归中的输出层又叫做全连接层 为一个Dense实例 net.add(nn.Dense(1)) # 初始模型参数 # 在使用net前 需要先初始化模型参数, mxnet中导入 initializer 模块 from mxnet import init net.initialize(init.Normal(sigma=0.01)) # 损失函数 from mxnet.gluon import loss as gloss loss = gloss.L2Loss() # 优化算法 from mxnet import gluon trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03}) from mxnet import autograd # 训练模型 num_epochs = 3
plt.plot(np.arange(len(loss_train)), loss_test, label="loss_test", color='blue') plt.legend(loc='upper right') plt.show() #%% X_train, X_test, y_train, y_test = load_data() #%% net = gluon.nn.Sequential() net.add(gluon.nn.Dense(1)) net.add(gluon.nn.Activation('sigmoid')) net.initialize(init.Normal()) loss_fn = gluon.loss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=True) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.05}) epoches = 15 batch_size = 100 dataset = gluon.data.ArrayDataset(X_train, y_train) data_iterator = gluon.data.DataLoader(dataset, batch_size, shuffle=True) start = time.time() loss_train = [] loss_test = [] #%% for epoch in range(epoches): print("[INFO] epoch %s is running..." % epoch) for batch_x, batch_y in data_iterator: