def _get_decoder(self): output = nn.Dense(self._vocab_size, prefix='decoder0_') return output
from mxnet import nd, init from mxnet.gluon import nn, loss as gloss from mxnet import autograd net = nn.Sequential() net.add(nn.Dense(256, activation='relu')) net.add(nn.Dense(10)) net.initialize() X = nd.random.uniform(shape=(2, 20)) Y = net(X) # print(X) print(net[0].params) print(type(net[0].params)) print(net[0].weight) print(type(net[0].weight)) # print(net[1].params, type(net[1].params)) # print(net[0].params['dense0_weight'] == net[0].weight) # print(net[0].weight.data()) # print(net[0].weight.grad()) # print(net[1].bias.data()) # print(net.collect_params()) # print(net.collect_params('.*weight')) # X.attach_grad() # with autograd.record(): # y_hat = net(X) # y_hat.backward() # print(net[0].weight.grad())
def __init__(self, rnn_layer, vocab_size, **kwargs): super(RNNModel, self).__init__(**kwargs) self.rnn = rnn_layer self.vocab_size = vocab_size self.dense = nn.Dense(vocab_size)
def __init__(self, query_units, num_heads, pos_embed_units: Optional[int] = None, max_distance=None, bidirectional=False, num_buckets=None, method='transformer_xl', dropout: float = 0.0, dtype='float32', layout='NTK', use_einsum=False): """ Parameters ---------- query_units num_heads pos_embed_units max_distance bidirectional num_buckets method dropout attention_dropout query_add_bias Add additional bias term to the query scaled dtype layout """ super().__init__() self._dropout = dropout self._method = method self._query_units = query_units self._num_heads = num_heads self._bidirectional = bidirectional self._num_buckets = num_buckets assert query_units % num_heads == 0, 'The units must be divisible by the number of heads.' self._head_query_units = query_units // num_heads self._max_distance = max_distance self._pos_embed_units = pos_embed_units self._dtype = dtype self._use_einsum = use_einsum self._layout = layout if self._layout not in ['NKT', 'NTK', 'TNK']: raise ValueError('layout="{}" is not supported'.format( self._layout)) if method == 'transformer_xl': if pos_embed_units is None: pos_embed_units = self._num_heads * self._head_query_units self._rel_pos_embed = SinusoidalPositionalEmbedding( units=pos_embed_units, dtype=self._dtype) self._rel_proj = nn.Dense(units=query_units, in_units=pos_embed_units, flatten=False, use_bias=False, dtype=self._dtype) self._dropout_layer = nn.Dropout(dropout) elif method == 'shaw': assert self._max_distance is not None, 'Must set max_distance when method="shaw".' if self._bidirectional: vocab_size = self._max_distance * 2 + 1 else: vocab_size = self._max_distance + 1 self._rel_pos_embed = LearnedPositionalEmbedding( units=self._num_heads * self._head_query_units, max_length=vocab_size, weight_initializer=mx.init.Xavier(rnd_type="gaussian", factor_type="in", magnitude=1), mode='wrap' if self._bidirectional else 'raise', dtype=self._dtype) elif method == 't5': if self._num_buckets is None: self._num_buckets = 32 if self._max_distance is None: self._max_distance = 128 self._rel_pos_embed = BucketPositionalEmbedding( units=num_heads, num_buckets=self._num_buckets, max_distance=self._max_distance, bidirectional=self._bidirectional, dtype=self._dtype) else: raise NotImplementedError( 'method="{}" is currently not supported!'.format(method))
N=X_tr.shape[0] p=X_tr.shape[1] X_train=nd.array(X_tr) y_train=nd.array(y_tr) batch_size=200 train_dataset = ArrayDataset(X_train, y_train) train_data = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) X_test=nd.array(X_te).as_in_context(ctx) y_test=nd.array(y_te).as_in_context(ctx) net=nn.Sequential() net.add(nn.Dense(1000,activation='relu'),nn.Dense(1, activation=None)) net.initialize(mx.init.Xavier(), ctx=ctx) for i, (data, label) in enumerate(train_data): aa=net(data.as_in_context(ctx)) break net_initial=net def init_masks_percents(net,p): masks={} percents={} for i in enumerate(net): masks[i[0]]=nd.ones(net[i[0]].weight.data().shape).as_in_context(ctx)
# -*- coding: utf-8 -*- """ Created on Tue Oct 20 09:46:48 2020 @author: DER """ import d2lzh as d2l from mxnet import gluon, init from mxnet.gluon import loss as gloss, nn """ 3.10.1定义模型 """ net = nn.Sequential() net.add(nn.Dense(256, activation="relu"), nn.Dense(10)) print(net) net.initialize(init.Normal(sigma=0.01)) """ 3.10.2训练模型 """ batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) loss = gloss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), "sgd", {"learning_rate": 0.5}) num_epochs = 5 d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, trainer)
def __init__(self, action_dim): super(Actor, self).__init__() self.action_dim = action_dim self.dense0 = nn.Dense(400, activation='relu') self.dense1 = nn.Dense(300, activation='relu') self.dense2 = nn.Dense(self.action_dim)
def __init__(self, **kwargs): super(TailNegBlock, self).__init__(**kwargs) self.fc1 = nn.Dense(10, flatten=True) self.fc2 = nn.Dense(10, flatten=True)
def __init__(self, bert, prefix=None, params=None): super(BertForQA, self).__init__(prefix=prefix, params=params) self.bert = bert with self.name_scope(): self.span_classifier = nn.Dense(units=2, flatten=False)
def __init__(self, **kwargs): super(MLP, self).__init__(**kwargs) self.hidden = nn.Dense(256, activation='relu') self.output = nn.Dense(10)
def __init__(self, vocab_size=5, hidden_units=4): super().__init__() self.x2h_map = nn.Embedding(input_dim=vocab_size, output_dim=hidden_units) self.h2h_map = nn.Dense(units=hidden_units, flatten=False) self.vocab_map = nn.Dense(units=vocab_size, flatten=False)
loss.backward() trainer.step(data.shape[0]) # Your first model should be a sequential network, with 3 layers. You first layer should have 16 hidden units, the second should have 8 hidden units and the last layer should the correct number of output units for the classification task at hand. You should add ReLU activations on all hidden layers, but not the output layer. You should define `network` in the cell below. # # **Hint**: You'll find classes in the `mxnet.gluon.nn` subpackage useful for this task. # In[47]: # YOUR CODE HERE from mxnet.gluon import nn network = nn.Sequential() network.add( nn.Dense(16, activation='relu'), nn.Dense(8, activation='relu'), nn.Dense(10) ) #raise NotImplementedError() # In[48]: assert isinstance(network, mx.gluon.nn.Sequential) assert len(network) == 3 assert isinstance(network[0], mx.gluon.nn.Dense) assert network[0].act.name.endswith('relu') assert network[0].weight.shape[0] == 16 assert isinstance(network[1], mx.gluon.nn.Dense)
def __init__(self, channels, init_block_channels, final_block_channels, residuals, shortcuts, kernel_sizes, expansions, bn_epsilon=1e-3, bn_use_global_stats=False, in_channels=3, in_size=(224, 224), classes=1000): super(ProxylessNAS, self).__init__() self.in_size = in_size self.classes = classes with self.name_scope(): self.features = nn.HybridSequential(prefix="") self.features.add( conv3x3_block(in_channels=in_channels, out_channels=init_block_channels, strides=2, bn_epsilon=bn_epsilon, bn_use_global_stats=bn_use_global_stats, activation="relu6")) in_channels = init_block_channels for i, channels_per_stage in enumerate(channels): stage = nn.HybridSequential(prefix="stage{}_".format(i + 1)) residuals_per_stage = residuals[i] shortcuts_per_stage = shortcuts[i] kernel_sizes_per_stage = kernel_sizes[i] expansions_per_stage = expansions[i] with stage.name_scope(): for j, out_channels in enumerate(channels_per_stage): residual = (residuals_per_stage[j] == 1) shortcut = (shortcuts_per_stage[j] == 1) kernel_size = kernel_sizes_per_stage[j] expansion = expansions_per_stage[j] strides = 2 if (j == 0) and (i != 0) else 1 stage.add( ProxylessUnit( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, strides=strides, bn_epsilon=bn_epsilon, bn_use_global_stats=bn_use_global_stats, expansion=expansion, residual=residual, shortcut=shortcut)) in_channels = out_channels self.features.add(stage) self.features.add( conv1x1_block(in_channels=in_channels, out_channels=final_block_channels, bn_epsilon=bn_epsilon, bn_use_global_stats=bn_use_global_stats, activation="relu6")) in_channels = final_block_channels self.features.add(nn.AvgPool2D(pool_size=7, strides=1)) self.output = nn.HybridSequential(prefix="") self.output.add(nn.Flatten()) self.output.add(nn.Dense(units=classes, in_units=in_channels))
import matplotlib.pyplot as plt import numpy as np from mxnet import nd, gluon, init, autograd import mxnet as mx from mxnet import gluon, init, nd, autograd from mxnet.gluon import data as gdata, nn, loss as gloss, utils as gutils net = nn.Sequential() net.add(nn.Dense(200, activation='tanh', use_bias=False), nn.Dense(200, activation='tanh', use_bias=False), nn.Dense(200, activation='tanh', use_bias=False), nn.Dense(200, activation='tanh', use_bias=False)) X = nd.random.uniform(-1, 1, (1, 100)) net.initialize(force_reinit=True, init=init.Xavier()) X.attach_grad() with autograd.record(): Y = net[:1](X) Y.backward() y0 = X.grad y0 = y0.asnumpy() y0 = np.round(y0, 1) y0 = list(y0[0]) #print(y0) y0 = sorted(y0) print(y0) y0set = sorted(set(y0)) print(y0set) n = 0 #plt.figure() x = []
# should be dot(X_, W) E = self.attn(X_) # (n, hidden) -> (n, hidden) attn_weights = F.softmax(E, axis=1) # (n, w) attn_applied = F.elemwise_mul(attn_weights, X_) #(n,w) output = self.c * (F.elemwise_mul(X_, attn_weights)) + (1 - self.c) * X_ output = self.out(output) #(n,output_size) return output net = nn.Sequential() with net.name_scope(): net.add(rnn.GRU(num_hidden, num_layers, layout='NTC') ) # T: sequence_length, N: batch_size, C: feature_dimension net.add(nn.BatchNorm()) net.add(nn.Dense(sequence_length)) # this is to conver (nwc) to (nw) net.add(Attn(sequence_length, num_hidden)) # last layer attn, in (nw) o (nw) net.collect_params().initialize(mx.init.Normal(sigma=0.02), ctx=ctx) print(net.collect_params) #params = net.collect_params() #params.load('try3.params', ctx=ctx) square_loss = gluon.loss.L2Loss() learning_settings = {'learning_rate': 0.005, 'momentum': 0.9} trainer = gluon.Trainer(net.collect_params(), 'sgd', learning_settings) #metric = mx.metric.MSE() epochs = 20 loss_sequence = [] for e in range(epochs):
def __init__(self, levels, channels, classes=1000, block=BasicBlock, momentum=0.9, norm_layer=BatchNorm, norm_kwargs=None, residual_root=False, linear_root=False, use_feature=False, **kwargs): super(DLA, self).__init__(**kwargs) if norm_kwargs is None: norm_kwargs = {} norm_kwargs['momentum'] = momentum self._use_feature = use_feature self.channels = channels self.base_layer = nn.HybridSequential('base') self.base_layer.add( nn.Conv2D(in_channels=3, channels=channels[0], kernel_size=7, strides=1, padding=3, use_bias=False)) self.base_layer.add(norm_layer(in_channels=channels[0], **norm_kwargs)) self.base_layer.add(nn.Activation('relu')) self.level0 = self._make_conv_level(channels[0], channels[0], levels[0], norm_layer, norm_kwargs) self.level1 = self._make_conv_level(channels[0], channels[1], levels[1], norm_layer, norm_kwargs, stride=2) self.level2 = Tree(levels[2], block, channels[1], channels[2], 2, level_root=False, root_residual=residual_root, norm_layer=norm_layer, norm_kwargs=norm_kwargs, prefix='level2_') self.level3 = Tree(levels[3], block, channels[2], channels[3], 2, level_root=True, root_residual=residual_root, norm_layer=norm_layer, norm_kwargs=norm_kwargs, prefix='level3_') self.level4 = Tree(levels[4], block, channels[3], channels[4], 2, level_root=True, root_residual=residual_root, norm_layer=norm_layer, norm_kwargs=norm_kwargs, prefix='level4_') self.level5 = Tree(levels[5], block, channels[4], channels[5], 2, level_root=True, root_residual=residual_root, norm_layer=norm_layer, norm_kwargs=norm_kwargs, prefix='level5_') if not self._use_feature: self.global_avg_pool = nn.GlobalAvgPool2D() self.fc = nn.Dense(units=classes)
def __init__(self, **kwargs): super(SimpleModel, self).__init__(**kwargs) self.fc1 = nn.Dense(20) self.fc2 = nn.Dense(10)
break mnist_valid = gluon.data.vision.FashionMNIST(train=False) valid_data = gluon.data.DataLoader( mnist_valid.transform_first(transformer), batch_size=batch_size, num_workers=4) # Define model net = nn.Sequential() net.add( nn.Conv2D(channels=6, kernel_size=5, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(channels=16, kernel_size=3, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Flatten(), nn.Dense(120, activation='relu'), nn.Dense(84, activation='relu'), nn.Dense(10)) net.initialize(init=init.Xavier()) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) def acc(output, label): return (output.argmax(axis=1) == label.astype('float32')).mean().asscalar() for epoch in range(10): train_loss, train_acc, valid_acc = 0., 0., 0. tic = time.time()
def __init__(self): super(Critic, self).__init__() self.dense0 = nn.Dense(400, activation='relu') self.dense1 = nn.Dense(300, activation='relu') self.dense2 = nn.Dense(1)
def __init__(self, in_units=0, **kwargs): super(Net, self).__init__(**kwargs) with self.name_scope(): self.dense0 = nn.Dense(5, in_units=in_units) self.dense1 = nn.Dense(5, in_units=in_units)
def get_net(): net = nn.HybridSequential() with net.name_scope(): net.add(net_resnet50.features) net.add(nn.Dense(1062)) return net
def __init__(self, **kwargs): super(Net, self).__init__(**kwargs) with self.name_scope(): self.dense0 = nn.Dense(10, in_units=5, use_bias=False)
def __init__(self, channels, init_block_channels, cardinality, bottleneck_width, group_widths, refresh_steps, bn_use_global_stats=False, in_channels=3, in_size=(224, 224), classes=1000, **kwargs): super(CRUNet, self).__init__(**kwargs) self.in_size = in_size self.classes = classes with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add( ResInitBlock(in_channels=in_channels, out_channels=init_block_channels, bn_use_global_stats=bn_use_global_stats)) in_channels = init_block_channels for i, channels_per_stage in enumerate(channels): stage = nn.HybridSequential(prefix="stage{}_".format(i + 1)) group_width = group_widths[i] refresh_step = refresh_steps[i] with stage.name_scope(): for j, out_channels in enumerate(channels_per_stage): strides = 2 if (j == 0) and (i != 0) else 1 if group_width != 0: if ((refresh_step == 0) and (j == 0)) or ((refresh_step != 0) and (j % refresh_step == 0)): conv1_params = None conv2_params = None unit = CRUUnit( in_channels=in_channels, out_channels=out_channels, strides=strides, group_width=group_width, bn_use_global_stats=bn_use_global_stats, conv1_params=conv1_params, conv2_params=conv2_params) if conv1_params is None: conv1_params = unit.body.conv1.conv.params conv2_params = unit.body.conv2.params stage.add(unit) else: stage.add( ResUnit( in_channels=in_channels, out_channels=out_channels, strides=strides, cardinality=cardinality, bottleneck_width=bottleneck_width, bn_use_global_stats=bn_use_global_stats)) in_channels = out_channels self.features.add(stage) self.features.add( PreResActivation(in_channels=in_channels, bn_use_global_stats=bn_use_global_stats)) self.features.add(nn.AvgPool2D(pool_size=7, strides=1)) self.output = nn.HybridSequential(prefix='') self.output.add(nn.Flatten()) self.output.add(nn.Dense(units=classes, in_units=in_channels))
def __init__(self, **kwargs): super(Model1, self).__init__(**kwargs) with self.name_scope(): self.layers = [nn.Dense(i * 10) for i in range(6)]
def __init__(self, repeat=6, penultimate_filters=4032, stem_filters=96, filters_multiplier=2, classes=1000, use_aux=True): super(NASNetALarge, self).__init__() filters = penultimate_filters // 24 self.conv0 = nn.HybridSequential(prefix='') self.conv0.add( nn.Conv2D(stem_filters, 3, padding=0, strides=2, use_bias=False)) self.conv0.add(nn.BatchNorm(momentum=0.1, epsilon=0.001)) self.cell_stem_0 = CellStem0(stem_filters, num_filters=filters // (filters_multiplier**2)) self.cell_stem_1 = CellStem1(num_filters=filters // filters_multiplier) self.norm_1 = nn.HybridSequential(prefix='') self.norm_1.add( FirstCell(out_channels_left=filters // 2, out_channels_right=filters)) for _ in range(repeat - 1): self.norm_1.add( NormalCell(out_channels_left=filters, out_channels_right=filters)) self.reduction_cell_0 = ReductionCell0(out_channels_left=2 * filters, out_channels_right=2 * filters) self.norm_2 = nn.HybridSequential(prefix='') self.norm_2.add( FirstCell(out_channels_left=filters, out_channels_right=2 * filters)) for _ in range(repeat - 1): self.norm_2.add( NormalCell(out_channels_left=2 * filters, out_channels_right=2 * filters)) if use_aux: self.out_aux = nn.HybridSequential(prefix='') self.out_aux.add( nn.Conv2D(filters // 3, kernel_size=1, use_bias=False)) self.out_aux.add(nn.BatchNorm(epsilon=0.001)) self.out_aux.add(nn.Activation('relu')) self.out_aux.add( nn.Conv2D(2 * filters, kernel_size=5, use_bias=False)) self.out_aux.add(nn.BatchNorm(epsilon=0.001)) self.out_aux.add(nn.Activation('relu')) self.out_aux.add(nn.Dense(classes)) else: self.out_aux = None self.reduction_cell_1 = ReductionCell1(out_channels_left=4 * filters, out_channels_right=4 * filters) self.norm_3 = nn.HybridSequential(prefix='') self.norm_3.add( FirstCell(out_channels_left=2 * filters, out_channels_right=4 * filters)) for _ in range(repeat - 1): self.norm_3.add( NormalCell(out_channels_left=4 * filters, out_channels_right=4 * filters)) self.out = nn.HybridSequential(prefix='') self.out.add(nn.Activation('relu')) self.out.add(nn.GlobalAvgPool2D()) self.out.add(nn.Dropout(0.5)) self.out.add(nn.Dense(classes))
def __init__(self, **kwargs): super(Model2, self).__init__(**kwargs) with self.name_scope(): self.layers = dict() self.layers['a'] = [nn.Dense(10), nn.Dense(10)]
def get_net(): net = nn.Sequential() net.add(nn.Dense(64, activation="relu"), nn.Dense(32, activation="relu"), nn.Dense(1)) return net
def __init__(self, **kwargs): super(Model3, self).__init__(**kwargs) with self.name_scope(): self.layers = nn.Sequential() self.layers.add(*[nn.Dense(i * 10) for i in range(6)])
class MySequential(nn.Block): def __init__(self, **kwargs): super(MySequential, self).__init__(**kwargs) def add(self, block): self._children[block.name] = block def forward(self, x): for block in self._children.values(): x = block return x net = MySequential() net.add(nn.Dense(256, activation='relu')) net.initialize() net(x) class FancyMLP(nn.Block): def __init__(self, **kwargs): super(FancyMLP, self).__init__(**kwargs) self.rand_weight = self.params.get_constant( 'rand_weight', nd.random.uniform(shape=(20, 20))) self.dense = nn.Dense(20, activation='relu') def forward(self, x): x = self.dense(x)
print("Y_train: " + str(Y_train)) print("Y_test: " + str(Y_test)) ## define network num_classes = 2 num_hidden = 200 learning_rate = .01 epochs = 200 batch_size = 20 model = nn.Sequential() with model.name_scope(): model.embed = nn.Embedding(voca_size, num_embed) model.add( rnn.LSTM(num_hidden, layout='NTC', dropout=0.7, bidirectional=False)) model.add(nn.Dense(num_classes)) def eval_accuracy(x, y, batch_size): accuracy = mx.metric.Accuracy() for i in range(x.shape[0] // batch_size): data = x[i * batch_size:(i * batch_size + batch_size), ] target = y[i * batch_size:(i * batch_size + batch_size), ] output = model(data) predictions = nd.argmax(output, axis=1) accuracy.update(preds=predictions, labels=target) return accuracy.get()[1]