import sys sys.path.append('..') import gluonbook as gb import mxnet as mx from mxnet import autograd, gluon, init, nd from mxnet.gluon import loss as gloss, nn batch_size = 256 train_iter, test_iter = gb.load_data_fashion_mnist(batch_size) net = nn.Sequential() # net.add(nn.Flatten()) net.add(nn.Dense(256, activation='relu')) net.add(nn.Dense(10)) net.initialize(init.Normal(sigma=0.01)) loss = gloss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5}) num_epochs = 5 gb.train_cpu(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, trainer) # help(nd.Activation)
import sys sys.path.append("..") import gluonbook as gb from mxnet import gluon, nd, autograd, init from mxnet.gluon import loss as gloss, nn net = nn.Sequential() net.add(nn.Dense(128, activation='tanh')) net.add(nn.Dense(10)) net.add(nn.Dense(10)) net.initialize(init.Normal(sigma=0.01)) batch_szie = 256 train_iter, test_iter = gb.load_data_fashion_mnist(batch_szie) loss = gloss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) epochs = 50 gb.train_cpu(net, train_iter, test_iter, loss, epochs, batch_szie, trainer=trainer)
b1 = nd.zeros(num_hiddens1) W2 = nd.random.normal(scale=0.01, shape=(num_hiddens1, num_hiddens2)) b2 = nd.zeros(num_hiddens2) W3 = nd.random.normal(scale=0.01, shape=(num_hiddens2, num_outputs)) b3 = nd.zeros(num_outputs) params = [W1, b1, W2, b2, W3, b3] for param in params: param.attach_grad() drop_prob1 = 0.2 drop_prob2 = 0.5 def net(X): X = X.reshape(-1, num_inputs) H1 = (nd.dot(X, W1) + b1).relu() if autograd.is_training(): H1 = dropout(H1, drop_prob1) H2 = (nd.dot(H1, W2) + b2).relu() if autograd.is_training(): H2 = dropout(H2, drop_prob2) return nd.dot(H2, W3) + b3 num_epochs = 5 lr = 0.5 batch_size = 256 loss = gloss.SoftmaxCrossEntropyLoss() train_iter, test_iter = gb.load_data_fashion_mnist(batch_size) gb.train_cpu(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)