def forward(self, X): # 如果X不在内存上,将moving_mean和moving_var复制到X所在显存上 if self.moving_mean.context != X.context: self.moving_mean = self.moving_mean.copyto(X.context) self.moving_var = self.moving_var.copyto(X.context) # 保存更新过的moving_mean和moving_var Y, self.moving_mean, self.moving_var = batch_norm(X, self.gamma.data(), self.beta.data(), self.moving_mean, self.moving_var, eps=1e-5, momentum=0.9) return Y net = nn.Sequential() net.add(nn.Conv2D(6, kernel_size=5), BatchNorm(6, num_dims=4), nn.Activation('sigmoid'), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(16, kernel_size=5), BatchNorm(16, num_dims=4), nn.Activation('sigmoid'), nn.MaxPool2D(pool_size=2, strides=2), nn.Dense(120), BatchNorm(120, num_dims=2), nn.Activation('sigmoid'), nn.Dense(84), BatchNorm(84, num_dims=2), nn.Activation('sigmoid'), nn.Dense(10)) lr, num_epochs, batch_size, ctx = 1.0, 5, 256, d2l.try_gpu() net.initialize(ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
I = nd.sigmoid(nd.dot(X, W_xi)+nd.dot(H, W_hi)+b_i) F = nd.sigmoid(nd.dot(X, W_xf)+nd.dot(H, W_hf)+b_f) O = nd.sigmoid(nd.dot(X, W_xo)+nd.dot(H, W_ho)+b_o) C_tilda = nd.tanh(nd.dot(X, W_xc)+nd.dot(H, W_hc)+b_c) C = F*C+I*C_tilda H = O*C.tanh() Y = nd.dot(H, W_hq)+b_q outputs.append(Y) return outputs, (H, C) if __name__ == "__main__": (corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics() num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size ctx = d2l.try_gpu() num_epochs, num_steps, batch_size, lr, clipping_theta = 160, 35, 32, 1e2, 1e-2 pred_period, pred_len, prefixes = 40, 50, ['分开', '不分开'] # d2l.train_and_predict_rnn(lstm, get_params, init_lstm_state, num_hiddens, # vocab_size, ctx, corpus_indices, idx_to_char, # char_to_idx, False, num_epochs, num_steps, lr, # clipping_theta, batch_size, pred_period, pred_len, # prefixes) lstm_layer = rnn.LSTM(num_hiddens) model = d2l.RNNModel(lstm_layer, vocab_size) d2l.train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes)
l = loss(outputs, y).sum() l.backward() trainer.step(batch_size) train_l_sum += l.asscalar() n += y.size time_s = "time %.2f sec" % (time.time() - start) if valid_iter is not None: valid_loss = evaluate_loss(valid_iter, net, ctx) epoch_s = ("epoch %d, train loss %f, valid loss %f, " % (epoch + 1, train_l_sum / n, valid_loss)) else: epoch_s = ("epoch %d, train loss %f, " % (epoch + 1, train_l_sum / n)) print(epoch_s + time_s + ', lr ' + str(trainer.learning_rate)) ctx, num_epochs, lr, wd = d2l.try_gpu(), 1, 0.01, 1e-4 lr_period, lr_decay, net = 10, 0.1, get_net(ctx) net.hybridize() train(net, train_iter, valid_iter, num_epochs, lr, wd, ctx, lr_period, lr_decay) net = get_net(ctx) net.hybridize() train(net, train_valid_iter, None, num_epochs, lr, wd, ctx, lr_period, lr_decay) preds = [] for data, label in test_iter: output_features = net.features(data.as_in_context(ctx)) output = nd.softmax(net.output_new(output_features)) preds.extend(output.asnumpy())
def forward(self, X): anchors, cls_preds, bbox_preds = [None] * 5, [None] * 5, [None] * 5 for i in range(5): # getattr(self, 'blk_%d' % i)即访问self.blk_i X, anchors[i], cls_preds[i], bbox_preds[i] = blk_forward( X, getattr(self, 'blk_%d' % i), sizes[i], ratios[i], getattr(self, 'cls_%d' % i), getattr(self, 'bbox_%d' % i)) # reshape函数中的0表示保持批量大小不变 return (nd.concat(*anchors, dim=1), concat_preds(cls_preds).reshape( (0, -1, self.num_classes + 1)), concat_preds(bbox_preds)) batch_size = 32 train_iter, _ = d2l.load_data_pikachu(batch_size) ctx, net = d2l.try_gpu(), TinySSD(num_classes=1) net.initialize(init=init.Xavier(), ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': 0.2, 'wd': 5e-4 }) cls_loss = gloss.SoftmaxCrossEntropyLoss() bbox_loss = gloss.L1Loss() def calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks): cls = cls_loss(cls_preds, cls_labels) bbox = bbox_loss(bbox_preds * bbox_masks, bbox_labels * bbox_masks) return cls + bbox
nd.waitall() if i % 50 == 0 and i != 0: print('epoch %3d, content loss %.2f, style loss %.2f, ' 'TV loss %.2f, %.2f sec' % (i, nd.add_n(*contents_l).asscalar(), nd.add_n(*styles_l).asscalar(), tv_l.asscalar(), time.time() - start)) if i % lr_decay_epoch == 0 and i != 0: trainer.set_learning_rate(trainer.learning_rate * 0.1) print('change lr to %.1e' % trainer.learning_rate) return X # In[ ]: ctx, image_shape = d2l.try_gpu(), (225, 150) net.collect_params().reset_ctx(ctx) content_X, contents_Y = get_contents(image_shape, ctx) _, styles_Y = get_styles(image_shape, ctx) output = train(content_X, contents_Y, styles_Y, ctx, 0.01, 500, 200) # In[ ]: d2l.plt.imsave('../img/neural-style-1.png', postprocess(output).asnumpy()) # In[ ]: image_shape = (450, 300) _, content_Y = get_contents(image_shape, ctx) _, style_Y = get_styles(image_shape, ctx) X = preprocess(postprocess(output) * 255, image_shape)
train_l_sum += l.asscalar() train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar() n += y.size time_s = "time %.2f sec" % (time.time() - start) if valid_iter is not None: valid_acc = d2l.evaluate_accuracy(valid_iter, net, ctx) epoch_s = ( "epoch %d, loss %f, train acc %f, valid acc %f, " % (epoch + 1, train_l_sum / n, train_acc_sum / n, valid_acc)) else: epoch_s = ("epoch %d, loss %f, train acc %f, " % (epoch + 1, train_l_sum / n, train_acc_sum / n)) print(epoch_s + time_s + ', lr ' + str(trainer.learning_rate)) ctx, num_epochs, lr, wd = d2l.try_gpu(), 1, 0.1, 5e-4 lr_period, lr_decay, net = 80, 0.1, get_net(ctx) net.hybridize() train(net, train_iter, valid_iter, num_epochs, lr, wd, ctx, lr_period, lr_decay) net, preds = get_net(ctx), [] net.hybridize() train(net, train_valid_iter, None, num_epochs, lr, wd, ctx, lr_period, lr_decay) for X, _ in test_iter: y_hat = net(X.as_in_context(ctx)) preds.extend(y_hat.argmax(axis=1).astype(int).asnumpy()) sorted_ids = list(range(1, len(test_ds) + 1)) sorted_ids.sort(key=lambda x: str(x))
'fashion-mnist')): root = os.path.expanduser(root) # 展开用户路径'~' transformer = [] if resize: transformer += [gdata.vision.transforms.Resize(resize)] transformer += [gdata.vision.transforms.ToTensor()] transformer = gdata.vision.transforms.Compose(transformer) mnist_train = gdata.vision.FashionMNIST(root=root, train=True) mnist_test = gdata.vision.FashionMNIST(root=root, train=False) num_workers = 0 if sys.platform.startswith('win32') else 4 train_iter = gdata.DataLoader(mnist_train.transform_first(transformer), batch_size, shuffle=True, num_workers=num_workers) test_iter = gdata.DataLoader(mnist_test.transform_first(transformer), batch_size, shuffle=False, num_workers=num_workers) return train_iter, test_iter batch_size = 128 # 如出现“out of memory”的报错信息,可减小batch_size或resize # 将batch_size减小到64后,一个epoch要运行一刻钟 # resize不能随便减小 train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224) lr, num_epochs, ctx = 0.01, 5, d2l.try_gpu() net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)