def train(net, lr, num_epochs): ctx = d2l.try_gpu() net.initialize(ctx=ctx, force_reinit=True) trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr}) for epoch in range(num_epochs): start, l_sum, n = time.time(), 0.0, 0 for batch in data_iter: center, context_negative, mask, label = [ data.as_in_context(ctx) for data in batch ] with autograd.record(): pred = skip_gram(center, context_negative, net[0], net[1]) # 使用掩码变量mask来避免填充项对损失函数计算的影响 l = (loss(pred.reshape(label.shape), label, mask) * mask.shape[1] / mask.sum(axis=1)) l.backward() trainer.step(batch_size) l_sum += l.sum().asscalar() n += l.size print('epoch %d, loss %.2f, time %.2fs' % (epoch + 1, l_sum / n, time.time() - start))
blk = nn.Sequential() blk.add(nn.BatchNorm(), nn.Activation('relu'), nn.Conv2D(num_channels, kernel_size=1), nn.AvgPool2D(pool_size=2, strides=2)) return blk net = nn.Sequential() net.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3), nn.BatchNorm(), nn.Activation('relu'), nn.MaxPool2D(pool_size=3, strides=2, padding=1)) num_channels, growth_rate = 64, 32 num_convs_in_dense_blocks = [4, 4, 4, 4] for i, num_convs in enumerate(num_convs_in_dense_blocks): net.add(DenseBlock(num_convs, growth_rate)) num_channels += num_convs * growth_rate if i != len(num_convs_in_dense_blocks) - 1: num_channels //= 2 net.add(transition_block(num_channels)) net.add(nn.BatchNorm(), nn.Activation('relu'), nn.GlobalAvgPool2D(), nn.Dense(10)) lr, num_epochs, batch_size, ctx = 0.1, 5, 256, d2l.try_gpu() net.initialize(ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96) d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
def predict_rnn_gluon(prefix, num_chars, model, vocab_size, ctx, idx_to_char, char_to_idx): # 使用model的成员函数来初始化隐藏状态 state = model.begin_state(batch_size=1, ctx=ctx) output = [char_to_idx[prefix[0]]] for t in range(num_chars + len(prefix) - 1): X = nd.array([output[-1]], ctx=ctx).reshape((1, 1)) (Y, state) = model(X, state) # 前向计算不需要传入模型参数 if t < len(prefix) - 1: output.append(char_to_idx[prefix[t + 1]]) else: output.append(int(Y.argmax(axis=1).asscalar())) return ''.join([idx_to_char[i] for i in output]) ctx = d2l.try_gpu() model = RNNModel(rnn_layer, vocab_size) model.initialize(force_reinit=True, ctx=ctx) predict_rnn_gluon('分开', 10, model, vocab_size, ctx, idx_to_char, char_to_idx) def train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes): loss = gloss.SoftmaxCrossEntropyLoss() model.initialize(ctx=ctx, force_reinit=True, init=init.Normal(0.01)) trainer = gluon.Trainer(model.collect_params(), 'sgd', { 'learning_rate': lr, 'momentum': 0, 'wd': 0
return (nd.concat(*anchors, dim=1), concat_preds(cls_preds).reshape( (0, -1, self.num_classes + 1)), concat_preds(bbox_preds)) net = TinySSD(num_classes=1) net.initialize() X = nd.zeros((32, 3, 256, 256)) anchors, cls_preds, bbox_preds = net(X) print('output anchors:', anchors.shape) print('output class preds:', cls_preds.shape) print('output bbox preds:', bbox_preds.shape) batch_size = 32 train_iter, _ = d2l.load_data_pikachu(batch_size) ctx, net = d2l.try_gpu(), TinySSD(num_classes=1) net.initialize(init=init.Xavier(), ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': 0.2, 'wd': 5e-4 }) cls_loss = gloss.SoftmaxCrossEntropyLoss() bbox_loss = gloss.L1Loss() def calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks): cls = cls_loss(cls_preds, cls_labels) bbox = bbox_loss(bbox_preds * bbox_masks, bbox_labels * bbox_masks) return cls + bbox
contents_Y_hat, styles_Y_hat = extract_features( X, content_layers, style_layers) contents_l, styles_l, tv_l, l = compute_loss( X, contents_Y_hat, styles_Y_hat, contents_Y, styles_Y_gram) l.backward() trainer.step(1) nd.waitall() if i % 50 == 0 and i != 0: print('epoch %3d, content loss %.2f, style loss %.2f, ' 'TV loss %.2f, %.2f sec' % (i, nd.add_n(*contents_l).asscalar(), nd.add_n(*styles_l).asscalar(), tv_l.asscalar(), time.time() - start)) if i % lr_decay_epoch == 0 and i != 0: trainer.set_learning_rate(trainer.learning_rate * 0.1) print('change lr to %.1e' % trainer.learning_rate) return X ctx, image_shape = d2l.try_gpu(), (225, 150) net.collect_params().reset_ctx(ctx) content_X, contents_Y = get_contents(image_shape, ctx) _, styles_Y = get_styles(image_shape, ctx) output = train(content_X, contents_Y, styles_Y, ctx, 0.01, 500, 200) d2l.plt.imsave('./d2l/img/neural-style-1.png', postprocess(output).asnumpy()) image_shape = (450, 300) _, content_Y = get_contents(image_shape, ctx) _, style_Y = get_styles(image_shape, ctx) X = preprocess(postprocess(output) * 255, image_shape) output = train(X, content_Y, style_Y, ctx, 0.01, 300, 100) d2l.plt.imsave('./d2l/img/neural-style-2.png', postprocess(output).asnumpy())
def predict_sentiment(net, vocab, sentence): sentence = nd.array(vocab.to_indices(sentence), ctx=d2l.try_gpu()) label = nd.argmax(net(sentence.reshape((1, -1))), axis=1) return 'positive' if label.asscalar() == 1 else 'negative'