def fit_and_plot(weight_decay): net = nn.Sequential() net.add(nn.Dense(1)) net.initialize(init.Normal(sigma=1)) # 对权重参数做 L2 范数正则化,即权重衰减。 trainer_w = gluon.Trainer(net.collect_params('.*weight'), 'sgd', { 'learning_rate': learning_rate, 'wd': weight_decay }) # 不对偏差参数做 L2 范数正则化。 trainer_b = gluon.Trainer(net.collect_params('.*bias'), 'sgd', {'learning_rate': learning_rate}) train_ls = [] test_ls = [] for _ in range(num_epochs): for X, y in train_iter: with autograd.record(): l = loss(net(X), y) l.backward() # 对两个 Trainer 实例分别调用 step 函数。 trainer_w.step(batch_size) trainer_b.step(batch_size) train_ls.append( loss(net(train_features), train_labels).mean().asscalar()) test_ls.append(loss(net(test_features), test_labels).mean().asscalar()) gb.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) return 'w[:10]:', net[0].weight.data()[:, :10], 'b:', net[0].bias.data()
def optimize(batch_size, lr, num_epochs, log_interval): [w, b], sqrs = init_params() ls = [loss(net(features, w, b), labels).mean().asnumpy()] for epoch in range(1, num_epochs + 1): for batch_i, (X, y) in enumerate( gb.data_iter(batch_size, len(features), features, labels)): with autograd.record(): l = loss(net(X, w, b), y) l.backward() adagrad([w, b], sqrs, lr, batch_size) if batch_i * batch_size % log_interval == 0: ls.append(loss(net(features, w, b), labels).mean().asnumpy()) print('w:', w, '\nb:', b, '\n') es = np.linspace(0, num_epochs, len(ls), endpoint=True) gb.semilogy(es, ls, 'epoch', 'loss')
def fit_and_plot(lambd): w, b = params = init_params() train_ls = [] test_ls = [] for _ in range(num_epoch): for X, y in train_iter: with autograd.record(): l = loss(net(X, w, b), y) + lambd * l2_penalty(w) l.backward() gb.sgd(params, lr, batch_size) train_ls.append( loss(net(train_feature, w, b), train_labels).mean().asscalar()) test_ls.append( loss(net(test_feature, w, b), test_labels).mean().asscalar()) gb.semilogy(range(1, num_epoch + 1), train_ls, 'epoch', 'loss', range(1, num_epoch + 1), test_ls, ['train', 'test']) return w[:10].T, b
def train(net, train_features, train_labels, test_features, test_labels, num_epochs, verbose_epoch, learning_rate, weight_decay, batch_size): train_ls = [] if test_features is not None: test_ls = [] train_iter = gdata.DataLoader(gdata.ArrayDataset(train_features, train_labels), batch_size, shuffle=True) # 这里使用了 Adam 优化算法。 trainer = gluon.Trainer(net.collect_params(), 'adam', { 'learning_rate': learning_rate, 'wd': weight_decay }) net.initialize(init=init.Xavier(), force_reinit=True) for epoch in range(1, num_epochs + 1): for X, y in train_iter: with autograd.record(): l = loss(net(X), y) l.backward() trainer.step(batch_size) cur_train_l = get_rmse_log(net, train_features, train_labels) if epoch >= verbose_epoch: print("epoch %d, train loss: %f" % (epoch, cur_train_l)) train_ls.append(cur_train_l) if test_features is not None: cur_test_l = get_rmse_log(net, test_features, test_labels) test_ls.append(cur_test_l) if test_features is not None: gb.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) else: gb.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss') if test_features is not None: return cur_train_l, cur_test_l else: return cur_train_l
def optimize(batch_size, trainer, num_epochs, decay_epoch, log_interval, features, labels, net): i = 0 dataset = gdata.ArrayDataset(features, labels) data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) loss = gloss.L2Loss() ls = [loss(net(features), labels).mean().asnumpy()] for epoch in range(1, num_epochs + 1): # 学习率自我衰减。 if decay_epoch and epoch > decay_epoch: trainer.set_learning_rate(trainer.learning_rate * 0.1) for batch_i, (X, y) in enumerate(data_iter): with autograd.record(): l = loss(net(X), y) l.backward() i += 1 trainer.step(batch_size) if batch_i * batch_size % log_interval == 0: ls.append(loss(net(features), labels).mean().asnumpy()) # 为了便于打印,改变输出形状并转化成 numpy 数组。 print('i:', i, 'w:', net[0].weight.data(), '\nb:', net[0].bias.data(), '\n') es = np.linspace(0, num_epochs, len(ls), endpoint=True) gb.semilogy(es, ls, 'epoch', 'loss')
return net def train(net, train_features, train_labels, test_features, test_labels, num_epochs, learning_rate, weight_decay, batch_size): # num_epochs, learning_rate, weight_decay, batch_size train_ls, valid_ls = [], [] train_iter = gdata.DataLoader(gdata.ArrayDataset( train_features, train_labels), batch_size, shuffle=True) trainer = gluon.Trainer(net.collect_params(), 'adam', { 'learning_rate': learning_rate, 'wd': weight_decay}) for epoch in range(num_epochs): for X, y in train_iter: with autograd.record(): l = loss(net(X), y) l.backward() trainer.step(batch_size) train_ls.append(rmse(net, train_features, train_labels)) valid_ls.append(rmse(net, valid_features, valid_labels)) print('epoch '+str(epoch)+' done. rmse='+str(rmse(net, train_features, train_labels))+'/'+str(rmse(net, valid_features, valid_labels)),end='\r') return train_ls, valid_ls # --- net = get_net() train_ls, valid_ls = train(net, train_features, train_labels, None, None, num_epochs, lr, wd, batch_size) gb.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse') print('train rmse %f' % train_ls[-1]) print('valid rmse %f' % valid_ls[-1]) net.save_parameters('./xyz_v2.2.param')