def fit_n_plot_with_pytorch(lambd, num_inputs, num_outputs, lr, num_epochs, train_iter, train_features, train_labels, test_features, test_labels): net = torch.nn.Linear(num_inputs, num_outputs) loss = torch.nn.MSELoss() for param in net.parameters(): param.data.uniform_() optimizer = torch.optim.SGD(net.parameters(), lr, weight_decay=lambd) train_ls, test_ls = [], [] for epoch in range(num_epochs): for X, y in train_iter: with torch.enable_grad(): optimizer.zero_grad() y_pred = net(X) loss_val = loss(y_pred, y) loss_val.backward() optimizer.step() train_ls.append( torch.mean(loss(net(train_features), train_labels)).item()) test_ls.append( torch.mean(loss(net(test_features), test_labels)).item()) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print('L2 norm of w:', net[0].weight.norm().item())
def fit_and_plot_gluon(wd): net = nn.Sequential() net.add(nn.Dense(1)) net.initialize(init.Normal(sigma=1)) loss = gloss.L2Loss() # The weight parameter has been decayed. Weight names generally end with # "weight". trainer_w = gluon.Trainer(net.collect_params('.*weight'), 'sgd', {'learning_rate': lr, 'wd': wd}) # The bias parameter has not decayed. Bias names generally end with "bias" trainer_b = gluon.Trainer(net.collect_params('.*bias'), 'sgd', {'learning_rate': lr}) train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: with autograd.record(): l = loss(net(X), y) l.backward() # Call the step function on each of the two Trainer instances to # update the weight and bias separately trainer_w.step(batch_size) trainer_b.step(batch_size) train_ls.append(loss(net(train_features), train_labels).mean().asscalar()) test_ls.append(loss(net(test_features), test_labels).mean().asscalar()) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print('L2 norm of w:', net[0].weight.data().norm().asscalar())
def train_and_pred(train_features, test_feature, train_labels, test_data, num_epochs, lr, weight_decay, batch_size): net = get_net() train_ls, _ = train(net, train_features, train_labels, None, None, num_epochs, lr, weight_decay, batch_size) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse') print('train rmse %f' % train_ls[-1]) # Apply the network to the test set preds = net(test_features).asnumpy() # Reformat it for export to Kaggle test_data['SalePrice'] = pd.Series(preds.reshape(1, -1)[0]) submission = pd.concat([test_data['Id'], test_data['SalePrice']], axis=1) submission.to_csv('submission.csv', index=False)
def k_fold(k, X_train, y_train, num_epochs, learning_rate, weight_decay, batch_size): train_l_sum, valid_l_sum = 0, 0 for i in range(k): data = get_k_fold_data(k, i, X_train, y_train) net = get_net() train_ls, valid_ls = train(net, *data, num_epochs, learning_rate, weight_decay, batch_size) train_l_sum += train_ls[-1] valid_l_sum += valid_ls[-1] if i == 0: d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse', range(1, num_epochs + 1), valid_ls, ['train', 'valid']) print('fold %d, train rmse: %f, valid rmse: %f' % (i, train_ls[-1], valid_ls[-1])) return train_l_sum / k, valid_l_sum / k
def fit_and_plot(lambd): w, b = init_params() train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: with autograd.record(): # The L2 norm penalty term has been added l = loss(net(X, w, b), y) + lambd * l2_penalty(w) l.backward() d2l.sgd([w, b], lr, batch_size) train_ls.append(loss(net(train_features, w, b), train_labels).mean().asscalar()) test_ls.append(loss(net(test_features, w, b), test_labels).mean().asscalar()) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print('l2 norm of w:', w.norm().asscalar())
def fit_n_plot(lambd, num_inputs, num_epochs, train_iter, net, loss, grad_descent, lr, batch_size, train_features, train_labels, test_features, test_labels): w, b = init_params(num_inputs) train_ls, test_ls = [], [] for epoch in range(num_epochs): for X, y in train_iter: with torch.enable_grad(): y_pred = net(X, w, b) loss_val = loss(y_pred, y).sum() + lambd * l2_penalty(w) loss_val.backward() grad_descent([w, b], lr, batch_size) with torch.no_grad(): train_ls.append( torch.mean(loss(net(train_features, w, b), train_labels)).item()) test_ls.append( torch.mean(loss(net(test_features, w, b), test_labels)).item()) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print('l2 norm of w:', torch.norm(w).item())