def fit_and_plot(train_features, test_features, train_labels, test_labels): # 写一个只有线性层的网络,打平连接到一个输出 # 这个过程自动初始化了参数所以不用额外处理了 net = torch.nn.Linear(train_features.shape[-1], 1) # 设置批大小,记得限制最小值 batch_size = min(10, train_labels.shape[0]) # 读取和参数设置 dataset = torch.utils.data.TensorDataset(train_features, train_labels) train_iter = torch.utils.data.DataLoader(dataset, batch_size, shuffle=True) optimizer = torch.optim.SGD(net.parameters(), lr=0.01) train_ls, test_ls = [], [] # 用下划线来标明这个循环的循环值是不需要用到的 for _ in range(num_epochs): # 进行正常的训练和误差计算 for X, y in train_iter: l = loss(net(X), y.view(-1, 1)) optimizer.zero_grad() l.backward() optimizer.step() # 把数据的标签打平变为方便处理的标签组 train_labels = train_labels.view(-1, 1) test_labels = test_labels.view(-1, 1) # 计算出预测的误差并和上面的标签组合起来 train_ls.append(loss(net(train_features), train_labels).item()) test_ls.append(loss(net(test_features), test_labels).item()) # 打印结果 print('final epoch: train loss', train_ls[-1], 'test loss', test_ls[-1]) plot.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print('weight:', net.weight.data, '\nbias:', net.bias.data)
def fit_and_plot_pytorch(wd): net = nn.Linear(num_features, 1) nn.init.normal_(net.weight, mean=0, std=1) nn.init.normal_(net.bias, mean=0, std=1) # weight_decay就是附加在损失函数上的惩罚系数,修改这个超参数会改变衰减比率 optim_w = torch.optim.SGD(params=[net.weight], lr=lr, weight_decay=wd) # 不对bias进行惩罚 optim_b = torch.optim.SGD(params=[net.bias], lr=lr) # 训练 train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: # 具体的训练和之前的做法都一样 l = loss(net(X), y).mean() optim_w.zero_grad() optim_b.zero_grad() l.backward() # 反向传播后就让优化器迭代 optim_w.step() optim_b.step() train_ls.append(loss(net(train_features), train_labels).mean().item()) test_ls.append(loss(net(test_features), test_labels).mean().item()) plot.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print('L2 norm of w:', net.weight.data.norm().item())
def train_and_pred(train_features, test_features, train_labels, test_data, num_epochs, lr, weight_decay, batch_size): # 适配网络 net = get_net(train_features.shape[1]) # 进行训练,这里是使用了前面猜测好的超参数进行的 train_ls, _ = train(net, train_features, train_labels, None, None, num_epochs, lr, weight_decay, batch_size) # 绘图和输出 plot.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse') print('train rmse %f' % train_ls[-1]) # detach用于得到当前计算图的变量 preds = net(test_features).detach().numpy() # 保存到提交文档中 test_data['SalePrice'] = pd.Series(preds.reshape(1, -1)[0]) submission = pd.concat([test_data['Id'], test_data['SalePrice']], axis=1) submission.to_csv('./Datasets/KaggleHouse/submission.csv', index=False)
def fit_and_plot(lambd): w, b = init_params() train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: # 这里的损失项要加上前面写好的L2惩罚项,注意乘上lambd好控制衰减比率 l = loss(net(X, w, b), y) + lambd * l2_penalty(w) l = l.sum() if w.grad is not None: w.grad.data.zero_() b.grad.data.zero_() l.backward() # 进行梯度下降 linear_reg.sgd([w, b], lr, batch_size) # 记录每次训练的loss train_ls.append( loss(net(train_features, w, b), train_labels).mean().item()) test_ls.append( loss(net(test_features, w, b), test_labels).mean().item()) # 绘制和输出 print('L2 norm of w:', w.norm().item()) plot.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test'])
def k_fold(k, X_train, y_train, num_epochs, learning_rate, weight_decay, batch_size): train_l_sum, valid_l_sum = 0, 0 # 对于每一种折法 for i in range(k): # 得到k折的验证集和训练集 data = get_k_fold_data(k, i, X_train, y_train) # 用X的特征数来生成一个对应大小的网络 net = get_net(X_train.shape[1]) # 将数据应用到上面的训练函数中,*data自动将四个返回值展开输入到参数中 train_ls, valid_ls = train(net, *data, num_epochs, learning_rate, weight_decay, batch_size) # -1指取出最后一格元素 train_l_sum += train_ls[-1] valid_l_sum += valid_ls[-1] # 绘图第一折的误差曲线,并输出每一折的训练结果 if i == 0: plot.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse', range(1, num_epochs + 1), valid_ls, ['train', 'valid']) print('fold %d, train rmse %f, valid rmse %f' % (i, train_ls[-1], valid_ls[-1])) # 返回误差总和的平均值,valid部分可以较好地衡量泛化误差 return train_l_sum / k, valid_l_sum / k