def fit_and_plot_gluon(wd): net = nn.Sequential() net.add(nn.Dense(1)) net.initialize(init.Normal(sigma=1)) # 对权重参数衰减。权重名称一般是以weight结尾 trainer_w = gluon.Trainer(net.collect_params(".*weight"), "sgd", { "learning_rate": lr, "wd": wd }) # 不对偏差参数衰减。偏差名称一般是以bias结尾 trainer_b = gluon.Trainer(net.collect_params(".*bias"), "sgd", {"learning_rate": lr}) train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: with autograd.record(): l = loss(net(X), y) l.backward() trainer_w.step(batch_size) trainer_b.step(batch_size) train_ls.append( loss(net(train_features), train_labels).mean().asscalar()) test_ls.append(loss(net(test_features), test_labels).mean().asscalar()) d2l.semilogy(range(1, num_epochs + 1), train_ls, "epochs", "loss", range(1, num_epochs + 1), test_ls, ["train", "test"]) print("L2 norm of w", net[0].weight.data().norm().asscalar())
def k_fold(k, X_train, y_train, num_epochs, learning_rate, weight_decay, batch_size): train_l_sum, valid_l_sum = 0, 0 for i in range(k): data = get_k_fold_data(k, i, X_train, y_train) net = get_net() train_ls, valid_ls = train( net, data[0], data[1], data[2], data[3], num_epochs, learning_rate, # train_ls, valid_ls = train(net, train_features, train_labels, test_features, test_labels, num_epochs, learning_rate, weight_decay, batch_size) train_l_sum += train_ls[-1] valid_l_sum += valid_ls[-1] if i == 0: d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse', range(1, num_epochs + 1), valid_ls, ['train', 'valid']) print('fold %d, train rmse %f, valid rmse %f' % (i, train_ls[-1], valid_ls[-1])) return train_l_sum / k, valid_l_sum / k
def fit_and_plot(train_features, test_features, train_labels, test_labels): net = nn.Sequential() net.add(nn.Dense(1)) net.initialize() batch_size = min(10, train_labels.shape[0]) train_iter = gdata.DataLoader(gdata.ArrayDataset(train_features, train_labels), batch_size, shuffle=True) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01}) train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: with autograd.record(): l = loss(net(X), y) l.backward() trainer.step(batch_size) train_ls.append( loss(net(train_features), train_labels).mean().asscalar()) test_ls.append(loss(net(test_features), test_labels).mean().asscalar()) print('final epoch: train loss', train_ls[-1], 'test loss', test_ls[-1]) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print('weight:', net[0].weight.data().asnumpy(), '\nbias:', net[0].bias.data().asnumpy())
def fit_and_plot_gluon(wd): net = nn.Sequential() net.add(nn.Dense(1)) net.initialize(init.Normal(sigma=1)) # add attenuation to weight trainer_w = gluon.Trainer(net.collect_params('.*weight'), 'sgd', {'learning_rate': lr, 'wd': wd}) # don't add attenuation to bias trainer_b = gluon.Trainer(net.collect_params('.*bias'), 'sgd', {'learning_rate': lr}) train_ls, test_ls = [], [] for _ in range(num_epochs): for x, y in train_iter: with autograd.record(): l = loss(net(x), y) l.backward() # update wight and bias with function step trainer_w.step(batch_size) trainer_b.step(batch_size) train_ls.append(loss(net(train_features), train_labels).mean().asscalar()) test_ls.append(loss(net(test_features), test_labels).mean().asscalar()) #print('true w: ', trainer_w.mean()) print('L2 norm of w:', net[0].weight.data().norm().asscalar()) print('final epoch: train loss ', train_ls[-1], 'test loss', test_ls[-1]) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test'])
def fit_and_plot_gluon(wd): net = nn.Sequential() net.add(nn.Dense(1)) net.initialize(init.Normal(sigma=1)) # 对权重参数衰减。权重名称一般是以weight结尾 trainer_w = gluon.Trainer(net.collect_params('.*weight'), 'sgd', { 'learning_rate': lr, 'wd': wd }) # 不对偏差参数衰减。偏差名称一般是以bias结尾 trainer_b = gluon.Trainer(net.collect_params('.*bias'), 'sgd', {'learning_rate': lr}) train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: with autograd.record(): l = loss(net(X), y) l.backward() # 对两个Trainer实例分别调用step函数,从而分别更新权重和偏差 trainer_w.step(batch_size) trainer_b.step(batch_size) train_ls.append( loss(net(train_features), train_labels).mean().asscalar()) test_ls.append(loss(net(test_features), test_labels).mean().asscalar()) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print('L2 norm of w:', net[0].weight.data().norm().asscalar())
def train_and_pred(train_features, test_features, train_lables, test_data, num_epochs, lr, weight_decay, batch_size): net = get_net() train_ls, _ = train(net, train_features, train_lables, None, None, num_epochs, lr, weight_decay, batch_size) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse') print('train rmse %f' % train_ls[-1]) preds = net(train_features).asnumpy() test_data['SalePrice'] = pd.Series(preds.reshape(1, -1)[0])
def train_and_pred(train_features,test_features,train_labels,test_data,num_epochs,learning_rate,weight_decay,batch_size): net = get_net() train_ls, _ = train(net, train_features, train_labels, None,None,num_epochs,learning_rate,weight_decay,batch_size) d2l.semilogy(range(1, num_epochs + 1),train_ls,'epochs','rmse') print('train rmse %f' % train_ls[-1]) preds = net(test_features).asnumpy() test_data['SalePrice'] = pd.Series(preds.reshape(1,-1)[0]) submission = pd.concat(test_data['Id',test_data['SalePrice']],axis = 1) #生成test_data的预测值文件 submission.to_csv('submission.csv',index=False)
def train_and_pred(train_features,test_features,train_labels,test_data,num_epochs,lr,weight_decay,batch_size): net = get_net() train_ls,_ =train(net,train_features,train_labels,None,None,num_epochs,lr,weight_decay,batch_size) d2l.semilogy(range(1,num_epochs+1),train_ls,'epochs','rmse') print("train rmse %f"%train_ls[-1]) preds = net(test_features).asnumpy() test_data['SalePrice'] = pd.Series(preds.reshape(1,-1)[0]) submission = pd.concat([test_data["Id"],test_data["SalePrice"]],axis=1) submission.to_csv("E:/Download/kaggle_house/submission.csv",index=False)
def train_and_pred(train_features,test_features, train_labels,test_data, num_epochs,lr,weight_decay,batch_size): net = get_net() train_ls,_ = train(net,train_features,train_labels,None,None, num_epochs,lr,weight_decay,batch_size) d2l.semilogy(range(1,num_epochs+1),train_ls,'epochs','rmse') print('train rmse %f' %train_ls[-1]) # 直接将test_data的最后一列labels填上预测值,然后生成csv preds = net(test_features).asnumpy() test_data['SalePrice'] = pd.Series(preds.reshape(1,-1)[0]) submission = pd.concat([test_data['Id'],test_data['SalePrice']],axis=1) submission.to_csv('submission.csv',index=False) # index的意思是,是否保留行索引
def pit_and_plot(lambd): w,b = init_params() # 初始化模型参数 train_l, test_l = [], [] for _ in range(num_epochs): for X,y in train_iter: with autograd.record(): # 增加L2番薯惩罚 l = loss(net(X, w, b), y) + lambd*l2_penalty(w) l.backward() d2l.sgd([w,b], lr, batch_size) train_l.append(loss(net(train_features, w, b), train_labels).mean().asscalar()) test_l.append(loss(net(test_features, w, b), test_labels).mean().asscalar()) d2l.semilogy(range(1, num_epochs+1), train_l, 'epochs', 'loss', range(1, num_epochs+1), test_l, ['train', 'test'], figsize=(15, 5)) print('L2 norm of w:', w.norm().asscalar())
def fit_and_plot(lambd): w, b = init_params() train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: with autograd.record(): l = loss(net(X, w, b), y) + lambd * l2_penalty(w) l.backward() d2l.sgd([w, b], lr, batch_size) train_ls.append( loss(net(train_features, w, b), train_labels).mean().asscalar()) test_ls.append( loss(net(test_features, w, b), test_labels).mean().asscalar()) d2l.semilogy(range(1, num_epochs + 1), train_ls, "epochs", "loss", range(1, num_epochs + 1), test_ls, ["train", "test"]) print("L2 norm of w", w.norm().asscalar())
def k_fold(k, x_train, y_train, num_epochs, batch_size, learning_rate, weight_decay): train_l_sum, valid_l_sum = 0, 0 for i in range(k): data = get_k_fold_data(k, i, x_train, y_train) net = get_net() train_ls, valid_ls = train(net, *data, num_epochs, batch_size, learning_rate, weight_decay) train_l_sum += train_ls[-1] valid_l_sum += train_ls[-1] if (i == 0): d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse', range(1, num_epochs + 1), valid_ls, ['train', 'valid']) return train_l_sum / k, valid_l_sum / k
def fit_and_plot(lambd): w, b = init_params() train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: with autograd.record(): # 添加了L2范数惩罚项,广播机制使其变成长度为batch_size的向量 l = loss(net(X, w, b), y) + lambd * l2_penalty(w) l.backward() d2l.sgd([w, b], lr, batch_size) train_ls.append( loss(net(train_features, w, b), train_labels).mean().asscalar()) test_ls.append( loss(net(test_features, w, b), test_labels).mean().asscalar()) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print('L2 norm of w:', w.norm().asscalar())
def k_fold(k, x_train, y_train, num_epochs, learning_rate, weight_decay, batch_size): train_l_sum, valid_l_sum = 0, 0 for i in range(k): # k == 5 data = get_k_fold_data(k, i, x_train, y_train) net = get_net() train_ls, valid_ls = train(net, *data, num_epochs, learning_rate, weight_decay, batch_size) train_l_sum += train_ls[-1] valid_l_sum += valid_ls[-1] if i == 0: d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse', range(1, num_epochs + 1), valid_ls, ['train', 'valid']) print('fold %d, train rmse: %f, valid rmse: %f' % (i, train_ls[-1], valid_ls[-1])) return train_l_sum / k, valid_l_sum / k
def k_fold(k, X_train, y_train, num_epochs, learning_rate, weight_decay, batch_size): train_l_sum, valid_l_sum = 0, 0 for i in range(k): data = get_k_fold_data(k, i, X_train, y_train) net = get_net() train_ls, valid_ls = train(net, *data, num_epochs, learning_rate, # *data data是一个列表,*data会分别开其中对应位置的元素赋给对应的函数参数 weight_decay, batch_size) train_l_sum += train_ls[-1] valid_l_sum += valid_ls[-1] if i == 0: d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse', #semilogy绘图函数 range(1, num_epochs + 1), valid_ls, ['train', 'valid']) print('fold %d, train rmse %f, valid rmse %f' % (i, train_ls[-1], valid_ls[-1])) return train_l_sum / k, valid_l_sum / k
def k_fold(k, X_train, y_train, num_epochs, learning_rate, weight_decay, batch_size): train_l_sum, valid_l_sum = 0, 0 for i in range(k): data = get_k_fold_data(k,i,X_train, y_train) net = get_net() train_ls, valid_ls = train(net, *data, num_epochs, learning_rate, weight_decay, batch_size) train_l_sum += train_ls[-1] valid_l_sum += valid_ls[-1] if i == 0: # """Plot x and log(y).""" d2lzh.semilogy( range(1, num_epochs+1), train_ls, "epoch", "rmse", range(1, num_epochs+1), valid_ls, legend=["train", "valid"] ) print("fold %d, train rmse %f, valid rmse %f" % (i, train_ls[-1], valid_ls[-1])) return train_l_sum/k,valid_l_sum/k
def fit_and_plot(lambd): w, b = init_params() train_ls, test_ls = [], [] for _ in range(num_epochs): for x, y in train_iter: with autograd.record(): l = loss(net(x, w, b), y) + lambd * l2_penalty(w) l.backward() d2l.sgd([w, b], lr, batch_size) train_ls.append(loss(net(train_features, w, b), train_labels).mean().asscalar()) test_ls.append(loss(net(test_features, w, b), test_labels).mean().asscalar()) print('true w: ', w.mean()) print('L2 norm of w:', w.norm().asscalar()) print('final epoch: train loss ', train_ls[-1], 'test loss', test_ls[-1]) d2l.semilogy(range(1, num_epochs+1), train_ls, 'epochs', 'loss', range(1, num_epochs+1), test_ls, ['train', 'test'])
def k_fold(k,X_train,y_train,num_epochs, # 重点在于关乎data生成 --> 投入训练得到误差 learning_rate,weight_decay,batch_size): # 注意只投入训练数据!! train_l_sum ,valid_l_sum = 0,0 # 共k次训练,每次训练又有epochs轮!!! for i in range(k): # 准备训练,为了获得误差!!然后画图。 data = get_k_fold_data(k,i,X_train,y_train) net = get_net() # *data 前面有*是因为表示输入任意个!! train_ls,valid_ls = train(net,*data,num_epochs,learning_rate,weight_decay,batch_size) train_l_sum += train_ls[-1] # 取最后一个批次的损失,只是为了取损失!! valid_l_sum += valid_ls[-1] if i==0: # 如果是首个k-fold,就画图.别问为啥,再问自己改 print('i draw once!!!') d2l.semilogy(range(1,num_epochs+1),train_ls,'epochs','rmse', range(1,num_epochs+1),valid_ls, ['train','valid']) # 作图函数,p62 print('fold %d ,train rmse %f,valid rmse %f' %(i,train_ls[-1],valid_ls[-1])) return train_l_sum / k ,valid_l_sum / k
def train_and_pred(train_features, test_features, train_labels, test_data, num_epochs, lr, weight_decay, batch_size): net = get_net() ########################### # 用k_fold方法进行训练 ########################### # train_l, valid_l = k_fold(k, train_features, train_labels, num_epochs, lr, weight_decay, batch_size) # print('%d-fold validation: avg train rmse %f, avg valid rmse %f' % (k, train_l, valid_l)) ########################### # 用普通方法进行训练 ########################### train_ls, _ = train(net, train_features, train_labels, None, None, num_epochs, lr, weight_decay, batch_size) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse') print('train rmse %f' % train_ls[-1]) preds = net(test_features).asnumpy() test_data['SalePrice'] = pd.Series(preds.reshape(1, -1)[0]) submission = pd.concat([test_data['Id'], test_data['SalePrice']], axis=1) submission.to_csv('submission.csv', index=False)
def fit_and_plot(lambd): # w <NDArray 200x1 @cpu(0)> # b <NDArray 1x1 @cpu(0)> w, b = init_params() train_ls, test_ls = [], [] for _ in range(num_epochs): # 训练100次(梯度下降100次) for X, y in train_iter: # 训练批次为20,每一次取出一个训练,一共训练20次 # X <NDArray 1x200 @cpu(0)> # y <NDArray 1x1 @cpu(0)> with autograd.record(): # 添加了L2范数惩罚项,广播机制使其变成长度为batch_size的向量 # loss + λ / 2 * ||w||2 # # L2范数的平方 # # 带惩罚函数的损失 # 惩罚函数 # λ / 2 * (∥w∥ ** 2) # l2_penalty = (w ** 2).sum() / 2 # lambd * l2_penalty(w) = lambd / 2 * ((w ** 2).sum()) # 损失计算 # nd.dot(X, w) + b # X - <NDArray 1x200 @cpu(0)> # w - <NDArray 200x1 @cpu(0)> # b - <NDArray 1 @cpu(0)> netResult = net(X, w, b) lossO = loss(netResult, y) l = lossO + lambd * l2_penalty(w) # l = loss(net(X, w, b), y) + lambd * l2_penalty(w) # 计算梯度 l.backward() # batch_size 1 # lr 0.003 # [w, b] [200 x 1, 1 x 1] # 梯度下降 d2l.sgd([w, b], lr, batch_size) # train_features * w + b = y # 20x200 200x1 1x1 20x1 train_ls.append( loss(net(train_features, w, b), train_labels).mean().asscalar()) test_ls.append( loss(net(test_features, w, b), test_labels).mean().asscalar()) # 100次的训练,每次训练数据的损失都在降低,但是测试数据的损失一直很高 d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) # --------------------normTest-------------------- # TTT1 = nd.array((1,2,3,4)).reshape((1, 4)) # 1 + 4 + 9 + 16 = 30 # TTT2 = TTT1.norm().asscalar() # --------------------normTest-------------------- print('L2 norm of w:', w.norm().asscalar())