def fit_and_plot_pytorch(wd): net = nn.Linear(num_inputs, 1) nn.init.normal_(net.weight, mean=0, std=1) nn.init.normal_(net.bias, mean=0, std=1) # 对权重参数衰减 optimizer_w = torch.optim.SGD(params=[net.weight], lr=lr, weight_decay=wd) # 偏置不衰减 optimizer_b = torch.optim.SGD(params=[net.bias], lr=lr) train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: l = loss(net(X), y).mean() optimizer_w.zero_grad() optimizer_b.zero_grad() l.backward() # 分别对两个 optimizer 实例分别调用,从而分别更新权重和偏差 optimizer_w.step() optimizer_b.step() train_ls.append(loss(net(train_features), train_labels).mean().item()) test_ls.append(loss(net(test_features), test_labels).mean().item()) utils.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print('L2 norm of w:', net.weight.data.norm().item())
def fit_and_plot_pytorch(self, wd): net = nn.Linear(self.n_intput, 1) nn.init.normal_(net.weight, mean=0., std=1) nn.init.normal_(net.bias, mean=0., std=1) optim_w = torch.optim.SGD(params=[net.weight], lr=self.lr, weight_decay=wd) optim_b = torch.optim.SGD(params=[net.bias], lr=self.lr) train_iter, train_features, test_features, train_labels, test_labels = self.load_dataset( ) train_ls, test_ls = [], [] for _ in range(self.n_epochs): for X, y in train_iter: l = self.loss()(net(X), y).mean() optim_w.zero_grad() optim_b.zero_grad() l.backward() optim_w.step() optim_b.step() train_ls.append(self.loss()(net(train_features), train_labels).mean().item()) test_ls.append(self.loss()(net(test_features), test_labels).mean().item()) utils.semilogy(range(1, self.n_epochs + 1), train_ls, 'epoch', 'loss', range(1, self.n_epochs + 1), test_ls, ['train', 'test']) print('L2 norm w:', net.weight.data.norm().item())
def fit_and_plot(self, train_features, test_features, train_labels, test_labels): net = nn.Linear(train_features.shape[-1], 1) batch_size = min(10, train_features.shape[0]) dataset = torch.utils.data.TensorDataset(train_features, train_labels) train_iter = torch.utils.data.DataLoader(dataset, batch_size, shuffle=True) optimizer = torch.optim.SGD(net.parameters(), lr=0.01) train_loss, test_loss = [], [] for epoch in range(self.n_epochs): for X, y in train_iter: loss = self.loss()(net(X), y.view(-1, 1)) optimizer.zero_grad() loss.backward() optimizer.step() train_labels = train_labels.view(-1, 1) test_labels = test_labels.view(-1, 1) train_loss.append(self.loss()(net(train_features), train_labels).mean().item()) test_loss.append(self.loss()(net(test_features), test_labels).mean().item()) print( f'final epoch train loss:{train_loss[-1]},test loss:{test_loss[-1]}' ) utils.semilogy(range(1,self.n_epochs+1),train_loss,'epoch','loss', \ range(1,self.n_epochs+1),test_loss,['train','test']) print('gt_weight: {}, gt_bias:{}'.format(self.true_w, self.true_b)) print('pre_weight: {},pre_bias:{}'.format(net.weight.data.numpy(), net.bias.data.numpy()))
def fit_and_plot_gluon(wd): net = nn.Sequential() net.add(nn.Dense(1)) net.initialize(init.Normal(sigma=1)) dic = {'learning_rate': learning_rate, 'wd': wd} # Add weight decay to weight trainer_w = gluon.Trainer(net.collect_params('.*weight'), 'sgd', { 'learning_rate': learning_rate, 'wd': wd }) # No weight decay for bias trainer_b = gluon.Trainer(net.collect_params('.*bias'), 'sgd', {'learning_rate': learning_rate}) train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: with autograd.record(): l = loss(net(X), y) l.backward() trainer_w.step(batch_size) trainer_b.step(batch_size) train_ls.append( loss(net(train_features), train_labels).mean().asscalar()) test_ls.append(loss(net(test_features), test_labels).mean().asscalar()) utils.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print("L2 norm of w:", net[0].weight.data().norm().asscalar())
def train_and_pred(train_features, test_features, train_labels, test_data, num_epochs, lr, weight_decay, batch_size): net = get_net(train_features.shape[1]) train_ls, _ = train(net, train_features, train_labels, None, None, num_epochs, lr, weight_decay, batch_size) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse') print('train rmse %f' % train_ls[-1]) preds = net(test_features).detach().numpy() test_data['SalePrice'] = pd.Series(preds.reshape(1, -1)[0]) submission = pd.concat([test_data['Id'], test_data['SalePrice']], axis=1) submission.to_csv('./submission.csv', index=False)
def train_and_pred(self): k, num_epochs, lr, weight_decay, batch_size = 5, 100, 5, 0, 16 net = self.net(self.train_features.shape[1]) train_ls,_=self.train(net,self.train_features,self.train_labels,None,None,n_epochs=num_epochs, lr=lr,weight_decay=weight_decay,batch_size=batch_size) utils.semilogy(range(1,1+num_epochs),train_ls,'epoch','loss') print('train rmse is: {}'.format(train_ls[-1])) preds=net(self.test_features).detach().numpy() self.test_data['SalePrice']=pd.Series(preds.reshape(1,-1)[0]) submission=pd.concat([self.test_data['Id'],self.test_data['SalePrice']],axis=1) submission.to_csv('./submission.csv',index=False)
def k_fold(k, X_train, y_train, num_epochs, learning_rate, weight_decay, batch_size): train_l_sum, valid_l_sum = 0, 0 for i in range(k): data = get_k_fold_data(k, i, X_train, y_train) net = get_net(X_train.shape[1]) train_ls, valid_ls = train(net, *data, num_epochs, learning_rate, weight_decay, batch_size) train_l_sum += train_ls[-1] # 最后一次训练误差 valid_l_sum += valid_ls[-1] if i == 0: utils.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse', range(1, num_epochs + 1), valid_ls, ['train', 'valid']) print('fold %d, train rmse %f, valid rmse %f' % (i, train_ls[-1], valid_ls[-1])) return train_l_sum / k, valid_l_sum / k
def k_fold(self,k,X_train,y_train,n_epochs,lr,weight_decay,batch_size): train_sum,val_sum=0,0 for i in range(k): data = self.get_k_fold_data(k,i,X_train,y_train) net = self.net(X_train.shape[1]) train_ls,val_ls=self.train(net,*data,n_epochs,lr,weight_decay,batch_size) train_sum += train_ls[-1] val_sum += val_ls[-1] if i==0: utils.semilogy(range(1,n_epochs+1),train_ls,'epoch','rmse-loss', range(1,n_epochs+1),val_ls,['train','val']) print(f'{i} fold train rmse: {train_ls[-1]}, val rmse: {val_ls[-1]}') mean_train_ls,mean_val_ls=train_sum/k,val_sum/k print(f'{k} fold mean train rmse: {mean_train_ls}, val rmse: {mean_val_ls}')
def fit_and_plot(lambd): w, b = init_params() train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: l = loss(net(X, w, b), y) + lambd * l2_penalty(w) l = l.sum() if w.grad is not None: w.grad.data.zero_() b.grad.data.zero_() l.backward() utils.sgd([w, b], lr, batch_size) train_ls.append(loss(net(train_features, w, b), train_labels).mean().item()) test_ls.append(loss(net(test_features, w, b), test_labels).mean().item()) utils.semilogy(range(1, num_epochs+1), train_ls, 'epochs', 'loss', range(1, num_epochs+1), test_ls, ['train', 'test']) print('L2 norm of w:', w.norm().item())
def fit_and_plot(lambd): w = nd.random.normal(scale=1, shape=true_w.shape) b = nd.zeros(shape=(1, )) w.attach_grad() b.attach_grad() train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: with autograd.record(): l = loss(net(X, w, b), y) + lambd * l2_penalty(w) l.backward() utils.sgd([w, b], learning_rate, batch_size) train_ls.append( loss(net(train_features, w, b), train_labels).mean().asscalar()) test_ls.append( loss(net(test_features, w, b), test_labels).mean().asscalar()) utils.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print("L2 norm of w:", w.norm().asscalar())
def optimize(batch_size, lr, mom, num_epochs, log_interval): [w, b], vs = init_params() y_vals = [squared_loss(net(X, w, b), y).mean().asnumpy()] print('batch_size', batch_size) for epoch in range(1, num_epochs + 1): # 学习率自我衰减 if epoch > 2: lr *= .1 for batch_i, (features, label) in enumerate( utils.data_iter(batch_size, num_examples, X, y)): with autograd.record(): output = net(features, w, b) loss = squared_loss(output, label) loss.backward() sgd_momentum([w, b], vs, lr, mom, batch_size) if batch_i * batch_size % log_interval == 0: y_vals.append(squared_loss(net(X, w, b), y).mean().asnumpy()) print('epoch %d, learning_rate %f, loss %.4e' % (epoch, lr, y_vals[-1])) # 为了便于打印,改变输出形状并转化成numpy数组 print('w:', w.reshape((1, -1)).asnumpy(), 'b:', b.asscalar(), '\n') x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True) utils.semilogy(x_vals, y_vals, 'epoch', 'loss')
def fit_and_plot(train_features, test_features, train_labels, test_lables): net = torch.nn.Linear(train_features.shape[-1], 1) # Linear()模型会自动初始化参数 batch_size = min(10, train_labels.shape[0]) dataset = torch.utils.data.TensorDataset(train_features, train_labels) train_iter = torch.utils.data.DataLoader(dataset, batch_size, shuffle=True) optimizer = torch.optim.SGD(net.parameters(), lr=0.01) train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: l = loss(net(X), y.view(-1, 1)) optimizer.zero_grad() l.backward() optimizer.step() # 调用库函数 train_labels = train_labels.view(-1, 1) test_lables = test_lables.view(-1, 1) train_ls.append(loss(net(train_features), train_labels).item()) test_ls.append(loss(net(test_features), test_lables).item()) print('final epoch: train loss', train_ls[-1], 'test loss', test_ls[-1]) utils.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print('weight:', net.weight.data, '\nbias:', net.bias.data)
def fit_and_plot(self, lambd): [w, b] = self.init_weights() train_iter, train_features, test_features, train_labels, test_labels = self.load_dataset( ) train_ls, test_ls = [], [] for _ in range(self.n_epochs): for X, y in train_iter: l = self.loss()(self.net()(X, w, b), y) + lambd * self.l2_penalty(w) l = l.sum() if w.grad is not None: w.grad.data.zero_() b.grad.data.zero_() l.backward() utils.sgd([w, b], self.lr, self.batch_size) train_ls.append(self.loss()(self.net()(train_features, w, b), train_labels).mean().item()) test_ls.append(self.loss()(self.net()(test_features, w, b), test_labels).mean().item()) utils.semilogy(range(1, self.n_epochs + 1), train_ls, 'epoch', 'loss', range(1, self.n_epochs + 1), test_ls, ['train', 'test']) print('L2 norm of w:', w.norm().item())
def optimize(batch_size, lr, mom, num_epochs, log_interval): num_examples = 1000 X, y = genData(num_examples) [w, b], vs = init_params() y_vals = [utils.squared_loss(utils.linreg(X, w, b), y).mean().asnumpy()] for epoch in range(1, num_epochs + 1): # 学习率自我衰减。 if epoch > 2: lr *= 0.1 for batch_i, (features, label) in enumerate( utils.data_iter(batch_size, num_examples, X, y)): with autograd.record(): output = utils.linreg(features, w, b) loss = utils.squared_loss(output, label) loss.backward() sgd_momentum([w, b], lr, batch_size, vs, mom) if batch_i * batch_size % log_interval == 0: y_vals.append( utils.squared_loss(utils.linreg(X, w, b), y).mean().asnumpy()) print('w:', w, '\nb:', b, '\n') x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True) utils.semilogy(x_vals, y_vals, 'epoch', 'loss')
def optimize(batch_size, trainer, num_epochs, decay_epoch, log_interval, X, y, net): # num_examples = 1000 # X, y = genData(num_examples) dataset = gdata.ArrayDataset(X, y) data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) square_loss = gloss.L2Loss() y_vals = [square_loss(net(X), y).mean().asnumpy()] for epoch in range(1, num_epochs + 1): if decay_epoch and epoch > decay_epoch: trainer.set_learning_rate(trainer.learning_rate * 0.1) for batch_i, (features, label) in enumerate(data_iter): with autograd.record(): output = net(features) loss = square_loss(output, label) loss.backward() trainer.step(batch_size) if batch_i * batch_size % log_interval == 0: y_vals.append(square_loss(net(X), y).mean().asnumpy()) # 为了便于打印,改变输出形状并转化成numpy数组。 print('w:', net[0].weight.data(), '\nb:', net[0].bias.data(), '\n') x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True) utils.semilogy(x_vals, y_vals, 'epoch', 'loss')