def fit_and_plot_pytorch(wd):
    net = nn.Linear(num_inputs, 1)
    nn.init.normal_(net.weight, mean=0, std=1)
    nn.init.normal_(net.bias, mean=0, std=1)
    # 对权重参数衰减
    optimizer_w = torch.optim.SGD(params=[net.weight], lr=lr, weight_decay=wd)
    # 偏置不衰减
    optimizer_b = torch.optim.SGD(params=[net.bias], lr=lr)

    train_ls, test_ls = [], []
    for _ in range(num_epochs):
        for X, y in train_iter:
            l = loss(net(X), y).mean()
            optimizer_w.zero_grad()
            optimizer_b.zero_grad()

            l.backward()

            # 分别对两个 optimizer 实例分别调用,从而分别更新权重和偏差
            optimizer_w.step()
            optimizer_b.step()

        train_ls.append(loss(net(train_features), train_labels).mean().item())
        test_ls.append(loss(net(test_features), test_labels).mean().item())
    utils.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss',
                   range(1, num_epochs + 1), test_ls, ['train', 'test'])
    print('L2 norm of w:', net.weight.data.norm().item())
    def fit_and_plot_pytorch(self, wd):
        net = nn.Linear(self.n_intput, 1)
        nn.init.normal_(net.weight, mean=0., std=1)
        nn.init.normal_(net.bias, mean=0., std=1)

        optim_w = torch.optim.SGD(params=[net.weight],
                                  lr=self.lr,
                                  weight_decay=wd)
        optim_b = torch.optim.SGD(params=[net.bias], lr=self.lr)

        train_iter, train_features, test_features, train_labels, test_labels = self.load_dataset(
        )

        train_ls, test_ls = [], []
        for _ in range(self.n_epochs):
            for X, y in train_iter:
                l = self.loss()(net(X), y).mean()
                optim_w.zero_grad()
                optim_b.zero_grad()

                l.backward()
                optim_w.step()
                optim_b.step()
            train_ls.append(self.loss()(net(train_features),
                                        train_labels).mean().item())
            test_ls.append(self.loss()(net(test_features),
                                       test_labels).mean().item())
        utils.semilogy(range(1, self.n_epochs + 1), train_ls, 'epoch', 'loss',
                       range(1, self.n_epochs + 1), test_ls, ['train', 'test'])
        print('L2 norm w:', net.weight.data.norm().item())
    def fit_and_plot(self, train_features, test_features, train_labels,
                     test_labels):
        net = nn.Linear(train_features.shape[-1], 1)
        batch_size = min(10, train_features.shape[0])
        dataset = torch.utils.data.TensorDataset(train_features, train_labels)
        train_iter = torch.utils.data.DataLoader(dataset,
                                                 batch_size,
                                                 shuffle=True)
        optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
        train_loss, test_loss = [], []
        for epoch in range(self.n_epochs):
            for X, y in train_iter:
                loss = self.loss()(net(X), y.view(-1, 1))
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            train_labels = train_labels.view(-1, 1)
            test_labels = test_labels.view(-1, 1)
            train_loss.append(self.loss()(net(train_features),
                                          train_labels).mean().item())
            test_loss.append(self.loss()(net(test_features),
                                         test_labels).mean().item())
        print(
            f'final epoch train loss:{train_loss[-1]},test loss:{test_loss[-1]}'
        )

        utils.semilogy(range(1,self.n_epochs+1),train_loss,'epoch','loss', \
                      range(1,self.n_epochs+1),test_loss,['train','test'])

        print('gt_weight: {}, gt_bias:{}'.format(self.true_w, self.true_b))
        print('pre_weight: {},pre_bias:{}'.format(net.weight.data.numpy(),
                                                  net.bias.data.numpy()))
Esempio n. 4
0
def fit_and_plot_gluon(wd):
    net = nn.Sequential()
    net.add(nn.Dense(1))
    net.initialize(init.Normal(sigma=1))
    dic = {'learning_rate': learning_rate, 'wd': wd}
    # Add weight decay to weight
    trainer_w = gluon.Trainer(net.collect_params('.*weight'), 'sgd', {
        'learning_rate': learning_rate,
        'wd': wd
    })
    # No weight decay for bias
    trainer_b = gluon.Trainer(net.collect_params('.*bias'), 'sgd',
                              {'learning_rate': learning_rate})
    train_ls, test_ls = [], []
    for _ in range(num_epochs):
        for X, y in train_iter:
            with autograd.record():
                l = loss(net(X), y)
            l.backward()
            trainer_w.step(batch_size)
            trainer_b.step(batch_size)
        train_ls.append(
            loss(net(train_features), train_labels).mean().asscalar())
        test_ls.append(loss(net(test_features), test_labels).mean().asscalar())
    utils.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss',
                   range(1, num_epochs + 1), test_ls, ['train', 'test'])
    print("L2 norm of w:", net[0].weight.data().norm().asscalar())
def train_and_pred(train_features, test_features, train_labels, test_data, num_epochs, lr, weight_decay, batch_size):
    net = get_net(train_features.shape[1])
    train_ls, _ = train(net, train_features, train_labels, None, None, num_epochs, lr, weight_decay, batch_size)
    d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse')
    print('train rmse %f' % train_ls[-1])
    preds = net(test_features).detach().numpy()
    test_data['SalePrice'] = pd.Series(preds.reshape(1, -1)[0])
    submission = pd.concat([test_data['Id'], test_data['SalePrice']], axis=1)
    submission.to_csv('./submission.csv', index=False)
 def train_and_pred(self):
     k, num_epochs, lr, weight_decay, batch_size = 5, 100, 5, 0, 16
     net = self.net(self.train_features.shape[1])
     train_ls,_=self.train(net,self.train_features,self.train_labels,None,None,n_epochs=num_epochs,
                lr=lr,weight_decay=weight_decay,batch_size=batch_size)
     utils.semilogy(range(1,1+num_epochs),train_ls,'epoch','loss')
     print('train rmse is: {}'.format(train_ls[-1]))
     preds=net(self.test_features).detach().numpy()
     self.test_data['SalePrice']=pd.Series(preds.reshape(1,-1)[0])
     submission=pd.concat([self.test_data['Id'],self.test_data['SalePrice']],axis=1)
     submission.to_csv('./submission.csv',index=False)
Esempio n. 7
0
def k_fold(k, X_train, y_train, num_epochs, learning_rate, weight_decay, batch_size):
    train_l_sum, valid_l_sum = 0, 0
    for i in range(k):
        data = get_k_fold_data(k, i, X_train, y_train)
        net = get_net(X_train.shape[1])
        train_ls, valid_ls = train(net, *data, num_epochs, learning_rate, weight_decay, batch_size)
        train_l_sum += train_ls[-1] # 最后一次训练误差
        valid_l_sum += valid_ls[-1]
        if i == 0:
            utils.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse',
                range(1, num_epochs + 1), valid_ls, ['train', 'valid'])
        print('fold %d, train rmse %f, valid rmse %f' % (i, train_ls[-1], valid_ls[-1]))
    return train_l_sum / k, valid_l_sum / k
 def k_fold(self,k,X_train,y_train,n_epochs,lr,weight_decay,batch_size):
     train_sum,val_sum=0,0
     for i in range(k):
         data = self.get_k_fold_data(k,i,X_train,y_train)
         net = self.net(X_train.shape[1])
         train_ls,val_ls=self.train(net,*data,n_epochs,lr,weight_decay,batch_size)
         train_sum += train_ls[-1]
         val_sum += val_ls[-1]
         if i==0:
             utils.semilogy(range(1,n_epochs+1),train_ls,'epoch','rmse-loss',
                            range(1,n_epochs+1),val_ls,['train','val'])
         print(f'{i} fold train rmse: {train_ls[-1]}, val rmse: {val_ls[-1]}')
     mean_train_ls,mean_val_ls=train_sum/k,val_sum/k
     print(f'{k} fold mean train rmse: {mean_train_ls}, val rmse: {mean_val_ls}')
Esempio n. 9
0
def fit_and_plot(lambd):
    w, b = init_params()
    train_ls, test_ls = [], []
    for _ in range(num_epochs):
        for X, y in train_iter:
            l = loss(net(X, w, b), y) + lambd * l2_penalty(w)
            l = l.sum()

            if w.grad is not None:
                w.grad.data.zero_()
                b.grad.data.zero_()
            l.backward()
            utils.sgd([w, b], lr, batch_size)
        train_ls.append(loss(net(train_features, w, b), train_labels).mean().item())
        test_ls.append(loss(net(test_features, w, b), test_labels).mean().item())
    utils.semilogy(range(1, num_epochs+1), train_ls, 'epochs', 'loss',
                   range(1, num_epochs+1), test_ls, ['train', 'test'])
    print('L2 norm of w:', w.norm().item())
def fit_and_plot(lambd):
    w = nd.random.normal(scale=1, shape=true_w.shape)
    b = nd.zeros(shape=(1, ))
    w.attach_grad()
    b.attach_grad()
    train_ls, test_ls = [], []
    for _ in range(num_epochs):
        for X, y in train_iter:
            with autograd.record():
                l = loss(net(X, w, b), y) + lambd * l2_penalty(w)
            l.backward()
            utils.sgd([w, b], learning_rate, batch_size)
        train_ls.append(
            loss(net(train_features, w, b), train_labels).mean().asscalar())
        test_ls.append(
            loss(net(test_features, w, b), test_labels).mean().asscalar())
    utils.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss',
                   range(1, num_epochs + 1), test_ls, ['train', 'test'])
    print("L2 norm of w:", w.norm().asscalar())
def optimize(batch_size, lr, mom, num_epochs, log_interval):
    [w, b], vs = init_params()
    y_vals = [squared_loss(net(X, w, b), y).mean().asnumpy()]
    print('batch_size', batch_size)
    for epoch in range(1, num_epochs + 1):
        # 学习率自我衰减
        if epoch > 2:
            lr *= .1
        for batch_i, (features, label) in enumerate(
                utils.data_iter(batch_size, num_examples, X, y)):
            with autograd.record():
                output = net(features, w, b)
                loss = squared_loss(output, label)
            loss.backward()
            sgd_momentum([w, b], vs, lr, mom, batch_size)
            if batch_i * batch_size % log_interval == 0:
                y_vals.append(squared_loss(net(X, w, b), y).mean().asnumpy())
        print('epoch %d, learning_rate %f, loss %.4e' %
              (epoch, lr, y_vals[-1]))
    # 为了便于打印,改变输出形状并转化成numpy数组
    print('w:', w.reshape((1, -1)).asnumpy(), 'b:', b.asscalar(), '\n')
    x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True)
    utils.semilogy(x_vals, y_vals, 'epoch', 'loss')
Esempio n. 12
0
def fit_and_plot(train_features, test_features, train_labels, test_lables):
    net = torch.nn.Linear(train_features.shape[-1], 1)  # Linear()模型会自动初始化参数

    batch_size = min(10, train_labels.shape[0])
    dataset = torch.utils.data.TensorDataset(train_features, train_labels)
    train_iter = torch.utils.data.DataLoader(dataset, batch_size, shuffle=True)

    optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
    train_ls, test_ls = [], []
    for _ in range(num_epochs):
        for X, y in train_iter:
            l = loss(net(X), y.view(-1, 1))
            optimizer.zero_grad()
            l.backward()
            optimizer.step()  # 调用库函数
        train_labels = train_labels.view(-1, 1)
        test_lables = test_lables.view(-1, 1)
        train_ls.append(loss(net(train_features), train_labels).item())
        test_ls.append(loss(net(test_features), test_lables).item())
    print('final epoch: train loss', train_ls[-1], 'test loss', test_ls[-1])
    utils.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss',
                   range(1, num_epochs + 1), test_ls, ['train', 'test'])
    print('weight:', net.weight.data, '\nbias:', net.bias.data)
    def fit_and_plot(self, lambd):
        [w, b] = self.init_weights()
        train_iter, train_features, test_features, train_labels, test_labels = self.load_dataset(
        )
        train_ls, test_ls = [], []
        for _ in range(self.n_epochs):
            for X, y in train_iter:
                l = self.loss()(self.net()(X, w, b),
                                y) + lambd * self.l2_penalty(w)
                l = l.sum()

                if w.grad is not None:
                    w.grad.data.zero_()
                    b.grad.data.zero_()
                l.backward()
                utils.sgd([w, b], self.lr, self.batch_size)
            train_ls.append(self.loss()(self.net()(train_features, w, b),
                                        train_labels).mean().item())
            test_ls.append(self.loss()(self.net()(test_features, w, b),
                                       test_labels).mean().item())
        utils.semilogy(range(1, self.n_epochs + 1), train_ls, 'epoch', 'loss',
                       range(1, self.n_epochs + 1), test_ls, ['train', 'test'])
        print('L2 norm of w:', w.norm().item())
Esempio n. 14
0
def optimize(batch_size, lr, mom, num_epochs, log_interval):
    num_examples = 1000
    X, y = genData(num_examples)
    [w, b], vs = init_params()
    y_vals = [utils.squared_loss(utils.linreg(X, w, b), y).mean().asnumpy()]

    for epoch in range(1, num_epochs + 1):
        # 学习率自我衰减。
        if epoch > 2:
            lr *= 0.1
        for batch_i, (features, label) in enumerate(
                utils.data_iter(batch_size, num_examples, X, y)):
            with autograd.record():
                output = utils.linreg(features, w, b)
                loss = utils.squared_loss(output, label)
            loss.backward()
            sgd_momentum([w, b], lr, batch_size, vs, mom)
            if batch_i * batch_size % log_interval == 0:
                y_vals.append(
                    utils.squared_loss(utils.linreg(X, w, b),
                                       y).mean().asnumpy())
    print('w:', w, '\nb:', b, '\n')
    x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True)
    utils.semilogy(x_vals, y_vals, 'epoch', 'loss')
def optimize(batch_size, trainer, num_epochs, decay_epoch, log_interval, X, y,
             net):
    # num_examples = 1000
    # X, y = genData(num_examples)
    dataset = gdata.ArrayDataset(X, y)
    data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)
    square_loss = gloss.L2Loss()

    y_vals = [square_loss(net(X), y).mean().asnumpy()]
    for epoch in range(1, num_epochs + 1):
        if decay_epoch and epoch > decay_epoch:
            trainer.set_learning_rate(trainer.learning_rate * 0.1)
        for batch_i, (features, label) in enumerate(data_iter):
            with autograd.record():
                output = net(features)
                loss = square_loss(output, label)
            loss.backward()
            trainer.step(batch_size)
            if batch_i * batch_size % log_interval == 0:
                y_vals.append(square_loss(net(X), y).mean().asnumpy())
    # 为了便于打印,改变输出形状并转化成numpy数组。
    print('w:', net[0].weight.data(), '\nb:', net[0].bias.data(), '\n')
    x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True)
    utils.semilogy(x_vals, y_vals, 'epoch', 'loss')