def train_pytorch_ch7(optimizer_fn, optimizer_hyperparams, features, labels, batch_size=10, num_epochs=2): # 初始化模型 net = nn.Sequential( nn.Linear(features.shape[-1], 1) ) loss = nn.MSELoss() optimizer = optimizer_fn(net.parameters(), **optimizer_hyperparams) def eval_loss(): return loss(net(features).view(-1), labels).item() / 2 ls = [eval_loss()] data_iter = torch.utils.data.DataLoader( torch.utils.data.TensorDataset(features, labels), batch_size, shuffle=True) for _ in range(num_epochs): start = time.time() for batch_i, (X, y) in enumerate(data_iter): # 除以2是为了和train_ch7保持一致, 因为squared_loss中除了2 l = loss(net(X).view(-1), y) / 2 optimizer.zero_grad() l.backward() optimizer.step() if (batch_i + 1) * batch_size % 100 == 0: ls.append(eval_loss()) # 打印结果和作图 print('loss: %f, %f sec per epoch' % (ls[-1], time.time() - start)) d2l.set_figsize() d2l.plt.plot(np.linspace(0, num_epochs, len(ls)), ls) d2l.plt.xlabel('epoch') d2l.plt.ylabel('loss')
def show_trace(res): n = max(abs(min(res)), abs(max(res))) f_line = np.arange(-n, n, 0.01) d2l.set_figsize((3.5, 2.5)) d2l.plt.plot(f_line, [f(x) for x in f_line], '-') d2l.plt.plot(res, [f(x) for x in res], '-o') d2l.plt.xlabel('x') d2l.plt.ylabel('f(x)')
def train_ch7(optimizer_fn, states, hyperparams, features, labels, batch_size=10, num_epochs=2): # 初始化模型 net, loss = d2l.linreg, d2l.squared_loss w = torch.nn.Parameter(torch.tensor(np.random.normal( 0, 0.01, size=(features.shape[1], 1)), dtype=torch.float32), requires_grad=True) b = torch.nn.Parameter(torch.zeros(1, dtype=torch.float32), requires_grad=True) def eval_loss(): return loss(net(features, w, b), labels).mean().item() ls = [eval_loss()] data_iter = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( features, labels), batch_size, shuffle=True) for _ in range(num_epochs): start = time.time() for batch_i, (X, y) in enumerate(data_iter): l = loss(net(X, w, b), y).mean() # 使用平均损失 # 梯度清零 if w.grad is not None: w.grad.data.zero_() b.grad.data.zero_() #使用data表示对数据进行操作 l.backward() optimizer_fn([w, b], states, hyperparams) # 迭代模型参数 if (batch_i + 1) * batch_size % 100 == 0: ls.append(eval_loss()) # 每100个样本记录下当前训练误差 # 打印结果和作图 print('loss: %f, %f sec per epoch' % (ls[-1], time.time() - start)) d2l.set_figsize() d2l.plt.plot(np.linspace(0, num_epochs, len(ls)), ls) d2l.plt.xlabel('epoch') d2l.plt.ylabel('loss')
from PIL import Image file_path = ' ' assert file_path != ' ', 'Please set path' #置当前使用的GPU设备仅为0号设备 os.environ["CUDA_VISIBLE_DEVICES"] = "0" import d2lzh1981 as d2l # 定义device,是否使用GPU,依据计算机配置自动会选择 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(torch.__version__) print(device) #原始图像展示 d2l.set_figsize() img = Image.open(file_path) d2l.plt.imshow(img) #展示图像 def show_images(imgs, num_rows, num_cols, scale=2): figsize = (num_cols * scale, num_rows * scale) _, axes = d2l.plt.subplots(num_rows, num_cols, figsize=figsize) for i in range(num_rows): for j in range(num_cols): axes[i][j].imshow(imgs[i * num_cols + j]) axes[i][j].axes.get_xaxis().set_visible(False) axes[i][j].axes.get_yaxis().set_visible(False) return axes def apply(img, aug, num_rows=2, num_cols=4, scale=1.5):
优化与深度学习 优化与估计 尽管优化方法可以最小化深度学习中的损失函数值,但本质上优化方法达到的目标与深度学习的目标并不相同。 优化方法目标:训练集损失函数值 深度学习目标:测试集损失函数值(泛化性) %matplotlib inline import sys sys.path.append('/home/kesci/input') import d2lzh1981 as d2l from mpl_toolkits import mplot3d # 三维画图 import numpy as np def f(x): return x * np.cos(np.pi * x) def g(x): return f(x) + 0.2 * np.cos(5 * np.pi * x) d2l.set_figsize((5, 3)) x = np.arange(0.5, 1.5, 0.01) fig_f, = d2l.plt.plot(x, f(x),label="train error") fig_g, = d2l.plt.plot(x, g(x),'--', c='purple', label="test error") fig_f.axes.annotate('empirical risk', (1.0, -1.2), (0.5, -1.1),arrowprops=dict(arrowstyle='->')) fig_g.axes.annotate('expected risk', (1.1, -1.05), (0.95, -0.5),arrowprops=dict(arrowstyle='->')) d2l.plt.xlabel('x') d2l.plt.ylabel('risk') d2l.plt.legend(loc="upper right") <matplotlib.legend.Legend at 0x7fc092436080> 优化在深度学习中的挑战 局部最小值 鞍点 梯度消失 局部最小值