def function_plot(): x = np.arange(0, 5, 0.1) d2l.plot(x, [f(x), 2 * x - 3], 'x', 'f(x)', legend=['f(x)', 'Tangent line (x=1)']) plt.show()
def plot_normal_distributions(): """ Plot normal distributions with different mean (mu) and variance (sigma) values to demonstrate what effects different means and variances has on a normal distribution. """ # Create an evenly spaced vector from -7 to 7 with 0.01 as the spacing. x = np.arange(-7, 7, 0.01) # Different parameters to be used for mean and sigma, respectively. parameters = [(0, 1), (0, 2), (3, 1)] d2l.plot( x, [ compute_normal_distribution(x, mean, variance) for mean, variance in parameters ], xlabel="z", ylabel="p(z)", figsize=(4.5, 2.5), legend=[ f"mean {mean}, var {variance}" for mean, variance in parameters ], ) d2l.plt.savefig("normal_distributions")
def plot_normal(): # Mean and variance pairs parameters = [(0, 1), (0, 2), (3, 1)] d2l.plot(x, [normal(x, mu, sigma) for mu, sigma in parameters], xlabel='z', ylabel='p(z)', figsize=(4.5, 2.5), legend=['mean %d, var %d' % (mu, sigma) for mu, sigma in parameters]) plt.show()
# Create a long enough P self.P = np.zeros((1, max_len, num_hiddens)) X = np.arange(0, max_len).reshape(-1, 1) / np.power( 10000, np.arange(0, num_hiddens, 2) / num_hiddens) self.P[:, :, 0::2] = np.sin(X) # :: means self.P[:, :, 1::2] = np.cos(X) def forward(self, X): X = X + self.P[:, :X.shape[1], :].as_in_ctx(X.ctx) return self.dropout(X) pe = PositionalEncoding(20, 0) pe.initialize() Y = pe(np.zeros((1, 100, 20))) d2l.plot(np.arange(100), Y[0, :, 4:8].T, figsize=(6, 2.5), legend=["dim %d" % p for p in [4, 5, 6, 7]]) # FIXME : visualize # Saved in the d2l package for later use class EncoderBlock(nn.Block): def __init__(self, num_hiddens, ffn_num_hiddens, num_heads, dropout, **kwargs): super(EncoderBlock, self).__init__(**kwargs) self.attention = MultiHeadAttention(num_hiddens, num_heads, dropout) self.addnorm1 = AddNorm(dropout) self.ffn = PositionWiseFFN(ffn_num_hiddens, num_hiddens) self.addnorm2 = AddNorm(dropout) def forward(self, X, valid_len): Y = self.addnorm1(X, self.attention(X, X, X, valid_len)) return self.addnorm2(Y, self.ffn(Y))
nd.arange(0, units, 2) / units) self.P[:, :, 0::2] = nd.sin(X) self.P[:, :, 1::2] = nd.cos(X) def forward(self, X): X = X + self.P[:, :X.shape[1], :].as_in_context(X.context) return self.dropout(X) # In[11]: pe = PositionalEncoding(20, 0) pe.initialize() Y = pe(nd.zeros((1, 100, 20))) d2l.plot(nd.arange(100), Y[0, :, 4:8].T, figsize=(6, 2.5), legend=["dim %d" % p for p in [4, 5, 6, 7]]) # In[12]: class EncoderBlock(nn.Block): def __init__(self, units, hidden_size, num_heads, dropout, **kwargs): super(EncoderBlock, self).__init__(**kwargs) self.attention = MultiHeadAttention(units, num_heads, dropout) self.add_1 = AddNorm(dropout) self.ffn = PositionWiseFFN(units, hidden_size) self.add_2 = AddNorm(dropout) def forward(self, X, valid_length): Y = self.add_1(X, self.attention(X, X, X, valid_length))
train_loss = train(model, train_iterator, optimizer, criterion, CLIP) dev_loss = evaluate(model, dev_iterator, criterion) train_losses.append(train_loss) dev_losses.append(dev_loss) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if dev_loss < best_dev_loss: best_dev_loss = dev_loss torch.save(model.state_dict(), 's2s-model.pt') print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s') print( f'\tTrain Loss: {train_loss:.5f} | Train PPL: {math.exp(train_loss):7.3f}' ) print( f'\t Dev. Loss: {dev_loss:.5f} | Dev. PPL: {math.exp(dev_loss):7.3f}') # Plot the losses x = np.arange(1, 11) train_losses = np.array(train_losses) dev_losses = np.array(dev_losses) d2l.plot(x, [train_losses, dev_losses], xlabel="epochs", ylabel="loss", legend=["Train Loss", "Dev Loss"])