import torch from torch import nn import d2lzh_pytorch as d2l device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') (corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics() num_hiddens = 256 num_epochs, num_steps, batch_size, clipping_theta = 160, 35, 32, 1e-2 pred_period, pred_len, prefixes = 40, 50, ['分开', '不分开'] lr = 1e-2 # 注意调整学习率 # print("using GRU") # gru_layer = nn.GRU(input_size=vocab_size, hidden_size=num_hiddens) # model = d2l.RNNModel(gru_layer, vocab_size) print("using LSTM") lstm_layer = nn.LSTM(input_size=vocab_size, hidden_size=num_hiddens) model = d2l.RNNModel(lstm_layer, vocab_size) d2l.train_and_predict_rnn_pytorch(model, num_hiddens, vocab_size, device, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes)
import time import math import numpy as np import torch from torch import nn, optim import torch.nn.functional as F import sys sys.path.append("..") import d2lzh_pytorch as d2l device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') (corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics() # one-hot def one_hot(x, n_class, dtype=torch.float32): x = x.long() # 转成Int64 res = torch.zeros(x.shape[0], n_class, dtype=dtype, device=x.device) # 为x中每一个样本生成一个n_class维全0向量 res.scatter_(1, x.view(-1, 1), 1) # dim=1 行维度,view(-1,1) 是index即那个位置被替换成1,最后一个1是source,如果是2 就相当于一个位置是2其余是0 return res def to_onehot(x, n_class): # x shape (batch seq_len) output shape seq_len elements of (batch, n_class) # 例如输入x 10个样本 窗口大小是5 x shaop = (10, 5), 词库大小是2000 # output 是5个 (10,2000)的10个onehot向量组成的矩阵 return [one_hot(x[:, i], n_class) for i in range(x.shape[1])] # 即为窗口内每一个字符生成onehot向量 # 初始化参数 num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size def get_params():