def evaluate(vocab: Vocabulary, corpus_filename: str, encoder: EncoderRNN, decoder: AttnDecoderRNN, max_src_length: int, max_tgt_length: int): device: torch.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") encoder.to(device) decoder.to(device) encoder.eval() decoder.eval() with torch.no_grad(): corpus = Corpus( filename=corpus_filename, max_src_length=max_src_length, # decoder.max_src_length, vocab=vocab, device=device) for batch in torch.utils.data.DataLoader(dataset=corpus, batch_size=1): input_tensor: torch.Tensor = batch["data"].permute(1, 0) encoder_outputs = encoder.encode_sequence(input_tensor) decoder_output = decoder.decode_sequence( encoder_outputs=encoder_outputs, start_symbol=corpus.characters.start_of_sequence.integer, max_length=max_tgt_length) _, top_i = decoder_output.topk(k=1) predictions = top_i.squeeze(dim=2).squeeze(dim=1).tolist() predicted_string = "".join( [corpus.characters[i].string for i in predictions]) print(predicted_string)
encoder_sd = checkpoint['encoder'] decoder_sd = checkpoint['decoder'] encoder_optimizer_sd = checkpoint['en_opt'] decoder_optimizer_sd = checkpoint['de_opt'] embedding_sd = checkpoint['embedding'] voc.__dict__ = checkpoint['voc_dict'] embedding = nn.Embedding(voc.num_words, Config.hidden_size) embedding.load_state_dict(embedding_sd) encoder = EncoderRNN(Config.hidden_size, embedding, Config.encoder_n_layers, Config.dropout) decoder = LuongAttnDecoderRNN(Config.attn_model, embedding, Config.hidden_size, voc.num_words, Config.decoder_n_layers, Config.dropout) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) encoder = encoder.to(Config.device) decoder = decoder.to(Config.device) # Set dropout layers to eval mode encoder.eval() decoder.eval() # Initialize search module searcher = GreedySearchDecoder(encoder, decoder) # Begin chatting (uncomment and run the following line to begin) evaluateInput(encoder, decoder, searcher, voc)
voc.__dict__ = checkpoint['voc_dict'] print('Building encoder and decoder ...') # 初始化word embedding embedding = nn.Embedding(voc.num_words, hidden_size) if model_checkpoint: embedding.load_state_dict(embedding_sd) # 初始化encoder和decoder模型 encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) if model_checkpoint: encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # 使用合适的设备 encoder = encoder.to(device) decoder = decoder.to(device) print('Models built and ready to go!') class GreedySearchDecoder(nn.Module): def __init__(self, encoder, decoder): super(GreedySearchDecoder, self).__init__() self.encoder = encoder self.decoder = decoder def forward(self, input_seq, input_length, max_length): # Encoder的Forward计算 encoder_outputs, encoder_hidden = self.encoder(input_seq, input_length) # 把Encoder最后时刻的隐状态作为Decoder的初始值 decoder_hidden = encoder_hidden[:decoder.n_layers]
vocab = build_vocab(cleaned_news, cleaned_summaries, min_freq=3) # 生成 dataset 是DataTensor 格式 news_dataset = build_dataset(vocab, cleaned_news, config['max_len_news'], type='news') summaries_dataset = build_dataset(vocab, cleaned_summaries, config['max_len_summaries'], type='summaries') # 合并在一起 dataset = TensorDataset(news_dataset, summaries_dataset) # 加载预训练的word2vec模型(使用搜狗新闻训练得到的word2vec),维度是300 pre_embeddings = get_pretrained_embedding(config['pretrained_vector_path'], vocab, vector_dim=300).to(device) # 构建模型,选择隐状态和词向量维度相同,都是300 vocab_size = len(vocab) # encoder 使用的是单层双向gru encoder = EncoderRNN(vocab_size, 300, 300, n_layers=1, pre_embeddings=pre_embeddings) # decoder 使用双层单项gru decoder = DecoderRNN(vocab_size, 300, 300, n_layers=2, pre_embeddings=pre_embeddings) # 迁移到cuda上,training 要用 encoder.to(device) decoder.to(device) # 训练模型 training(encoder, decoder, dataset, vocab, config['lr'], config['batch_size'], config['epochs'])