def main(): vocabulary = pickle.load(open(f'{EMBEDDING_DIR}/vocab.pkl', 'rb')) print("Number of words in data set: %d" % len(vocabulary)) embedding_matrix, vocab_to_index = map_vocab_to_embedding(vocabulary) hidden_size = 600 encoder = EncoderRNN(embedding_matrix, hidden_size) decoder = DecoderRNN(embedding_matrix, hidden_size) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() train_file = open(os.path.join(EMBEDDING_DIR, "train.pkl"), 'rb') train_data = pickle.load(train_file) train_file.close() n_iters = 2000 train(train_data, vocab_to_index, vocabulary, encoder, decoder, n_iters)
def build_model(src, tgt, hidden_size, mini_batch_size, bidirectional, dropout, attention, init_value): EXPERIMENT.param("Hidden", hidden_size) EXPERIMENT.param("Bidirectional", bidirectional) EXPERIMENT.param("Dropout", dropout) EXPERIMENT.param("Attention", attention) EXPERIMENT.param("Mini-batch", mini_batch_size) weight = torch.ones(len(tgt.vocab)) pad = tgt.vocab.stoi[tgt.pad_token] loss = Perplexity(weight, pad) encoder = EncoderRNN(len(src.vocab), MAX_LEN, hidden_size, rnn_cell="lstm", bidirectional=bidirectional, dropout_p=dropout, variable_lengths=False) decoder = DecoderRNN( len(tgt.vocab), MAX_LEN, hidden_size, # * 2 if bidirectional else hidden_size, rnn_cell="lstm", use_attention=attention, eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder) using_cuda = False if torch.cuda.is_available(): using_cuda = True encoder.cuda() decoder.cuda() seq2seq.cuda() loss.cuda() EXPERIMENT.param("CUDA", using_cuda) for param in seq2seq.parameters(): param.data.uniform_(-init_value, init_value) trainer = SupervisedTrainer(loss=loss, batch_size=mini_batch_size, checkpoint_every=5000, random_seed=42, print_every=1000) return seq2seq, trainer