def make_sent_dataset(): train_src_file = "./para-train.txt" train_trg_file = "./tgt-train.txt" embedding_file = "./glove.840B.300d.txt" embedding = "./embedding.pkl" word2idx_file = "./word2idx.pkl" # make vocab file word2idx = make_vocab(train_src_file, train_trg_file, word2idx_file, config.vocab_size) make_embedding(embedding_file, embedding, word2idx)
def make_sent_dataset(): train_src_file = "../squad/train_src50.txt" train_trg_file = "../squad/train_tgt50.txt" # dev file dev_src_file = "../squad/dev_src50.txt" dev_trg_file = "../squad/dev_tgt50.txt" embedding_file = "./glove.840B.300d.txt" embedding = "./embedding.pkl" word2idx_file = "./word2idx.pkl" # make vocab file word2idx = make_vocab(train_src_file, train_trg_file, dev_src_file, dev_trg_file, word2idx_file, config.vocab_size) make_embedding(embedding_file, embedding, word2idx)