SCORER_FILE_PATH = 'scorer/scorer_my.py' EMBEDDING_SAVE_PATH = 'word2vec_attention.pickle'#'../embeddings/numberbatch-en-19.08.txt' EMBEDDING_DIM = 300 MIN_DISCARD_LEN = 'inf' INPUT_LEN = 251 TARGET_LEN = 40 pretrained_ckpt = 'attention/model_best_rouge1.ckpt' device = 'cuda' # read data print('reading data...') from _utils import read_jsonl valid_X, valid_Y = read_jsonl(VALID_FILE_PATH) print('done') # load pretrained word embedding print('loading word embedding...') from _word2vec import Word2Vec word2vec = Word2Vec(EMBEDDING_SAVE_PATH, 300, raw=False) embedding = word2vec.embedding SOS_token = word2vec.word2idx['<SOS>'] EOS_token = word2vec.word2idx['<EOS>'] PAD_token = word2vec.word2idx['<PAD>'] UNK_token = word2vec.word2idx['<UNK>'] print('done') # transform sentences to embedding
MIN_DISCARD_LEN = 5 INPUT_LEN = 251 TARGET_LEN = 30 teacher_forcing_ratio = 0.5 GRAD_MAX = 5 CKPT_VALID_NAME = 'seq2seq/model_best_loss.ckpt' CKPT_NAME = 'seq2seq/model.ckpt' device = 'cuda' # read data print('reading data...') from _utils import read_jsonl train_X, train_Y = read_jsonl(TRAIN_FILE_PATH) valid_X, valid_Y = read_jsonl(VALID_FILE_PATH) test_X, _ = read_jsonl(TEST_FILE_PATH, False) print('done') # load pretrained word embedding print('loading word embedding...') from _word2vec import Word2Vec word2vec = Word2Vec(EMBEDDING_FILE_PATH, EMBEDDING_DIM) embedding = word2vec.make_embedding( [train_X, train_Y, valid_X, valid_Y, test_X], MIN_DISCARD_LEN) SOS_token = word2vec.word2idx['<SOS>'] EOS_token = word2vec.word2idx['<EOS>'] PAD_token = word2vec.word2idx['<PAD>'] UNK_token = word2vec.word2idx['<UNK>']
TEST_FILE_PATH = sys.argv[1] PREDICTION_FILE_PATH = sys.argv[2] EMBEDDING_SAVE_PATH = 'word2vec_extractive.pickle' #'../embeddings/numberbatch-en-19.08.txt' EMBEDDING_DIM = 300 MIN_DISCARD_LEN = 2 INPUT_LEN = 301 pretrained_ckpt = 'extractive/model_best_rouge1.ckpt' device = 'cuda' # read data print('reading data...') from _utils import read_jsonl test_X, _, idx_X = read_jsonl(TEST_FILE_PATH, False, True) print('done') # load pretrained word embedding print('loading word embedding...') from _word2vec import Word2Vec word2vec = Word2Vec(EMBEDDING_SAVE_PATH, 300, raw=False) embedding = word2vec.embedding SOS_token = word2vec.word2idx['<SOS>'] EOS_token = word2vec.word2idx['<EOS>'] PAD_token = word2vec.word2idx['<PAD>'] UNK_token = word2vec.word2idx['<UNK>'] print('done') # transform sentences to embeddin4g
EMBEDDING_SAVE_PATH = 'word2vec_attention.pickle'#'../embeddings/numberbatch-en-19.08.txt' EMBEDDING_DIM = 300 MIN_DISCARD_LEN = 'inf' INPUT_LEN = 251 TARGET_LEN = 40 bid, tid = 4, 8 pretrained_ckpt = 'attention/model_best_rouge1.ckpt' device = 'cuda' # read data print('reading data...') from _utils import read_jsonl valid_X, _, idx_X = read_jsonl(VALID_FILE_PATH, False, True) print('done') # load pretrained word embedding print('loading word embedding...') from _word2vec import Word2Vec word2vec = Word2Vec(EMBEDDING_SAVE_PATH, 300, raw=False) embedding = word2vec.embedding SOS_token = word2vec.word2idx['<SOS>'] EOS_token = word2vec.word2idx['<EOS>'] PAD_token = word2vec.word2idx['<PAD>'] UNK_token = word2vec.word2idx['<UNK>'] print('done') # transform sentences to embedding