def get_data_dict(strategy_str, bpe_str=""): filenames = [ f"indexed/{bpe_str}indexed_training_text.{strategy_str}.pkl", f"indexed/{bpe_str}indexed_valid_text.{strategy_str}.pkl", f"indexed/{bpe_str}indexed_test_text.{strategy_str}.pkl" ] files = [os.path.join(data_path, filename) for filename in filenames] data = load_pickles(files) data_dict = { split: dialogues for (split, dialogues) in zip(splits, data[:3]) } return data_dict
Load pickled lists """ filenames = [ "vocab_all.pkl", "shared_vocab_politeness.pkl", "shared_vocab_movie.pkl", "new_vocab_politeness.pkl", "new_vocab_movie.pkl", "embedding_word2vec_politeness.pkl", "embedding_word2vec_movie.pkl", "movie_train_source.pkl", "movie_train_target.pkl", "movie_valid_source.pkl", "movie_valid_target.pkl", "movie_test_source.pkl", "movie_test_target.pkl", "polite_movie_target.pkl", "neutral_movie_target.pkl", "rude_movie_target.pkl"] files = [os.path.join(data_path, filename) for filename in filenames] # Load files data = load_pickles(files) vocab = data[0] shared_vocab_politeness = data[1] shared_vocab_movie = data[2] new_vocab_politeness = data[3] new_vocab_movie = data[4] embedding_word2vec_politeness = data[5] embedding_word2vec_movie = data[6] source_train = data[7] + data[9] target_train = data[8] + data[10] source_test = data[11] target_test = data[12] triple_lsts = data[13:] [source_train, target_train] = zip_remove_duplicates_unzip([source_train, target_train])