예제 #1
0
def get_data_dict(strategy_str, bpe_str=""):
    filenames = [
        f"indexed/{bpe_str}indexed_training_text.{strategy_str}.pkl",
        f"indexed/{bpe_str}indexed_valid_text.{strategy_str}.pkl",
        f"indexed/{bpe_str}indexed_test_text.{strategy_str}.pkl"
    ]
    files = [os.path.join(data_path, filename) for filename in filenames]
    data = load_pickles(files)
    data_dict = {
        split: dialogues
        for (split, dialogues) in zip(splits, data[:3])
    }
    return data_dict
Load pickled lists
"""
filenames = [
    "vocab_all.pkl",
    "shared_vocab_politeness.pkl", "shared_vocab_movie.pkl",
    "new_vocab_politeness.pkl", "new_vocab_movie.pkl",
    "embedding_word2vec_politeness.pkl", "embedding_word2vec_movie.pkl",
    "movie_train_source.pkl", "movie_train_target.pkl",
    "movie_valid_source.pkl", "movie_valid_target.pkl",
    "movie_test_source.pkl", "movie_test_target.pkl",
    "polite_movie_target.pkl",  "neutral_movie_target.pkl", "rude_movie_target.pkl"]

files = [os.path.join(data_path, filename) for filename in filenames]

# Load files
data = load_pickles(files)

vocab = data[0]
shared_vocab_politeness = data[1]
shared_vocab_movie = data[2]
new_vocab_politeness = data[3]
new_vocab_movie = data[4]
embedding_word2vec_politeness = data[5]
embedding_word2vec_movie = data[6]
source_train = data[7] + data[9]
target_train = data[8] + data[10]
source_test = data[11]
target_test = data[12]
triple_lsts = data[13:]

[source_train, target_train] = zip_remove_duplicates_unzip([source_train, target_train])