def prepare_data(config): train_path = os.path.join(config.train_dir, "train") voc_file_path = [train_path + ".query", train_path + ".answer", train_path + ".gen"] vocab_path = os.path.join(config.train_dir, "vocab%d.all" % config.vocab_size) data_utils.create_vocabulary(vocab_path, voc_file_path, config.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) print("Preparing train disc_data in %s" % config.train_dir) train_query_path, train_answer_path, train_gen_path =data_utils.hier_prepare_disc_data(config.train_dir, vocab, config.vocab_size) query_set, answer_set, gen_set = hier_read_data(config, train_query_path, train_answer_path, train_gen_path) return query_set, answer_set, gen_set
def prepare_data(config): ''' train_path = os.path.join(config.train_dir, "train") voc_file_path = [train_path + ".query", train_path + ".answer", train_path + ".gen"] vocab_path = os.path.join(config.train_dir, "vocab%d.all" % config.vocab_size) data_utils.create_vocabulary(vocab_path, voc_file_path, config.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) ''' fr_vocab = open('vocab', 'rb') fr_rev_vocab = open('rev_vocab', 'rb') vocab = pickle.load(fr_vocab) rev_vocab = pickle.load(fr_rev_vocab) fr_vocab.close() fr_rev_vocab.close() print("Preparing train disc_data in %s" % config.train_dir) train_query_path, train_answer_path, train_gen_path, dev_query_path, dev_answer_path, dev_gen_path = \ data_utils.hier_prepare_disc_data(config.train_dir, vocab, config.vocab_size) query_set, answer_set, gen_set = hier_read_data(config, train_query_path, train_answer_path, train_gen_path) return query_set, answer_set, gen_set