def get_PennTreeBank(data_dir=None): if data_dir is None: data_dir = get_dataset_path('ptb_data') if not os.path.isfile(os.path.join(data_dir, 'ptb.train.txt')): download(TRAIN_URL, data_dir) download(VALID_URL, data_dir) download(TEST_URL, data_dir) word_to_id = tfreader._build_vocab(os.path.join(data_dir, 'ptb.train.txt')) data3 = [np.asarray(tfreader._file_to_word_ids(os.path.join(data_dir, fname), word_to_id)) for fname in ['ptb.train.txt', 'ptb.valid.txt', 'ptb.test.txt']] return data3, word_to_id
def get_PennTreeBank(data_dir=None): if data_dir is None: data_dir = get_dataset_path('ptb_data') if not os.path.isfile(os.path.join(data_dir, 'ptb.train.txt')): download(TRAIN_URL, data_dir) download(VALID_URL, data_dir) download(TEST_URL, data_dir) word_to_id = tfreader._build_vocab(os.path.join(data_dir, 'ptb.train.txt')) data3 = [np.asarray(tfreader._file_to_word_ids(os.path.join(data_dir, fname), word_to_id)) for fname in ['ptb.train.txt', 'ptb.valid.txt', 'ptb.test.txt']] return data3, word_to_id