def load_data(dir=data_dir): codetext, valitext = util.read_data_files(data_dir, validation=True) # display some stats on the data epoch_size = len(codetext) // (BATCHSIZE * SEQLEN) print('data stats: training_len={}, validation_len={}, epoch_size={}'.format(len(codetext), len(valitext), epoch_size)) return codetext, valitext
# By Donald Whyte and Alejandro Saucedo # # Step 3b: # Building the Recurrent Network Model # ============================================================================== import numpy as np import tensorflow as tf # `rnn` module temporarily in contrib. It's moving back to code in TF 1.1. from tensorflow.contrib import layers, rnn from util import ALPHABET_SIZE, read_data_files, rnn_minibatch_generator # A. Load Training Data # ------------------------------------------------------------------------------ training_data, validation_data, file_index = read_data_files( '../data/shakespeare/*', validation=True) print(f'Num training characters: {len(training_data)}') print(f'Num test/validation characters: {len(validation_data)}') print(f'Num text files processed: {len(file_index)}') # B. Build RNN Model # ------------------------------------------------------------------------------ # Configurable hyperparameters. BATCH_SIZE = 200 SEQUENCE_LENGTH = 30 NUM_HIDDEN_LAYERS = 3 GRU_INTERNAL_SIZE = 512 # *** Inputs (sequence of characters that are encoded as ints)