Example #1
0
def load_data(dir=data_dir):
    codetext, valitext = util.read_data_files(data_dir, validation=True)

    # display some stats on the data
    epoch_size = len(codetext) // (BATCHSIZE * SEQLEN)
    print('data stats: training_len={}, validation_len={}, epoch_size={}'.format(len(codetext), len(valitext), epoch_size))

    return codetext, valitext
Example #2
0
# By Donald Whyte and Alejandro Saucedo
#
# Step 3b:
# Building the Recurrent Network Model
# ==============================================================================

import numpy as np
import tensorflow as tf
# `rnn` module temporarily in contrib. It's moving back to code in TF 1.1.
from tensorflow.contrib import layers, rnn

from util import ALPHABET_SIZE, read_data_files, rnn_minibatch_generator

# A. Load Training Data
# ------------------------------------------------------------------------------
training_data, validation_data, file_index = read_data_files(
    '../data/shakespeare/*', validation=True)

print(f'Num training characters: {len(training_data)}')
print(f'Num test/validation characters: {len(validation_data)}')
print(f'Num text files processed: {len(file_index)}')

# B. Build RNN Model
# ------------------------------------------------------------------------------

# Configurable hyperparameters.
BATCH_SIZE = 200
SEQUENCE_LENGTH = 30
NUM_HIDDEN_LAYERS = 3
GRU_INTERNAL_SIZE = 512

# *** Inputs (sequence of characters that are encoded as ints)