y = y[indices] # Explicitly set apart 10% for validation data that we never train over split_at = len(X) - len(X) / 10 (X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at)) (y_train, y_val) = (y[:split_at], y[split_at:]) print(X_train.shape) print(y_train.shape) print("Build model...") model = Sequential() # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE # note: in a situation where your input sequences have a variable length, # use input_shape=(None, nb_feature). model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, convertor.get_dim()))) # For the decoder's input, we repeat the encoded input for each time step model.add(RepeatVector(DIGITS + 1)) # The decoder RNN could be multiple layers stacked or a single keras_layer for _ in range(LAYERS): model.add(RNN(HIDDEN_SIZE, return_sequences=True)) # For each of step of the output sequence, decide which character should be chosen model.add(TimeDistributedDense(convertor.get_dim())) model.add(Activation("softmax")) model.compile(loss="categorical_crossentropy", optimizer="adam") # Train the model each generation and show predictions against the validation dataset for iteration in range(1, 200): print()
D_y = D_y[indices] # Explicitly set apart 10% for validation data that we never train over split_at = len(D_X) - len(D_X) / 10 (D_X_train, D_X_val) = (slice_X(D_X, 0, split_at), slice_X(D_X, split_at)) (D_y_train, D_y_val) = (D_y[:split_at], D_y[split_at:]) print(D_X_train.shape) print(D_y_train.shape) import dlx.unit.core as U import dlx.unit.recurrent as R from dlx.model import Model print('Build model...') input_dim = convertor.get_dim() output_dim = convertor.get_dim() hidden_dim = HIDDEN_SIZE input_length = MAXLEN output_length = DIGITS + 1 '''Define Units''' #Data uayer data = U.Input(3, 'X') #RNN encoder encoder = R.RNN(input_length, input_dim, hidden_dim, name='ENCODER') #encoder = R.LSTM(input_length, input_dim, hidden_dim, name='ENCODER') #RNN decoder decoder = R.RNN(output_length, hidden_dim, hidden_dim, name='DECODER') #decoder = R.LSTM(output_length, hidden_dim, hidden_dim, name='DECODER') #Time Distributed Dense tdd = U.TimeDistributedDense(output_length, hidden_dim, output_dim, 'TDD')
print("Vectorization...") convertor = CharacterDataEngine(chars, maxlen=len(chars) - 1) initial_value = convertor.encode_dataset(starts, maxlen=1) y = convertor.encode_dataset(chains) split_at = len(y) - len(y) / 10 (y_train, y_val) = (y[:split_at], y[split_at:]) (i_train, i_val) = (initial_value[:split_at], initial_value[split_at:]) (X_train, X_val) = (y_train, y_val) print(i_train.shape) print(y_train.shape) print("Build model...") HIDDEN_SIZE = 128 BATCH_SIZE = 50 MAXLEN = len(chars) - 1 input_dim = convertor.get_dim() rnn_layer = SimpleRNN(HIDDEN_SIZE, input_shape=(MAXLEN, convertor.get_dim()), return_sequences=True) shift_layer = Shift(rnn_layer, initial_value) model = Graph() model.add_input(name="initial_value", input_shape=(1, input_dim)) model.add_input(name="sequence_input", input_shape=(MAXLEN, input_dim)) model.add_node(shift_layer, name="shift", input="sequence_input") model.add_node(rnn_layer, name="rnn", input="shift") model.add_node(TimeDistributedDense(input_dim), name="tdd", input="rnn") model.add_node(Activation("softmax"), name="softmax", input="tdd") model.add_output(name="output", input="softmax") model.compile(loss={"output": "categorical_crossentropy"}, optimizer="adam") for iteration in range(1, 200): print()
print(D_X_train.shape) print(D_y_train.shape) import lx_layer.layer as L import lx_layer.recurrent as R import theano.printing as P from keras import activations, objectives from keras import models from keras.optimizers import Adam from keras import backend as K from util import initializations print("Build model...") input_dim = convertor.get_dim() output_dim = convertor.get_dim() hidden_dim = HIDDEN_SIZE input_length = MAXLEN output_length = DIGITS + 1 """Define functions""" activation_softmax = activations.get("softmax") activation_linear = activations.get("linear") activation_hard_sigmoid = activations.get("hard_sigmoid") activation_tanh = activations.get("tanh") init_glorot_uniform = initializations.glorot_normal init_orthogonal = initializations.orthogonal forget_bias_init = initializations.one """Define layers"""
# Explicitly set apart 10% for validation data that we never train over split_at = len(D_A) - len(D_A) / 10 (D_A_train, D_A_val) = (slice_X(D_A, 0, split_at), slice_X(D_A, split_at)) (D_B_train, D_B_val) = (slice_X(D_B, 0, split_at), slice_X(D_B, split_at)) (D_y_train, D_y_val) = (D_y[:split_at], D_y[split_at:]) print(D_A_train.shape) print(D_B_train.shape) print(D_y_train.shape) import dlx.unit.core as U import dlx.unit.attention as A from dlx.model import Model print('Build model...') input_dim = convertor.get_dim() + MAXLEN output_dim = convertor.get_dim() hidden_dim = HIDDEN_SIZE output_length = MAXLEN attention_hidden_dim = HIDDEN_SIZE '''Define Units''' #Data unit dataA = U.Input(3, 'A') dataB = U.Input(3, 'B') #Add Remove 1 add1 = U.AddOneAtBegin() remove1 = U.RemoveOneAtBegin() #Attention decoder = A.AttentionLSTM_X(output_length + 1, input_dim, hidden_dim,
split_at = len(D_A) - len(D_A) / 10 (D_A_train, D_A_val) = (slice_X(D_A, 0, split_at), slice_X(D_A, split_at)) (D_B_train, D_B_val) = (slice_X(D_B, 0, split_at), slice_X(D_B, split_at)) (D_y_train, D_y_val) = (D_y[:split_at], D_y[split_at:]) print(D_A_train.shape) print(D_B_train.shape) print(D_y_train.shape) import dlx.unit.core as U import dlx.unit.attention as A from dlx.model import Model print("Build model...") input_dim = convertor.get_dim() + MAXLEN output_dim = convertor.get_dim() hidden_dim = HIDDEN_SIZE output_length = MAXLEN attention_hidden_dim = HIDDEN_SIZE """Define Units""" # Data unit dataA = U.Input(3, "A") dataB = U.Input(3, "B") # Add Remove 1 add1 = U.AddOneAtBegin() remove1 = U.RemoveOneAtBegin() # Attention decoder = A.AttentionLSTM_X(output_length + 1, input_dim, hidden_dim, input_dim, attention_hidden_dim, name="ATT") # One to Many