if __name__ == '__main__': # Define the input and expected output variable input_var, target_var = T.tensor3s('input', 'target') # The generator to sample examples from generator = CopyTask(batch_size=1, max_iter=1000000, size=8, max_length=5, end_marker=True) # The model (1-layer Neural Turing Machine) l_output, l_ntm = model(input_var, batch_size=generator.batch_size, \ size=generator.size, num_units=100, memory_shape=(128, 20)) # The generated output variable and the loss function pred_var = T.clip(lasagne.layers.get_output(l_output), 1e-10, 1. - 1e-10) loss = T.mean(lasagne.objectives.binary_crossentropy(pred_var, target_var)) # Create the update expressions params = lasagne.layers.get_all_params(l_output, trainable=True) updates = graves_rmsprop(loss, params, learning_rate=1e-3) # Compile the function for a training step, as well as the prediction function and # a utility function to get the inner details of the NTM train_fn = theano.function([input_var, target_var], loss, updates=updates) ntm_fn = theano.function([input_var], pred_var) ntm_layer_fn = theano.function([input_var], lasagne.layers.get_output(l_ntm, get_details=True)) # Training try: scores, all_scores = [], [] for i, (example_input, example_output) in generator: score = train_fn(example_input, example_output) scores.append(score) all_scores.append(score) if i % 500 == 0: mean_scores = np.mean(scores)
input_var, target_var = T.tensor3s('input', 'target') # The generator to sample examples from generator = UpsideDownCopyTask(batch_size=1, max_iter=1000000, size=8, max_length=5, end_marker=True) # The model (1-layer Neural Turing Machine) l_output, l_ntm = model(input_var, batch_size=generator.batch_size, \ size=generator.size, num_units=100, memory_shape=(128, 20)) # The generated output variable and the loss function pred_var = T.clip(lasagne.layers.get_output(l_output), 1e-6, 1. - 1e-6) loss = T.mean(lasagne.objectives.binary_crossentropy(pred_var, target_var)) # Create the update expressions params = lasagne.layers.get_all_params(l_output, trainable=True) updates = graves_rmsprop(loss, params, learning_rate=1e-3) # Compile the function for a training step, as well as the prediction function and # a utility function to get the inner details of the NTM train_fn = theano.function([input_var, target_var], loss, updates=updates) ntm_fn = theano.function([input_var], pred_var) ntm_layer_fn = theano.function([input_var], lasagne.layers.get_output(l_ntm, get_details=True)) # Training try: scores, all_scores = [], [] for i, (example_input, example_output) in generator: score = train_fn(example_input, example_output) scores.append(score) all_scores.append(score)