def get_output_for(self, input, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = T.flatten(input, 2) activation = T.dot(input, self.W) if self.b is not None: activation = T.broadcast('+', activation , T.dimshuffle(self.b, 'x', 0), 'xx,1x') return self.nonlinearity(activation)
def main(num_epochs=NUM_EPOCHS): print("Building network ...") # First, we build the network, starting with an input layer # Recurrent layers expect input of shape # (batch size, max sequence length, number of features) l_in = lasagne.layers.InputLayer(shape=(N_BATCH, MAX_LENGTH, 2)) # The network also needs a way to provide a mask for each sequence. We'll # use a separate input layer for that. Since the mask only determines # which indices are part of the sequence for each batch entry, they are # supplied as matrices of dimensionality (N_BATCH, MAX_LENGTH) l_mask = lasagne.layers.InputLayer(shape=(N_BATCH, MAX_LENGTH)) # We're using a bidirectional network, which means we will combine two # RecurrentLayers, one with the backwards=True keyword argument. # Setting a value for grad_clipping will clip the gradients in the layer # Setting only_return_final=True makes the layers only return their output # for the final time step, which is all we need for this task l_forward = lasagne.layers.RecurrentLayer( l_in, N_HIDDEN, mask_input=l_mask, grad_clipping=GRAD_CLIP, W_in_to_hid=lasagne.init.HeUniform(), W_hid_to_hid=lasagne.init.HeUniform(), nonlinearity=lasagne.nonlinearities.tanh, only_return_final=True) l_backward = lasagne.layers.RecurrentLayer( l_in, N_HIDDEN, mask_input=l_mask, grad_clipping=GRAD_CLIP, W_in_to_hid=lasagne.init.HeUniform(), W_hid_to_hid=lasagne.init.HeUniform(), nonlinearity=lasagne.nonlinearities.tanh, only_return_final=True, backwards=True) # Now, we'll concatenate the outputs to combine them. l_concat = lasagne.layers.ConcatLayer([l_forward, l_backward]) # Our output layer is a simple dense connection, with 1 output unit l_out = lasagne.layers.DenseLayer( l_concat, num_units=1, nonlinearity=lasagne.nonlinearities.tanh) target_values = T.vector('target_output', fixed_shape=(N_BATCH,)) # lasagne.layers.get_output produces a variable for the output of the net network_output = lasagne.layers.get_output(l_out) # The network output will have shape (n_batch, 1); let's flatten to get a # 1-dimensional vector of predicted values predicted_values = T.flatten(network_output) # Our cost will be mean-squared error cost = T.mean(T.square(predicted_values - target_values)) # Retrieve all parameters from the network all_params = lasagne.layers.get_all_params(l_out) # Compute SGD updates for training print("Computing updates ...") updates = lasagne.updates.adagrad(cost, all_params, LEARNING_RATE) # Theano functions for training and computing cost print("Compiling functions ...") import time start_time = time.time() train = theano.function([l_in.input_var, target_values, l_mask.input_var], cost, updates=updates) compute_cost = theano.function( [l_in.input_var, target_values, l_mask.input_var], cost) print("compiling took %f seconds" % (time.time() - start_time)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val = gen_data() print("Training ...") try: for epoch in range(num_epochs): import time start_time = time.time() for _ in range(EPOCH_SIZE): X, y, m = gen_data() train(X, y, m) cost_val = compute_cost(X_val, y_val, mask_val) print("Epoch {} validation cost = {}; spent {} seconds".format(epoch, cost_val, time.time() - start_time)) except KeyboardInterrupt: pass