Example #1
0
  def gradients(self, x, loss_function, y_target):
    '''
    Inputs: 
      x: network input 
      loss_function: Since the gradients of the loss function need to be computed, this has to be provided.
      y_target: Target values for the network output. 
    Return value:
      gradients: Gradients of the loss function w.r.t. all weights and biases of the network. 
                 Gradients have a weights and biases member, the indexing starts with 0 for the first hidden layer (W_1, b_1)
                 and ends with the output layer (W_out, b_out)
    '''
    gradients = sup.Variables()
    
    # Outputs of each layer (layer_evaluations[0] is input x)
    layer_evaluations = []
    for layer_idx, layer in enumerate(self.layers):
      layer_evaluations.append(self.evaluateLayer(layer_idx, x))

    # Output equals the evaluation of the last layer
    network_output = self.output(x)

    # Derivative of cost w.r.t. the network output
    dCost_dy = #TODO: implement cost function derivative w.r.t. output variables of network
    # Element-wise multiplication with sigmoid derivative (sigmoid is applied element-wise)
    delta_fused = #TODO: start backpropagating the error 
    
    # Gradient backpropagation
    ## Start from last layer and propagate error gradient through until first layer
    ## Attention!!!: layer_evaluations[0] is the network input while self.layers[0] is the first hidden layer
    for layer_idx in np.arange(len(self.layers)-1, -1, -1):
      logger.debug('Computing the gradient for layer {}'.format(layer_idx))
      # If layer is not last layer, update delta_fused (which is accumulating the back-propagated gradient)
      # TODO: implement backpropagation of gradient for arbitrary number of layers
    return gradients
    def testBatchGradient(self):
        # Manually build a gradient list
        gradient_list = []
        n_layers = 2
        batch_size = 3
        w = np.ones([3, 5])
        b = np.ones([1, 5])
        grad = sup.Variables()
        for i in range(n_layers):
            grad.weights.append(w)
            grad.biases.append(b)

        for j in range(batch_size):
            gradient_list.append(grad)

        # Manually compute batch gradient
        batch_gradient_manual = grad * batch_size

        batch_gradient = self.optimizer.computeBatchGradient(gradient_list)

        self.assertTrue(batch_gradient_manual == batch_gradient)
    def testVariables(self):
        var = sup.Variables()
        var.weights.append(np.ones([2, 2]))
        var.weights.append(np.ones([2, 2]))
        var.biases.append(np.ones([1, 2]))
        var.biases.append(np.ones([1, 2]))

        # Multiplication
        var_neg = var * (-1)
        for i in range(len(var)):
            self.assertTrue(
                np.all(var.weights[i] +
                       var_neg.weights[i] == np.zeros_like(var.weights[i])))
            self.assertTrue(
                np.all(var.biases[i] +
                       var_neg.biases[i] == np.zeros_like(var.biases[i])))

        # Addition
        var_add = var + var_neg
        for i in range(len(var_add)):
            self.assertTrue(
                np.all(var_add.weights[i] == np.zeros_like(var.weights[i])))
            self.assertTrue(
                np.all(var_add.biases[i] == np.zeros_like(var.biases[i])))

        # Subtraction
        var_sub = var - var
        for i in range(len(var_sub)):
            self.assertTrue(
                np.all(var_sub.weights[i] == np.zeros_like(var.weights[i])))
            self.assertTrue(
                np.all(var_sub.biases[i] == np.zeros_like(var.biases[i])))

        # Equality
        self.assertTrue(var == var)
        self.assertFalse(var == var_sub)

        # Inequality
        self.assertFalse(var != var)
        self.assertTrue(var != var_sub)
hidden_layer_specs = []
hidden_layer_specs.append({
    'activation': activation.SigmoidActivation(),
    'dim': w_1.shape[1]
})
output_dim = y_target.shape[1]

fc_net = network.FCNetwork(input_dim, output_dim, hidden_layer_specs)

fc_net.layers[0].setWeights(w_1)
fc_net.layers[0].setBiases(b_1)

fc_net.layers[1].setWeights(w_out)
fc_net.layers[1].setBiases(b_out)

correct_gradients = sup.Variables()


class TestActivationFunctions(unittest.TestCase):
    """Test activation functions for value and gradient."""
    def testUnit(self):
        unitActivation = activation.UnitActivation()
        input = np.random.rand(10)
        self.assertTrue(np.all(input == unitActivation.evaluate(input)))
        self.assertTrue(
            np.all(np.ones_like(input) == unitActivation.derivative(input)))

    def testSigmoid(self):
        sigmoidActivation = activation.SigmoidActivation()
        input = np.random.rand(10)
        self.assertTrue(
Example #5
0
 def getParameters(self):
   nn_params = sup.Variables()
   for l in self.layers:
     nn_params.weights.append(l.getWeights())
     nn_params.biases.append(l.getBiases())
   return nn_params
    def gradients(self, x, loss_function, y_target):
        '''
    Inputs:
      x: network input
      loss_function: Since the gradients of the loss function need to be computed, this has to be provided.
      y_target: Target values for the network output.
    Return value:
      gradients: Gradients of the loss function w.r.t. all weights and biases of the network.
                 Gradients have a weights and biases member, the indexing starts with 0 for the first hidden layer (W_1, b_1)
                 and ends with the output layer (W_out, b_out)
    '''
        gradients = sup.Variables()
        gradients.weights = [None] * self.numberOfLayers(
        )  #Start with None, should definitely throw an error if we have an indexing problem later
        gradients.biases = [None] * self.numberOfLayers()

        # Outputs of each layer (layer_evaluations[0] is input x)
        layer_evaluations = []
        for layer_idx, layer in enumerate(self.layers):
            layer_evaluations.append(self.evaluateLayer(layer_idx, x))

        # Output equals the evaluation of the last layer
        network_output = self.output(x)
        print("Network output shape: {}".format(network_output))

        ## Output layer
        dCost_dy = loss_function.derivative(
            network_output, y_target
        )  #TODO: implement cost function derivative w.r.t. output variables of network
        # Element-wise multiplication with sigmoid derivative (sigmoid is applied element-wise)
        # delta_fused = dCost_dy * self.layers[-1].derivativeActivation(network_output) #TODO: start backpropagating the error
        #TODO: the above line is more general, but does not work right now
        delta_fused = dCost_dy * network_output * (1 - network_output)
        # L_w_out = np.dot(network_output.T, delta_fused)
        # L_b_out = delta_fused
        print("Output weights shape: {}".format(
            np.dot(layer_evaluations[-1].T, delta_fused)))
        gradients.weights[self.numberOfLayers() - 1] = np.dot(
            layer_evaluations[-1].T, delta_fused)
        gradients.biases[self.numberOfLayers() - 1] = delta_fused

        print(layer_evaluations)

        # Gradient backpropagation
        ## Start from last layer and propagate error gradient through until first layer
        ## Attention!!!: layer_evaluations[0] is the network input while self.layers[0] is the first hidden layer
        for layer_idx in np.arange(len(self.layers) - 2, -1, -1):
            logger.debug(
                'Computing the gradient for layer {}'.format(layer_idx))
            print("Gradient for layer_idx: {}".format(layer_idx))
            # If layer is not last layer, update delta_fused (which is accumulating the back-propagated gradient)
            # TODO: implement backpropagation of gradient for arbitrary number of layers
            L_w_prev = self.layers[
                layer_idx + 1].getWeights()  # 'prev' w.r.t. the back prop
            #   print("Current layer evaluations shape: {}".format(layer_evaluations[layer_idx]))
            print("\tPrevious layer weights (index {}) shape: {}".format(
                layer_idx + 1, L_w_prev.shape))
            print("\tPrevious layer biases (index {}) shape: {}".format(
                layer_idx + 1, gradients.biases[layer_idx + 1].shape))
            print("\tCurrent delta_fused shape: {}".format(delta_fused.shape))

            #   delta_fused = np.dot(delta_fused, L_w_prev.T) * self.layers[layer_idx].derivativeActivation(layer_evaluations[layer_idx])
            delta_fused = np.dot(delta_fused, L_w_prev.T) * layer_evaluations[
                layer_idx + 1] * (1 - layer_evaluations[layer_idx + 1])

            gradients.weights[layer_idx] = np.dot(
                layer_evaluations[layer_idx].T, delta_fused)  # weight
            gradients.biases[layer_idx] = delta_fused  # bias

            print(
                "\tLayer index {} will have weight shape {} and bias shape {}".
                format(layer_idx, gradients.weights[layer_idx].shape,
                       gradients.biases[layer_idx].shape))

        return gradients