Esempio n. 1
0
    def backward(self, train_data, y_true):
        loss, self.gradients["A3"] = losses.cross_entropy_loss(self.nodes["A3"], y_true)
        self.gradients["W3"], self.gradients["B3"], self.gradients["Z2"] = \
            layer.fc_backward(self.gradients["A3"], self.Parameters["W3"], self.nodes["Z2"])

        self.gradients["A2"] = activations.relu_backward(self.gradients["Z2"].T, self.nodes["A2"])
        self.gradients["W2"], self.gradients["B2"], self.gradients["Z1"] = \
            layer.fc_backward(self.gradients["A2"], self.Parameters["W2"], self.nodes["Z1"])

        self.gradients["A1"] = activations.relu_backward(self.gradients["Z1"].T, self.nodes["A1"])
        self.gradients["W1"], self.gradients["B1"], self.gradients["Z1"] = \
            layer.fc_backward(self.gradients["A1"], self.Parameters["W1"], self.nodes["X2"])

        self.gradients["Z1"] = self.gradients["Z1"].reshape((128, 16, 5, 5))

        self.gradients["Maxpool2"] = layer.max_pooling_backward(self.gradients["Z1"], self.nodes["Conv2"], (2, 2))
        self.gradients["K2"], self.gradients["Kb2"], self.gradients["KZ2"] = \
            layer.conv_backward(self.gradients["Maxpool2"], self.Parameters["K2"], self.nodes["Maxpool1"])

        self.gradients["Maxpool1"] = \
            layer.max_pooling_backward(self.gradients["KZ2"], self.nodes["Conv1"], (2, 2))
        self.gradients["K1"], self.gradients["Kb1"], self.gradients["KZ1"] = \
            layer.conv_backward(self.gradients["Maxpool1"], self.Parameters["K1"], train_data)

        return loss
Esempio n. 2
0
 def backward(self, train_data, y_true):
     loss, self.gradients["y"] = cross_entropy_loss(self.nurons["y"], y_true)
     self.gradients["W3"], self.gradients["b3"], self.gradients["z3_relu"] = fc_backward(self.gradients["y"],
                                                                                         self.weights["W3"],
                                                                                         self.nurons["z3_relu"])
     self.gradients["z3"] = relu_backward(self.gradients["z3_relu"], self.nurons["z3"])
     self.gradients["W2"], self.gradients["b2"], self.gradients["z2_relu"] = fc_backward(self.gradients["z3"],
                                                                                         self.weights["W2"],
                                                                                         self.nurons["z2_relu"])
     self.gradients["z2"] = relu_backward(self.gradients["z2_relu"], self.nurons["z2"])
     self.gradients["W1"], self.gradients["b1"], _ = fc_backward(self.gradients["z2"],
                                                                 self.weights["W1"],
                                                                 train_data)
     return loss
Esempio n. 3
0
def linear_activation_backward(dA, AL, cache, activation):
    """
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments:
    dA -- post-activation gradient for current layer l 
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    activation -- the activation to be used in this layer, stored as a text string: "softmax" or "relu"
    
    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    linear_cache, activation_cache = cache

    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    elif activation == "softmax":
        dZ = softmax_backward(dA, AL, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db
Esempio n. 4
0
File: model.py Progetto: Darvaron/ML
def linear_activation_backward(dA, cache, activation):
    '''
    Implementa backpropagation para la LINEAR->ACTIVATION de la capa actual
    Arguments:
    dA: Gradiente del costo respecto a la función de activación, de dimensiones igual a A
    cache: tupla que contiene el cache de la parte lineal y el cache de la activación
    activation: string que contiene el nombre de la activación usada, "Sigmoid" o "Relu"

    Returns:
    dA_prev -- Gradiente del costo respecto a la activación de la capa anterior, de dimensiones iguales a A_prev
    dW -- Gradiente del costo respecto a los pesos de la capa actual, de dimensiones iguales a W
    db -- Gradiente del costo respecto a los sesgos de la capa actual, de dimensiones iguales a b
    '''

    linear_c, activation_c = cache

    if activation == "Relu":
        dZ = relu_backward(dA, activation_c)
        dA_prev, dW, db = linear_backward(dZ, linear_c)

    elif activation == "Sigmoid":
        dZ = sigmoid_backward(dA, activation_c)
        dA_prev, dW, db = linear_backward(dZ, linear_c)

    return dA_prev, dW, db
Esempio n. 5
0
    def backward(self, train_data, y_true):
        loss, self.gradients["A3"] = losses.cross_entropy_loss(
            self.nodes["A3"], y_true)
        self.gradients["W3"], self.gradients["B3"], self.gradients["Z2"] = \
            layer.fc_backward(self.gradients["A3"], self.Parameters["W3"], self.nodes["Z2"])

        self.gradients["A2"] = activations.relu_backward(
            self.gradients["Z2"].T, self.nodes["A2"])
        self.gradients["W2"], self.gradients["B2"], self.gradients["Z1"] = \
            layer.fc_backward(self.gradients["A2"], self.Parameters["W2"], self.nodes["Z1"])

        self.gradients["A1"] = activations.relu_backward(
            self.gradients["Z1"].T, self.nodes["A1"])
        self.gradients["W1"], self.gradients["B1"], self.gradients["Z1"] = \
            layer.fc_backward(self.gradients["A1"], self.Parameters["W1"], train_data)

        return loss
Esempio n. 6
0
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache

    if activation == "relu":
    	dZ = relu_backward(dA, activation_cache)
    elif activation == "sigmoid":
    	dZ = sigmoid_backward(dA, activation_cache)
    dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db
Esempio n. 7
0
    def linear_activation_backward(self, dA, cache, activation):
        linear_cache, activation_cache = cache

        if activation == 'relu':
            dZ = relu_backward(dA, activation_cache)
        elif activation == 'sigmoid':
            dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = self.linear_backward(dZ, linear_cache)

        return dA_prev, dW, db
Esempio n. 8
0
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache

    if activation == 'relu':
        dZ = np.dot(dA, activations.relu_backward(dA, activation_cache))
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    elif activation == 'sigmoid':
        dZ = np.dot(dA, activations.sigmoid_backward(dA, activation_cache))
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db
Esempio n. 9
0
def linear_activation_backward(dA, cache, activation):
    # Retriving cache
    linear_cache, activation_cache = cache

    # Backward activation step
    if activation == 'relu':
        dZ = relu_backward(dA, activation_cache)
    elif activation == 'sigmoid':
        dZ = sigmoid_backward(dA, activation_cache)

    # Linear backward step
    dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db
Esempio n. 10
0
def linear_act_backward(dA, cache, act):
    """
    Implements the linear and activation function derivatives of single node
    """
    lin_cache, act_cache = cache

    if act == "relu":
        dZ = relu_backward(dA, act_cache)
        dA, dW, db = linear_backward(dZ, lin_cache)
    elif act == "sigmoid":
        dZ = sigmoid_backward(dA, act_cache)
        dA, dW, db = linear_backward(dZ, lin_cache)

    return dA, dW, db
def linear_activation_backward_with_regularization(dA, cache, activation,
                                                   _lambda):
    # Retriving cache
    linear_cache, activation_cache = cache

    # Activation backward step
    if activation == 'relu':
        dZ = relu_backward(dA, activation_cache)
    elif activation == 'sigmoid':
        dZ = sigmoid_backward(dA, activation_cache)

    # Linear backward step
    dA_prev, dW, db = linear_backward_with_regularization(
        dZ, linear_cache, _lambda)

    return dA_prev, dW, db
def linear_activation_backward_with_dropout(dA, cache, activation, keep_prob):
    # Retriving cache
    linear_cache, activation_cache, D = cache

    # Linear backward and activation steps
    if activation == 'relu':
        # Implementing dropout
        dA = dA * D  # Apply mask D to shut down the same neurons as during the forward propagation
        dA = np.divide(
            dA, keep_prob
        )  # Scale the value of neurons that haven't been shut down

        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward_with_dropout(dZ, linear_cache, D,
                                                       keep_prob)
    elif activation == 'sigmoid':
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db
Esempio n. 13
0
def linear_activation_backward(dA,
                               cache,
                               lambd,
                               activation,
                               sparse_ae_parameters=()):
    """
	Implement the backward propagation for the LINEAR->ACTIVATION layer.
	
	Arguments:
	dA -- post-activation gradient for current layer l 
	cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
	activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
	
	Returns:
	dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
	dW -- Gradient of the cost with respect to W (current layer l), same shape as W
	db -- Gradient of the cost with respect to b (current layer l), same shape as b
	"""
    linear_cache, activation_cache = cache

    if activation == "relu":
        if sparse_ae_parameters:
            sparse_beta, rho, rho_hat = sparse_ae_parameters
            #print("dA1's shape:", dA.shape)
            #print("rho_hat's shape:", rho_hat.shape)
            dA = dA + sparse_beta * (-rho / rho_hat + (1 - rho) /
                                     (1 - rho_hat))
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache, lambd)

    elif activation == "sigmoid":
        if sparse_ae_parameters:
            sparse_beta, rho, rho_hat = sparse_ae_parameters
            #print("dA1's shape:", dA.shape)
            #print("rho_hat's shape:", rho_hat.shape)
            dA = dA + sparse_beta * (-rho / rho_hat + (1 - rho) /
                                     (1 - rho_hat))
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache, lambd)

    return dA_prev, dW, db
Esempio n. 14
0
def network(params,
            layers,
            data,
            labels,
            reconstruction=False,
            addnoise=False):

    l = len(layers)
    batch_size = layers[1]['batch_size']

    param_grad = {}
    cp = {}
    output = {}

    data_orig = copy.deepcopy(data)
    if addnoise:
        noise = np.random.binomial(1, 0.75, size=data.shape)
        data = data * noise

    output[1] = {
        'data': data,
        'height': layers[1]['height'],
        'channel': layers[1]['channel'],
        'batch_size': layers[1]['batch_size'],
        'diff': 0
    }

    for i in range(2, l + 1):
        if layers[i]['type'] == 'IP':
            output[i] = fully_connected.inner_product_forward(
                output[i - 1], layers[i], params[i - 1])
        elif layers[i]['type'] == 'RELU':
            output[i] = activations.relu_forward(output[i - 1], layers[i])
        elif layers[i]['type'] == 'Sigmoid':
            output[i] = activations.sigmoid_forward(output[i - 1], layers[i])
        elif layers[i]['type'] == 'Tanh':
            output[i] = activations.tanh_forward(output[i - 1], layers[i])
        elif layers[i]['type'] == 'LOSS':
            [obj, grad_w, grad_b, input_back_deriv,
             success_rate] = loss_func(params[i - 1]['w'], params[i - 1]['b'],
                                       output[i - 1]['data'], labels,
                                       layers[i]['num'], 1)
            param_grad[i - 1] = {
                'w': grad_w / batch_size,
                'b': grad_b / batch_size
            }
        elif layers[i]['type'] == 'autoEnc':
            [obj, input_back_deriv,
             success_rate] = autoEnc_loss(output[i - 1]['data'], data_orig)
            param_grad[i - 1] = {'w': 0.0, 'b': 0.0}

    if reconstruction:
        return output[i - 1]['data']

    for i in range(l - 1, 1, -1):
        param_grad[i - 1] = {}
        param_grad[i - 1]['w'] = np.array([])
        param_grad[i - 1]['b'] = np.array([])
        if layers[i]['type'] == 'IP':
            output[i]['diff'] = input_back_deriv
            param_grad[
                i -
                1], input_back_deriv = fully_connected.inner_product_backward(
                    output[i], output[i - 1], layers[i], params[i - 1])
        elif layers[i]['type'] == 'RELU':
            output[i]['diff'] = input_back_deriv
            input_back_deriv = activations.relu_backward(
                output[i], output[i - 1], layers[i])
            param_grad[i - 1]['w'] = np.array([])
            param_grad[i - 1]['b'] = np.array([])
        elif layers[i]['type'] == 'Sigmoid':
            output[i]['diff'] = input_back_deriv
            input_back_deriv = activations.sigmoid_backward(
                output[i], output[i - 1], layers[i])
            param_grad[i - 1]['w'] = np.array([])
            param_grad[i - 1]['b'] = np.array([])
        elif layers[i]['type'] == 'Tanh':
            output[i]['diff'] = input_back_deriv
            input_back_deriv = activations.tanh_backward(
                output[i], output[i - 1], layers[i])
            param_grad[i - 1]['w'] = np.array([])
            param_grad[i - 1]['b'] = np.array([])

    return (obj / batch_size), success_rate, param_grad
Esempio n. 15
0
def network(params, layers, data, labels):

    l = len(layers)
    batch_size = layers[1]['batch_size']

    param_grad = {}
    cp = {}
    output = {}
    output[1] = {
        'data': data,
        'height': layers[1]['height'],
        'channel': layers[1]['channel'],
        'batch_size': layers[1]['batch_size'],
        'diff': 0
    }

    for i in range(2, l):
        if layers[i]['type'] == 'IP':
            output[i] = fully_connected.inner_product_forward(
                output[i - 1], layers[i], params[i - 1])
        elif layers[i]['type'] == 'RELU':
            output[i] = activations.relu_forward(output[i - 1], layers[i])
        elif layers[i]['type'] == 'Sigmoid':
            output[i] = activations.sigmoid_forward(output[i - 1], layers[i])
        elif layers[i]['type'] == 'Tanh':
            output[i] = activations.tanh_forward(output[i - 1], layers[i])
        elif layers[i]['type'] == 'batch_norm':
            output[i] = activations.batch_normalization_forward(
                output[i - 1], layers[i], params[i - 1])
    i = l
    [obj, grad_w, grad_b, input_back_deriv,
     success_rate] = loss_func(params[i - 1]['w'], params[i - 1]['b'],
                               output[i - 1]['data'], labels, layers[i]['num'],
                               1)

    param_grad[i - 1] = {'w': grad_w / batch_size, 'b': grad_b / batch_size}

    for i in range(l - 1, 1, -1):
        param_grad[i - 1] = {}
        param_grad[i - 1]['w'] = np.array([])
        param_grad[i - 1]['b'] = np.array([])
        if layers[i]['type'] == 'IP':
            output[i]['diff'] = input_back_deriv
            param_grad[
                i -
                1], input_back_deriv = fully_connected.inner_product_backward(
                    output[i], output[i - 1], layers[i], params[i - 1])
        elif layers[i]['type'] == 'RELU':
            output[i]['diff'] = input_back_deriv
            input_back_deriv = activations.relu_backward(
                output[i], output[i - 1], layers[i])
            param_grad[i - 1]['w'] = np.array([])
            param_grad[i - 1]['b'] = np.array([])
        elif layers[i]['type'] == 'Sigmoid':
            output[i]['diff'] = input_back_deriv
            input_back_deriv = activations.sigmoid_backward(
                output[i], output[i - 1], layers[i])
            param_grad[i - 1]['w'] = np.array([])
            param_grad[i - 1]['b'] = np.array([])
        elif layers[i]['type'] == 'Tanh':
            output[i]['diff'] = input_back_deriv
            input_back_deriv = activations.tanh_backward(
                output[i], output[i - 1], layers[i])
            param_grad[i - 1]['w'] = np.array([])
            param_grad[i - 1]['b'] = np.array([])
        elif layers[i]['type'] == 'batch_norm':
            output[i]['diff'] = input_back_deriv
            param_grad[
                i -
                1], input_back_deriv = activations.batch_normalization_backward(
                    output[i], output[i - 1], layers[i], params[i - 1])
        param_grad[i - 1]['w'] = param_grad[i - 1]['w'] / batch_size
        param_grad[i - 1]['b'] = param_grad[i - 1]['b'] / batch_size

    return (obj / batch_size), success_rate, param_grad