def linear_activation_backward(dA, cache, activation): ''' Implementa backpropagation para la LINEAR->ACTIVATION de la capa actual Arguments: dA: Gradiente del costo respecto a la función de activación, de dimensiones igual a A cache: tupla que contiene el cache de la parte lineal y el cache de la activación activation: string que contiene el nombre de la activación usada, "Sigmoid" o "Relu" Returns: dA_prev -- Gradiente del costo respecto a la activación de la capa anterior, de dimensiones iguales a A_prev dW -- Gradiente del costo respecto a los pesos de la capa actual, de dimensiones iguales a W db -- Gradiente del costo respecto a los sesgos de la capa actual, de dimensiones iguales a b ''' linear_c, activation_c = cache if activation == "Relu": dZ = relu_backward(dA, activation_c) dA_prev, dW, db = linear_backward(dZ, linear_c) elif activation == "Sigmoid": dZ = sigmoid_backward(dA, activation_c) dA_prev, dW, db = linear_backward(dZ, linear_c) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(self, dA, cache, activation): linear_cache, activation_cache = cache if activation == 'relu': dZ = relu_backward(dA, activation_cache) elif activation == 'sigmoid': dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = self.linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): linear_cache, activation_cache = cache if activation == 'relu': dZ = np.dot(dA, activations.relu_backward(dA, activation_cache)) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == 'sigmoid': dZ = np.dot(dA, activations.sigmoid_backward(dA, activation_cache)) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): # Retriving cache linear_cache, activation_cache = cache # Backward activation step if activation == 'relu': dZ = relu_backward(dA, activation_cache) elif activation == 'sigmoid': dZ = sigmoid_backward(dA, activation_cache) # Linear backward step dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_act_backward(dA, cache, act): """ Implements the linear and activation function derivatives of single node """ lin_cache, act_cache = cache if act == "relu": dZ = relu_backward(dA, act_cache) dA, dW, db = linear_backward(dZ, lin_cache) elif act == "sigmoid": dZ = sigmoid_backward(dA, act_cache) dA, dW, db = linear_backward(dZ, lin_cache) return dA, dW, db
def linear_activation_backward_with_regularization(dA, cache, activation, _lambda): # Retriving cache linear_cache, activation_cache = cache # Activation backward step if activation == 'relu': dZ = relu_backward(dA, activation_cache) elif activation == 'sigmoid': dZ = sigmoid_backward(dA, activation_cache) # Linear backward step dA_prev, dW, db = linear_backward_with_regularization( dZ, linear_cache, _lambda) return dA_prev, dW, db
def linear_activation_backward_with_dropout(dA, cache, activation, keep_prob): # Retriving cache linear_cache, activation_cache, D = cache # Linear backward and activation steps if activation == 'relu': # Implementing dropout dA = dA * D # Apply mask D to shut down the same neurons as during the forward propagation dA = np.divide( dA, keep_prob ) # Scale the value of neurons that haven't been shut down dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward_with_dropout(dZ, linear_cache, D, keep_prob) elif activation == 'sigmoid': dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, lambd, activation, sparse_ae_parameters=()): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ linear_cache, activation_cache = cache if activation == "relu": if sparse_ae_parameters: sparse_beta, rho, rho_hat = sparse_ae_parameters #print("dA1's shape:", dA.shape) #print("rho_hat's shape:", rho_hat.shape) dA = dA + sparse_beta * (-rho / rho_hat + (1 - rho) / (1 - rho_hat)) dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache, lambd) elif activation == "sigmoid": if sparse_ae_parameters: sparse_beta, rho, rho_hat = sparse_ae_parameters #print("dA1's shape:", dA.shape) #print("rho_hat's shape:", rho_hat.shape) dA = dA + sparse_beta * (-rho / rho_hat + (1 - rho) / (1 - rho_hat)) dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache, lambd) return dA_prev, dW, db
def network(params, layers, data, labels, reconstruction=False, addnoise=False): l = len(layers) batch_size = layers[1]['batch_size'] param_grad = {} cp = {} output = {} data_orig = copy.deepcopy(data) if addnoise: noise = np.random.binomial(1, 0.75, size=data.shape) data = data * noise output[1] = { 'data': data, 'height': layers[1]['height'], 'channel': layers[1]['channel'], 'batch_size': layers[1]['batch_size'], 'diff': 0 } for i in range(2, l + 1): if layers[i]['type'] == 'IP': output[i] = fully_connected.inner_product_forward( output[i - 1], layers[i], params[i - 1]) elif layers[i]['type'] == 'RELU': output[i] = activations.relu_forward(output[i - 1], layers[i]) elif layers[i]['type'] == 'Sigmoid': output[i] = activations.sigmoid_forward(output[i - 1], layers[i]) elif layers[i]['type'] == 'Tanh': output[i] = activations.tanh_forward(output[i - 1], layers[i]) elif layers[i]['type'] == 'LOSS': [obj, grad_w, grad_b, input_back_deriv, success_rate] = loss_func(params[i - 1]['w'], params[i - 1]['b'], output[i - 1]['data'], labels, layers[i]['num'], 1) param_grad[i - 1] = { 'w': grad_w / batch_size, 'b': grad_b / batch_size } elif layers[i]['type'] == 'autoEnc': [obj, input_back_deriv, success_rate] = autoEnc_loss(output[i - 1]['data'], data_orig) param_grad[i - 1] = {'w': 0.0, 'b': 0.0} if reconstruction: return output[i - 1]['data'] for i in range(l - 1, 1, -1): param_grad[i - 1] = {} param_grad[i - 1]['w'] = np.array([]) param_grad[i - 1]['b'] = np.array([]) if layers[i]['type'] == 'IP': output[i]['diff'] = input_back_deriv param_grad[ i - 1], input_back_deriv = fully_connected.inner_product_backward( output[i], output[i - 1], layers[i], params[i - 1]) elif layers[i]['type'] == 'RELU': output[i]['diff'] = input_back_deriv input_back_deriv = activations.relu_backward( output[i], output[i - 1], layers[i]) param_grad[i - 1]['w'] = np.array([]) param_grad[i - 1]['b'] = np.array([]) elif layers[i]['type'] == 'Sigmoid': output[i]['diff'] = input_back_deriv input_back_deriv = activations.sigmoid_backward( output[i], output[i - 1], layers[i]) param_grad[i - 1]['w'] = np.array([]) param_grad[i - 1]['b'] = np.array([]) elif layers[i]['type'] == 'Tanh': output[i]['diff'] = input_back_deriv input_back_deriv = activations.tanh_backward( output[i], output[i - 1], layers[i]) param_grad[i - 1]['w'] = np.array([]) param_grad[i - 1]['b'] = np.array([]) return (obj / batch_size), success_rate, param_grad
def network(params, layers, data, labels): l = len(layers) batch_size = layers[1]['batch_size'] param_grad = {} cp = {} output = {} output[1] = { 'data': data, 'height': layers[1]['height'], 'channel': layers[1]['channel'], 'batch_size': layers[1]['batch_size'], 'diff': 0 } for i in range(2, l): if layers[i]['type'] == 'IP': output[i] = fully_connected.inner_product_forward( output[i - 1], layers[i], params[i - 1]) elif layers[i]['type'] == 'RELU': output[i] = activations.relu_forward(output[i - 1], layers[i]) elif layers[i]['type'] == 'Sigmoid': output[i] = activations.sigmoid_forward(output[i - 1], layers[i]) elif layers[i]['type'] == 'Tanh': output[i] = activations.tanh_forward(output[i - 1], layers[i]) elif layers[i]['type'] == 'batch_norm': output[i] = activations.batch_normalization_forward( output[i - 1], layers[i], params[i - 1]) i = l [obj, grad_w, grad_b, input_back_deriv, success_rate] = loss_func(params[i - 1]['w'], params[i - 1]['b'], output[i - 1]['data'], labels, layers[i]['num'], 1) param_grad[i - 1] = {'w': grad_w / batch_size, 'b': grad_b / batch_size} for i in range(l - 1, 1, -1): param_grad[i - 1] = {} param_grad[i - 1]['w'] = np.array([]) param_grad[i - 1]['b'] = np.array([]) if layers[i]['type'] == 'IP': output[i]['diff'] = input_back_deriv param_grad[ i - 1], input_back_deriv = fully_connected.inner_product_backward( output[i], output[i - 1], layers[i], params[i - 1]) elif layers[i]['type'] == 'RELU': output[i]['diff'] = input_back_deriv input_back_deriv = activations.relu_backward( output[i], output[i - 1], layers[i]) param_grad[i - 1]['w'] = np.array([]) param_grad[i - 1]['b'] = np.array([]) elif layers[i]['type'] == 'Sigmoid': output[i]['diff'] = input_back_deriv input_back_deriv = activations.sigmoid_backward( output[i], output[i - 1], layers[i]) param_grad[i - 1]['w'] = np.array([]) param_grad[i - 1]['b'] = np.array([]) elif layers[i]['type'] == 'Tanh': output[i]['diff'] = input_back_deriv input_back_deriv = activations.tanh_backward( output[i], output[i - 1], layers[i]) param_grad[i - 1]['w'] = np.array([]) param_grad[i - 1]['b'] = np.array([]) elif layers[i]['type'] == 'batch_norm': output[i]['diff'] = input_back_deriv param_grad[ i - 1], input_back_deriv = activations.batch_normalization_backward( output[i], output[i - 1], layers[i], params[i - 1]) param_grad[i - 1]['w'] = param_grad[i - 1]['w'] / batch_size param_grad[i - 1]['b'] = param_grad[i - 1]['b'] / batch_size return (obj / batch_size), success_rate, param_grad