def backward(self, train_data, y_true): loss, self.gradients["A3"] = losses.cross_entropy_loss(self.nodes["A3"], y_true) self.gradients["W3"], self.gradients["B3"], self.gradients["Z2"] = \ layer.fc_backward(self.gradients["A3"], self.Parameters["W3"], self.nodes["Z2"]) self.gradients["A2"] = activations.relu_backward(self.gradients["Z2"].T, self.nodes["A2"]) self.gradients["W2"], self.gradients["B2"], self.gradients["Z1"] = \ layer.fc_backward(self.gradients["A2"], self.Parameters["W2"], self.nodes["Z1"]) self.gradients["A1"] = activations.relu_backward(self.gradients["Z1"].T, self.nodes["A1"]) self.gradients["W1"], self.gradients["B1"], self.gradients["Z1"] = \ layer.fc_backward(self.gradients["A1"], self.Parameters["W1"], self.nodes["X2"]) self.gradients["Z1"] = self.gradients["Z1"].reshape((128, 16, 5, 5)) self.gradients["Maxpool2"] = layer.max_pooling_backward(self.gradients["Z1"], self.nodes["Conv2"], (2, 2)) self.gradients["K2"], self.gradients["Kb2"], self.gradients["KZ2"] = \ layer.conv_backward(self.gradients["Maxpool2"], self.Parameters["K2"], self.nodes["Maxpool1"]) self.gradients["Maxpool1"] = \ layer.max_pooling_backward(self.gradients["KZ2"], self.nodes["Conv1"], (2, 2)) self.gradients["K1"], self.gradients["Kb1"], self.gradients["KZ1"] = \ layer.conv_backward(self.gradients["Maxpool1"], self.Parameters["K1"], train_data) return loss
def backward(self, train_data, y_true): loss, self.gradients["y"] = cross_entropy_loss(self.nurons["y"], y_true) self.gradients["W3"], self.gradients["b3"], self.gradients["z3_relu"] = fc_backward(self.gradients["y"], self.weights["W3"], self.nurons["z3_relu"]) self.gradients["z3"] = relu_backward(self.gradients["z3_relu"], self.nurons["z3"]) self.gradients["W2"], self.gradients["b2"], self.gradients["z2_relu"] = fc_backward(self.gradients["z3"], self.weights["W2"], self.nurons["z2_relu"]) self.gradients["z2"] = relu_backward(self.gradients["z2_relu"], self.nurons["z2"]) self.gradients["W1"], self.gradients["b1"], _ = fc_backward(self.gradients["z2"], self.weights["W1"], train_data) return loss
def linear_activation_backward(dA, AL, cache, activation): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation -- the activation to be used in this layer, stored as a text string: "softmax" or "relu" Returns: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "softmax": dZ = softmax_backward(dA, AL, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): ''' Implementa backpropagation para la LINEAR->ACTIVATION de la capa actual Arguments: dA: Gradiente del costo respecto a la función de activación, de dimensiones igual a A cache: tupla que contiene el cache de la parte lineal y el cache de la activación activation: string que contiene el nombre de la activación usada, "Sigmoid" o "Relu" Returns: dA_prev -- Gradiente del costo respecto a la activación de la capa anterior, de dimensiones iguales a A_prev dW -- Gradiente del costo respecto a los pesos de la capa actual, de dimensiones iguales a W db -- Gradiente del costo respecto a los sesgos de la capa actual, de dimensiones iguales a b ''' linear_c, activation_c = cache if activation == "Relu": dZ = relu_backward(dA, activation_c) dA_prev, dW, db = linear_backward(dZ, linear_c) elif activation == "Sigmoid": dZ = sigmoid_backward(dA, activation_c) dA_prev, dW, db = linear_backward(dZ, linear_c) return dA_prev, dW, db
def backward(self, train_data, y_true): loss, self.gradients["A3"] = losses.cross_entropy_loss( self.nodes["A3"], y_true) self.gradients["W3"], self.gradients["B3"], self.gradients["Z2"] = \ layer.fc_backward(self.gradients["A3"], self.Parameters["W3"], self.nodes["Z2"]) self.gradients["A2"] = activations.relu_backward( self.gradients["Z2"].T, self.nodes["A2"]) self.gradients["W2"], self.gradients["B2"], self.gradients["Z1"] = \ layer.fc_backward(self.gradients["A2"], self.Parameters["W2"], self.nodes["Z1"]) self.gradients["A1"] = activations.relu_backward( self.gradients["Z1"].T, self.nodes["A1"]) self.gradients["W1"], self.gradients["B1"], self.gradients["Z1"] = \ layer.fc_backward(self.gradients["A1"], self.Parameters["W1"], train_data) return loss
def linear_activation_backward(dA, cache, activation): linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(self, dA, cache, activation): linear_cache, activation_cache = cache if activation == 'relu': dZ = relu_backward(dA, activation_cache) elif activation == 'sigmoid': dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = self.linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): linear_cache, activation_cache = cache if activation == 'relu': dZ = np.dot(dA, activations.relu_backward(dA, activation_cache)) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == 'sigmoid': dZ = np.dot(dA, activations.sigmoid_backward(dA, activation_cache)) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): # Retriving cache linear_cache, activation_cache = cache # Backward activation step if activation == 'relu': dZ = relu_backward(dA, activation_cache) elif activation == 'sigmoid': dZ = sigmoid_backward(dA, activation_cache) # Linear backward step dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_act_backward(dA, cache, act): """ Implements the linear and activation function derivatives of single node """ lin_cache, act_cache = cache if act == "relu": dZ = relu_backward(dA, act_cache) dA, dW, db = linear_backward(dZ, lin_cache) elif act == "sigmoid": dZ = sigmoid_backward(dA, act_cache) dA, dW, db = linear_backward(dZ, lin_cache) return dA, dW, db
def linear_activation_backward_with_regularization(dA, cache, activation, _lambda): # Retriving cache linear_cache, activation_cache = cache # Activation backward step if activation == 'relu': dZ = relu_backward(dA, activation_cache) elif activation == 'sigmoid': dZ = sigmoid_backward(dA, activation_cache) # Linear backward step dA_prev, dW, db = linear_backward_with_regularization( dZ, linear_cache, _lambda) return dA_prev, dW, db
def linear_activation_backward_with_dropout(dA, cache, activation, keep_prob): # Retriving cache linear_cache, activation_cache, D = cache # Linear backward and activation steps if activation == 'relu': # Implementing dropout dA = dA * D # Apply mask D to shut down the same neurons as during the forward propagation dA = np.divide( dA, keep_prob ) # Scale the value of neurons that haven't been shut down dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward_with_dropout(dZ, linear_cache, D, keep_prob) elif activation == 'sigmoid': dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, lambd, activation, sparse_ae_parameters=()): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ linear_cache, activation_cache = cache if activation == "relu": if sparse_ae_parameters: sparse_beta, rho, rho_hat = sparse_ae_parameters #print("dA1's shape:", dA.shape) #print("rho_hat's shape:", rho_hat.shape) dA = dA + sparse_beta * (-rho / rho_hat + (1 - rho) / (1 - rho_hat)) dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache, lambd) elif activation == "sigmoid": if sparse_ae_parameters: sparse_beta, rho, rho_hat = sparse_ae_parameters #print("dA1's shape:", dA.shape) #print("rho_hat's shape:", rho_hat.shape) dA = dA + sparse_beta * (-rho / rho_hat + (1 - rho) / (1 - rho_hat)) dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache, lambd) return dA_prev, dW, db
def network(params, layers, data, labels, reconstruction=False, addnoise=False): l = len(layers) batch_size = layers[1]['batch_size'] param_grad = {} cp = {} output = {} data_orig = copy.deepcopy(data) if addnoise: noise = np.random.binomial(1, 0.75, size=data.shape) data = data * noise output[1] = { 'data': data, 'height': layers[1]['height'], 'channel': layers[1]['channel'], 'batch_size': layers[1]['batch_size'], 'diff': 0 } for i in range(2, l + 1): if layers[i]['type'] == 'IP': output[i] = fully_connected.inner_product_forward( output[i - 1], layers[i], params[i - 1]) elif layers[i]['type'] == 'RELU': output[i] = activations.relu_forward(output[i - 1], layers[i]) elif layers[i]['type'] == 'Sigmoid': output[i] = activations.sigmoid_forward(output[i - 1], layers[i]) elif layers[i]['type'] == 'Tanh': output[i] = activations.tanh_forward(output[i - 1], layers[i]) elif layers[i]['type'] == 'LOSS': [obj, grad_w, grad_b, input_back_deriv, success_rate] = loss_func(params[i - 1]['w'], params[i - 1]['b'], output[i - 1]['data'], labels, layers[i]['num'], 1) param_grad[i - 1] = { 'w': grad_w / batch_size, 'b': grad_b / batch_size } elif layers[i]['type'] == 'autoEnc': [obj, input_back_deriv, success_rate] = autoEnc_loss(output[i - 1]['data'], data_orig) param_grad[i - 1] = {'w': 0.0, 'b': 0.0} if reconstruction: return output[i - 1]['data'] for i in range(l - 1, 1, -1): param_grad[i - 1] = {} param_grad[i - 1]['w'] = np.array([]) param_grad[i - 1]['b'] = np.array([]) if layers[i]['type'] == 'IP': output[i]['diff'] = input_back_deriv param_grad[ i - 1], input_back_deriv = fully_connected.inner_product_backward( output[i], output[i - 1], layers[i], params[i - 1]) elif layers[i]['type'] == 'RELU': output[i]['diff'] = input_back_deriv input_back_deriv = activations.relu_backward( output[i], output[i - 1], layers[i]) param_grad[i - 1]['w'] = np.array([]) param_grad[i - 1]['b'] = np.array([]) elif layers[i]['type'] == 'Sigmoid': output[i]['diff'] = input_back_deriv input_back_deriv = activations.sigmoid_backward( output[i], output[i - 1], layers[i]) param_grad[i - 1]['w'] = np.array([]) param_grad[i - 1]['b'] = np.array([]) elif layers[i]['type'] == 'Tanh': output[i]['diff'] = input_back_deriv input_back_deriv = activations.tanh_backward( output[i], output[i - 1], layers[i]) param_grad[i - 1]['w'] = np.array([]) param_grad[i - 1]['b'] = np.array([]) return (obj / batch_size), success_rate, param_grad
def network(params, layers, data, labels): l = len(layers) batch_size = layers[1]['batch_size'] param_grad = {} cp = {} output = {} output[1] = { 'data': data, 'height': layers[1]['height'], 'channel': layers[1]['channel'], 'batch_size': layers[1]['batch_size'], 'diff': 0 } for i in range(2, l): if layers[i]['type'] == 'IP': output[i] = fully_connected.inner_product_forward( output[i - 1], layers[i], params[i - 1]) elif layers[i]['type'] == 'RELU': output[i] = activations.relu_forward(output[i - 1], layers[i]) elif layers[i]['type'] == 'Sigmoid': output[i] = activations.sigmoid_forward(output[i - 1], layers[i]) elif layers[i]['type'] == 'Tanh': output[i] = activations.tanh_forward(output[i - 1], layers[i]) elif layers[i]['type'] == 'batch_norm': output[i] = activations.batch_normalization_forward( output[i - 1], layers[i], params[i - 1]) i = l [obj, grad_w, grad_b, input_back_deriv, success_rate] = loss_func(params[i - 1]['w'], params[i - 1]['b'], output[i - 1]['data'], labels, layers[i]['num'], 1) param_grad[i - 1] = {'w': grad_w / batch_size, 'b': grad_b / batch_size} for i in range(l - 1, 1, -1): param_grad[i - 1] = {} param_grad[i - 1]['w'] = np.array([]) param_grad[i - 1]['b'] = np.array([]) if layers[i]['type'] == 'IP': output[i]['diff'] = input_back_deriv param_grad[ i - 1], input_back_deriv = fully_connected.inner_product_backward( output[i], output[i - 1], layers[i], params[i - 1]) elif layers[i]['type'] == 'RELU': output[i]['diff'] = input_back_deriv input_back_deriv = activations.relu_backward( output[i], output[i - 1], layers[i]) param_grad[i - 1]['w'] = np.array([]) param_grad[i - 1]['b'] = np.array([]) elif layers[i]['type'] == 'Sigmoid': output[i]['diff'] = input_back_deriv input_back_deriv = activations.sigmoid_backward( output[i], output[i - 1], layers[i]) param_grad[i - 1]['w'] = np.array([]) param_grad[i - 1]['b'] = np.array([]) elif layers[i]['type'] == 'Tanh': output[i]['diff'] = input_back_deriv input_back_deriv = activations.tanh_backward( output[i], output[i - 1], layers[i]) param_grad[i - 1]['w'] = np.array([]) param_grad[i - 1]['b'] = np.array([]) elif layers[i]['type'] == 'batch_norm': output[i]['diff'] = input_back_deriv param_grad[ i - 1], input_back_deriv = activations.batch_normalization_backward( output[i], output[i - 1], layers[i], params[i - 1]) param_grad[i - 1]['w'] = param_grad[i - 1]['w'] / batch_size param_grad[i - 1]['b'] = param_grad[i - 1]['b'] / batch_size return (obj / batch_size), success_rate, param_grad