def f2(): # calculate backward seperate softmax with loss activation = Activation_Softmax() activation.output = softmax_output loss = Loss_CategoricalCrossentropy() loss.backward(softmax_output, class_targets) activation.backward(loss.dinputs) dvalues2 = activation.dinputs
class Activation_Softmax_Loss_CategoricalCorssentropy(): # Create activation and loss function object def __init__(self): self.activation = Activation_Softmax() self.loss = Loss_CategoricalCrossentropy() def forward(self, inputs, y_true): # Output layer's activation function self.activation.forward(inputs) # Set the output self.output = self.activation.output # Calculate and return loss value return self.loss.calculate(self.output, y_true) # Backward pass def backward(self, dvalues, y_true): # Number of samples samples = len(dvalues) # If labels are one-hot encoded, # turn them into discrete values if len(y_true.shape) == 2: # argmax with axis 1, will return index the higest value y_true = np.argmax(y_true, axis=1) # Copy so we can safely modify self.dinputs = dvalues.copy() # Calculate gradient self.dinputs[range(samples), y_true] -= 1 # Normalize gradient self.dinputs = self.dinputs / samples
def __init__(self): self.activation = Activation_Softmax() self.loss = Loss_CategoricalCrossentropy()
class Activation_Softmax_Loss_CategoricalCorssentropy(): # Create activation and loss function object def __init__(self): self.activation = Activation_Softmax() self.loss = Loss_CategoricalCrossentropy() def forward(self, inputs, y_true): # Output layer's activation function self.activation.forward(inputs) # Set the output self.output = self.activation.output # Calculate and return loss value return self.loss.calculate(self.output, y_true) # Regularizatoin loss calculation def regularization_loss(self, layer): # 0 by default regularization_loss = 0 # L1 regularization - weigths # calculate only when factor greater than 0 if layer.weigth_regularizer_l1 > 0: regularization_loss += layer.weigth_regularizer_l1 * np.sum( np.abs(layer.weights)) # L2 regularization - weights if layer.weight_regularizer_l2 > 0: regularization_loss += layer.weight_regularizer_l2 * np.sum( layer.weights * layer.weights) # L1 regularization - biases if layer.bias_regularizer_l1 > 0: regularization_loss += layer.bias_regularizer_l1 * np.sum( np.abs(layer.biases)) # L2 regularization - bises if layer.bias_regularizer_l2 > 0: regularization_loss += layer.bias_regularizer_l2 * np.sum( layer.biases * layer.biases) return regularization_loss # Backward pass def backward(self, dvalues, y_true): # Number of samples samples = len(dvalues) # If labels are one-hot encoded, # turn them into discrete values if len(y_true.shape) == 2: # argmax with axis 1, will return index the higest value y_true = np.argmax(y_true, axis=1) # Copy so we can safely modify self.dinputs = dvalues.copy() # Calculate gradient self.dinputs[range(samples), y_true] -= 1 # Normalize gradient self.dinputs = self.dinputs / samples
# Finding an intelligent way to adjust the neurons’ input’s weights and biases to minimize loss is the main difficulty of neural networks. # initialize dataset nnfs.init() # Create dataset x, y = vertical_data(samples=100, classes=3) # Create model dense1 = Layer_Dense(2, 3) # First dense layer with 2 input, 3 output activation_relu = Activation_ReLU() dense2 = Layer_Dense(3, 3) # Second dense layer, 3 inputs, 3 output activation_softmax = Activation_Softmax() # Create loss function loss_function = Loss_CategoricalCrossentropy() # Then create some variables to track the best loss and the associated weights and biases lowest_loss = 9999999 # some initial value best_dense1_weights = dense1.weights.copy() best_dense1_biases = dense1.biases.copy() best_dense2_weights = dense2.weights.copy() best_dense2_biases = dense2.biases.copy() # we iterate as many times as desired, pick random values for weights and biases, and save the weights # and bises if they gnerate the lowest-seen loss: for iteration in range(10000): # Generate a new set of weights for iteration dense1.weights += 0.05 * np.random.randn(2, 3)
import math import numpy as np from libraries import Loss_CategoricalCrossentropy # Consider a scenario with a neural network that performs classification between three classes, and the neural network classifies in batches of threee. # After running through the softmax activation function with a batch of samples and 3 classes, the network's output layer yields: # Probabilities for 3 samples softmax_outputs = np.array([[0.7, 0, 0.2], [0.1, 0.5, 0.4], [0.02, 0.9, 0.08]]) # Let asumes we trying to classify something as a "dog", "cat" or "human". # A dos is class 0 (at index 0), cat class 1 (index 1), and a human class 2 (index 2) # in the book the author say it categorical labels # class_target = np.array([0, 1, 1]) class_targets = np.array([[1, 0, 0], [0, 1, 0], [0, 1, 0]]) loss_class = Loss_CategoricalCrossentropy() loss = loss_class.calculate(softmax_outputs, class_targets) print(loss)
# dummy output from activation softmax softmax_output = np.array([[0.7, 0.1, 0.2], [0.1, 0.5, 0.4], [0.02, 0.9, 0.08]]) class_targets = np.array([0, 1, 1]) softmax_loss = Activation_Softmax_Loss_CategoricalCorssentropy() softmax_loss.backward(softmax_output, class_targets) dvalues1 = softmax_loss.dinputs activation = Activation_Softmax() activation.output = softmax_output loss = Loss_CategoricalCrossentropy() loss.backward(softmax_output, class_targets) # update activation softmax backward activation.backward(loss.dinputs) dvalues2 = activation.dinputs print('Gradient: combined loss and activation:') print(dvalues1) print('Gradient: seperate loss and activation:') print(dvalues2) # Now we measure how faster combined and seperate loss and activation. def f1(): # calculate backward combine softmax with loss