def backprop(y, cache): derivative_w = [] derivative_b = [] loss = Loss(cache[-1], y) loss_value = loss.forward() # print(loss_value) dA = loss.backward() for index,layer in reversed(list(enumerate(layers))[1:]): derivative_w.append(np.dot(cache[index].T,dA)) derivative_b.append(np.sum(dA, axis=0, keepdims=True)) dZ = np.dot(dA, layer.w.T) dA = dZ * layer.backward(cache[index]) derivative_w.append(np.dot(cache[0].T, dA)) derivative_b.append(np.sum(dA, axis=0)) derivative_w = derivative_w[::-1] derivative_b = derivative_b[::-1] return derivative_w, derivative_b, loss_value
optimizer = Adam(learningRate = 0.05, decay = 4e-8) for epoch in range(10001): layer1.passForward(X) activation1.forward(layer1.output) layer2.passForward(activation1.output) activation2.forward(layer2.output) #print(lossFunc.forward(activation2.output, y)) loss = lossFunc.forward(activation2.output, y) # Calculate accuracy from output of activation2 and targets predictions = np.argmax(activation2.output, axis=1) # calculate values along first axis accuracy = np.mean(predictions==y) #------------------------------------------------------------------------------------------------------- #back propagation lossFunc.backward(activation2.output, y) activation2.backward(lossFunc.dvalues) layer2.backward(activation2.dvalues) activation1.backward(layer2.dvalues) layer1.backward(activation1.dvalues)