def backward(self, dout): fc_cache1, relu_cache, fc_cache2 = self.cache da2, dW2, db2 = affine_backward(dout, fc_cache2) da = relu_backward(da2, relu_cache) dx, dW1, db1 = affine_backward(da, fc_cache1) grads = {'W1': dW1, 'b1': db1, 'W2': dW2, 'b2': db2} return dx, grads
def backward(self, dout): fc_cache, sigmoid_cache, scale_cache = self.cache da2, dsigma, dmu = scale_shift_backward(dout, scale_cache) da = sigmoid_backward(da2, sigmoid_cache) dx, dW, db = affine_backward(da, fc_cache) grads = {'W': dW, 'b': db, 'sigma': dsigma, 'mu': dmu} return dx, grads
def backward(self, dout): dx, dW, db = affine_backward(dout, self.cache) grads = {'W': dW, 'b': db} return dx, grads