def conv_layer(x): """ the derivative check in the gradient checker relates to the input of the function hence, the input should be z - since the backward step computes @loss / @z """ conv1 = nn.Conv2d(in_channels=1, out_channels=2, kernel_size=2) relu1 = nn.Relu() conv2 = nn.Conv2d(in_channels=2, out_channels=4, kernel_size=2) relu2 = nn.Relu() flatten = nn.Flatten() linear = nn.Linear(4, 2) softmax = nn.Softmax() # forward pass a = relu1(conv1(x)) a = relu2(conv2(a)) a_flatten = flatten(a) dist = softmax(linear(a_flatten)) # backward labels = np.zeros(dist.shape) labels[:, 1] = 1 loss = -np.log(np.sum(dist * labels, axis=1)) softmax_grad = softmax.backward(labels) linear_grad = linear.backward(softmax_grad) flatten_grad = flatten.backward(linear_grad) relu2_grad = relu2.backward(flatten_grad) conv2_grad = conv2.backward(relu2_grad) relu1_grad = relu1.backward(conv2_grad) conv1_grad = conv1.backward(relu1_grad) return loss, conv1_grad
def conv(b): """ the derivative check in the gradient checker relates to the input of the function hence, the input should be z - since the backward step computes @loss / @z """ # simulate end of classification conv = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=2) relu = nn.Relu() flatten = nn.Flatten() linear = nn.Linear(in_dimension=12, out_dimension=4) softmax = nn.Softmax() conv.set_biases(b.reshape(3, 1)) # forward a = flatten(relu(conv(x))) dist = softmax(linear(a)) # backward labels = np.zeros(dist.shape) labels[:, 1] = 1 loss = -np.log(np.sum(dist * labels, axis=1)) softmax_grad = softmax.backward(labels) linear_grad = linear.backward(softmax_grad) flatten_grad = flatten.backward(linear_grad) relu_grad = relu.backward(flatten_grad) conv_grad = conv.backward(relu_grad) b_grad = conv.b_grad return loss, b_grad
def relu_layer(x): relu = nn.Relu() softmax = nn.Softmax() a = softmax(relu(x)) num_classes = x.shape labels = np.zeros(num_classes) labels[:, 0] = 1 loss = -np.log(np.sum(a * labels, axis=1)) softmax_grad = softmax.backward(labels) relu_grad = relu.backward(softmax_grad) return loss, relu_grad
def test_linear_module_relu_2(self): x = np.array([[1, 2, 0, -1], [1, 2, -1, -2]]) w = np.array([[0., 1., 0., 0.], [0., 2., 2., 0.]]) b = np.array([-5., -1.]) expected_res = np.array([[0., 3.], [0., 1.]]) linear_layer = nn.Linear(4, 2) linear_layer.set_weights(w) linear_layer.set_biases(b) relu_layer = nn.Relu() z = linear_layer(x) a = relu_layer(z) np.testing.assert_allclose(a, expected_res, atol=0.0001)
def linear_layer(z): """ the derivative check in the gradient checker relates to the input of the function hence, the input should be z - since the backward step computes @loss / @z """ # simulate end of classification relu_layer = nn.Relu() linear = nn.Linear(in_dimension=2, out_dimension=5) softmax = nn.Softmax() a_L_mins_1 = relu_layer(z) z_L = linear(a_L_mins_1) a_L = softmax(z_L) labels = np.zeros(a_L.shape) labels[:, 1] = 1 loss = -np.log(np.sum(a_L * labels, axis=1)) softmax_grad = softmax.backward(labels) layer_L_grad = linear.backward(softmax_grad) relu_grad = relu_layer.backward(layer_L_grad) return loss, relu_grad