def backward(self, inputs, labels, outputs): """Backward pass of neural network Changes weights and biases of each layer to reduce loss Args: inputs (torch.tensor): inputs to train neural network. Size (batch_size, N_in) labels (torch.tensor): correct labels. Size (batch_size) outputs (torch.tensor): outputs predicted by neural network. Size (batch_size, N_out) Returns: dw1 (torch.tensor): Gradient of loss w.r.t. w1. Size like w1 db1 (torch.tensor): Gradient of loss w.r.t. b1. Size like b1 dw2 (torch.tensor): Gradient of loss w.r.t. w2. Size like w2 db2 (torch.tensor): Gradient of loss w.r.t. b2. Size like b2 dw3 (torch.tensor): Gradient of loss w.r.t. w3. Size like w3 db3 (torch.tensor): Gradient of loss w.r.t. b3. Size like b3 """ # Calculating derivative of loss w.r.t weighted sum dout = loss.delta_cross_entropy_softmax(outputs, labels) # d2 = (dout.w3)*(a2) d2 = torch.mul(torch.matmul(dout, self.weights['w3']), activation.delta_sigmoid(self.cache['z2'])) # d1 = (d2.w2)*(a1) d1 = torch.mul(torch.matmul(d2, self.weights['w2']), activation.delta_sigmoid(self.cache['z1'])) dw1, db1, dw2, db2, dw3, db3 = self.calculate_grad( inputs, d1, d2, dout) # calculate all gradients return dw1, db1, dw2, db2, dw3, db3
def test_delta_sigmoid(self): batch_size = 6 N_hn = 512 x = torch.rand((batch_size, N_hn), dtype=torch.float) grads = activation.delta_sigmoid(x) assert isinstance(grads, torch.FloatTensor) assert grads.size() == torch.Size([batch_size, N_hn])
def test_delta_sigmoid(self): batch_size = 6 N_hn = 512 precision = 0.000001 x = torch.rand((batch_size, N_hn), dtype=torch.float, requires_grad=True) grads = activation.delta_sigmoid(x) # calculate gradients with torch x.sigmoid().backward(torch.ones_like(x)) assert isinstance(grads, torch.FloatTensor) assert grads.size() == torch.Size([batch_size, N_hn]) self.assertTrue(torch.le(torch.abs(grads - x.grad), precision).all())
def test_delta_sigmoid(self): batch_size = 2 N_hn = 8 #x = torch.rand((batch_size, N_hn), dtype=torch.float) x = torch.FloatTensor([[-10, -0.2, -0.6, 0, 0.1, 0.5, 2, 50], [-10, -0.2, -0.6, 0, 0.1, 0.5, 2, 50]]) #already calculated delta_sigmoid values y = torch.FloatTensor([[ 4.53958e-05, 0.247517, 0.228784, 0.25, 0.249376, 0.235004, 0.104994, 1.92875e-22 ], [ 4.53958e-05, 0.247517, 0.228784, 0.25, 0.249376, 0.235004, 0.104994, 1.92875e-22 ]]) grads = activation.delta_sigmoid(x) assert isinstance(grads, torch.FloatTensor) assert grads.size() == x.size() #torch.Size([batch_size, N_hn]) precision = 0.0009 self.assertTrue(torch.le(torch.abs(grads - y), precision).all())