def _affine_backward(self, x, w, b, dout): layer = Linear(w.shape[0], w.shape[1]) layer.weight = w layer.bias = b tmp = layer.forward(x) layer.backward(dout) return layer.dx, layer.dw, layer.db
class NN2(object): def __init__(self, in_layer_size, hidden_layer_size, out_layer_size): self.fc1 = Linear(in_layer_size, hidden_layer_size) self.ac1 = ReLu() self.fc2 = Linear(hidden_layer_size, out_layer_size) def forward(self, x): s1 = self.fc1.forward(x) a1 = self.ac1.forward(s1) a2 = self.fc2.forward(a1) return a2 def update(self, params): self.fc1.update([params[0]]) self.fc2.update([params[1]]) def backward(self, dL_dy2): ''' output dy/dw2 = d(f(wx+b))/dw = x output dy/dw1 = d(f(wx+b))/dw = x ''' #dL_ds2 = self.ac2.backward(dL_dy2) dL_dy1 = self.fc2.backward(dL_dy2) dL_ds1 = self.ac1.backward(dL_dy1) dL_dy0 = self.fc1.backward(dL_ds1) return dL_dy0 def param(self): return [self.fc1.param()[0], self.fc2.param()[0]]
class NN(object): def __init__(self, in_layer_size, out_layer_size): self.fc1 = Linear(in_layer_size, out_layer_size, bias=False) self.ac1 = Tanh() def forward(self, x): s1 = self.fc1.forward(x) a1 = self.ac1.forward(s1) return a1 def update(self, params): #print("W:", params[0].shape) self.fc1.update([params[0]]) if len(params) > 1: #print("R:", len([params[1]])) self.ac1.update(params[1]) #print("W:",self.fc1.param()[0][0]) #print("dW:",self.fc1.param()[0][1]) def backward(self, dL_dy): ''' output dy/dw2 = d(f(wx+b))/dw = x output dy/dw1 = d(f(wx+b))/dw = x ''' #print(dL_dy) dL_ds = self.ac1.backward(dL_dy) dL_dy0 = self.fc1.backward(dL_ds) #print(dL_dy0) return dL_dy0 def param(self): return [self.fc1.param()[0], self.ac1.param()[0]]
bias = torch.Tensor([1, 2, 3, 4]) bias.shape ''' input = input[:, :, None] weights.matmul(input).squeeze() + bias''' lin = Linear(5, 4, ReLU()) output = lin.forward(input) target = torch.Tensor([[0, 0, 1, 0], [0, 0, 0, 1], [0, 0, 1, 0]]) d_loss = dloss(output, target) prev_dl_dx = lin.backward(d_loss) prev_dl_dx.shape ex_dloss = torch.Tensor([[.1, .2, .2, .1], [.1, .2, .2, .1], [.1, .2, .2, .1]]) dl_ds = drelu(output)*ex_dloss dl_db = dl_ds.sum()/dl_ds.shape[0] (drelu(output)*ex_dloss).sum(0) prev dl_dx = weights.transpose(1,2).matmul(dl_ds[:, :, None]).squeeze()