def testModelCreation(self): class NN(Model): def __init__(self): self.linear1 = Linear(64, 32) self.linear2 = Linear(32, 16) self.linear3 = Linear(16, 8) self.linear4 = Linear(8, 4) self.linear5 = Linear(4, 2) self.final = Linear(2, 1) def forward(self, x): x = self.linear1(x) x = self.linear2(x) x = self.linear3(x) x = self.linear4(x) x = self.linear5(x) x = self.final(x) return x model = NN() sgd = SGD(lr=0.002) model.compile(sgd, MSE) # model.summary() testInput = Tensor(np.random.randn(64, 1)) testOutput = model(testInput) assert testOutput.shape == (1, 1)
def matmulBackward1(grad: Tensor, t1: 'Tensor', t2: 'Tensor') -> Tensor: """Gradient Function that is used when a tensor is matrix multiplied to a tensor that requires gradient. - Math: let A.shape == m x n let B.shape == n x p Y = A @ B F(Y) = L dF/dB = (dY/dB).T dF/dY returns: dF/dB = A.T * grad where: dF/dY -> grad """ try: result = np.matmul(t1.data.T, grad.data) except: raise RuntimeError(f"Caught Exception while \ trying to perform matrix-multiplication two \ matrices with shape: {t1.data.T.shape} {grad.shape}" ) return Tensor(result)
def matmulBackward0(grad: Tensor, t1: 'Tensor', t2: 'Tensor') -> Tensor: """Gradient Function that is used when a tensor that requires gradient is matrix-multiplied to another tensor - Math: let A.shape == m x n let B.shape == n x p Y = A @ B F(Y) = L dF/dA = dF/dY dY/dA returns: dF/dA = grad * B.T where: dF/dY -> grad returns: """ try: result = np.matmul(grad.data, t2.data.T) except: raise RuntimeError(f"Caught Exception while \ trying to perform matrix-multiplication two \ matrices with shape: {grad.shape} {t2.data.T.shape}" ) return Tensor(result)
def meanBackward(grad: Tensor, t1: Tensor) -> Tensor: """Gradient Function that is used when tensor.mean() is executed in the computation graph """ data = np.ones_like(t1.data) / np.size(t1.data) data = grad.data * data return Tensor(data)
def sumBackward(grad: Tensor, t1: 'Tensor') -> Tensor: """Gradient Function that is used when tensor.sum() is executed in the computation graph """ return Tensor(grad.data * np.ones_like(t1.data))
class Linear(Layer): def __init__(self, in_features: int, out_features: int, use_bias: bool = False, name: str = 'Linear') -> None: self._use_bias = use_bias self._in_features = in_features self._out_features = out_features self._name = name self.weights = Tensor(data=np.random.randn(out_features, in_features), requires_grad=True) if use_bias: # self.bias must handle broadcasting # but idk how to implement it self.bias = Tensor(np.random.randn(out_features, 1), requires_grad=True) def forward(self, x: 'Tensor') -> 'Tensor': """Forward Propagation""" assert self.weights.shape[-1] == x.shape[0], \ f"""\nThe shape is not compatible with the this layers in features\n Expected: ({self.weights.shape[-1]}, x) Got: ({x.shape[0]}, x)""" if self._use_bias: output = self.weights @ x + self.bias else: output = self.weights @ x return output def __str__(self) -> str: return f'{self._name} = ({self._in_features}, {self._out_features})' def zero_grad(self) -> None: self.weights.zero_grad() if self._use_bias: self.bias.zero_grad() return
def negBackward(grad: Tensor, t1: Tensor) -> Tensor: """Gradient Function that is used when - tensor is executed in the computation graph """ data = np.negative(grad.data) print(data) return Tensor(data)
def __init__(self, in_features: int, out_features: int, use_bias: bool = False, name: str = 'Linear') -> None: self._use_bias = use_bias self._in_features = in_features self._out_features = out_features self._name = name self.weights = Tensor(data=np.random.randn(out_features, in_features), requires_grad=True) if use_bias: # self.bias must handle broadcasting # but idk how to implement it self.bias = Tensor(np.random.randn(out_features, 1), requires_grad=True)
def testModelBackward(self): class NN(Model): def __init__(self): self.linear1 = Linear(10, 5) self.linear2 = Linear(5, 1) def forward(self, x): x = self.linear1(x) x = self.linear2(x) return x model = NN() sgd = SGD(lr=0.01) model.compile(sgd, MSE) testData = Tensor(np.random.uniform(10, -10, size=(10, 1))) output = model(testData) testGrad = Tensor(np.random.uniform(10, -10, size=(1, 1))) output.backward(testGrad) return
def mulBackward1(grad: Tensor, t1: 'Tensor', t2: 'Tensor') -> Tensor: """Gradient function that is used when a tensor that requires gradient is multiplied element-wise to another tensor - Math: Y = A * B dY/dA = B dY/dB = A """ return Tensor(grad.data * t1.data)
def forward(self, x, y) -> Tensor: if np.any(x < 0) or np.any(y < 0): raise ValueError("Only non-zero values are allowed as an input to this function") x = np.array(x, dtype=np.float) y = np.array(y, dtype=np.float) x /= np.sum(x) y /= np.sum(y) mask = y > 0 y = y[mask] x = x[mask] result = -np.sum(x*np.log(y)) return Tensor(result, x.requiresGrad)
def powBackward(grad: Tensor, t1: Tensor, power: Number) -> Tensor: """Gradient Function that is used when tensor.pow(n) or tensor ** n is executed in the computation graph """ #print("==== From pow backward ==== ") #print("Arguments") #print("t1") #print(t1.data) #print("grad") #print(grad.data) data = grad.data * np.multiply(power, (t1.data**(power - 1))) #print("result") #print(data) #print("==== End pow backward ==== ") return Tensor(data)
def forward(self, t1: Tensor) -> Tensor: expData = np.exp(t1.data) data = expData / np.sum(expData, axis=0) requires_grad = t1.requires_grad return Tensor(expData, requires_grad)
def ReLU(t1: Tensor) -> Tensor: data = np.maximum(0, t1.data, t1.data) # Use in place operation return Tensor(data, t1.requires_grad)
def reluBackward(grad: Tensor, t1: Tensor) -> Tensor: data = grad.data * np.where(t1.data > 0, 1, 0) return Tensor(data)
def Tanh(t1: Tensor) -> Tensor: data = np.tanh(t1.data) requires_grad = t1.requires_grad return Tensor(data, requires_grad)
def tanhBackward(grad: Tensor, t1: Tensor) -> Tensor: data = grad.data * (1 - np.tanh(t1.data)**2) return Tensor(data)
def subBackward1(grad: Tensor, t1: Tensor, t2: Tensor) -> Tensor: data = np.negative(grad.data) return Tensor(data)