def test_broadcasted_scalar_add(self) -> None: """ In this test t2 is a scalar""" t1 = Tensor([[1, 2, 3], [4, 5, 6]], requires_grad=True) t2 = Tensor(1, requires_grad=True) t3 = t1 + t2 assert t3.data.tolist() == [[2, 3, 4], [5, 6, 7]] t3.backward(np.asarray([[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]])) assert t1.grad.tolist() == [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]] # The gradient of t2 should be doubled since all it's values are # broadcasted to two places. assert t2.grad.tolist() == 6.0 # Also try the reverse direction t1.zero_grad(), t2.zero_grad() t4 = t2 + t1 assert t4.data.tolist() == [[2, 3, 4], [5, 6, 7]] t4.backward(np.asarray([[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]])) assert t1.grad.tolist() == [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]] # The gradient of t2 should be doubled since all it's values are # broadcasted to two places. assert t2.grad.tolist() == 6.0
def test_axis_sum(self) -> None: t1 = Tensor([[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], requires_grad=True) # First axis t2 = t1.sum(0) assert t2.data.tolist() == [2, 4, 6] t2.backward(np.asarray([1.0, 2.0, 3.0])) assert t1.grad.tolist() == [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]] # Second axis t1.zero_grad() t3 = t1.sum(1) assert t3.data.tolist() == [6, 6] t3.backward(np.asarray([1.0, 2.0])) assert t1.grad.tolist() == [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]
def test_simple_sum(self) -> None: t1 = Tensor([1, 2, 3], requires_grad=True) t2 = t1.sum() assert t2.data.tolist() == 6 assert t2.requires_grad t2.backward() assert t1.grad.tolist() == [1.0, 1.0, 1.0] # Also try with a specified grad t1.zero_grad() t3 = t1.sum() assert t3.data.tolist() == 6 t2.backward(2) assert t1.grad.tolist() == [2.0, 2.0, 2.0]
def test_broadcasted_scalar_mul(self) -> None: """ In this test t2 is a scalar""" t1 = Tensor([[1, 2, 3], [4, 5, 6]], requires_grad=True) t2 = Tensor(2, requires_grad=True) t3 = t1 * t2 assert t3.data.tolist() == [[2, 4, 6], [8, 10, 12]] t3.backward(np.asarray([[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]])) assert t1.grad.tolist() == [[2.0, 2.0, 2.0], [2.0, 2.0, 2.0]] assert t2.grad.tolist() == 21.0 # Also try the reverse direction t1.zero_grad(), t2.zero_grad() t4 = t2 * t1 assert t4.data.tolist() == [[2, 4, 6], [8, 10, 12]] t4.backward(np.asarray([[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]])) assert t1.grad.tolist() == [[2.0, 2.0, 2.0], [2.0, 2.0, 2.0]] assert t2.grad.tolist() == 21.0
bias = 2 y = x @ coefs + bias # Our model w = Tensor(np.random.randn(3), requires_grad=True) b = Tensor(np.random.rand(), requires_grad=True) # Train the model lr = 0.001 batch_size = 25 for _ in range(1000): # Train in batches idx = np.arange(x.shape[0]) np.random.shuffle(idx) w.zero_grad(), b.zero_grad() for start in range(0, x.shape[0], batch_size): batch_idx = idx[start:start + batch_size] pred = x[batch_idx] @ w + b errors = y[batch_idx] - pred mse_loss = (errors * errors).sum() mse_loss.backward() print(mse_loss.data) # Gradient Descent w.data -= lr * w.grad b.data -= lr * b.grad print(f"Target function: coefficients={coefs.data}, bias={bias}")