def test_sgd_momentum(monkeypatch: pytest.MonkeyPatch) -> None: N, D = 4, 5 w = np.linspace(-0.4, 0.6, num=N * D).reshape(N, D) dw = np.linspace(-0.6, 0.4, num=N * D).reshape(N, D) v = np.linspace(0.6, 0.9, num=N * D).reshape(N, D) model = Linear(N, D) monkeypatch.setattr(SGDMomentum, "init_context", lambda self, w: (0.9, v)) optimizer = SGDMomentum(model, lr=1e-3) next_w, (_, next_v) = optimizer.update(optimizer.context["w"], w, dw) expected_next_w = np.asarray([ [0.1406, 0.20738947, 0.27417895, 0.34096842, 0.40775789], [0.47454737, 0.54133684, 0.60812632, 0.67491579, 0.74170526], [0.80849474, 0.87528421, 0.94207368, 1.00886316, 1.07565263], [1.14244211, 1.20923158, 1.27602105, 1.34281053, 1.4096], ]) expected_velocity = np.asarray([ [0.5406, 0.55475789, 0.56891579, 0.58307368, 0.59723158], [0.61138947, 0.62554737, 0.63970526, 0.65386316, 0.66802105], [0.68217895, 0.69633684, 0.71049474, 0.72465263, 0.73881053], [0.75296842, 0.76712632, 0.78128421, 0.79544211, 0.8096], ]) assert np.allclose(next_w, expected_next_w) assert np.allclose(next_v, expected_velocity)
def test_rmsprop(monkeypatch: pytest.MonkeyPatch) -> None: N, D = 4, 5 w = np.linspace(-0.4, 0.6, num=N * D).reshape(N, D) dw = np.linspace(-0.6, 0.4, num=N * D).reshape(N, D) v = np.linspace(0.6, 0.9, num=N * D).reshape(N, D) model = Linear(N, D) monkeypatch.setattr(RMSProp, "init_context", lambda self, w: (v, )) optimizer = RMSProp(model, lr=1e-2) next_w, (next_v, ) = optimizer.update(optimizer.context["w"], w, dw) expected_next_w = np.asarray([ [-0.39223849, -0.34037513, -0.28849239, -0.23659121, -0.18467247], [-0.132737, -0.08078555, -0.02881884, 0.02316247, 0.07515774], [0.12716641, 0.17918792, 0.23122175, 0.28326742, 0.33532447], [0.38739248, 0.43947102, 0.49155973, 0.54365823, 0.59576619], ]) expected_velocity = np.asarray([ [0.5976, 0.6126277, 0.6277108, 0.64284931, 0.65804321], [0.67329252, 0.68859723, 0.70395734, 0.71937285, 0.73484377], [0.75037008, 0.7659518, 0.78158892, 0.79728144, 0.81302936], [0.82883269, 0.84469141, 0.86060554, 0.87657507, 0.8926], ]) assert np.allclose(next_w, expected_next_w) assert np.allclose(next_v, expected_velocity)
def test_linear_backward() -> None: x = np.random.randn(10, 2, 3) w = np.random.randn(6, 5) b = np.random.randn(5) dout = np.random.randn(10, 5) model = Linear(10, 5) params = {"w": w, "b": b} dx_num, dw_num, db_num = estimate_gradients(model, dout, x, params) _ = model(x) dx, dw, db = model.backward(dout) assert np.allclose(dx_num, dx) assert np.allclose(dw_num, dw) assert np.allclose(db_num, db)
class MissingGradients(Module): def __init__(self) -> None: super().__init__() self.fc1 = Linear(784, 30) self.fc2 = Linear(30, 10) self.set_parameters("fc1", "fc2") def forward(self, x: NPArray) -> NPArray: x = self.fc1(x) x = self.fc2(x) return x def backward(self, dout: NPArray) -> dict[str, NPArray]: dx2, _, _ = self.fc2.backward(dout) _, _, _ = self.fc1.backward(dx2) return {}
class MissingParameters(Module): def __init__(self) -> None: super().__init__() self.fc1 = Linear(784, 30) self.fc2 = Linear(30, 10) def forward(self, x: NPArray) -> NPArray: x = self.fc1(x) x = self.fc2(x) return x def backward(self, dout: NPArray) -> dict[str, dict[str, NPArray]]: grads = {} dx2, dw2, db2 = self.fc2.backward(dout) grads["fc2"] = {"w": dw2, "b": db2} _, dw1, db1 = self.fc1.backward(dx2) grads["fc1"] = {"w": dw1, "b": db1} return grads
def test_linear_forward() -> None: input_dim = 2 input_shape = (4, 5, 6) output_dim = 3 input_size = input_dim * np.prod(input_shape) weight_size = output_dim * np.prod(input_shape) x = np.linspace(-0.1, 0.5, num=input_size).reshape(input_dim, *input_shape) model = Linear(input_dim, output_dim) model.w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim) model.b = np.linspace(-0.3, 0.1, num=output_dim) out = model(x) correct_out = np.array([[1.49834967, 1.70660132, 1.91485297], [3.25553199, 3.5141327, 3.77273342]]) assert np.allclose(out, correct_out)
def test_sgd() -> None: N, D = 4, 5 w = np.linspace(-0.4, 0.6, num=N * D).reshape(N, D) dw = np.linspace(-0.6, 0.4, num=N * D).reshape(N, D) model = Linear(N, D) optimizer = SGD(model, lr=1e-2) next_w, _ = optimizer.update(optimizer.context["w"], w, dw) expected_next_w = np.asarray( [ [-0.394, -0.34189474, -0.28978947, -0.23768421, -0.18557895], [-0.13347368, -0.08136842, -0.02926316, 0.02284211, 0.07494737], [0.12705263, 0.17915789, 0.23126316, 0.28336842, 0.33547368], [0.38757895, 0.43968421, 0.49178947, 0.54389474, 0.596], ] ) assert np.allclose(next_w, expected_next_w)
def test_adam(monkeypatch: pytest.MonkeyPatch) -> None: N, D = 4, 5 w = np.linspace(-0.4, 0.6, num=N * D).reshape(N, D) dw = np.linspace(-0.6, 0.4, num=N * D).reshape(N, D) m = np.linspace(0.6, 0.9, num=N * D).reshape(N, D) v = np.linspace(0.7, 0.5, num=N * D).reshape(N, D) model = Linear(N, D) monkeypatch.setattr(Adam, "init_context", lambda self, w: (m, v, 5)) optimizer = Adam(model, lr=1e-2) next_w, (next_m, next_v, _) = optimizer.update(optimizer.context["w"], w, dw) expected_next_w = np.asarray( [ [-0.40094747, -0.34836187, -0.29577703, -0.24319299, -0.19060977], [-0.1380274, -0.08544591, -0.03286534, 0.01971428, 0.0722929], [0.1248705, 0.17744702, 0.23002243, 0.28259667, 0.33516969], [0.38774145, 0.44031188, 0.49288093, 0.54544852, 0.59801459], ] ) expected_v = np.asarray( [ [0.69966, 0.68908382, 0.67851319, 0.66794809, 0.65738853], [0.64683452, 0.63628604, 0.6257431, 0.61520571, 0.60467385], [0.59414753, 0.58362676, 0.57311152, 0.56260183, 0.55209767], [0.54159906, 0.53110598, 0.52061845, 0.51013645, 0.49966], ] ) expected_m = np.asarray( [ [0.48, 0.49947368, 0.51894737, 0.53842105, 0.55789474], [0.57736842, 0.59684211, 0.61631579, 0.63578947, 0.65526316], [0.67473684, 0.69421053, 0.71368421, 0.73315789, 0.75263158], [0.77210526, 0.79157895, 0.81105263, 0.83052632, 0.85], ] ) assert np.allclose(next_w, expected_next_w) assert np.allclose(next_m, expected_m) assert np.allclose(next_v, expected_v)
def __init__(self) -> None: super().__init__() self.fc1 = Linear(784, 30)
def __init__(self) -> None: super().__init__() self.fc1 = Linear(784, 30) self.fc2 = Linear(30, 10) self.set_parameters("fc1", "fc3")