def test_update(): W = numpy.asarray([1.0, 0.0, 0.0, 1.0], dtype="f").reshape((2, 2)) bias = numpy.asarray([0.0, 0.0], dtype="f") model = Linear(2, 2) model.set_param("W", W) model.set_param("b", bias) sgd = SGD(1.0, L2=0.0, grad_clip=0.0) sgd.averages = None ff = numpy.asarray([[0.0, 0.0]], dtype="f") tf = numpy.asarray([[1.0, 0.0]], dtype="f") ft = numpy.asarray([[0.0, 1.0]], dtype="f") # noqa: F841 tt = numpy.asarray([[1.0, 1.0]], dtype="f") # noqa: F841 # ff, i.e. 0, 0 scores, backprop = model.begin_update(ff) assert_allclose(scores[0, 0], scores[0, 1]) # Tell it the answer was 'f' gradient = numpy.asarray([[-1.0, 0.0]], dtype="f") backprop(gradient) for key, (param, d_param) in model.get_gradients().items(): param, d_param = sgd(key, param, d_param) model.set_param(key[1], param) model.set_grad(key[1], d_param) b = model.get_param("b") W = model.get_param("W") assert b[0] == 1.0 assert b[1] == 0.0 # Unchanged -- input was zeros, so can't get gradient for weights. assert W[0, 0] == 1.0 assert W[0, 1] == 0.0 assert W[1, 0] == 0.0 assert W[1, 1] == 1.0 # tf, i.e. 1, 0 scores, finish_update = model.begin_update(tf) # Tell it the answer was 'T' gradient = numpy.asarray([[0.0, -1.0]], dtype="f") finish_update(gradient) for key, (W, dW) in model.get_gradients().items(): sgd(key, W, dW) b = model.get_param("b") W = model.get_param("W") assert b[0] == 1.0 assert b[1] == 1.0 # Gradient for weights should have been outer(gradient, input) # so outer([0, -1.], [1., 0.]) # = [[0., 0.], [-1., 0.]] assert W[0, 0] == 1.0 - 0.0 assert W[0, 1] == 0.0 - 0.0 assert W[1, 0] == 0.0 - -1.0 assert W[1, 1] == 1.0 - 0.0
def test_pytorch_unwrapped(nN, nI, nO): model = Linear(nO, nI).initialize() X = numpy.zeros((nN, nI), dtype="f") X += numpy.random.uniform(size=X.size).reshape(X.shape) sgd = SGD(0.01) Y = numpy.zeros((nN, nO), dtype="f") check_learns_zero_output(model, sgd, X, Y)
def test_pytorch_wrapper(nN, nI, nO): import torch.nn model = PyTorchWrapper(torch.nn.Linear(nI, nO)).initialize() sgd = SGD(0.001) X = numpy.zeros((nN, nI), dtype="f") X += numpy.random.uniform(size=X.size).reshape(X.shape) Y = numpy.zeros((nN, nO), dtype="f") Yh, get_dX = model.begin_update(X) assert isinstance(Yh, numpy.ndarray) assert Yh.shape == (nN, nO) dYh = (Yh - Y) / Yh.shape[0] dX = get_dX(dYh) model.finish_update(sgd) assert dX.shape == (nN, nI) check_learns_zero_output(model, sgd, X, Y) assert isinstance(model.predict(X), numpy.ndarray)
def sgd(): return SGD(0.001, ops=NumpyOps())
def sgd(): return SGD(0.001)