def train(model, optim, steps, BS=128): losses, accuracies = [], [] t = trange(steps) for i in t: optim.zero_grad() samp = np.random.randint(0, X_train.shape[0], size=(BS)) x = Tensor(X_train[samp].reshape((-1, 28 * 28)).astype(np.float32)) Y = Y_train[samp] y = np.zeros((len(samp), 10), np.float32) # correct loss for NLL, torch NLL loss returns one per row y[range(y.shape[0]), Y] = -10.0 y = Tensor(y) # network out = model.forward(x) # NLL loss function loss = out.mul(y).mean() loss.backward() optim.step() cat = np.argmax(out.data, axis=1) accuracy = (cat == Y).mean() # printing loss = loss.data losses.append(loss) accuracies.append(accuracy) t.set_description("loss %.2f accuracy %.2f" % (loss, accuracy))
def __init__(self): # https://keras.io/examples/vision/mnist_convnet/ conv = 3 #inter_chan, out_chan = 32, 64 inter_chan, out_chan = 8, 16 # for speed self.c1 = Tensor(layer_init_uniform(inter_chan, 1, conv, conv)) self.c2 = Tensor(layer_init_uniform(out_chan, inter_chan, conv, conv)) self.l1 = Tensor(layer_init_uniform(out_chan * 5 * 5, 10))
def test_conv2d(self): inn = np.random.rand(1, 3, 32, 32) w_in = np.random.rand(5, 3, 5, 5) x = Tensor(inn, requires_grad=True) w = Tensor(w_in, requires_grad=True) out = x.conv2d(w, padding=1) out_torch = F.conv2d(torch.tensor(inn), torch.tensor(w_in), stride=1, padding=1) np.testing.assert_allclose(out.data, out_torch.data, rtol=1e-06)
def test_add_gradient(): a = Tensor([1, 2, 3, 4, 5], requires_grad=True) b = Tensor([2, 2, 2, 2, 2], requires_grad=True) c = a + b c.backward() assert (a.grad.data == np.array([ 1, 1, 1, 1, 1, ])).all()
class TinyNet(): def __init__(self): self.x = Tensor(x_init.copy()) self.W = Tensor(W_init.copy()) self.m = Tensor(m_init.copy()) def forward(self): out = self.x.dot(self.W).relu() out = out.logsoftmax() out = out.mul(self.m).add(self.m).sum() return out
def __init__(self, n_inputs, n_outputs, init_fn=None): super().__init__(init_fn=init_fn) if self.init_fn is None: W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2.0 / (n_inputs)) else: W = init_fn(n_inputs, n_outputs) self.weight = Tensor(W, requires_grad=True) self.parameters.append(self.weight)
def helper_test_op(shps, torch_fxn, slowgrad_fxn, atol=0, rtol=1e-6, grad_atol=0, grad_rtol=1e-6, forward_only=False): torch.manual_seed(0) ts = [torch.rand(x, requires_grad=True) for x in shps] tst = [Tensor(x.detach().numpy(), requires_grad=True) for x in ts] out = torch_fxn(*ts) ret = slowgrad_fxn(*tst) np.testing.assert_allclose(ret.data, out.detach().numpy(), atol=atol, rtol=rtol) if not forward_only: out.mean().backward() ret.mean().backward() for t, tt in zip(ts, tst): np.testing.assert_allclose(t.grad, tt.grad.data, atol=grad_atol, rtol=grad_rtol) # speed torch_fp = timeit.Timer(functools.partial(torch_fxn, * ts)).timeit(5) * 1000 / 5 slowgrad_fp = timeit.Timer(functools.partial(slowgrad_fxn, * tst)).timeit(5) * 1000 / 5 if not forward_only: torch_fbp = timeit.Timer( functools.partial(lambda f, x: f(*x).mean().backward(), torch_fxn, ts)).timeit(5) * 1000 / 5 slowgrad_fbp = timeit.Timer( functools.partial(lambda f, x: f(*x).mean().backward(), slowgrad_fxn, tst)).timeit(5) * 1000 / 5 else: torch_fbp, slowgrad_fbp = np.nan, np.nan print( "testing %30r torch/slowgrad fp: %.2f / %.2f ms bp: %.2f / %.2f ms" % (shps, torch_fp, slowgrad_fp, torch_fbp - torch_fp, slowgrad_fbp - slowgrad_fp))
def __init__(self, in_channels, out_channels, kernel_size=(2, 2), stride=1, padding=0, init_fn=None): super().__init__(init_fn=init_fn) shape = (out_channels, in_channels, *kernel_size) if self.init_fn is None: W = np.random.randn(*shape) * np.sqrt(2.0 / (n_inputs)) else: W = init_fn(*shape) self.weight = Tensor(W, requires_grad=True) self.parameters.append(self.weight)
def numpy_eval(): Y_test_preds_out = model.forward( Tensor(X_test.reshape((-1, 28 * 28)).astype(np.float32))) Y_test_preds = np.argmax(Y_test_preds_out.data, axis=1) return (Y_test == Y_test_preds).mean()
X_train, Y_train, X_test, Y_test = fetch_mnist() model = TinyConvNet() optim = optim.SGD(model.parameters(), lr=0.001) BS = 128 losses, accuracies = [], [] steps = 1000 t = trange(steps) for i in t: optim.zero_grad() samp = np.random.randint(0, X_train.shape[0], size=(BS)) x = Tensor(X_train[samp].reshape((-1, 28 * 28)).astype(np.float32)) Y = Y_train[samp] y = np.zeros((len(samp), 10), np.float32) # correct loss for NLL, torch NLL loss returns one per row y[range(y.shape[0]), Y] = -10.0 y = Tensor(y) # network out = model.forward(x) # NLL loss function loss = out.mul(y).mean() loss.backward() optim.step() cat = np.argmax(out.data, axis=1)
def __init__(self): self.l1 = Tensor(layer_init_uniform(784, 128), requires_grad=True) self.l2 = Tensor(layer_init_uniform(128, 10), requires_grad=True)
def __init__(self): self.x = Tensor(x_init.copy()) self.W = Tensor(W_init.copy()) self.m = Tensor(m_init.copy())
def test_create_tensor(): data = np.zeros([5, 5]) tensor = Tensor(data) assert type(tensor) == Tensor assert (tensor.data == data).all()