def __init__(self, n_inputs, n_outputs, init_fn=None): super().__init__(init_fn=init_fn) if self.init_fn is None: W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2.0 / (n_inputs)) else: W = init_fn(n_inputs, n_outputs) self.weight = Tensor(W, requires_grad=True) self.parameters.append(self.weight)
def helper_test_op(shps, torch_fxn, slowgrad_fxn, atol=0, rtol=1e-6, grad_atol=0, grad_rtol=1e-6, forward_only=False): torch.manual_seed(0) ts = [torch.rand(x, requires_grad=True) for x in shps] tst = [Tensor(x.detach().numpy(), requires_grad=True) for x in ts] out = torch_fxn(*ts) ret = slowgrad_fxn(*tst) np.testing.assert_allclose(ret.data, out.detach().numpy(), atol=atol, rtol=rtol) if not forward_only: out.mean().backward() ret.mean().backward() for t, tt in zip(ts, tst): np.testing.assert_allclose(t.grad, tt.grad.data, atol=grad_atol, rtol=grad_rtol) # speed torch_fp = timeit.Timer(functools.partial(torch_fxn, * ts)).timeit(5) * 1000 / 5 slowgrad_fp = timeit.Timer(functools.partial(slowgrad_fxn, * tst)).timeit(5) * 1000 / 5 if not forward_only: torch_fbp = timeit.Timer( functools.partial(lambda f, x: f(*x).mean().backward(), torch_fxn, ts)).timeit(5) * 1000 / 5 slowgrad_fbp = timeit.Timer( functools.partial(lambda f, x: f(*x).mean().backward(), slowgrad_fxn, tst)).timeit(5) * 1000 / 5 else: torch_fbp, slowgrad_fbp = np.nan, np.nan print( "testing %30r torch/slowgrad fp: %.2f / %.2f ms bp: %.2f / %.2f ms" % (shps, torch_fp, slowgrad_fp, torch_fbp - torch_fp, slowgrad_fbp - slowgrad_fp))
def __init__(self, in_channels, out_channels, kernel_size=(2, 2), stride=1, padding=0, init_fn=None): super().__init__(init_fn=init_fn) shape = (out_channels, in_channels, *kernel_size) if self.init_fn is None: W = np.random.randn(*shape) * np.sqrt(2.0 / (n_inputs)) else: W = init_fn(*shape) self.weight = Tensor(W, requires_grad=True) self.parameters.append(self.weight)
def numpy_eval(): Y_test_preds_out = model.forward( Tensor(X_test.reshape((-1, 28 * 28)).astype(np.float32))) Y_test_preds = np.argmax(Y_test_preds_out.data, axis=1) return (Y_test == Y_test_preds).mean()
X_train, Y_train, X_test, Y_test = fetch_mnist() model = TinyConvNet() optim = optim.SGD(model.parameters(), lr=0.001) BS = 128 losses, accuracies = [], [] steps = 1000 t = trange(steps) for i in t: optim.zero_grad() samp = np.random.randint(0, X_train.shape[0], size=(BS)) x = Tensor(X_train[samp].reshape((-1, 28 * 28)).astype(np.float32)) Y = Y_train[samp] y = np.zeros((len(samp), 10), np.float32) # correct loss for NLL, torch NLL loss returns one per row y[range(y.shape[0]), Y] = -10.0 y = Tensor(y) # network out = model.forward(x) # NLL loss function loss = out.mul(y).mean() loss.backward() optim.step() cat = np.argmax(out.data, axis=1)
def __init__(self): self.l1 = Tensor(layer_init_uniform(784, 128), requires_grad=True) self.l2 = Tensor(layer_init_uniform(128, 10), requires_grad=True)
def __init__(self): self.x = Tensor(x_init.copy()) self.W = Tensor(W_init.copy()) self.m = Tensor(m_init.copy())
def test_create_tensor(): data = np.zeros([5, 5]) tensor = Tensor(data) assert type(tensor) == Tensor assert (tensor.data == data).all()