def test_mul(): N = 1024 X0 = np.random.rand(N, N).astype(dtype) X1 = np.random.rand(N, N).astype(dtype) with Graph(NumpyRuntime) as g_np: x0 = g_np.Placeholder(X0.shape) x1 = g_np.Placeholder(X1.shape) g_np.Mul(x0, x1) def f_np(): return g_np.forward(X0, X1) with Graph(TvmRuntime) as g_tvm: x0 = g_tvm.Placeholder(X0.shape) x1 = g_tvm.Placeholder(X1.shape) g_tvm.Mul(x0, x1) def f_tvm(): return g_tvm.forward(X0, X1) def f_pytorch(): x0 = torch.tensor(X0, requires_grad=True, dtype=torch.float32) x1 = torch.tensor(X1, requires_grad=True, dtype=torch.float32) mul = x0 * x1 return mul.data.numpy() compare('Mul', f_np, f_tvm, f_pytorch)
def test_repeat(): N = 1024 M = 512 X = np.random.rand(1, N).astype(dtype) with Graph(NumpyRuntime) as g_np: x = g_np.Placeholder(X.shape) g_np.Repeat(x, M, axis=0) def f_np(): return g_np.forward(X) with Graph(TvmRuntime) as g_tvm: x = g_tvm.Placeholder(X.shape) g_tvm.Repeat(x, M, axis=0) def f_tvm(): return g_tvm.forward(X) def f_pytorch(): x = torch.tensor(X, requires_grad=True, dtype=torch.float32) x = torch.reshape(x, (N,)) y = x.repeat(M) y = torch.reshape(y, (M, N)) return y.data.numpy() compare('Repeat', f_np, f_tvm, f_pytorch)
def test_matmul(): N = 64 M = 256 K = 128 X0 = np.random.rand(N, K).astype(dtype) X1 = np.random.rand(K, M).astype(dtype) with Graph(NumpyRuntime) as g_np: m0 = g_np.Placeholder(X0.shape) m1 = g_np.Placeholder(X1.shape) g_np.Matmul(m0, m1) def f_np(): return g_np.forward(X0, X1) with Graph(TvmRuntime) as g_tvm: m0 = g_tvm.Placeholder(X0.shape) m1 = g_tvm.Placeholder(X1.shape) g_tvm.Matmul(m0, m1) def f_tvm(): return g_tvm.forward(X0, X1) def f_pytorch(): x0 = torch.tensor(X0, requires_grad=True, dtype=torch.float32) x1 = torch.tensor(X1, requires_grad=True, dtype=torch.float32) mm = torch.matmul(x0, x1) return mm.data.numpy() compare('Matmul', f_np, f_tvm, f_pytorch)
def test_mul(): N = 1 X0 = np.random.rand(N).astype(dtype) X1 = np.random.rand(N).astype(dtype) with Graph(NumpyRuntime) as g: x0 = g.Placeholder(X0.shape, name='x0') x1 = g.Placeholder(X1.shape, name='x1') mul = g.Mul(x0, x1) numerical_grads = g.get_numerical_gradient(X0, X1) time_ng = timeit(lambda: g.get_numerical_gradient(X0, X1), number=10) / 10 g.propagate(X0, X1) np_backward_grads = g.get_grads() time_np = timeit(lambda: g.propagate(X0, X1), number=10) / 10 with Graph(TvmRuntime) as g: x0 = g.Placeholder(X0.shape, name='x0') x1 = g.Placeholder(X1.shape, name='x1') mul = g.Mul(x0, x1) g.propagate(X0, X1) tvm_backward_grads = g.get_grads() time_tvm = timeit(lambda: g.propagate(X0, X1), number=10) / 10 def pytorch_add(): x0 = torch.tensor(X0, requires_grad=True, dtype=torch.float32) x1 = torch.tensor(X1, requires_grad=True, dtype=torch.float32) mul = x0 * x1 mul.backward() return x0, x1 x0_pt, x1_pt = pytorch_add() pt_backward_grads = { 'x0': x0_pt.grad.data.numpy(), 'x1': x1_pt.grad.data.numpy() } time_pt = timeit(lambda: pytorch_add(), number=10) / 10 for v in [x0, x1]: name = v.name np.testing.assert_allclose(numerical_grads[name], pt_backward_grads[name], rtol=1e-04) np.testing.assert_allclose(np_backward_grads[name], numerical_grads[name], rtol=1e-04) np.testing.assert_allclose(tvm_backward_grads[name], np_backward_grads[name], rtol=1e-06) print(f'Time: Mul backward') print( f'tvm: {time_tvm}\nnumpy: {time_np}\npytorch: {time_pt}\nnumerical: {time_ng}' )
def test_softmax_with_cross_entropy_loss(): N = 1024 X = np.random.rand(N) T = np.zeros((N,), dtype='int') T[7] = 1 # 7th category is the answer with Graph(NumpyRuntime) as g_np: x = g_np.Placeholder(X.shape) t = g_np.Placeholder(T.shape) g_np.SoftmaxWithCrossEntropyLoss(x, t) def f_np(): return g_np.forward(X, T) with Graph(TvmRuntime) as g_tvm: x = g_tvm.Placeholder(X.shape) t = g_tvm.Placeholder(T.shape, require_grad=False) g_tvm.SoftmaxWithCrossEntropyLoss(x, t) def f_tvm(): return g_tvm.forward(X, T) def f_pytorch(): if X.ndim == 1: m = 1 n = X.size elif X.ndim == 2: m, n = X.dims else: NotImplementedError x = torch.tensor(X, requires_grad=True, dtype=torch.float32) t = torch.tensor(T, requires_grad=False, dtype=torch.float32) loss = PytorchFuncHelper.softmax_with_cross_entropy_loss(x, t, m) return loss.data.numpy() compare('SoftmaxWithCrossEntropyLoss', f_np, f_tvm, f_pytorch)
def test_softmax(): N = 1024 X = np.random.rand(N) with Graph(NumpyRuntime) as g_np: x = g_np.Placeholder(X.shape) g_np.Softmax(x) def f_np(): return g_np.forward(X) with Graph(TvmRuntime) as g_tvm: x = g_tvm.Placeholder(X.shape) g_tvm.Softmax(x) def f_tvm(): return g_tvm.forward(X) def f_pytorch(): x = torch.tensor(X, requires_grad=True, dtype=torch.float32) softmax = torch.softmax(x, dim=0) return softmax.data.numpy().astype('float32') compare('Softmax', f_np, f_tvm, f_pytorch)
def test_sigmoid(): N = 1024 X = np.random.rand(N, N) with Graph(NumpyRuntime) as g_np: x = g_np.Placeholder(X.shape) g_np.Sigmoid(x) def f_np(): return g_np.forward(X) with Graph(TvmRuntime) as g_tvm: x = g_tvm.Placeholder(X.shape) g_tvm.Sigmoid(x) def f_tvm(): return g_tvm.forward(X) def f_pytorch(): x = torch.tensor(X, requires_grad=True, dtype=torch.float32) sigmoid = torch.sigmoid(x) return sigmoid.data.numpy() compare('Sigmoid', f_np, f_tvm, f_pytorch)
def test_sum(): N = 1024 X = np.random.rand(N, N).astype(dtype) with Graph(NumpyRuntime) as g_np: x = g_np.Placeholder(X.shape) g_np.Sum(x) def f_np(): return g_np.forward(X) with Graph(TvmRuntime) as g_tvm: x = g_tvm.Placeholder(X.shape) g_tvm.Sum(x) def f_tvm(): return g_tvm.forward(X) def f_pytorch(): x = torch.tensor(X, requires_grad=True, dtype=torch.float32) sum_ = torch.sum(x, dim=-1, keepdim=True) return sum_.data.numpy() compare('Sum', f_np, f_tvm, f_pytorch)
def test_mul(): X0 = np.array([2]).astype(dtype) X1 = np.array([3]).astype(dtype) with Graph(NumpyRuntime) as g: x0 = g.Placeholder(X0.shape, name='x0') x1 = g.Placeholder(X1.shape, name='x1') mul = g.Mul(x0, x1) grads = g.get_numerical_gradient(X0, X1) answers = {'x0': 3.0, 'x1': 2.0} for v in [x0, x1]: name = v.name np.testing.assert_allclose(grads[name], answers[name], rtol=1e-04)
def test_matmul(): M0 = np.array([[1, 2, 3], [3, 5, 4]]).astype(dtype) M1 = np.array([[5, 1], [2, 2], [3, 4]]).astype(dtype) with Graph(NumpyRuntime) as g: m0 = g.Placeholder(M0.shape, name='m0') m1 = g.Placeholder(M1.shape, name='m1') mm = g.Matmul(m0, m1) mmsum = g.Sum(mm) grads = g.get_numerical_gradient(mmsum, M0, M1) # tm0 = np.array([[1, 2, 3], [3, 5, 4]]).astype(dtype) # answers = {'m0': 3.0, 'm1': 2.0} for m in [m0, m1]: name = m.name # np.testing.assert_allclose(grads[name], answers[name], rtol=1e-04) print(f'name: {name}, val: {m.tensor}, grad: {grads[name]}')
def __init__(self, input_size, hidden_size, output_size): self.placeholders = [] I = input_size H = hidden_size O = output_size with Graph(NumpyRuntime) as g: x0 = g.Placeholder((1, I)) w0 = g.Variable((I, H), RandomInitializer) mm0 = g.Matmul(x0, w0) b0 = g.Variable((1, H), RandomInitializer) add0 = g.Add(mm0, b0) w1 = g.Variable((H, O), RandomInitializer) mm1 = g.Matmul(add0, w1) b1 = g.Variable((1, O), RandomInitializer) add1 = g.Add(mm1, b1) self.graph = g self.placeholders.append(x0)
def test_two_layer_net(): N = 256 K = 512 M = 10 X0 = np.random.rand(1, N).astype(dtype) W0 = np.random.rand(N, K).astype(dtype) B0 = np.random.rand(1, K).astype(dtype) W1 = np.random.rand(K, M).astype(dtype) B1 = np.random.rand(1, M).astype(dtype) T = np.zeros((1, M), dtype='int') T[0, 3] = 1 with Graph(NumpyRuntime) as g_np: x0 = g_np.Placeholder(X0.shape) w0 = g_np.Placeholder(W0.shape) b0 = g_np.Placeholder(B0.shape) w1 = g_np.Placeholder(W1.shape) b1 = g_np.Placeholder(B1.shape) t = g_np.Placeholder(T.shape) affine0 = g_np.Matmul(x0, w0) add0 = g_np.Add(affine0, b0) sigm0 = g_np.Sigmoid(add0) affine1 = g_np.Matmul(sigm0, w1) add1 = g_np.Add(affine1, b1) sigm1 = g_np.Sigmoid(add1) g_np.SoftmaxWithCrossEntropyLoss(sigm1, t) def f_np(): return g_np.forward(X0, W0, B0, W1, B1, T) with Graph(TvmRuntime) as g_tvm: x0 = g_tvm.Placeholder(X0.shape) w0 = g_tvm.Placeholder(W0.shape) b0 = g_tvm.Placeholder(B0.shape) w1 = g_tvm.Placeholder(W1.shape) b1 = g_tvm.Placeholder(B1.shape) t = g_tvm.Placeholder(T.shape) affine0 = g_tvm.Matmul(x0, w0) add0 = g_tvm.Add(affine0, b0) sigm0 = g_tvm.Sigmoid(add0) affine1 = g_tvm.Matmul(sigm0, w1) add1 = g_tvm.Add(affine1, b1) sigm1 = g_tvm.Sigmoid(add1) g_tvm.SoftmaxWithCrossEntropyLoss(sigm1, t) def f_tvm(): return g_tvm.forward(X0, W0, B0, W1, B1, T) def f_pytorch(): x0 = torch.tensor(X0, requires_grad=True, dtype=torch.float32) w0 = torch.tensor(W0, requires_grad=True, dtype=torch.float32) b0 = torch.tensor(B0, requires_grad=True, dtype=torch.float32) w1 = torch.tensor(W1, requires_grad=True, dtype=torch.float32) b1 = torch.tensor(B1, requires_grad=True, dtype=torch.float32) t = torch.tensor(T, requires_grad=False, dtype=torch.float32) mm0 = torch.matmul(x0, w0) add0 = torch.add(mm0, b0) sigm0 = torch.sigmoid(add0) mm1 = torch.matmul(sigm0, w1) add1 = torch.add(mm1, b1) sigm1 = torch.sigmoid(add1) batch_size = 1 loss = PytorchFuncHelper.softmax_with_cross_entropy_loss(sigm1, t, batch_size) return loss.data.numpy() compare('TwoLayerNet', f_np, f_tvm, f_pytorch)