def test_conv_transpose_grad(self): N, H, W, C = 1, 5, 5, 2 Kh, Kw, Kc = 3, 3, 2 x = jt.random([N, H, W, C]) w = jt.random([Kh, Kw, C, Kc]) y, yy = conv_transpose(x, w) mask = jt.random(y.shape) loss = (y * mask).sum() dx, dw = jt.grad(loss, [x, w]) jdx, jdw = jt.fetch_sync([dx, dw]) check_fused(len(x.shape)) nmask = mask.data _, (ndx, ndw) = ngrad(lambda args: \ (conv_transpose_naive(args[0], args[1])*nmask).sum(), [np.float64(x.data), np.float64(w.data)], 1e-7) assert np.allclose(ndx, jdx), (ndx, jdx, ndx - jdx) assert np.allclose(ndw, jdw), (ndw, jdw)
def test_binary_dep(self): a = jt.random([10]) b, = self.where(a>0.5) b = b+1 assert (b.data==np.where(a.data>0.5)[0]+1).all() b, = self.where(a>1) b = b+1 assert (b.data==np.where(a.data>1)[0]+1).all()
def test_pool(self): N, H, W, C = 3, 10, 10, 4 size = 3 for op in ops: x = jt.random([N, H, W, C]) y = pool(x, size, op) ny = pool_naive(x.data, size, op) assert np.allclose(y.data, ny), (op, y.data, ny)
def test_reindex_dep(self): a = jt.random([10]) b, = self.where(a>1) assert len(b.data)==0 b, = self.where(a>0.5) assert (b.data==np.where(a.data>0.5)).all() b = a.reindex_var(self.where(a>0.5)) assert (b.data==a.data[a.data>0.5]).all()
def check(): a = jt.random((5, 1)) func = Func() b = func(a) assert numpy.allclose(b.data, (a + a).data) da = jt.grad(b, a) one = numpy.ones(a.shape) assert numpy.allclose(da.data, one * 2.0)
def check(shape, fail_cond, fail_func): a = jt.random(shape) selected = jt.candidate(a, fail_cond) a_ = a.data selected_out = selected.data selected_ans = check_candidate(a_, fail_func) assert selected_out.tolist() == selected_ans.tolist(), (selected_out, selected_ans)
def _test_primitive(self, op_name, op_func, N, C_in, C_out, expand, stride): op = op_func(C_in, C_out, expand, stride) input = jt.random([N, C_in, 7, 7]).float32() output = op(input) self.assertEqual( output.shape[:2], ([N, C_out]), 'Primitive {} failed for shape {}.'.format(op_name, input.shape) )
def test_normalize_3d_tensor(self): jt.seed(28) n_channels = 3 img_size = 10 mean = jt.random((n_channels, )).data std = jt.random((n_channels, )).data img = jt.random((n_channels, img_size, img_size)).data target = transform.image_normalize(img, mean, std) mean_unsqueezed = mean.reshape(-1, 1, 1) std_unsqueezed = std.reshape(-1, 1, 1) result1 = transform.image_normalize(img, mean_unsqueezed, std_unsqueezed) result2 = transform.image_normalize(img, mean_unsqueezed, std_unsqueezed) assert_array_almost_equal(target, result1) assert_array_almost_equal(target, result2)
def test_normalize_different_dtype(self): for dtype1 in ['float32', 'float64']: img = jt.random((3, 10, 10), dtype=dtype1) for dtype2 in ['int64', 'float32', 'float64']: mean = jt.array([1, 2, 3], dtype=dtype2) std = jt.array([1, 2, 1], dtype=dtype2) # checks that it doesn't crash transform.image_normalize(img, mean, std)
def test_reduce_dep(self): a = jt.random([100, 100]) index = (a > 0.5).where() x = a.reindex_var(index) xsum = x.sum() na = a.data assert np.allclose(np.sum(na[na > 0.5]), xsum.data), (x.data, xsum.data, np.sum(na[na > 0.5]))
def test(self): a = jt.random([10]) b = jt.code(a.shape, a.dtype, [a], cpu_src=''' for (int i=0; i<in0_shape0; i++) @out(i) = @in0(i)*@in0(i)*2; ''', cpu_grad_src = [''' for (int i=0; i<in0_shape0; i++) { @out(i) = @dout(i)*@in0(i)*4; } ''']) na, nb = jt.fetch_sync([a,b]) assert np.allclose(na*na*2, nb) c = jt.random([10]) da = jt.grad(c*b, a) assert np.allclose(c.data*na*4, da.data), (c.data*na*4, da.data)
def test_cuda2(self): a = jt.random((100, 100)) b = jt.random((100, 100)) c = jt.code(a.shape, a.dtype, [a, b], cuda_header=''' namespace jittor { __global__ static void kernel1(@ARGS_DEF) { @PRECALC for (int i=blockIdx.x; i<in0shape0; i+=gridDim.x) for (int j=threadIdx.x; j<in0shape1; j+=blockDim.x) @out(i,j) = @in0(i,j)*@in1(i,j); } __global__ static void kernel2(@ARGS_DEF) { @PRECALC for (int i=blockIdx.x; i<in0shape0; i+=gridDim.x) for (int j=threadIdx.x; j<in0shape1; j+=blockDim.x) @out(i,j) = @dout(i,j)*@in1(i,j); } __global__ static void kernel3(@ARGS_DEF) { @PRECALC for (int i=blockIdx.x; i<in0shape0; i+=gridDim.x) for (int j=threadIdx.x; j<in0shape1; j+=blockDim.x) @out(i,j) = @dout(i,j)*@in0(i,j); } } ''', cuda_src=''' kernel1<<<32, 32>>>(@ARGS); ''', cuda_grad_src=[ ''' kernel2<<<32, 32>>>(@ARGS); ''', ''' kernel3<<<32, 32>>>(@ARGS); ''' ]) da, db = jt.grad(c, [a, b]) assert np.allclose(c.data, a.data * b.data), (c.data, a.data * b.data) assert np.allclose(da.data, b.data) assert np.allclose(db.data, a.data)
def check(xshape, wshape, stride=(1, 1, 1), padding=(0, 0, 0), dilation=(1, 1, 1), group=1): with jt.flag_scope(use_cuda=1): x = jt.random(xshape) w = jt.random(wshape) # y = jt.cudnn.ops.cudnn_conv3d(x, w, *stride, *padding, *dilation, group) y = jt.nn.conv3d(x, w, None, stride, padding, dilation, group) masky = jt.rand_like(y) dx, dw = jt.grad(masky * y, [x, w]) y2 = jt.nn.conv3d(x, w, None, stride, padding, dilation, group) dx2, dw2 = jt.grad(masky * y2, [x, w]) np.testing.assert_allclose(y.data, y2.data) np.testing.assert_allclose(dx.data, dx2.data, rtol=1e-5, atol=1e-3) np.testing.assert_allclose(dw.data, dw2.data, rtol=1e-5, atol=1e-3)
def test_pool_grad(self): jt.set_seed(1) N, H, W, C = 2, 7, 7, 2 size = 3 # ops = ["maximum"] for op in ops: x = jt.random([N, H, W, C]) y = pool(x, size, op) mask = jt.random(y.shape) loss = (y * mask).sum() dx = jt.grad(loss, x) jdx = dx.data nx = x.data nmask = mask.data _, (ndx, ) = ngrad( lambda args: (pool_naive(args[0], size, op) * nmask).sum(), [nx], 1e-6) assert np.allclose(jdx, ndx), (op, jdx[0, :, :, 0], ndx[0, :, :, 0])
def test_float64(self): jt.set_seed(3) with jt.log_capture_scope(log_silent=1, log_v=0, log_vprefix="op.cc=100") as raw_log: t = jt.random([5, 5], dtype='float64') t.data logs = find_log_with_re( raw_log, "(Jit op key (not )?found: " + "curand_random" + ".*)") assert len(logs) == 1
def check_forward(xshape, wshape, stride, padding, dilation, groups, use_cuda, nhwc): assert nhwc == 0 test_func = test_nchw # only check cudnn with jt.log_capture_scope(use_cuda=use_cuda, enable_tuner=1, log_v=10, log_vprefix="conv_tuner.cc=1000") as raw_log: x = jt.random(xshape) w = jt.random(wshape) y = test_func(x, w, stride, padding, dilation, groups) y.sync() with jt.flag_scope(use_cuda=0, enable_tuner=0): cy = test_func(x, w, stride, padding, dilation, groups) cy.sync() assert np.allclose(y.data, cy.data)
def test_reduce_opt(self): a = jt.random((16, 512, 38, 38)) b = jt.random((16, 512, 38, 38)) jt.sync([a, b]) with jt.profile_scope(rerun=10, warmup=10) as rep: norm = a.sqr().sum(1, keepdims=True).sqrt() c = a / norm da = jt.grad(c * b, a) jt.sync([c, da]) gpu_c = c.numpy() gpu_da = da.numpy() with jt.flag_scope(use_cuda=0): norm = a.sqr().sum(1, keepdims=True).sqrt() c = a / norm da = jt.grad(c * b, a) assert np.allclose(gpu_c, c.data, 1e-3) assert (np.abs(gpu_da - da.data).max() < 1e-6) assert float(rep[1][3]) < 15e6, float(rep[1][3]) # 15ms(about 8ms)
def execute(self, input): output = input if self.p > 0 and self.is_train: if self.p == 1: noise = jt.zeros(input.shape) else: noise = jt.random(input.shape) noise = (noise > self.p).int() output = output * noise return output
def test5(self): with jt.flag_scope(use_cuda=1): f32 = jt.float32 np.random.seed(0) jt.set_seed(3) x = f32(np.random.rand(1, 1)) w = (jt.random([x.shape[-1], 10]) - f32(0.5)) / f32( x.shape[-1])**f32(0.5) jt.nn.matmul(x, w).data
def execute(self, x): if self.drop_prob == 0. or not self.is_training(): return x keep_prob = 1 - self.drop_prob shape = (x.shape[0], ) + (1, ) * ( x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets random_tensor = keep_prob + jt.random(shape, dtype=x.dtype) random_tensor = jt.floor(random_tensor) # binarize output = (x / keep_prob) * random_tensor return output
def check_forward(xshape, wshape, stride, padding, dilation, groups, use_cuda, nhwc): assert nhwc == 0 test_func = test_nchw # only check cudnn with jt.log_capture_scope(use_cuda=use_cuda, enable_tuner=1, log_v=10, log_vprefix="op.cc=100,conv_tuner=1000" ) as raw_log: x = jt.random(xshape) w = jt.random(wshape) y = test_func(x, w, stride, padding, dilation, groups) y.sync() with jt.flag_scope(use_cuda=0, enable_tuner=0): cy = test_func(x, w, stride, padding, dilation, groups) cy.sync() logs = find_log_with_re(raw_log, "(Jit op key (not )?found: .*conv.*)") assert len(logs)==1 assert np.allclose(y.data, cy.data)
def test_pad(self): size = 10 lpad = 3 rpad = 4 a = jt.random([size]) b = a.reindex([size+lpad+rpad], [f"i0-{lpad}"], -1) na, nb = jt.fetch_sync([a, b]) assert (nb[lpad:lpad+size]==na).all() assert (nb[:lpad]==-1).all() assert (nb[-rpad:]==-1).all()
def test_reduce(self): x = jt.random([5, 5]) y = x.mpi_reduce(root=0) y.sync() if mpi.world_rank() == 0: assert np.allclose(y.data, (x * n).data) else: assert np.allclose(y.data, np.zeros([5, 5])) g = jt.grad(y, x) assert np.allclose(g.data, np.ones([5, 5]))
def test_data(self): test_img = np.random.random((64,3,224,224)).astype('float32') jittor_test_img = jt.array(test_img) lr = 100 jittor_model = jtmodels.__dict__['mobilenet_v2']() jittor_model2 = jtmodels.__dict__['mobilenet_v2']() # Set eval to avoid dropout layer & bn errors jittor_model.train() jittor_model.classifier[0].eval() for m in jittor_model.modules(): if isinstance(m, jt.nn.BatchNorm): m.eval() jittor_model2.train() jittor_model2.classifier[0].eval() for m in jittor_model2.modules(): if isinstance(m, jt.nn.BatchNorm): m.eval() load_parameters(jittor_model2, jittor_model) for m in jittor_model.modules(): if isinstance(m, jt.nn.Conv): m.is_depthwise_conv = False cnt = 0 for m in jittor_model2.modules(): if isinstance(m, jt.nn.Conv): if (m.is_depthwise_conv): cnt += 1 assert cnt == 17, (cnt, '!=', 17) jt_optimizer = jt.nn.SGD(jittor_model.parameters(), lr = lr) jt_optimizer2 = jt.nn.SGD(jittor_model2.parameters(), lr = lr) jittor_result = jittor_model(jittor_test_img) mask = jt.random(jittor_result.shape, jittor_result.dtype) loss = jittor_result * mask jt_optimizer.step(loss) jt.sync_all(True) jittor_result2 = jittor_model2(jittor_test_img) loss = jittor_result2 * mask x = jittor_result2.data + 1e-8 y = jittor_result.data + 1e-8 relative_error = abs(x - y) / abs(y) diff = relative_error.mean() assert diff < 1e-4, (diff, 'forword') jt_optimizer2.step(loss) jt.sync_all(True) compare_parameters(jittor_model, jittor_model2) jt.clean() jt.gc()
def check(): a = jt.random((5, 1)) b = jt.random((5, 1)) c, d = jt.numpy_code( [a.shape, a.shape], [a.dtype, a.dtype], [a, b], forward_code, [backward_code1, backward_code2], ) assert numpy.allclose(c.data, (a + b).data) assert numpy.allclose(d.data, (a - b).data) dca, dcb = jt.grad(c, [a, b]) dda, ddb = jt.grad(d, [a, b]) one = numpy.ones(a.shape) mone = one * -1.0 assert numpy.allclose(dca.data, one) assert numpy.allclose(dcb.data, one) assert numpy.allclose(dda.data, one) assert numpy.allclose(ddb.data, mone)
def test_profile_fused_op(self): size = 1000 r1 = [] r2 = [] for size in range(1024, 1025, 1): with performance_test_scope(2, 10) as report: a = jt.random([size,size,1]) b = jt.random([1,size,size]) c = (a*b).sum(1) c.sync() assert len(report) == 3 tp_np = get_np_matmul_toughtput(size) tp_jt = float(report[1][-1]) r1.append(tp_jt) r2.append(tp_np) na = a.data.reshape((size,size)) nb = b.data.reshape((size,size)) nc = np.matmul(na,nb) assert (np.abs(nc-c.data)<1e-2).all(), np.abs(nc-c.data).max()
def test_cuda2_use_func(self): class Func(Function): def execute(self, a, b): self.save_vars = a, b return jt.code(a.shape, a.dtype, [a, b], cuda_src=''' __global__ static void kernel1(@ARGS_DEF) { @PRECALC for (int i=blockIdx.x; i<in0_shape0; i+=gridDim.x) for (int j=threadIdx.x; j<in0_shape1; j+=blockDim.x) @out(i,j) = @in0(i,j)*@in1(i,j); } kernel1<<<32, 32>>>(@ARGS); ''') def grad(self, grad): a, b = self.save_vars return jt.code([a.shape, b.shape], [a.dtype, b.dtype], [a, b, grad], cuda_src=''' __global__ static void kernel2(@ARGS_DEF) { @PRECALC for (int i=blockIdx.x; i<in0_shape0; i+=gridDim.x) for (int j=threadIdx.x; j<in0_shape1; j+=blockDim.x) { @out0(i,j) = @in2(i,j)*@in1(i,j); @out1(i,j) = @in2(i,j)*@in0(i,j); } } kernel2<<<32, 32>>>(@ARGS); ''') a = jt.random((100, 100)) b = jt.random((100, 100)) func = Func() c = func(a, b) da, db = jt.grad(c, [a, b]) assert np.allclose(c.data, a.data * b.data), (c.data, a.data * b.data) assert np.allclose(da.data, b.data) assert np.allclose(db.data, a.data)
def test_memory_leak(self): def forward_code(np, data): a, b = data["inputs"] c, d = data["outputs"] np.add(a, b, out=c) np.subtract(a, b, out=d) def backward_code1(np, data): dout = data["dout"] out = data["outputs"][0] np.copyto(out, dout) def backward_code2(np, data): dout = data["dout"] out_index = data["out_index"] out = data["outputs"][0] if out_index == 0: np.copyto(out, dout) else: np.negative(dout, out) for i in range(1000000): a = jt.random((10000, 1)) b = jt.random((10000, 1)) c, d = jt.numpy_code( [a.shape, a.shape], [a.dtype, a.dtype], [a, b], forward_code, [backward_code1, backward_code2], ) assert numpy.allclose(c.data, (a + b).data) assert numpy.allclose(d.data, (a - b).data) dca, dcb = jt.grad(c, [a, b]) dda, ddb = jt.grad(d, [a, b]) one = numpy.ones(a.shape) mone = one * -1.0 assert numpy.allclose(dca.data, one) assert numpy.allclose(dcb.data, one) assert numpy.allclose(dda.data, one) assert numpy.allclose(ddb.data, mone)
def compute_gradient_penalty(D, X): 'Calculates the gradient penalty loss for DRAGAN' alpha = jt.array(np.random.random(size=X.shape).astype( np.float32)).stop_grad() interpolates = ((alpha * X) + ((1 - alpha) * (X + ((0.5 * std(X)) * jt.random(X.shape))))) d_interpolates = D(interpolates) gradients = jt.grad(d_interpolates, interpolates) gradient_penalty = (lambda_gp * ((norm(gradients, 2, dim=1) - 1).sqr()).mean()) return gradient_penalty
def calc_gradient_penalty(netD, real_data, generated_data): LAMBDA = 10 b_size = real_data.shape[0] alpha = jt.random([b_size, 1, 1, 1]) alpha = alpha.broadcast(real_data) interpolated = ((alpha * real_data.data) + ((1 - alpha) * generated_data.data)) prob_interpolated = netD(interpolated) gradients = jt.grad(prob_interpolated, interpolated) gradients = jt.reshape(gradients, [b_size, -1]) gradients_norm = jt.sqrt((jt.sum((gradients**2), dim=1) + 1e-12)) return (LAMBDA * ((gradients_norm - 1)**2).mean())