def test_conv_transpose3d(self): jt.set_global_seed(10) def check(xshape, wshape, stride=(1,1,1), padding=(0,0,0), dilation=(1,1,1), group=1): with jt.flag_scope(use_cuda=1): x = jt.random(xshape) w = jt.random(wshape) jt.sync_all() y2 = jt.nn.conv_transpose3d(x, w, None, stride, padding, 0, group, dilation) jt.sync_all() with jt.flag_scope(use_cuda=1): # y = jt.cudnn.ops.cudnn_conv3d_backward_x(w, x, *y2.shape[2:], *stride, *padding, *dilation, group) y = jt.nn.conv_transpose3d(x, w, None, stride, padding, 0, group, dilation) masky = jt.rand_like(y) dx, dw = jt.grad(masky*y, [x, w]) jt.sync_all() dx2, dw2 = jt.grad(masky*y2, [x, w]) jt.sync_all() np.testing.assert_allclose(y.numpy(), y2.numpy(), rtol=1e-6, atol=1e-4) np.testing.assert_allclose(dx.numpy(), dx2.numpy(), rtol=1e-6, atol=1e-4) np.testing.assert_allclose(dw.numpy(), dw2.numpy(), rtol=1e-5, atol=1e-3) check((2,5,10,10,10), (5,4,3,3,3), (1,1,1), (1,1,1)) check((2,5,10,10,10), (5,4,3,3,3), (2,2,2), (1,1,1)) check((2,5,10,10,10), (5,4,3,3,3), (2,2,2), (0,0,0)) check((2,5,10,10,10), (5,4,3,3,3), (1,2,3), (0,0,0)) check((2,5,10,10,10), (5,4,3,4,5), (1,1,1), (1,1,1)) check((2,5,10,10,10), (5,4,3,4,5), (1,2,3), (0,0,0)) check((2,5,10,10,10), (5,4,3,3,3), (1,1,1), (1,1,1), dilation=(1,2,3))
def test_jtrand(self): import random jt.set_global_seed(0) dataset = YourDataset4().set_attrs(batch_size=1, shuffle=True, num_workers=4) for _ in range(10): dd = [] for d in dataset: dd.append(d.numpy()) for i in range(len(d)): for j in range(i + 1, len(d)): assert not np.allclose(dd[i], dd[j])
def test_dict(self): import random jt.set_global_seed(0) dataset = YourDataset5().set_attrs(batch_size=1, shuffle=True, num_workers=4) for _ in range(10): dd = [] for d in dataset: # breakpoint() assert isinstance(d, dict) assert isinstance(d['a'], jt.Var) np.testing.assert_allclose(d['a'].numpy(), [[1, 2, 3]])
def check_gpu_with_cpu(T, C, N, S, S_min): jt.set_global_seed(1) # Initialize random batch of input vectors, for *size = (T,N,C) input = jt.randn(T, N, C).log_softmax(2) # input = -jt.ones((T, N, C)) # input[0,0,1] += 0.01 # Initialize random batch of targets (0 = blank, 1:C = classes) target = jt.randint(low=1, high=C, shape=(N, S), dtype=jt.int) _input_jt = input input_lengths = jt.full((N, ), T, dtype=jt.int) target_lengths = jt.randint(low=S_min, high=S + 1, shape=(N, ), dtype=jt.int) # ctc_loss = nn.CTCLoss() loss = jt.ctc_loss(input, target, input_lengths, target_lengths, reduction='none') _loss_jt = loss loss_jt = loss.numpy() dinput_jt = jt.grad(_loss_jt, _input_jt) dinput_jt.sync() with jt.flag_scope(use_cuda=1): input = input.copy() target = target.copy() input_lengths = input_lengths.copy() target_lengths = target_lengths.copy() loss = jt.ctc_loss(input, target, input_lengths, target_lengths, reduction='none') grad = jt.grad(loss, input) np.testing.assert_allclose(_loss_jt.numpy(), loss.numpy(), atol=1e-5, rtol=1e-5) np.testing.assert_allclose(dinput_jt.numpy(), grad.numpy(), atol=1e-5, rtol=1e-5)
def check(T, C, N, S, S_min): jt.set_global_seed(0) # Initialize random batch of input vectors, for *size = (T,N,C) input = jt.randn(T, N, C).log_softmax(2) # input = -jt.ones((T, N, C)) # input[0,0,1] += 0.01 # Initialize random batch of targets (0 = blank, 1:C = classes) target = jt.randint(low=1, high=C, shape=(N, S), dtype=jt.int) _input_jt = input input_lengths = jt.full((N, ), T, dtype=jt.int) target_lengths = jt.randint(low=S_min, high=S + 1, shape=(N, ), dtype=jt.int) # ctc_loss = nn.CTCLoss() loss = jt.ctc_loss(input, target, input_lengths, target_lengths, reduction='none') _loss_jt = loss loss_jt = loss.numpy() input = torch.Tensor(input.numpy()).detach().requires_grad_() input_lengths = torch.full(size=(N, ), fill_value=T, dtype=torch.long) target_lengths = torch.LongTensor(target_lengths.numpy()) input_lengths = torch.LongTensor(input_lengths.numpy()) target = torch.LongTensor(target.numpy()) loss = tnn.CTCLoss(reduction='none')(input, target, input_lengths, target_lengths) np.testing.assert_allclose(loss.detach().numpy(), loss_jt, rtol=1e-5, atol=1e-5) dinput_jt = jt.grad(_loss_jt, _input_jt) dinput_jt.sync() loss.sum().backward()
def test_jtrand(self): import random class YourDataset(Dataset): def __init__(self): super().__init__() self.set_attrs(total_len=160) def __getitem__(self, k): return jt.rand(2) jt.set_global_seed(0) dataset = YourDataset().set_attrs(batch_size=1, shuffle=True, num_workers=4) dd = [] for d in dataset: dd.append(d.numpy()) for i in range(len(d)): for j in range(i + 1, len(d)): assert not np.allclose(dd[i], dd[j])
def test_dict(self): import random class YourDataset(Dataset): def __init__(self): super().__init__() self.set_attrs(total_len=160) def __getitem__(self, k): return {"a": np.array([1, 2, 3])} jt.set_global_seed(0) dataset = YourDataset().set_attrs(batch_size=1, shuffle=True, num_workers=4) for _ in range(10): dd = [] for d in dataset: # breakpoint() assert isinstance(d, dict) assert isinstance(d['a'], jt.Var) np.testing.assert_allclose(d['a'].numpy(), [[1, 2, 3]])
def _worker_main(self, worker_id, buffer, status): import jittor_utils jittor_utils.cc.init_subprocess() jt.jt_init_subprocess() seed = jt.get_seed() wseed = (seed ^ (worker_id * 1167)) ^ 1234 jt.set_global_seed(wseed) # parallel_op_compiler still problematic, # it is not work on ubuntu 16.04. but worked on ubuntu 20.04 # it seems like the static value of parallel compiler # is not correctly init. jt.flags.use_parallel_op_compiler = 0 import time try: gid_obj = self.gid.get_obj() gid_lock = self.gid.get_lock() start = time.time() while True: # get id with gid_lock: while buffer.is_stop() or self.idqueue.is_stop() or \ gid_obj.value >= self.batch_len: self.num_idle.value += 1 self.num_idle_c.notify() self.gidc.wait() self.num_idle.value -= 1 cid = gid_obj.value batch_index_list = self.index_list_numpy[ cid * self.real_batch_size:min(self.real_len, (cid + 1) * self.real_batch_size)].copy() gid_obj.value += 1 with self.idqueue_lock: self.idqueue.push(worker_id) now = time.time() other_time = now - start start = now # load and transform data batch = [] if mp_log_v: print(f"#{worker_id} {os.getpid()} load batch", cid * self.real_batch_size, min(self.real_len, (cid + 1) * self.real_batch_size)) for i in batch_index_list: batch.append(self[i]) batch = self.collate_batch(batch) now = time.time() data_time = now - start start = now # send data to main process if mp_log_v: print(f"#{worker_id} {os.getpid()} send", type(batch).__name__, [type(b).__name__ for b in batch], buffer) try: buffer.send(batch) except: if buffer.is_stop(): continue raise now = time.time() send_time = now - start start = now status[0], status[1], status[2], status[3], status[4] = \ other_time, data_time, send_time, \ other_time + data_time + send_time, \ img_open_hook.duration img_open_hook.duration = 0.0 except: import traceback line = traceback.format_exc() print(line) os.kill(os.getppid(), signal.SIGINT) exit(0)
def set_seed(seed): np.random.seed(seed) jittor.set_global_seed(seed)