def _init_workers(self): jt.clean() jt.gc() self.index_list = mp.Array('i', self.real_len, lock=False) workers = [] # batch id to worker id self.idmap = mp.Array('i', self.batch_len, lock=False) # global token index self.gid = mp.Value('i', self.batch_len) # global token index condition self.gidc = mp.Condition(self.gid.get_lock()) # number of idle workers self.num_idle = mp.Value('i', 0, lock=False) # number of idle workers condition self.num_idle_c = mp.Condition(self.gid.get_lock()) for i in range(self.num_workers): w = Worker(target=self._worker_main, args=(i, ), buffer_size=self.buffer_size, keep_numpy_array=self.keep_numpy_array) workers.append(w) self.workers = workers self.index_list_numpy = np.ndarray(dtype='int32', shape=self.real_len, buffer=self.index_list)
def predict_shadow(self, sketch_mat): width = 512 # sketch = (self.sketch_img*255).astype(np.uint8) if self.inmodel: fake = {} shadow = {} vector_part = {} idx = 0 for key in self.model.keys(): loc = self.part[key] sketch_part = sketch_mat[loc[1]:loc[1] + loc[2], loc[0]:loc[0] + loc[2], :] if key == '': for key_p in self.model.keys(): if key_p != '': loc_p = self.part[key_p] sketch_part[loc_p[1]:loc_p[1] + loc_p[2], loc_p[0]:loc_p[0] + loc_p[2], :] = 255 shadow_, vector_part[key] = self.model[key].get_inter( sketch_part[:, :, 0], self.sample_Num, w_c=self.part_weight[key], sex=self.sex) # shadow_ = shadow_[loc[1]:loc[1] + loc[2], loc[0]:loc[0] + loc[2], :] if key == '': for key_p in self.model.keys(): if key_p != '': loc_p = self.part[key_p] shadow_[loc_p[1]:loc_p[1] + loc_p[2], loc_p[0]:loc_p[0] + loc_p[2], :] = 255 - (255 - shadow_[ loc_p[1]:loc_p[1] + loc_p[2], loc_p[0]:loc_p[0] + loc_p[2], :]) * ( 1 - (1 - self.mask[key_p]) * 0.2) self.shadow[key] = np.ones( (width, width, 1), dtype=np.uint8) * 255 self.shadow[key][ loc[1]:loc[1] + loc[2], loc[0]:loc[0] + loc[2], :] = 255 - (255 - shadow_) * (1 - self.mask[key]) idx = idx + 1 jt.gc() else: fake = np.ones_like(sketch) * 255 # self.fakes = fake self.vector_part = vector_part self.shadow = shadow # print(vector_part.keys()) self.generated = self.combine_model.inference(vector_part) jt.gc()
def test_data(self): test_img = np.random.random((64,3,224,224)).astype('float32') jittor_test_img = jt.array(test_img) lr = 100 jittor_model = jtmodels.__dict__['mobilenet_v2']() jittor_model2 = jtmodels.__dict__['mobilenet_v2']() # Set eval to avoid dropout layer & bn errors jittor_model.train() jittor_model.classifier[0].eval() for m in jittor_model.modules(): if isinstance(m, jt.nn.BatchNorm): m.eval() jittor_model2.train() jittor_model2.classifier[0].eval() for m in jittor_model2.modules(): if isinstance(m, jt.nn.BatchNorm): m.eval() load_parameters(jittor_model2, jittor_model) for m in jittor_model.modules(): if isinstance(m, jt.nn.Conv): m.is_depthwise_conv = False cnt = 0 for m in jittor_model2.modules(): if isinstance(m, jt.nn.Conv): if (m.is_depthwise_conv): cnt += 1 assert cnt == 17, (cnt, '!=', 17) jt_optimizer = jt.nn.SGD(jittor_model.parameters(), lr = lr) jt_optimizer2 = jt.nn.SGD(jittor_model2.parameters(), lr = lr) jittor_result = jittor_model(jittor_test_img) mask = jt.random(jittor_result.shape, jittor_result.dtype) loss = jittor_result * mask jt_optimizer.step(loss) jt.sync_all(True) jittor_result2 = jittor_model2(jittor_test_img) loss = jittor_result2 * mask x = jittor_result2.data + 1e-8 y = jittor_result.data + 1e-8 relative_error = abs(x - y) / abs(y) diff = relative_error.mean() assert diff < 1e-4, (diff, 'forword') jt_optimizer2.step(loss) jt.sync_all(True) compare_parameters(jittor_model, jittor_model2) jt.clean() jt.gc()
def test(h, w, total_alloc_call, total_alloc_byte, total_free_call = 0, total_free_byte = 0): jt.clean() jt.gc() with jt.flag_scope(use_stat_allocator=1): a = jt.random([h,w]) b = a+a c = a*b c.data del a,b,c gc.collect() x = ( jt.flags.stat_allocator_total_alloc_call, jt.flags.stat_allocator_total_alloc_byte, jt.flags.stat_allocator_total_free_call, jt.flags.stat_allocator_total_free_byte ) y = (total_alloc_call, total_alloc_byte, total_free_call, total_free_byte) assert x==y, (x, y)
def test_models(self): def to_cuda(x): if jt.has_cuda: return x.cuda() return x threshold = 1e-2 # Define numpy input image bs = 1 test_img = np.random.random((bs, 3, 224, 224)).astype('float32') # Define pytorch & jittor input image pytorch_test_img = to_cuda(torch.Tensor(test_img)) jittor_test_img = jt.array(test_img) for test_model in self.models: print("test model", test_model) if test_model == "inception_v3": test_img = np.random.random( (bs, 3, 300, 300)).astype('float32') pytorch_test_img = to_cuda(torch.Tensor(test_img)) jittor_test_img = jt.array(test_img) # Define pytorch & jittor model pytorch_model = to_cuda(tcmodels.__dict__[test_model]()) jittor_model = jtmodels.__dict__[test_model]() # Set eval to avoid dropout layer pytorch_model.eval() jittor_model.eval() # Jittor loads pytorch parameters to ensure forward alignment jittor_model.load_parameters(pytorch_model.state_dict()) # Judge pytorch & jittor forward relative error. If the differece is lower than threshold, this test passes. pytorch_result = pytorch_model(pytorch_test_img) jittor_result = jittor_model(jittor_test_img) x = pytorch_result.detach().cpu().numpy() + 1 y = jittor_result.data + 1 relative_error = abs(x - y) / abs(y) diff = relative_error.mean() assert diff < threshold, f"[*] {test_model} forward fails..., Relative Error: {diff}" print( f"[*] {test_model} forword passes with Relative Error {diff}") jt.clean() jt.gc() torch.cuda.empty_cache() print('all models pass test.')
images_path = sorted(glob.glob(fileRoot + r"/*")) params = [[0.80, 0.63, 1.0, 0.88, 0.93, 1], [1.0, 1.0, 1.0, 1.0, 0.84, 0], [0.1, 0.39, 0.58, 0.63, 0.49, 1], [1.0, 1.0, 1.0, 1.0, 1.0, 1], [0.78, 1.0, 1.0, 1.0, 0.79, 1]] i = 0 for x, fileName in enumerate(images_path): #fileName = fileRoot + str(x) + "_out_Similarity.jpg" print(fileName) mat_img = cv2.imread(fileName) mat_img = cv2.resize(mat_img, (512, 512), interpolation=cv2.INTER_CUBIC) mat_img = cv2.cvtColor(mat_img, cv2.COLOR_RGB2BGR) sketch = (mat_img).astype(np.uint8) combine_model.sex = params[i][5] #666 combine_model.part_weight['eye1'] = params[i][0] combine_model.part_weight['eye2'] = params[i][1] combine_model.part_weight['nose'] = params[i][2] combine_model.part_weight['mouth'] = params[i][3] combine_model.part_weight[''] = params[i][4] combine_model.predict_shadow(mat_img) print(combine_model.generated) cv2.imwrite('ori' + str(x) + '.jpg', cv2.cvtColor(combine_model.generated, cv2.COLOR_BGR2RGB)) i = i + 1 jt.gc()
def tearDown(self): jt.clean() jt.gc()
def test_allmodels(bs=1): # Define numpy input image test_img = np.random.random((bs,3,224,224)).astype('float32') # Define pytorch & jittor input image pytorch_test_img = to_cuda(torch.Tensor(test_img)) jittor_test_img = jt.array(test_img) for model in models: if model == "inception_v3": test_img = np.random.random((bs,3,300,300)).astype('float32') pytorch_test_img = to_cuda(torch.Tensor(test_img)) jittor_test_img = jt.array(test_img) jittor_test_img.stop_grad() pytorch_test_img.requires_grad = False # Define pytorch & jittor model pytorch_model = to_cuda(tcmodels.__dict__[model]()) jittor_model = jtmodels.__dict__[model]() # Set eval to avoid dropout layer pytorch_model.eval() jittor_model.eval() # Jittor loads pytorch parameters to ensure forward alignment jittor_model.load_parameters(pytorch_model.state_dict()) total = 512 warmup = max(2, total // bs // 8) rerun = max(2, total // bs) print("=" * 20 + model + "=" * 20) # Jittor warms up for i in range(warmup): jittor_result = jittor_model(jittor_test_img) jt.sync_all(True) # Test jittor and once forward time sta = time.time() for i in range(rerun): jittor_result = jittor_model(jittor_test_img) jittor_result.sync() jt.sync_all(True) end = time.time() print(f"- Jittor {model} forward average time cost: {round((time.time() - sta) / rerun,5)}, Batch Size: {bs}, FPS: {round(bs * rerun / (end - sta),2)}") # pytorch warmup for i in range(warmup): pytorch_result = pytorch_model(pytorch_test_img) # Test pytorch and once forward time torch.cuda.synchronize() sta = time.time() for i in range(rerun): pytorch_result = pytorch_model(pytorch_test_img) torch.cuda.synchronize() end = time.time() print(f"- Pytorch {model} forward average time cost: {round((end - sta) / rerun,5)}, Batch Size: {bs}, FPS: {round(bs * rerun / (end - sta),2)}") # Judge pytorch & jittor forward relative error. If the differece is lower than threshold, this test passes. x = pytorch_result.detach().cpu().numpy() + 1 y = jittor_result.numpy() + 1 relative_error = abs(x - y) / abs(y) diff = relative_error.mean() assert diff < threshold, f"[*] {model} forward fails..., Relative Error: {diff}" print(f"[*] {model} forword passes with Relative Error {diff}") torch.cuda.empty_cache() jt.clean() jt.gc()
def train(): parser = config_parser() args = parser.parse_args() # Load data intrinsic = None if args.dataset_type == 'llff': images, poses, bds, render_poses, i_test = load_llff_data( args.datadir, args.factor, recenter=True, bd_factor=.75, spherify=args.spherify) hwf = poses[0, :3, -1] poses = poses[:, :3, :4] print('Loaded llff', images.shape, render_poses.shape, hwf, args.datadir) if not isinstance(i_test, list): i_test = [i_test] if args.llffhold > 0: print('Auto LLFF holdout,', args.llffhold) i_test = np.arange(images.shape[0])[::args.llffhold] i_val = i_test i_train = np.array([ i for i in np.arange(int(images.shape[0])) if (i not in i_test and i not in i_val) ]) print('DEFINING BOUNDS') if args.no_ndc: near = np.ndarray.min(bds) * .9 far = np.ndarray.max(bds) * 1. else: near = 0. far = 1. print('NEAR FAR', near, far) elif args.dataset_type == 'blender': testskip = args.testskip faketestskip = args.faketestskip if jt.mpi and jt.mpi.local_rank() != 0: testskip = faketestskip faketestskip = 1 if args.do_intrinsic: images, poses, intrinsic, render_poses, hwf, i_split = load_blender_data( args.datadir, args.half_res, args.testskip, args.blender_factor, True) else: images, poses, render_poses, hwf, i_split = load_blender_data( args.datadir, args.half_res, args.testskip, args.blender_factor) print('Loaded blender', images.shape, render_poses.shape, hwf, args.datadir) i_train, i_val, i_test = i_split i_test_tot = i_test i_test = i_test[::args.faketestskip] near = args.near far = args.far print(args.do_intrinsic) print("hwf", hwf) print("near", near) print("far", far) if args.white_bkgd: images = images[..., :3] * images[..., -1:] + (1. - images[..., -1:]) else: images = images[..., :3] elif args.dataset_type == 'deepvoxels': images, poses, render_poses, hwf, i_split = load_dv_data( scene=args.shape, basedir=args.datadir, testskip=args.testskip) print('Loaded deepvoxels', images.shape, render_poses.shape, hwf, args.datadir) i_train, i_val, i_test = i_split hemi_R = np.mean(np.linalg.norm(poses[:, :3, -1], axis=-1)) near = hemi_R - 1. far = hemi_R + 1. else: print('Unknown dataset type', args.dataset_type, 'exiting') return # Cast intrinsics to right types H, W, focal = hwf H, W = int(H), int(W) hwf = [H, W, focal] render_poses = np.array(poses[i_test]) # Create log dir and copy the config file basedir = args.basedir expname = args.expname os.makedirs(os.path.join(basedir, expname), exist_ok=True) f = os.path.join(basedir, expname, 'args.txt') with open(f, 'w') as file: for arg in sorted(vars(args)): attr = getattr(args, arg) file.write('{} = {}\n'.format(arg, attr)) if args.config is not None: f = os.path.join(basedir, expname, 'config.txt') with open(f, 'w') as file: file.write(open(args.config, 'r').read()) # Create nerf model render_kwargs_train, render_kwargs_test, start, grad_vars, optimizer = create_nerf( args) global_step = start bds_dict = { 'near': near, 'far': far, } render_kwargs_train.update(bds_dict) render_kwargs_test.update(bds_dict) # Move testing data to GPU render_poses = jt.array(render_poses) # Short circuit if only rendering out from trained model if args.render_only: print('RENDER ONLY') with jt.no_grad(): testsavedir = os.path.join( basedir, expname, 'renderonly_{}_{:06d}'.format( 'test' if args.render_test else 'path', start)) os.makedirs(testsavedir, exist_ok=True) print('test poses shape', render_poses.shape) rgbs, _ = render_path(render_poses, hwf, args.chunk, render_kwargs_test, savedir=testsavedir, render_factor=args.render_factor) print('Done rendering', testsavedir) imageio.mimwrite(os.path.join(testsavedir, 'video.mp4'), to8b(rgbs), fps=30, quality=8) return # Prepare raybatch tensor if batching random rays accumulation_steps = 1 N_rand = args.N_rand // accumulation_steps use_batching = not args.no_batching if use_batching: # For random ray batching print('get rays') rays = np.stack( [get_rays_np(H, W, focal, p) for p in poses[:, :3, :4]], 0) # [N, ro+rd, H, W, 3] print('done, concats') rays_rgb = np.concatenate([rays, images[:, None]], 1) # [N, ro+rd+rgb, H, W, 3] rays_rgb = np.transpose(rays_rgb, [0, 2, 3, 1, 4]) # [N, H, W, ro+rd+rgb, 3] rays_rgb = np.stack([rays_rgb[i] for i in i_train], 0) # train images only rays_rgb = np.reshape(rays_rgb, [-1, 3, 3]) # [(N-1)*H*W, ro+rd+rgb, 3] rays_rgb = rays_rgb.astype(np.float32) print('shuffle rays') np.random.shuffle(rays_rgb) print('done') i_batch = 0 # Move training data to GPU images = jt.array(images.astype(np.float32)) poses = jt.array(poses) if use_batching: rays_rgb = jt.array(rays_rgb) N_iters = 51000 print('Begin') print('TRAIN views are', i_train) print('TEST views are', i_test) print('VAL views are', i_val) # Summary writers # writer = SummaryWriter(os.path.join(basedir, 'summaries', expname)) if not jt.mpi or jt.mpi.local_rank() == 0: date = str(datetime.datetime.now()) date = date[:date.rfind(":")].replace("-", "")\ .replace(":", "")\ .replace(" ", "_") gpu_idx = os.environ.get("CUDA_VISIBLE_DEVICES", "0") log_dir = os.path.join("./logs", "summaries", "log_" + date + "_gpu" + gpu_idx) if not os.path.exists(log_dir): os.makedirs(log_dir) writer = SummaryWriter(log_dir=log_dir) start = start + 1 for i in trange(start, N_iters): # jt.display_memory_info() time0 = time.time() # Sample random ray batch if use_batching: # Random over all images batch = rays_rgb[i_batch:i_batch + N_rand] # [B, 2+1, 3*?] batch = jt.transpose(batch, (1, 0, 2)) batch_rays, target_s = batch[:2], batch[2] i_batch += N_rand if i_batch >= rays_rgb.shape[0]: print("Shuffle data after an epoch!") rand_idx = jt.randperm(rays_rgb.shape[0]) rays_rgb = rays_rgb[rand_idx] i_batch = 0 else: # Random from one image np.random.seed(i) img_i = np.random.choice(i_train) target = images[img_i] #.squeeze(0) pose = poses[img_i, :3, :4] #.squeeze(0) if N_rand is not None: rays_o, rays_d = pinhole_get_rays( H, W, focal, pose, intrinsic) # (H, W, 3), (H, W, 3) if i < args.precrop_iters: dH = int(H // 2 * args.precrop_frac) dW = int(W // 2 * args.precrop_frac) coords = jt.stack( jt.meshgrid( jt.linspace(H // 2 - dH, H // 2 + dH - 1, 2 * dH), jt.linspace(W // 2 - dW, W // 2 + dW - 1, 2 * dW)), -1) if i == start: print( f"[Config] Center cropping of size {2*dH} x {2*dW} is enabled until iter {args.precrop_iters}" ) else: coords = jt.stack( jt.meshgrid(jt.linspace(0, H - 1, H), jt.linspace(0, W - 1, W)), -1) # (H, W, 2) coords = jt.reshape(coords, [-1, 2]) # (H * W, 2) select_inds = np.random.choice(coords.shape[0], size=[N_rand], replace=False) # (N_rand,) select_coords = coords[select_inds].int() # (N_rand, 2) rays_o = rays_o[select_coords[:, 0], select_coords[:, 1]] # (N_rand, 3) rays_d = rays_d[select_coords[:, 0], select_coords[:, 1]] # (N_rand, 3) batch_rays = jt.stack([rays_o, rays_d], 0) target_s = target[select_coords[:, 0], select_coords[:, 1]] # (N_rand, 3) ##### Core optimization loop ##### rgb, disp, acc, extras = render(H, W, focal, chunk=args.chunk, rays=batch_rays, verbose=i < 10, retraw=True, **render_kwargs_train) img_loss = img2mse(rgb, target_s) trans = extras['raw'][..., -1] loss = img_loss psnr = mse2psnr(img_loss) if 'rgb0' in extras: img_loss0 = img2mse(extras['rgb0'], target_s) loss = loss + img_loss0 psnr0 = mse2psnr(img_loss0) optimizer.backward(loss / accumulation_steps) if i % accumulation_steps == 0: optimizer.step() ### update learning rate ### decay_rate = 0.1 decay_steps = args.lrate_decay * accumulation_steps * 1000 new_lrate = args.lrate * (decay_rate**(global_step / decay_steps)) for param_group in optimizer.param_groups: param_group['lr'] = new_lrate ################################ dt = time.time() - time0 # Rest is logging if (i + 1) % args.i_weights == 0 and (not jt.mpi or jt.mpi.local_rank() == 0): print(i) path = os.path.join(basedir, expname, '{:06d}.tar'.format(i)) jt.save( { 'global_step': global_step, 'network_fn_state_dict': render_kwargs_train['network_fn'].state_dict(), 'network_fine_state_dict': render_kwargs_train['network_fine'].state_dict(), }, path) print('Saved checkpoints at', path) if i % args.i_video == 0 and i > 0: # Turn on testing mode with jt.no_grad(): rgbs, disps = render_path(render_poses, hwf, args.chunk, render_kwargs_test, intrinsic=intrinsic) if not jt.mpi or jt.mpi.local_rank() == 0: print('Done, saving', rgbs.shape, disps.shape) moviebase = os.path.join( basedir, expname, '{}_spiral_{:06d}_'.format(expname, i)) print('movie base ', moviebase) imageio.mimwrite(moviebase + 'rgb.mp4', to8b(rgbs), fps=30, quality=8) imageio.mimwrite(moviebase + 'disp.mp4', to8b(disps / np.max(disps)), fps=30, quality=8) if i % args.i_print == 0: tqdm.write( f"[TRAIN] Iter: {i} Loss: {loss.item()} PSNR: {psnr.item()}") if i % args.i_img == 0: img_i = np.random.choice(i_val) target = images[img_i] pose = poses[img_i, :3, :4] with jt.no_grad(): rgb, disp, acc, extras = render(H, W, focal, chunk=args.chunk, c2w=pose, intrinsic=intrinsic, **render_kwargs_test) psnr = mse2psnr(img2mse(rgb, target)) rgb = rgb.numpy() disp = disp.numpy() acc = acc.numpy() if not jt.mpi or jt.mpi.local_rank() == 0: writer.add_image('test/rgb', to8b(rgb), global_step, dataformats="HWC") writer.add_image('test/target', target.numpy(), global_step, dataformats="HWC") writer.add_scalar('test/psnr', psnr.item(), global_step) jt.clean_graph() jt.sync_all() jt.gc() if i % args.i_testset == 0 and i > 0: si_test = i_test_tot if i % args.i_tottest == 0 else i_test testsavedir = os.path.join(basedir, expname, 'testset_{:06d}'.format(i)) os.makedirs(testsavedir, exist_ok=True) print('test poses shape', poses[si_test].shape) with jt.no_grad(): rgbs, disps = render_path(jt.array(poses[si_test]), hwf, args.chunk, render_kwargs_test, savedir=testsavedir, intrinsic=intrinsic, expname=expname) jt.gc() global_step += 1