def draw_sequentially(self, mode, m_noise, m_image): upscaled_prev = self.first_img_input if len(self.Gs) > 0: if mode == 'rec': count = 0 for G, padded_rec_z, cur_real, next_real, noise_amp in zip(self.Gs, self.Zs, self.reals, self.reals[1:], self.noise_amps): upscaled_prev = upscaled_prev[:, :, 0:cur_real.shape[2], 0:cur_real.shape[3]] padded_img = m_image(upscaled_prev) padded_img_with_z = noise_amp * padded_rec_z + padded_img generated_img = G(padded_img_with_z.detach(), padded_img) up_scaled_img = resize_img(generated_img, 1/self.config.scale_factor, self.config) upscaled_prev = up_scaled_img[:, :, 0:next_real.shape[2], 0:next_real.shape[3]] count += 1 elif mode == 'rand': count = 0 pad_noise = int(((self.config.kernel_size - 1) * self.config.num_layers) / 2) for G, padded_rec_z, cur_real, next_real, noise_amp in zip(self.Gs, self.Zs, self.reals, self.reals[1:], self.noise_amps): if count == 0: # Generate random 1-channel noise random_noise = generate_noise([1, padded_rec_z.shape[2] - 2 * pad_noise, padded_rec_z.shape[3] - 2 * pad_noise], device=self.config.device) random_noise = random_noise.expand(1, 3, random_noise.shape[2], random_noise.shape[3]) else: # Generate random 3-channel noise random_noise = generate_noise([self.config.img_channel, padded_rec_z.shape[2] - 2 * pad_noise, padded_rec_z.shape[3] - 2 * pad_noise], device=self.config.device) padded_noise = m_noise(random_noise) upscaled_prev = upscaled_prev[:, :, 0:cur_real.shape[2], 0:cur_real.shape[3]] padded_img = m_image(upscaled_prev) padded_img_with_z = noise_amp * padded_noise + padded_img generated_img = G(padded_img_with_z.detach(), padded_img) up_scaled_img = resize_img(generated_img, 1/self.config.scale_factor, self.config) upscaled_prev = up_scaled_img[:, :, 0:next_real.shape[2], 0:next_real.shape[3]] count += 1 return upscaled_prev
def scale_image(self, img, mask, kp, vis, sfm_pose): # Scale image so largest bbox size is img_size bwidth = np.shape(img)[0] bheight = np.shape(img)[1] scale = self.img_size / float(max(bwidth, bheight)) img_scale, _ = image_utils.resize_img(img, scale) # if img_scale.shape[0] != self.img_size: # print('bad!') # import ipdb; ipdb.set_trace() mask_scale, _ = image_utils.resize_img(mask, scale) kp[vis, :2] *= scale sfm_pose[0] *= scale sfm_pose[1] *= scale return img_scale, mask_scale, kp, sfm_pose
def preprocess_image(img_path, img_size=256): img = io.imread(img_path) / 255. # if grayscale, convert to RGB if len(img.shape) == 2: img = np.repeat(np.expand_dims(img, 2), 3, axis=2) # Scale the max image size to be img_size scale_factor = float(img_size) / np.max(img.shape[:2]) img, _ = img_util.resize_img(img, scale_factor) # Crop img_size x img_size from the center center = np.round(np.array(img.shape[:2]) / 2).astype(int) # img center in (x, y) center = center[::-1] bbox = np.hstack([center - img_size / 2., center + img_size / 2.]) img = img_util.crop(img, bbox, bgval=1.) # Transpose the image to 3xHxW img = np.transpose(img, (2, 0, 1)) # necessary preprocessing for resnet img = torch.tensor(img, dtype=torch.float) img = resnet_transform(img) # random flip if np.random.rand(1) > 0.5: img = torch.flip(img, (2,)) return img
def create_sr_inference_input(self, real, iter_num): resized_real = real pad = nn.ZeroPad2d(5) finest_G = self.Gs[-1] finest_D = self.Ds[-1] finest_noise_amp = self.noise_amps[-1] self.Zs = [] self.Gs = [] self.Ds = [] self.reals = [] self.noise_amps = [] for i in range(iter_num): resized_real = resize_img(resized_real, pow(1 / self.config.scale_factor, 1), self.config) print(f'{self.config.infer_dir}/{0}_real.png') plt.imsave(f'{self.config.infer_dir}/{0}_real.png', torch2np(resized_real), vmin=0, vmax=1) self.reals.append(resized_real) self.Gs.append(finest_G) self.Ds.append(finest_D) self.noise_amps.append(finest_noise_amp) rec_z = torch.full(resized_real.shape, 0, device=self.config.device) padded_rec_z = pad(rec_z) self.Zs.append(padded_rec_z) return self.reals[0]
def preprocess_image(img_path, img_size=256): img = io.imread(img_path) / 255. # Scale the max image size to be img_size #-這邊將圖片的大小scale到257 scale_factor = float(img_size) / np.max(img.shape[:2]) img, _ = img_util.resize_img(img, scale_factor) #256x256x3 # Crop img_size x img_size from the center #---------------其實看不太懂它為什麼要切割,因為它切割的大小是257x257,而它縮放的大小是256x256 #--------------他是不是在耍人阿!?? #--------------切割是由中心點往外切出一個bounding box center = np.round(np.array(img.shape[:2]) / 2).astype(int) #p # print("center1:"+str(center)) # img center in (x, y) center = center[::-1] #p # print("center2:"+str(center)) bbox = np.hstack([center - img_size / 2., center + img_size / 2.]) #p # print("bbox:"+str(bbox)) img = img_util.crop(img, bbox, bgval=1.) #257x257x3 # Transpose the image to 3xHxW img = np.transpose(img, (2, 0, 1)) #3x257x257 return img
def create_sr_inference_input(self, real, iter_num): resized_real = real pad = nn.ZeroPad2d(5) finest_G = self.Gs[-1] finest_noise_amp = self.noise_amps[-1] self.Zs = [] self.Gs = [] self.reals = [] self.noise_amps = [] for i in range(iter_num): resized_real = resize_img(resized_real, pow(1 / self.config.scale_factor, 1), self.config) self.reals.append(resized_real) self.Gs.append(finest_G) self.noise_amps.append(finest_noise_amp) rec_z = torch.full(resized_real.shape, 0, device=self.config.device) padded_rec_z = pad(rec_z) self.Zs.append(padded_rec_z) return self.reals[0]
def train(self): # Prepare image pyramid train_img = read_img(self.config) real = resize_img(train_img, self.config.start_scale, self.config) self.reals = creat_reals_pyramid(real, self.reals, self.config) prev_nfc = 0 self.writer = SummaryWriter(f'{self.config.exp_dir}/logs') # Pyramid training for scale_iter in range(self.config.stop_scale + 1): # Become larger as scale_iter increase (maximum=128) self.config.nfc = min( self.config.nfc_init * pow(2, math.floor(scale_iter / 4)), 128) self.config.min_nfc = min( self.config.min_nfc_init * pow(2, math.floor(scale_iter / 4)), 128) # Prepare directory to save images self.config.result_dir = f'{self.config.exp_dir}/{scale_iter}' os.makedirs(self.config.result_dir, exist_ok=True) plt.imsave(f'{self.config.result_dir}/real_scale.png', torch2np(self.reals[scale_iter]), vmin=0, vmax=1) cur_discriminator, cur_generator = self.init_models() if prev_nfc == self.config.nfc: cur_generator.load_state_dict( torch.load( f'{self.config.exp_dir}/{scale_iter - 1}/generator.pth' )) cur_discriminator.load_state_dict( torch.load( f'{self.config.exp_dir}/{scale_iter - 1}/discriminator.pth' )) cur_z, cur_generator = self.train_single_stage( cur_discriminator, cur_generator) cur_generator = reset_grads(cur_generator, False) cur_generator.eval() cur_discriminator = reset_grads(cur_discriminator, False) cur_discriminator.eval() self.Gs.append(cur_generator) self.Zs.append(cur_z) self.noise_amps.append(self.config.noise_amp) torch.save(self.Zs, f'{self.config.exp_dir}/Zs.pth') torch.save(self.Gs, f'{self.config.exp_dir}/Gs.pth') torch.save(self.reals, f'{self.config.exp_dir}/reals.pth') torch.save(self.noise_amps, f'{self.config.exp_dir}/noiseAmp.pth') prev_nfc = self.config.nfc del cur_discriminator, cur_generator return
def preprocess_image(img_path, img_size=256): img = io.imread(img_path) / 255. # Scale the max image size to be img_size scale_factor = float(img_size) / np.max(img.shape[:2]) img, _ = img_util.resize_img(img, scale_factor) # Crop img_size x img_size from the center center = np.round(np.array(img.shape[:2]) / 2).astype(int) # img center in (x, y) center = center[::-1] bbox = np.hstack([center - img_size / 2., center + img_size / 2.]) img = img_util.crop(img, bbox, bgval=1.) # Transpose the image to 3xHxW img = np.transpose(img, (2, 0, 1)) return img
def adjust_scales(real, config): minwh = min(real.shape[2], real.shape[3]) maxwh = max(real.shape[2], real.shape[3]) config.num_scales = math.ceil( math.log(config.min_size / minwh, config.scale_factor_init)) + 1 scale2stop = math.ceil( math.log( min([config.max_size, maxwh]) / maxwh, config.scale_factor_init)) config.stop_scale = config.num_scales - scale2stop config.start_scale = min(config.max_size / maxwh, 1) resized_real = resize_img(real, config.start_scale, config) config.scale_factor = math.pow( config.min_size / min(resized_real.shape[2], resized_real.shape[3]), 1 / config.stop_scale) scale2stop = math.ceil( math.log( min([config.max_size, maxwh]) / maxwh, config.scale_factor_init)) config.stop_scale = config.num_scales - scale2stop return resized_real
def inference(self, start_img_input): if self.config.save_attention_map: global global_att_dir global global_epoch global_att_dir = f'{self.config.infer_dir}/attention' os.makedirs(global_att_dir, exist_ok=True) if start_img_input is None: start_img_input = torch.full(self.reals[0].shape, 0, device=self.config.device) cur_images = [] for idx, (G, D, Z_opt, noise_amp, real) in enumerate(zip(self.Gs, self.Ds, self.Zs, self.noise_amps, self.reals)): padding_size = ((self.config.kernel_size - 1) * self.config.num_layers) / 2 pad = nn.ZeroPad2d(int(padding_size)) output_h = (Z_opt.shape[2] - padding_size * 2) * self.config.scale_h output_w = (Z_opt.shape[3] - padding_size * 2) * self.config.scale_w prev_images = cur_images cur_images = [] for i in tqdm(range(self.config.num_samples)): if idx == 0: random_z = generate_noise([1, output_h, output_w], device=self.config.device) random_z = random_z.expand(1, 3, random_z.shape[2], random_z.shape[3]) padded_random_z = pad(random_z) else: random_z = generate_noise([self.config.img_channel, output_h, output_w], device=self.config.device) padded_random_z = pad(random_z) if self.config.use_fixed_noise and idx < self.config.gen_start_scale: padded_random_z = Z_opt if not prev_images: padded_random_img = pad(start_img_input) else: prev_img = prev_images[i] upscaled_prev_random_img = resize_img(prev_img, 1 / self.config.scale_factor, self.config) if self.config.mode == "train_SR": padded_random_img = pad(upscaled_prev_random_img) else: upscaled_prev_random_img = upscaled_prev_random_img[:, :, 0:round(self.config.scale_h * self.reals[idx].shape[2]), 0:round(self.config.scale_w * self.reals[idx].shape[3])] padded_random_img = pad(upscaled_prev_random_img) padded_random_img = padded_random_img[:, :, 0:padded_random_z.shape[2], 0:padded_random_z.shape[3]] padded_random_img = upsampling(padded_random_img, padded_random_z.shape[2], padded_random_z.shape[3]) padded_random_img_with_z = noise_amp * padded_random_z + padded_random_img cur_image = G(padded_random_img_with_z.detach(), padded_random_img) np_cur_image = torch2np(cur_image.detach()) if self.config.save_all_pyramid: plt.imsave(f'{self.config.infer_dir}/{i}_{idx}.png', np_cur_image, vmin=0, vmax=1) if self.config.save_attention_map: np_real = torch2np(real) _, _, cur_add_att_maps, cur_sub_att_maps = D(cur_image.detach()) cur_add_att_maps = cur_add_att_maps.detach().to(torch.device('cpu')).numpy().transpose(1, 2, 3, 0) cur_sub_att_maps = cur_sub_att_maps.detach().to(torch.device('cpu')).numpy().transpose(1, 2, 3, 0) global_epoch = f'{i}_{idx}thG' parmap.map(save_heatmap, [[np_cur_image, cur_add_att_maps, 'infer_add'], [np_real, cur_sub_att_maps, 'infer_sub']], pm_pbar=False, pm_processes=2) elif idx == len(self.reals) - 1: plt.imsave(f'{self.config.infer_dir}/{i}.png', np_cur_image, vmin=0, vmax=1) if self.config.save_attention_map: np_real = torch2np(real) _, _, cur_add_att_maps, cur_sub_att_maps = D(cur_image.detach()) cur_add_att_maps = cur_add_att_maps.detach().to(torch.device('cpu')).numpy().transpose(1, 2, 3, 0) cur_sub_att_maps = cur_sub_att_maps.detach().to(torch.device('cpu')).numpy().transpose(1, 2, 3, 0) global_epoch = f'{i}_{idx}thG' parmap.map(save_heatmap, [[np_cur_image, cur_add_att_maps, 'infer_add'], [np_real, cur_sub_att_maps, 'infer_sub']], pm_pbar=False, pm_processes=2) cur_images.append(cur_image) return cur_image.detach()
def inference(self, start_img_input): if start_img_input is None: start_img_input = torch.full(self.reals[0].shape, 0, device=self.config.device) cur_images = [] for idx, (G, Z_opt, noise_amp) in tqdm( enumerate(zip(self.Gs, self.Zs, self.noise_amps))): padding_size = ( (self.config.kernel_size - 1) * self.config.num_layers) / 2 pad = nn.ZeroPad2d(int(padding_size)) output_h = (Z_opt.shape[2] - padding_size * 2) * self.config.scale_h output_w = (Z_opt.shape[3] - padding_size * 2) * self.config.scale_w prev_images = cur_images cur_images = [] for i in range(self.config.num_samples): if idx == 0: random_z = generate_noise([1, output_h, output_w], device=self.config.device) random_z = random_z.expand(1, 3, random_z.shape[2], random_z.shape[3]) padded_random_z = pad(random_z) else: random_z = generate_noise( [self.config.img_channel, output_h, output_w], device=self.config.device) padded_random_z = pad(random_z) if self.config.use_fixed_noise and idx < self.config.gen_start_scale: padded_random_z = Z_opt if not prev_images: padded_random_img = pad(start_img_input) else: prev_img = prev_images[i] upscaled_prev_random_img = resize_img( prev_img, 1 / self.config.scale_factor, self.config) if self.config.mode == "train_SR": padded_random_img = pad(upscaled_prev_random_img) else: upscaled_prev_random_img = upscaled_prev_random_img[:, :, 0:round( self.config.scale_h * self.reals[idx].shape[2] ), 0:round(self.config.scale_w * self.reals[idx].shape[3])] padded_random_img = pad(upscaled_prev_random_img) padded_random_img = padded_random_img[:, :, 0:padded_random_z .shape[2], 0:padded_random_z .shape[3]] padded_random_img = upsampling( padded_random_img, padded_random_z.shape[2], padded_random_z.shape[3]) padded_random_img_with_z = noise_amp * padded_random_z + padded_random_img cur_image = G(padded_random_img_with_z.detach(), padded_random_img) if self.config.save_all_pyramid: plt.imsave(f'{self.config.infer_dir}/{i}_{idx}.png', torch2np(cur_image.detach()), vmin=0, vmax=1) elif idx == len(self.reals) - 1: plt.imsave(f'{self.config.infer_dir}/{i}.png', torch2np(cur_image.detach()), vmin=0, vmax=1) cur_images.append(cur_image) return cur_image.detach()
def creat_reals_pyramid(real, reals, config): for i in range(config.stop_scale + 1): scale = math.pow(config.scale_factor, config.stop_scale - i) curr_real = resize_img(real, scale, config) reals.append(curr_real) return reals