def _compute_mean(self): meanstd_file = 'dataset/mpii_for_mpii_mean.pth.tar' if os.path.isfile(meanstd_file): meanstd = torch.load(meanstd_file) else: mean = torch.zeros(3) std = torch.zeros(3) for index in self.train: a = self.anno[index] img_path = os.path.join(self.img_folder, a['img_paths']) img = imutils.load_image(img_path) # CxHxW mean += img.view(img.size(0), -1).mean(1) std += img.view(img.size(0), -1).std(1) mean /= len(self.train) std /= len(self.train) meanstd = { 'mean': mean, 'std': std, } torch.save(meanstd, meanstd_file) if self.is_train: print(' Mean: %.4f, %.4f, %.4f' % (meanstd['mean'][0], meanstd['mean'][1], meanstd['mean'][2])) print(' Std: %.4f, %.4f, %.4f' % (meanstd['std'][0], meanstd['std'][1], meanstd['std'][2])) return meanstd['mean'], meanstd['std']
def test_load_image_th(self): previous_image_dim_ordering = K.image_dim_ordering() K.set_image_dim_ordering('th') blue_im = load_image(dir + '/../fixture/blue.png') K.set_image_dim_ordering(previous_image_dim_ordering) self.assertEqual(blue_im.shape, (3, 600, 600))
def __getitem__(self, index): # print('loading image', index) if self.is_train: a = self.anno[self.train[index]] else: a = self.anno[self.valid[index]] img_path = os.path.join(self.img_folder, a['img_paths']) pts = torch.Tensor(a['joint_self']) # pts[:, 0:2] -= 1 # Convert pts to zero based pts = pts[:, 0:2] # c = torch.Tensor(a['objpos']) - 1 c = torch.Tensor(a['objpos']) # print(c) s = torch.Tensor([a['scale_provided']]) # r = torch.Tensor([1]) # exit() if a['dataset'] == 'MPII': c[1] = c[1] + 15 * s[0] s = s * 1.25 normalizer = a['normalizer'] * 0.6 elif a['dataset'] == 'LEEDS': print('using lsp data') s = s * 1.4375 normalizer = torch.dist(pts[2, :], pts[13, :]) else: print('no such dataset {}'.format(a['dataset'])) # For single-person pose estimation with a centered/scaled figure img = imutils.load_image(img_path) # img = Image.open(img_path) # SCALE_VAR, ROTATE_VAR = 0.25, 30 imgs = [None] * (self.num) # imgs_flip = [None] * (self.num) # imgs[0], _ = self.gen_img_heatmap(np.array([1]), np.array([0]), origin_img, annot) heatmaps = [None] * self.num pts_augs = [None] * self.num # heatmaps_flip = [None] * self.num scales = [None] * self.num rotations = [None] * self.num centers = [None] * self.num origin_pts = [None] * self.num normalizers = [None] * self.num rot_idx = torch.zeros(self.num).long() for i in range(0, len(self.rotation_means)): rot_idx[i] = i scales[i] = s.clone() rotations[i] = torch.Tensor([self.rotation_means[i]]) centers[i] = c.clone() origin_pts[i] = pts.clone() normalizers[i] = normalizer imgs[i], heatmaps[i], pts_augs[i] = self.gen_img_heatmap( c.clone(), s.clone(), self.rotation_means[i], img.clone(), pts.clone()) # exit() return imgs, heatmaps, centers, scales, rotations, \ origin_pts, normalizers, rot_idx, index, pts_augs
def generate_data_from_image_list(image_list, size, style_fullpath_prefix, input_len=1, output_len=1, batch_size=4, transform_f=None, preprocess_type='none', verbose=False): if transform_f != None: file = h5py.File(style_fullpath_prefix + '_' + str(size[0]) + '.hdf5', 'r') y_style1 = np.array(file.get('conv_1_2')) y_style2 = np.array(file.get('conv_2_2')) y_style3 = np.array(file.get('conv_3_4')) y_style4 = np.array(file.get('conv_4_2')) # Init inputs/outputs inputs = [] outputs = [] for i in range(input_len): inputs.append([]) for i in range(output_len): outputs.append([]) nb_element = 0 while 1: random.shuffle(image_list) for fullpath in image_list: nb_element += 1 im = load_image(fullpath, size, preprocess_type=preprocess_type, verbose=verbose) if transform_f != None: f_input = [ preprocess(np.array([im]), type='vgg19') ] y_content = transform_f(f_input)[0][0] # First element in the list of result, first element of the output batch inputs[0].append(im) outputs[0].append(y_content) outputs[1].append(y_style1) outputs[2].append(y_style2) outputs[3].append(y_style3) outputs[4].append(y_style4) outputs[5].append(np.zeros_like(im)) else: for i in range(input_len): inputs[i].append(im) for i in range(output_len): outputs[i].append(im) if nb_element >= batch_size: nb_element = 0 inputs_list = [] for i in range(input_len): inputs_list.append(np.array(inputs[i])) outputs_list = [] for i in range(output_len): outputs_list.append(np.array(outputs[i])) yield(inputs_list, outputs_list) # reset inputs/outputs for i in range(input_len): inputs[i] = [] for i in range(output_len): outputs[i] = []
def __getitem__(self, index): # print('loading image', index) if self.is_train: a = self.anno[self.train[index]] else: a = self.anno[self.valid[index]] img_path = os.path.join(self.img_folder, a['img_paths']) pts = torch.Tensor(a['joint_self']) # pts[:, 0:2] -= 1 # Convert pts to zero based pts = pts[:, 0:2] # c = torch.Tensor(a['objpos']) - 1 c = torch.Tensor(a['objpos']) # print(c) s = torch.Tensor([a['scale_provided']]) # exit() if a['dataset'] == 'MPII': c[1] = c[1] + 15 * s[0] s = s * 1.25 normalizer = a['normalizer'] * 0.6 elif a['dataset'] == 'LEEDS': print('using lsp data') s = s * 1.4375 normalizer = torch.dist(pts[2, :], pts[13, :]) else: print('no such dataset {}'.format(a['dataset'])) # For single-person pose estimation with a centered/scaled figure img = imutils.load_image(img_path) # img = Image.open(img_path) inp = HumanAug.crop(imutils.im_to_numpy(img), c.numpy(), s.numpy(), 0, self.inp_res, self.std_size) inp = imutils.im_to_torch(inp).float() # inp = self.color_normalize(inp, self.mean, self.std) # pts_aug = HumanAug.TransformPts(pts.numpy(), c.numpy(), # s.numpy(), 0, self.out_res, self.std_size) # # # Generate ground truth # heatmap, pts_aug = HumanPts.pts2heatmap(pts_aug, [self.out_res, self.out_res], sigma=1) # heatmap = torch.from_numpy(heatmap).float() tmp_scale_distri = self.grnd_scale_distri[ index] / self.grnd_scale_distri[index].sum() tmp_rot_distri = self.grnd_rotation_distri[ index] / self.grnd_rotation_distri[index].sum() return inp, tmp_scale_distri, tmp_rot_distri, index
def test_import_model(self): data_model_folder = dir + "/../fixture/model_conv2d_relu" should_convert = K._BACKEND == "theano" model = import_model(data_model_folder, should_convert=should_convert) input_img = np.array([ load_image(dir + '/../fixture/blue.png', size=None, preprocess_type='st', verbose=False) ]) output = model.predict([input_img]).astype('int32') true_output = np.array([[[[0, 0, 0], [0, 0, 0], [0, 0, 0]], [[131, 116, 153], [153, 281, 364], [103, 254, 318]], [[52, 1, 0], [0, 0, 0], [0, 0, 0]]]]) self.assertEqual(len(model.layers), 3) self.assertEqual(True, (output == true_output).all())
def __getitem__(self, index): if self.is_train: a = self.anno[self.train[index]] else: a = self.anno[self.valid[index]] img_path = os.path.join(self.img_folder, a['img_paths']) pts_path = os.path.join(self.img_folder, a['pts_paths']) skip_pts = [33, 36, 39, 42, 45, 48, 51, 54, 57] if pts_path[-4:] == '.txt': pts = np.loadtxt(pts_path) # L x 2 #pts = pts[skip_pts, :] elif pts_path[-4:] == '.pts': pts = FacePts.Pts2Lmk(pts_path) # L x 2 #pts = pts[skip_pts, :] #print(pts) pts = torch.Tensor(pts) assert torch.sum(pts - torch.Tensor(a['pts'])) == 0 s = torch.Tensor([a['scale_provided_det']]) * 1.1 c = torch.Tensor(a['objpos_det']) # For single-person pose estimation with a centered/scaled figure img = imutils.load_image(img_path) # print img.size() # exit() # img = scipy.misc.imread(img_path, mode='RGB') # CxHxW # img = torch.from_numpy(img) r = 0 if self.is_train: s = s * (2**(sample_from_bounded_gaussian(self.scale_factor))) r = sample_from_bounded_gaussian(self.rot_factor) if np.random.uniform(0, 1, 1) <= 0.6: r = np.array([0]) # Flip #if np.random.random() <= 0.5: # img = torch.from_numpy(HumanAug.fliplr(img.numpy())).float() # pts = HumanAug.shufflelr(pts, width=img.size(2), dataset='face') # c[0] = img.size(2) - c[0] # Color img[0, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1) img[1, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1) img[2, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1) # Prepare image and groundtruth map inp = HumanAug.crop(imutils.im_to_numpy(img), c.numpy(), s.numpy(), r, self.inp_res, self.std_size) inp = imutils.im_to_torch(inp).float() # inp = self.color_normalize(inp, self.mean, self.std) # pts_aug = HumanAug.TransformPts(pts.numpy(), c.numpy(), # s.numpy(), r, self.out_res, self.std_size) pts_input_res = HumanAug.TransformPts(pts.numpy(), c.numpy(), s.numpy(), r, self.inp_res, self.std_size) pts_aug = pts_input_res * (1. * self.out_res / self.inp_res) #check_res = pts_input_res - pts #print('diff.... -> {}'.format(check_res)) # Generate ground truth heatmap, pts_aug = HumanPts.pts2heatmap(pts_aug, [self.out_res, self.out_res], sigma=1) heatmap = torch.from_numpy(heatmap).float() # pts_aug = torch.from_numpy(pts_aug).float() if self.is_train: return inp, heatmap, pts_input_res else: # Meta info #meta = {'index': index, 'center': c, 'scale': s, # 'pts': pts, 'tpts': pts_aug} return inp, heatmap, pts, index, c, s, img_path
def __getitem__(self, index): sf = self.scale_factor rf = self.rot_factor if self.is_train: a = self.anno[self.train_list[index]] else: a = self.anno[self.valid_list[index]] img_path = os.path.join(self.img_folder, a['img_paths']) pts = torch.Tensor(a['joint_self']) # pts[:, 0:2] -= 1 # Convert pts to zero based # c = torch.Tensor(a['objpos']) - 1 c = torch.Tensor(a['objpos']) s = a['scale_provided'] # Adjust center/scale slightly to avoid cropping limbs if c[0] != -1: c[1] = c[1] + 15 * s s = s * 1.25 # For single-person pose estimation with a centered/scaled figure nparts = pts.size(0) img = load_image(img_path) # CxHxW r = 0 if self.is_train: s = s * torch.randn(1).mul_(sf).add_(1).clamp(1 - sf, 1 + sf)[0] r = torch.randn(1).mul_(rf).clamp( -2 * rf, 2 * rf)[0] if random.random() <= 0.6 else 0 # Flip if random.random() <= 0.5: img = fliplr(img) pts = shufflelr(pts, img.size(2), self.DATA_INFO.hflip_indices) c[0] = img.size(2) - c[0] # Color img[0, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) img[1, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) img[2, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) # Prepare image and groundtruth map inp = crop(img, c, s, self.inp_res, rot=r) inp = color_normalize(inp, self.DATA_INFO.rgb_mean, self.DATA_INFO.rgb_stddev) # Generate ground truth tpts = pts.clone() target = torch.zeros(nparts, *self.out_res) target_weight = tpts[:, 2].clone().view(nparts, 1) for i in range(nparts): # if tpts[i, 2] > 0: # This is evil!! if tpts[i, 1] > 0: tpts[i, 0:2] = to_torch( transform(tpts[i, 0:2] + 1, c, s, self.out_res, rot=r)) target[i], vis = draw_labelmap(target[i], tpts[i] - 1, self.sigma, type=self.label_type) target_weight[i, 0] *= vis # Meta info if not isinstance(s, torch.Tensor): s = torch.Tensor(s) meta = { 'index': index, 'center': c, 'scale': s, 'pts': pts, 'tpts': tpts, 'target_weight': target_weight } return inp, target, meta
print("Empty string") #=============================================================================== # # Plot all groundtruth #=============================================================================== print("\nPlotting all ground truth now on selected images ...") full_folder = os.path.join(IMAGE_DIR, "aflw_ours/original_res") makedir(full_folder) for i in range(num_images): index = indices_in_json[i] img_data = data_all[index] fig = plt.figure(dpi=DPI) img_path = img_data['img_paths'] img = imutils.load_image(img_path) pts = np.array(img_data['pts']) # Assume all points are visible for a dataset. This is a multiclass # visibility vis = np.ones(pts.shape[0]) # The pts which are labelled -1 in both x and y are not visible points self_occluded_landmark = (pts[:, 0] == -1) & (pts[:, 1] == -1) external_occluded_landmark = (pts[:, 0] < -1) & (pts[:, 1] < -1) vis[self_occluded_landmark] = 0 vis[external_occluded_landmark] = 2 pts = np.abs(pts) # Get visible points which have 1 in the visibility
def __getitem__(self, index): if self.is_train: a = self.anno[self.train[index]] else: a = self.anno[self.valid[index]] img_path = os.path.join(self.img_folder, a['img_paths']) pts = torch.Tensor(a['joint_self']) # pts[:, 0:2] -= 1 # Convert pts to zero based pts = pts[:, 0:2] # c = torch.Tensor(a['objpos']) - 1 c = torch.Tensor(a['objpos']) # print c s = torch.Tensor([a['scale_provided']]) # print s # exit() if a['dataset'] == 'MPII': c[1] = c[1] + 15 * s[0] s = s * 1.25 normalizer = a['normalizer'] * 0.6 elif a['dataset'] == 'LEEDS': print 'using lsp data' s = s * 1.4375 normalizer = torch.dist(pts[2, :], pts[13, :]) else: print 'no such dataset {}'.format(a['dataset']) # For single-person pose estimation with a centered/scaled figure img = imutils.load_image(img_path) # print img.size() # exit() # img = scipy.misc.imread(img_path, mode='RGB') # CxHxW # img = torch.from_numpy(img) inp_std, heatmap_std = self.gen_img_heatmap(c.clone(), s.clone(), 0, img.clone(), pts.clone()) # r = 0 if self.is_train: s = s * (2 ** (sample_from_bounded_gaussian(self.scale_factor))) r = sample_from_bounded_gaussian(self.rot_factor) if np.random.uniform(0, 1, 1) <= 0.6: r = np.array([0]) # Flip if np.random.random() <= 0.5: img = torch.from_numpy(HumanAug.fliplr(img.numpy())).float() pts = HumanAug.shufflelr(pts, width=img.size(2), dataset='mpii') c[0] = img.size(2) - c[0] # Color img[0, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1) img[1, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1) img[2, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1) # aug image and groundtruth map inp, heatmap = self.gen_img_heatmap(c.clone(), s.clone(), r, img.clone(), pts.clone()) r = torch.FloatTensor([r]) return inp_std, inp, heatmap, c, s, r, pts, normalizer, index else: # Meta info #meta = {'index': index, 'center': c, 'scale': s, # 'pts': pts, 'tpts': pts_aug} r = torch.FloatTensor([0]) return inp_std, heatmap_std, c, s, r, pts, normalizer, index
def __getitem__(self, index): # print('loading image', index) if self.img_index_list is None: a = self.anno[self.train[index]] else: idx = self.img_index_list[index] a = self.anno[self.train[idx]] img_path = os.path.join(self.img_folder, a['img_paths']) pts = torch.Tensor(a['joint_self']) # pts[:, 0:2] -= 1 # Convert pts to zero based pts = pts[:, 0:2] # c = torch.Tensor(a['objpos']) - 1 c = torch.Tensor(a['objpos']) # print(c) s = torch.Tensor([a['scale_provided']]) # r = torch.FloatTensor([0]) # exit() if a['dataset'] == 'MPII': c[1] = c[1] + 15 * s[0] s = s * 1.25 normalizer = a['normalizer'] * 0.6 elif a['dataset'] == 'LEEDS': print('using lsp data') s = s * 1.4375 normalizer = torch.dist(pts[2, :], pts[13, :]) else: print('no such dataset {}'.format(a['dataset'])) # For single-person pose estimation with a centered/scaled figure img = imutils.load_image(img_path) if self.img_index_list is None: s_aug = s * (2**(sample_from_large_gaussian(self.scale_factor))) r_aug = sample_from_large_gaussian(self.rot_factor) if np.random.uniform(0, 1, 1) <= 0.6: r_aug = np.array([0]) else: gaussian_mean_scale = self.scale_means[ self.scale_index_list[index]] scale_factor = sample_from_small_gaussian(gaussian_mean_scale, self.scale_var) gaussian_mean_rotation = self.rotation_means[ self.rotation_index_list[index]] r_aug = sample_from_small_gaussian(gaussian_mean_rotation, self.rotaiton_var) s_aug = s * (2**scale_factor) if self.separate_s_r: img_list = [None] * 2 heatmap_list = [None] * 2 c_list = [c.clone()] * 2 s_list = [s_aug.clone(), s.clone()] r_list = [torch.FloatTensor([0]), torch.FloatTensor([r_aug])] grnd_pts_list = [pts.clone(), pts.clone()] # print('type of normalizaer: ', type(normalizer)) normalizer_list = [normalizer, normalizer] img_list[0], heatmap_list[0] = self.gen_img_heatmap( c.clone(), s_aug.clone(), 0, img.clone(), pts.clone()) img_list[1], heatmap_list[1] = self.gen_img_heatmap( c.clone(), s.clone(), r_aug, img.clone(), pts.clone()) if self.img_index_list is not None: return img_list, heatmap_list, c_list, s_list,\ r_list, grnd_pts_list, normalizer_list, idx else: inp_std, _ = self.gen_img_heatmap(c.clone(), s.clone(), 0, img.clone(), pts.clone()) return inp_std, img_list, heatmap_list, c_list, s_list, r_list,\ grnd_pts_list, normalizer_list, index else: if np.random.random() <= 0.5: img = torch.from_numpy(HumanAug.fliplr(img.numpy())).float() pts = HumanAug.shufflelr(pts, width=img.size(2), dataset='mpii') c[0] = img.size(2) - c[0] # Color img[0, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1) img[1, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1) img[2, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1) inp, heatmap = self.gen_img_heatmap(c.clone(), s_aug.clone(), r_aug, img.clone(), pts.clone()) # if self.separate_s_r is false, then self.img_index_list is not None # so return idx instead of index r_aug = torch.FloatTensor([r_aug]) return inp, heatmap, c, s_aug, r_aug, pts, normalizer, idx
def train_weights(input_dir, size, model, train_iteratee, cv_input_dir=None, max_iter=2000, batch_size=4, callbacks=[], load_result=False): losses = {'loss': [], 'val_loss': [], 'best_loss': 1e15} best_weights = model.get_weights() need_more_training = True current_iter = 0 current_epoch = 1 files = [ input_dir + '/' + name for name in os.listdir(input_dir) if len(re.findall('\.(jpe?g|png)$', name)) ] batch_size = min(batch_size, len(files)) print('total_files %d' % len(files)) max_epoch = math.floor((batch_size * max_iter) / len(files)) + 1 while need_more_training: print('Epoch %d/%d' % (current_epoch, max_epoch)) nb_elem = min((max_iter - current_iter) * batch_size, len(files)) progbar = Progbar(nb_elem) progbar_values = [] ims = [] y_ims = [] for idx, fullpath in enumerate(files): if load_result == True: im, y_im = load_image( fullpath, size=size, preprocess_type='st', verbose=False, load_result=load_result) # th ordering, BGR y_ims.append(y_im) else: im = load_image(fullpath, size=size, preprocess_type='st', verbose=False, load_result=load_result) # th ordering, BGR ims.append(im) if len(ims) >= batch_size or idx == len(files) - 1: current_iter += 1 if load_result == True: data = train_iteratee( [np.array(ims), np.array(y_ims), True]) else: data = train_iteratee([np.array(ims), True]) loss = data[0].item(0) losses['loss'].append(loss) progbar_values.append(('loss', loss)) for loss_idx, subloss in enumerate(data): if loss_idx < 1: continue progbar_values.append(('loss ' + str(loss_idx), subloss)) progbar.update(idx + 1, progbar_values) if loss < losses['best_loss']: losses['best_loss'] = loss best_weights = model.get_weights() for callback in callbacks: callback({ current_iter: current_iter, losses: losses, model: model, data: data }) ims = [] if current_iter >= max_iter: need_more_training = False break current_epoch += 1 last_weights = model.get_weights() print("final best loss:", losses['best_loss']) return (best_weights, last_weights), losses
def __getitem__(self, index): if self.is_train: a = self.anno[self.train[index]] else: a = self.anno[self.valid[index]] img_path = os.path.join(self.img_folder, a['img_paths']) if a['pts_paths'] == "unknown.xyz": pts = a['pts'] else: pts_path = os.path.join(self.img_folder, a['pts_paths']) if pts_path[-4:] == '.txt': pts = np.loadtxt(pts_path) # L x 2 else: pts = a['pts'] pts = np.array(pts) # Assume all points are visible for a dataset. This is a multiclass # visibility visible_multiclass = np.ones(pts.shape[0]) if a['dataset'] == 'aflw_ours' or a['dataset'] == 'cofw_68': # The pts which are labelled -1 in both x and y are not visible points self_occluded_landmark = (pts[:, 0] == -1) & (pts[:, 1] == -1) external_occluded_landmark = (pts[:, 0] < -1) & (pts[:, 1] < -1) visible_multiclass[self_occluded_landmark] = 0 visible_multiclass[external_occluded_landmark] = 2 # valid landmarks are those which are external occluded and not occluded valid_landmark = (pts[:, 0] != -1) & (pts[:, 1] != -1) # The points which are partially occluded have both coordinates as negative but not -1 # Make them positive pts = np.abs(pts) # valid_landmark is 0 for to be masked and 1 for not to be masked # mask is 1 for to be masked and 0 for not to be masked pts_masked = np.ma.array(pts, mask=np.column_stack( (1 - valid_landmark, 1 - valid_landmark))) pts_mean = np.mean(pts_masked, axis=0) # Replace -1 by mean of valid landmarks. Otherwise taking min for # calculating geomteric mean of the box can create issues later. pts[self_occluded_landmark] = pts_mean.data scale_mul_factor = 1.1 elif a['dataset'] == "aflw" or a['dataset'] == "wflw": self_occluded_landmark = (pts[:, 0] <= 0) | (pts[:, 1] <= 0) valid_landmark = 1 - self_occluded_landmark visible_multiclass[self_occluded_landmark] = 0 # valid_landmark is 0 for to be masked and 1 for not to be masked # mask is 1 for to be masked and 0 for not to be masked pts_masked = np.ma.array(pts, mask=np.column_stack( (1 - valid_landmark, 1 - valid_landmark))) pts_mean = np.mean(pts_masked, axis=0) # Replace -1 by mean of valid landmarks. Otherwise taking min for # calculating geomteric mean of the box can create issues later. pts[self_occluded_landmark] = pts_mean.data scale_mul_factor = 1.25 else: scale_mul_factor = 1.1 pts = torch.Tensor(pts) # size is 68*2 s = torch.Tensor([a['scale_provided_det']]) * scale_mul_factor c = torch.Tensor(a['objpos_det']) # For single-person pose estimation with a centered/scaled figure # the image in the original size img = imutils.load_image(img_path) r = 0 s_rand = 1 if self.is_train: #data augmentation for training data s_rand = (1 + sample_from_bounded_gaussian(self.scale_factor / 2.)) s = s * s_rand r = sample_from_bounded_gaussian(self.rot_factor / 2.) #print('s shape is ', s.size(), 's is ', s) #if np.random.uniform(0, 1, 1) <= 0.6: # r = np.array([0]) if self.use_flipping: # Flip if np.random.random() <= 0.5: img = torch.from_numpy(HumanAug.fliplr( img.numpy())).float() pts = HumanAug.shufflelr(pts, width=img.size(2), dataset='face') c[0] = img.size(2) - c[0] # Color img[0, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1) img[1, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1) img[2, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1) if self.use_occlusion: # Apply a random black occlusion # C x H x W patch_center_row = randint(1, img.size(1)) patch_center_col = randint(1, img.size(2)) patch_height = randint(1, img.size(1) / 2) patch_width = randint(1, img.size(2) / 2) row_min = max(0, patch_center_row - patch_height) row_max = min(img.size(1), patch_center_row + patch_height) col_min = max(0, patch_center_col - patch_width) col_max = min(img.size(2), patch_center_col + patch_width) img[:, row_min:row_max, col_min:col_max] = 0 # Prepare points first pts_input_res = HumanAug.TransformPts(pts.numpy(), c.numpy(), s.numpy(), r, self.inp_res, self.std_size) # Some landmark points can go outside after transformation. Determine the # extra scaling required. # This can only be done for the training points. For validation, we do # not know the points location. if self.is_train and self.keep_pts_inside: # visible copy takes care of whether point is visible or not. visible_copy = visible_multiclass.copy() visible_copy[visible_multiclass > 1] = 1 scale_down = get_ideal_scale(pts_input_res, self.inp_res, img_path, visible=visible_copy) s = s / scale_down s_rand = s_rand / scale_down pts_input_res = HumanAug.TransformPts(pts.numpy(), c.numpy(), s.numpy(), r, self.inp_res, self.std_size) if a['dataset'] == "aflw": meta_box_size = a['box_size'] # We convert the meta_box size also to the input res. The meta_box # is not formed by the landmark point but is supplied externally. # We assume the meta_box as two points [meta_box_size, 0] and [0, 0] # apply the transformation on top of it temp = HumanAug.TransformPts( np.array([[meta_box_size, 0], [0, 0]]), c.numpy(), s.numpy(), r, self.inp_res, self.std_size) # Passed as array of 2 x 2 # we only want the transformed distance between the points meta_box_size_input_res = np.linalg.norm(temp[1] - temp[0]) else: meta_box_size_input_res = -10 # some invalid number # pts_input_res is in the size of 256 x 256 # Bring down to 64 x 64 since finally heatmap will be 64 x 64 pts_aug = pts_input_res * (1. * self.out_res / self.inp_res) # Prepare image inp = HumanAug.crop(imutils.im_to_numpy(img), c.numpy(), s.numpy(), r, self.inp_res, self.std_size) inp_vis = inp inp = imutils.im_to_torch(inp).float() # 3*256*256 # Generate proxy ground truth heatmap heatmap, pts_aug = HumanPts.pts2heatmap(pts_aug, [self.out_res, self.out_res], sigma=self.sigma) heatmap = torch.from_numpy(heatmap).float() heatmap_mask = HumanPts.pts2mask(pts_aug, [self.out_res, self.out_res], bb=10) if self.is_train: return inp, heatmap, pts_input_res, heatmap_mask, s_rand, visible_multiclass, meta_box_size_input_res else: return inp, heatmap, pts_input_res, c, s, index, inp_vis, s_rand, visible_multiclass, meta_box_size_input_res
grams(layer_dict[ls_name].output) for ls_name in style_layers ] predict_style = K.function([vgg_model.input], style_output_layers) content_layers = ['conv_3_2'] content_output_layers = [ layer_dict[lc_name].output for lc_name in content_layers ] predict_content = K.function([vgg_model.input], content_output_layers) if 'results_style_dir' in locals(): image_list = get_image_list(args.style_dir) for image_path in image_list: X_train_style = np.array([ load_image(image_path, size=(height, width), preprocess_type='vgg19', verbose=True) ]) results = predict_style([X_train_style]) filename = image_path.split('/')[-1].split('.')[0] output_filename = results_style_dir + '/' + filename + '_' + str( args.image_size) + '.hdf5' with h5py.File(output_filename, 'w') as hf: for idx, style_layer in enumerate(style_layers): hf.create_dataset(style_layer, data=results[idx][0]) if 'results_content_dir' in locals(): print( 'be carefull, every file dumped is taking 22mb, check you have enough space' )
def __getitem__(self, index): if self.is_train: a = self.anno[self.train[index]] else: a = self.anno[self.valid[index]] img_path = os.path.join(self.img_folder, a['img_paths']) pts = torch.Tensor(a['joint_self']) # pts[:, 0:2] -= 1 # Convert pts to zero based pts = pts[:, 0:2] # c = torch.Tensor(a['objpos']) - 1 c = torch.Tensor(a['objpos']) # print c s = torch.Tensor([a['scale_provided']]) # print s # exit() if a['dataset'] == 'MPII': c[1] = c[1] + 15 * s[0] s = s * 1.25 normalizer = a['normalizer'] * 0.6 elif a['dataset'] == 'LEEDS': print 'using lsp data' s = s * 1.4375 normalizer = torch.dist(pts[2, :], pts[13, :]) else: print 'no such dataset {}'.format(a['dataset']) # For single-person pose estimation with a centered/scaled figure img = imutils.load_image(img_path) # print img.size() # exit() # img = scipy.misc.imread(img_path, mode='RGB') # CxHxW # img = torch.from_numpy(img) r = 0 if self.is_train: s = s * (2**(sample_from_bounded_gaussian(self.scale_factor))) r = sample_from_bounded_gaussian(self.rot_factor) if np.random.uniform(0, 1, 1) <= 0.6: r = 0 # Flip if np.random.random() <= 0.5: img = torch.from_numpy(HumanAug.fliplr(img.numpy())).float() pts = HumanAug.shufflelr(pts, width=img.size(2), dataset='mpii') c[0] = img.size(2) - c[0] # Color img[0, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1) img[1, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1) img[2, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1) # Prepare image and groundtruth map inp = HumanAug.crop(imutils.im_to_numpy(img), c.numpy(), s.numpy(), r, self.inp_res, self.std_size) inp = imutils.im_to_torch(inp).float() # inp = self.color_normalize(inp, self.mean, self.std) pts_aug = HumanAug.TransformPts(pts.numpy(), c.numpy(), s.numpy(), r, self.out_res, self.std_size) #idx_indicator = (pts[:, 0] <= 0) | (pts[:, 1] <= 0) #idx = torch.arange(0, pts.size(0)).long() #idx = idx[idx_indicator] #pts_aug[idx, :] = 0 # Generate ground truth heatmap, pts_aug = HumanPts.pts2heatmap(pts_aug, [self.out_res, self.out_res], sigma=1) heatmap = torch.from_numpy(heatmap).float() # pts_aug = torch.from_numpy(pts_aug).float() r = torch.FloatTensor([r]) #normalizer = torch.FloatTensor([normalizer]) if self.is_train: #print 'inp size: ', inp.size() #print 'heatmap size: ', heatmap.size() #print 'c size: ', c.size() #print 's size: ', s.size() #print 'r size: ', r.size() #print 'pts size: ', pts.size() #print 'normalizer size: ', normalizer.size() #print 'r: ', r # if len(r.size()) != 1: # print 'r: ', r # if len(c.size()) != 1: # print 'c: ', c return inp, heatmap, c, s, r, pts, normalizer else: # Meta info #meta = {'index': index, 'center': c, 'scale': s, # 'pts': pts, 'tpts': pts_aug} return inp, heatmap, c, s, r, pts, normalizer, index
parser.add_argument('--pooling_type', default='avg', type=str, choices=['max', 'avg'], help='VGG pooling type.') parser.add_argument('--image_size', default=256, type=int, help='Input image size.') parser.add_argument('--max_iter', default=600, type=int, help='Number of training iter.') args = parser.parse_args() dim_ordering = K.image_dim_ordering() channels = 3 width = args.image_size height = args.image_size size = (height, width) if dim_ordering == 'th': input_shape = (channels, width, height) else: input_shape = (width, height, channels) X_train = np.array([load_image(args.content, size=(height, width), preprocess_type='vgg19', verbose=True)]) print("X_train shape:", X_train.shape) X_train_style = np.array([load_image(args.style, size=(height, width), preprocess_type='vgg19', verbose=True)]) print("X_train_style shape:", X_train_style.shape) print('Loading VGG headless 5') modelWeights = "%s/%s-%s-%s%s" % (vgg19Dir,'vgg-19', dim_ordering, K._BACKEND, '_headless_5_weights.hdf5') model = VGG_19_headless_5(input_shape, modelWeights, trainable=False, pooling_type=args.pooling_type) layer_dict, layers_names = get_layer_data(model, 'conv_') print('Layers found:' + ', '.join(layers_names)) input_layer = model.input print('Building white noise images') input_data = create_noise_tensor(height, width, channels)
help='Number of training iter.') args = parser.parse_args() dim_ordering = K.image_dim_ordering() channels = 3 width = args.image_size height = args.image_size size = (height, width) if dim_ordering == 'th': input_shape = (channels, width, height) else: input_shape = (width, height, channels) X_train = np.array([ load_image(args.content, size=(height, width), preprocess_type='vgg19', verbose=True) ]) print("X_train shape:", X_train.shape) X_train_style = np.array([ load_image(args.style, size=(height, width), preprocess_type='vgg19', verbose=True) ]) print("X_train_style shape:", X_train_style.shape) print('Loading VGG headless 5') modelWeights = "%s/%s-%s-%s%s" % (vgg19Dir, 'vgg-19', dim_ordering, K._BACKEND, '_headless_5_weights.hdf5')
val_image_list = val_image_list[:10] nb_val_samples = len(val_image_list) val_generator = generate_data_from_image_list(val_image_list, (height, width), style_fullpath_prefix, input_len=1, output_len=6, batch_size=args.batch_size, transform_f=true_content_f, preprocess_type='none', verbose=False) # Tensorboard callback doesn't handle generator so far and we actually only need # a few images to see the qualitative result validation_data = [] validation_data.append( load_image(train_image_list[0], size=(height, width), preprocess_type='none')) validation_data.append( load_image(val_image_list[0], size=(height, width), preprocess_type='none')) st_model.validation_data = [validation_data] print('Iterating over hyper parameters') current_iter = 0 # Alpha need to be a lot lower than in the gatys_paper # This is probably due to the fact that here we are looking at a new content picture each batch # while the style is always the same and so he can go down the style gradient much faster than the content one # which is noisier for alpha in [1e1]: for beta in [1.]: for gamma in [1e-5]: