def _compute_mean(self):
        meanstd_file = 'dataset/mpii_for_mpii_mean.pth.tar'
        if os.path.isfile(meanstd_file):
            meanstd = torch.load(meanstd_file)
        else:
            mean = torch.zeros(3)
            std = torch.zeros(3)
            for index in self.train:
                a = self.anno[index]
                img_path = os.path.join(self.img_folder, a['img_paths'])
                img = imutils.load_image(img_path)  # CxHxW
                mean += img.view(img.size(0), -1).mean(1)
                std += img.view(img.size(0), -1).std(1)
            mean /= len(self.train)
            std /= len(self.train)
            meanstd = {
                'mean': mean,
                'std': std,
            }
            torch.save(meanstd, meanstd_file)
        if self.is_train:
            print('    Mean: %.4f, %.4f, %.4f' %
                  (meanstd['mean'][0], meanstd['mean'][1], meanstd['mean'][2]))
            print('    Std:  %.4f, %.4f, %.4f' %
                  (meanstd['std'][0], meanstd['std'][1], meanstd['std'][2]))

        return meanstd['mean'], meanstd['std']
Example #2
0
    def test_load_image_th(self):
        previous_image_dim_ordering = K.image_dim_ordering()
        K.set_image_dim_ordering('th')
        blue_im = load_image(dir + '/../fixture/blue.png')
        K.set_image_dim_ordering(previous_image_dim_ordering)

        self.assertEqual(blue_im.shape, (3, 600, 600))
    def __getitem__(self, index):
        # print('loading image', index)
        if self.is_train:
            a = self.anno[self.train[index]]
        else:
            a = self.anno[self.valid[index]]

        img_path = os.path.join(self.img_folder, a['img_paths'])
        pts = torch.Tensor(a['joint_self'])
        # pts[:, 0:2] -= 1  # Convert pts to zero based
        pts = pts[:, 0:2]
        # c = torch.Tensor(a['objpos']) - 1
        c = torch.Tensor(a['objpos'])
        # print(c)
        s = torch.Tensor([a['scale_provided']])
        # r = torch.Tensor([1])
        # exit()
        if a['dataset'] == 'MPII':
            c[1] = c[1] + 15 * s[0]
            s = s * 1.25
            normalizer = a['normalizer'] * 0.6
        elif a['dataset'] == 'LEEDS':
            print('using lsp data')
            s = s * 1.4375
            normalizer = torch.dist(pts[2, :], pts[13, :])
        else:
            print('no such dataset {}'.format(a['dataset']))

        # For single-person pose estimation with a centered/scaled figure
        img = imutils.load_image(img_path)
        # img = Image.open(img_path)
        # SCALE_VAR, ROTATE_VAR = 0.25, 30
        imgs = [None] * (self.num)
        # imgs_flip = [None] * (self.num)
        # imgs[0], _ = self.gen_img_heatmap(np.array([1]), np.array([0]), origin_img, annot)
        heatmaps = [None] * self.num
        pts_augs = [None] * self.num
        # heatmaps_flip = [None] * self.num
        scales = [None] * self.num
        rotations = [None] * self.num
        centers = [None] * self.num
        origin_pts = [None] * self.num
        normalizers = [None] * self.num
        rot_idx = torch.zeros(self.num).long()
        for i in range(0, len(self.rotation_means)):
            rot_idx[i] = i
            scales[i] = s.clone()
            rotations[i] = torch.Tensor([self.rotation_means[i]])
            centers[i] = c.clone()
            origin_pts[i] = pts.clone()
            normalizers[i] = normalizer
            imgs[i], heatmaps[i], pts_augs[i] = self.gen_img_heatmap(
                c.clone(), s.clone(), self.rotation_means[i], img.clone(),
                pts.clone())
        # exit()
        return imgs, heatmaps, centers, scales, rotations, \
               origin_pts, normalizers, rot_idx, index, pts_augs
Example #4
0
def generate_data_from_image_list(image_list, size, style_fullpath_prefix, input_len=1, output_len=1, batch_size=4, transform_f=None, preprocess_type='none', verbose=False):
    if transform_f != None:
        file = h5py.File(style_fullpath_prefix + '_' + str(size[0]) + '.hdf5', 'r')
        y_style1 = np.array(file.get('conv_1_2'))
        y_style2 = np.array(file.get('conv_2_2'))
        y_style3 = np.array(file.get('conv_3_4'))
        y_style4 = np.array(file.get('conv_4_2'))

    # Init inputs/outputs
    inputs = []
    outputs = []
    for i in range(input_len):
        inputs.append([])
    for i in range(output_len):
        outputs.append([])
    nb_element = 0
    while 1:
        random.shuffle(image_list)
        for fullpath in image_list:
            nb_element += 1
            
            im = load_image(fullpath, size, preprocess_type=preprocess_type, verbose=verbose)

            if transform_f != None:
                f_input = [ preprocess(np.array([im]), type='vgg19') ]
                y_content = transform_f(f_input)[0][0] # First element in the list of result, first element of the output batch
                inputs[0].append(im)
                outputs[0].append(y_content)
                outputs[1].append(y_style1)
                outputs[2].append(y_style2)
                outputs[3].append(y_style3)
                outputs[4].append(y_style4)
                outputs[5].append(np.zeros_like(im))
            else:
                for i in range(input_len):
                    inputs[i].append(im)
                for i in range(output_len):
                    outputs[i].append(im)

            if nb_element >= batch_size:
                nb_element = 0

                inputs_list = []
                for i in range(input_len):
                    inputs_list.append(np.array(inputs[i]))
                outputs_list = []
                for i in range(output_len):
                    outputs_list.append(np.array(outputs[i]))

                yield(inputs_list, outputs_list)

                # reset inputs/outputs
                for i in range(input_len):
                    inputs[i] = []
                for i in range(output_len):
                    outputs[i] = []
Example #5
0
    def __getitem__(self, index):
        # print('loading image', index)
        if self.is_train:
            a = self.anno[self.train[index]]
        else:
            a = self.anno[self.valid[index]]

        img_path = os.path.join(self.img_folder, a['img_paths'])
        pts = torch.Tensor(a['joint_self'])
        # pts[:, 0:2] -= 1  # Convert pts to zero based
        pts = pts[:, 0:2]

        # c = torch.Tensor(a['objpos']) - 1
        c = torch.Tensor(a['objpos'])
        # print(c)
        s = torch.Tensor([a['scale_provided']])
        # exit()
        if a['dataset'] == 'MPII':
            c[1] = c[1] + 15 * s[0]
            s = s * 1.25
            normalizer = a['normalizer'] * 0.6
        elif a['dataset'] == 'LEEDS':
            print('using lsp data')
            s = s * 1.4375
            normalizer = torch.dist(pts[2, :], pts[13, :])
        else:
            print('no such dataset {}'.format(a['dataset']))

        # For single-person pose estimation with a centered/scaled figure
        img = imutils.load_image(img_path)
        # img = Image.open(img_path)

        inp = HumanAug.crop(imutils.im_to_numpy(img), c.numpy(), s.numpy(), 0,
                            self.inp_res, self.std_size)
        inp = imutils.im_to_torch(inp).float()
        # inp = self.color_normalize(inp, self.mean, self.std)
        # pts_aug = HumanAug.TransformPts(pts.numpy(), c.numpy(),
        #                                 s.numpy(), 0, self.out_res, self.std_size)
        #
        # # Generate ground truth
        # heatmap, pts_aug = HumanPts.pts2heatmap(pts_aug, [self.out_res, self.out_res], sigma=1)
        # heatmap = torch.from_numpy(heatmap).float()
        tmp_scale_distri = self.grnd_scale_distri[
            index] / self.grnd_scale_distri[index].sum()
        tmp_rot_distri = self.grnd_rotation_distri[
            index] / self.grnd_rotation_distri[index].sum()
        return inp, tmp_scale_distri, tmp_rot_distri, index
Example #6
0
    def test_import_model(self):
        data_model_folder = dir + "/../fixture/model_conv2d_relu"

        should_convert = K._BACKEND == "theano"
        model = import_model(data_model_folder, should_convert=should_convert)
        input_img = np.array([
            load_image(dir + '/../fixture/blue.png',
                       size=None,
                       preprocess_type='st',
                       verbose=False)
        ])

        output = model.predict([input_img]).astype('int32')
        true_output = np.array([[[[0, 0, 0], [0, 0, 0], [0, 0, 0]],
                                 [[131, 116, 153], [153, 281, 364],
                                  [103, 254, 318]],
                                 [[52, 1, 0], [0, 0, 0], [0, 0, 0]]]])

        self.assertEqual(len(model.layers), 3)
        self.assertEqual(True, (output == true_output).all())
Example #7
0
    def __getitem__(self, index):

        if self.is_train:
            a = self.anno[self.train[index]]
        else:
            a = self.anno[self.valid[index]]

        img_path = os.path.join(self.img_folder, a['img_paths'])
        pts_path = os.path.join(self.img_folder, a['pts_paths'])

        skip_pts = [33, 36, 39, 42, 45, 48, 51, 54, 57]

        if pts_path[-4:] == '.txt':
            pts = np.loadtxt(pts_path)  # L x 2
#pts = pts[skip_pts, :]

        elif pts_path[-4:] == '.pts':
            pts = FacePts.Pts2Lmk(pts_path)  # L x 2
#pts = pts[skip_pts, :]

#print(pts)

        pts = torch.Tensor(pts)
        assert torch.sum(pts - torch.Tensor(a['pts'])) == 0
        s = torch.Tensor([a['scale_provided_det']]) * 1.1
        c = torch.Tensor(a['objpos_det'])
        # For single-person pose estimation with a centered/scaled figure
        img = imutils.load_image(img_path)
        # print img.size()
        # exit()
        # img = scipy.misc.imread(img_path, mode='RGB') # CxHxW
        # img = torch.from_numpy(img)

        r = 0
        if self.is_train:
            s = s * (2**(sample_from_bounded_gaussian(self.scale_factor)))
            r = sample_from_bounded_gaussian(self.rot_factor)
            if np.random.uniform(0, 1, 1) <= 0.6:
                r = np.array([0])

            # Flip
            #if np.random.random() <= 0.5:
            #    img = torch.from_numpy(HumanAug.fliplr(img.numpy())).float()
            #    pts = HumanAug.shufflelr(pts, width=img.size(2), dataset='face')
            #    c[0] = img.size(2) - c[0]

            # Color
            img[0, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)
            img[1, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)
            img[2, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)

        # Prepare image and groundtruth map
        inp = HumanAug.crop(imutils.im_to_numpy(img), c.numpy(), s.numpy(), r,
                            self.inp_res, self.std_size)
        inp = imutils.im_to_torch(inp).float()
        # inp = self.color_normalize(inp, self.mean, self.std)
        # pts_aug = HumanAug.TransformPts(pts.numpy(), c.numpy(),
        #                                 s.numpy(), r, self.out_res, self.std_size)
        pts_input_res = HumanAug.TransformPts(pts.numpy(), c.numpy(),
                                              s.numpy(), r, self.inp_res,
                                              self.std_size)
        pts_aug = pts_input_res * (1. * self.out_res / self.inp_res)

        #check_res = pts_input_res - pts

        #print('diff.... -> {}'.format(check_res))

        # Generate ground truth
        heatmap, pts_aug = HumanPts.pts2heatmap(pts_aug,
                                                [self.out_res, self.out_res],
                                                sigma=1)
        heatmap = torch.from_numpy(heatmap).float()
        # pts_aug = torch.from_numpy(pts_aug).float()

        if self.is_train:
            return inp, heatmap, pts_input_res
        else:
            # Meta info
            #meta = {'index': index, 'center': c, 'scale': s,
            #        'pts': pts, 'tpts': pts_aug}

            return inp, heatmap, pts, index, c, s, img_path
Example #8
0
    def __getitem__(self, index):
        sf = self.scale_factor
        rf = self.rot_factor
        if self.is_train:
            a = self.anno[self.train_list[index]]
        else:
            a = self.anno[self.valid_list[index]]

        img_path = os.path.join(self.img_folder, a['img_paths'])
        pts = torch.Tensor(a['joint_self'])
        # pts[:, 0:2] -= 1  # Convert pts to zero based

        # c = torch.Tensor(a['objpos']) - 1
        c = torch.Tensor(a['objpos'])
        s = a['scale_provided']

        # Adjust center/scale slightly to avoid cropping limbs
        if c[0] != -1:
            c[1] = c[1] + 15 * s
            s = s * 1.25

        # For single-person pose estimation with a centered/scaled figure
        nparts = pts.size(0)
        img = load_image(img_path)  # CxHxW

        r = 0
        if self.is_train:
            s = s * torch.randn(1).mul_(sf).add_(1).clamp(1 - sf, 1 + sf)[0]
            r = torch.randn(1).mul_(rf).clamp(
                -2 * rf, 2 * rf)[0] if random.random() <= 0.6 else 0

            # Flip
            if random.random() <= 0.5:
                img = fliplr(img)
                pts = shufflelr(pts, img.size(2), self.DATA_INFO.hflip_indices)
                c[0] = img.size(2) - c[0]

            # Color
            img[0, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1)
            img[1, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1)
            img[2, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1)

        # Prepare image and groundtruth map
        inp = crop(img, c, s, self.inp_res, rot=r)
        inp = color_normalize(inp, self.DATA_INFO.rgb_mean,
                              self.DATA_INFO.rgb_stddev)

        # Generate ground truth
        tpts = pts.clone()
        target = torch.zeros(nparts, *self.out_res)
        target_weight = tpts[:, 2].clone().view(nparts, 1)

        for i in range(nparts):
            # if tpts[i, 2] > 0: # This is evil!!
            if tpts[i, 1] > 0:
                tpts[i, 0:2] = to_torch(
                    transform(tpts[i, 0:2] + 1, c, s, self.out_res, rot=r))
                target[i], vis = draw_labelmap(target[i],
                                               tpts[i] - 1,
                                               self.sigma,
                                               type=self.label_type)
                target_weight[i, 0] *= vis

        # Meta info
        if not isinstance(s, torch.Tensor):
            s = torch.Tensor(s)

        meta = {
            'index': index,
            'center': c,
            'scale': s,
            'pts': pts,
            'tpts': tpts,
            'target_weight': target_weight
        }

        return inp, target, meta
Example #9
0
        print("Empty string")

#===============================================================================
# # Plot all groundtruth
#===============================================================================
print("\nPlotting all ground truth now on selected images ...")
full_folder = os.path.join(IMAGE_DIR, "aflw_ours/original_res")
makedir(full_folder)

for i in range(num_images):
    index = indices_in_json[i]
    img_data = data_all[index]

    fig = plt.figure(dpi=DPI)
    img_path = img_data['img_paths']
    img = imutils.load_image(img_path)
    pts = np.array(img_data['pts'])

    # Assume all points are visible for a dataset. This is a multiclass
    # visibility
    vis = np.ones(pts.shape[0])
    # The pts which are labelled -1 in both x and y are not visible points
    self_occluded_landmark = (pts[:, 0] == -1) & (pts[:, 1] == -1)
    external_occluded_landmark = (pts[:, 0] < -1) & (pts[:, 1] < -1)

    vis[self_occluded_landmark] = 0
    vis[external_occluded_landmark] = 2

    pts = np.abs(pts)

    # Get visible points which have 1 in the visibility
    def __getitem__(self, index):

        if self.is_train:
            a = self.anno[self.train[index]]
        else:
            a = self.anno[self.valid[index]]

        img_path = os.path.join(self.img_folder, a['img_paths'])
        pts = torch.Tensor(a['joint_self'])
        # pts[:, 0:2] -= 1  # Convert pts to zero based
        pts = pts[:, 0:2]

        # c = torch.Tensor(a['objpos']) - 1
        c = torch.Tensor(a['objpos'])
        # print c
        s = torch.Tensor([a['scale_provided']])
        # print s
        # exit()
        if a['dataset'] == 'MPII':
            c[1] = c[1] + 15 * s[0]
            s = s * 1.25
            normalizer = a['normalizer'] * 0.6
        elif a['dataset'] == 'LEEDS':
            print 'using lsp data'
            s = s * 1.4375
            normalizer = torch.dist(pts[2, :], pts[13, :])
        else:
            print 'no such dataset {}'.format(a['dataset'])

        # For single-person pose estimation with a centered/scaled figure
        img = imutils.load_image(img_path)
        # print img.size()
        # exit()
        # img = scipy.misc.imread(img_path, mode='RGB') # CxHxW
        # img = torch.from_numpy(img)
        inp_std, heatmap_std = self.gen_img_heatmap(c.clone(), s.clone(), 0,
                                                    img.clone(), pts.clone())
        # r = 0
        if self.is_train:
            s = s * (2 ** (sample_from_bounded_gaussian(self.scale_factor)))
            r = sample_from_bounded_gaussian(self.rot_factor)
            if np.random.uniform(0, 1, 1) <= 0.6:
                r = np.array([0])

            # Flip
            if np.random.random() <= 0.5:
                img = torch.from_numpy(HumanAug.fliplr(img.numpy())).float()
                pts = HumanAug.shufflelr(pts, width=img.size(2), dataset='mpii')
                c[0] = img.size(2) - c[0]

            # Color
            img[0, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)
            img[1, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)
            img[2, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)

            # aug image and groundtruth map
            inp, heatmap = self.gen_img_heatmap(c.clone(), s.clone(), r,
                                                img.clone(), pts.clone())

            r = torch.FloatTensor([r])
            return inp_std, inp, heatmap, c, s, r, pts, normalizer, index
        else:
            # Meta info
            #meta = {'index': index, 'center': c, 'scale': s,
            #        'pts': pts, 'tpts': pts_aug}
            r = torch.FloatTensor([0])
            return inp_std, heatmap_std, c, s, r, pts, normalizer, index
Example #11
0
    def __getitem__(self, index):
        # print('loading image', index)
        if self.img_index_list is None:
            a = self.anno[self.train[index]]
        else:
            idx = self.img_index_list[index]
            a = self.anno[self.train[idx]]

        img_path = os.path.join(self.img_folder, a['img_paths'])
        pts = torch.Tensor(a['joint_self'])
        # pts[:, 0:2] -= 1  # Convert pts to zero based
        pts = pts[:, 0:2]
        # c = torch.Tensor(a['objpos']) - 1
        c = torch.Tensor(a['objpos'])
        # print(c)
        s = torch.Tensor([a['scale_provided']])
        # r = torch.FloatTensor([0])
        # exit()
        if a['dataset'] == 'MPII':
            c[1] = c[1] + 15 * s[0]
            s = s * 1.25
            normalizer = a['normalizer'] * 0.6
        elif a['dataset'] == 'LEEDS':
            print('using lsp data')
            s = s * 1.4375
            normalizer = torch.dist(pts[2, :], pts[13, :])
        else:
            print('no such dataset {}'.format(a['dataset']))

        # For single-person pose estimation with a centered/scaled figure
        img = imutils.load_image(img_path)
        if self.img_index_list is None:
            s_aug = s * (2**(sample_from_large_gaussian(self.scale_factor)))
            r_aug = sample_from_large_gaussian(self.rot_factor)
            if np.random.uniform(0, 1, 1) <= 0.6:
                r_aug = np.array([0])
        else:
            gaussian_mean_scale = self.scale_means[
                self.scale_index_list[index]]
            scale_factor = sample_from_small_gaussian(gaussian_mean_scale,
                                                      self.scale_var)
            gaussian_mean_rotation = self.rotation_means[
                self.rotation_index_list[index]]
            r_aug = sample_from_small_gaussian(gaussian_mean_rotation,
                                               self.rotaiton_var)
            s_aug = s * (2**scale_factor)
        if self.separate_s_r:
            img_list = [None] * 2
            heatmap_list = [None] * 2
            c_list = [c.clone()] * 2
            s_list = [s_aug.clone(), s.clone()]
            r_list = [torch.FloatTensor([0]), torch.FloatTensor([r_aug])]
            grnd_pts_list = [pts.clone(), pts.clone()]
            # print('type of normalizaer: ', type(normalizer))
            normalizer_list = [normalizer, normalizer]
            img_list[0], heatmap_list[0] = self.gen_img_heatmap(
                c.clone(), s_aug.clone(), 0, img.clone(), pts.clone())
            img_list[1], heatmap_list[1] = self.gen_img_heatmap(
                c.clone(), s.clone(), r_aug, img.clone(), pts.clone())
            if self.img_index_list is not None:
                return img_list, heatmap_list, c_list, s_list,\
                       r_list, grnd_pts_list, normalizer_list, idx
            else:
                inp_std, _ = self.gen_img_heatmap(c.clone(), s.clone(), 0,
                                                  img.clone(), pts.clone())
                return inp_std, img_list, heatmap_list, c_list, s_list, r_list,\
                       grnd_pts_list, normalizer_list, index
        else:
            if np.random.random() <= 0.5:
                img = torch.from_numpy(HumanAug.fliplr(img.numpy())).float()
                pts = HumanAug.shufflelr(pts,
                                         width=img.size(2),
                                         dataset='mpii')
                c[0] = img.size(2) - c[0]

            # Color
            img[0, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)
            img[1, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)
            img[2, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)

            inp, heatmap = self.gen_img_heatmap(c.clone(),
                                                s_aug.clone(), r_aug,
                                                img.clone(), pts.clone())
            # if self.separate_s_r is false, then self.img_index_list is not None
            # so return idx instead of index
            r_aug = torch.FloatTensor([r_aug])
            return inp, heatmap, c, s_aug, r_aug, pts, normalizer, idx
Example #12
0
def train_weights(input_dir,
                  size,
                  model,
                  train_iteratee,
                  cv_input_dir=None,
                  max_iter=2000,
                  batch_size=4,
                  callbacks=[],
                  load_result=False):
    losses = {'loss': [], 'val_loss': [], 'best_loss': 1e15}

    best_weights = model.get_weights()

    need_more_training = True
    current_iter = 0
    current_epoch = 1
    files = [
        input_dir + '/' + name for name in os.listdir(input_dir)
        if len(re.findall('\.(jpe?g|png)$', name))
    ]
    batch_size = min(batch_size, len(files))
    print('total_files %d' % len(files))

    max_epoch = math.floor((batch_size * max_iter) / len(files)) + 1
    while need_more_training:
        print('Epoch %d/%d' % (current_epoch, max_epoch))
        nb_elem = min((max_iter - current_iter) * batch_size, len(files))
        progbar = Progbar(nb_elem)
        progbar_values = []

        ims = []
        y_ims = []
        for idx, fullpath in enumerate(files):
            if load_result == True:
                im, y_im = load_image(
                    fullpath,
                    size=size,
                    preprocess_type='st',
                    verbose=False,
                    load_result=load_result)  # th ordering, BGR
                y_ims.append(y_im)
            else:
                im = load_image(fullpath,
                                size=size,
                                preprocess_type='st',
                                verbose=False,
                                load_result=load_result)  # th ordering, BGR
            ims.append(im)

            if len(ims) >= batch_size or idx == len(files) - 1:
                current_iter += 1
                if load_result == True:
                    data = train_iteratee(
                        [np.array(ims), np.array(y_ims), True])
                else:
                    data = train_iteratee([np.array(ims), True])

                loss = data[0].item(0)
                losses['loss'].append(loss)
                progbar_values.append(('loss', loss))
                for loss_idx, subloss in enumerate(data):
                    if loss_idx < 1:
                        continue
                    progbar_values.append(('loss ' + str(loss_idx), subloss))
                progbar.update(idx + 1, progbar_values)

                if loss < losses['best_loss']:
                    losses['best_loss'] = loss
                    best_weights = model.get_weights()

                for callback in callbacks:
                    callback({
                        current_iter: current_iter,
                        losses: losses,
                        model: model,
                        data: data
                    })

                ims = []
                if current_iter >= max_iter:
                    need_more_training = False
                    break

        current_epoch += 1

    last_weights = model.get_weights()
    print("final best loss:", losses['best_loss'])
    return (best_weights, last_weights), losses
Example #13
0
    def __getitem__(self, index):

        if self.is_train:
            a = self.anno[self.train[index]]
        else:
            a = self.anno[self.valid[index]]

        img_path = os.path.join(self.img_folder, a['img_paths'])

        if a['pts_paths'] == "unknown.xyz":
            pts = a['pts']
        else:
            pts_path = os.path.join(self.img_folder, a['pts_paths'])

            if pts_path[-4:] == '.txt':
                pts = np.loadtxt(pts_path)  # L x 2
            else:
                pts = a['pts']

        pts = np.array(pts)
        # Assume all points are visible for a dataset. This is a multiclass
        # visibility
        visible_multiclass = np.ones(pts.shape[0])

        if a['dataset'] == 'aflw_ours' or a['dataset'] == 'cofw_68':
            # The pts which are labelled -1 in both x and y are not visible points
            self_occluded_landmark = (pts[:, 0] == -1) & (pts[:, 1] == -1)
            external_occluded_landmark = (pts[:, 0] < -1) & (pts[:, 1] < -1)

            visible_multiclass[self_occluded_landmark] = 0
            visible_multiclass[external_occluded_landmark] = 2

            # valid landmarks are those which are external occluded and not occluded
            valid_landmark = (pts[:, 0] != -1) & (pts[:, 1] != -1)

            # The points which are partially occluded have both coordinates as negative but not -1
            # Make them positive
            pts = np.abs(pts)

            # valid_landmark is 0 for to be masked and 1 for not to be masked
            # mask is 1 for to be masked and 0 for not to be masked
            pts_masked = np.ma.array(pts,
                                     mask=np.column_stack(
                                         (1 - valid_landmark,
                                          1 - valid_landmark)))
            pts_mean = np.mean(pts_masked, axis=0)

            # Replace -1 by mean of valid landmarks. Otherwise taking min for
            # calculating geomteric mean of the box can create issues later.
            pts[self_occluded_landmark] = pts_mean.data

            scale_mul_factor = 1.1

        elif a['dataset'] == "aflw" or a['dataset'] == "wflw":
            self_occluded_landmark = (pts[:, 0] <= 0) | (pts[:, 1] <= 0)
            valid_landmark = 1 - self_occluded_landmark
            visible_multiclass[self_occluded_landmark] = 0

            # valid_landmark is 0 for to be masked and 1 for not to be masked
            # mask is 1 for to be masked and 0 for not to be masked
            pts_masked = np.ma.array(pts,
                                     mask=np.column_stack(
                                         (1 - valid_landmark,
                                          1 - valid_landmark)))
            pts_mean = np.mean(pts_masked, axis=0)

            # Replace -1 by mean of valid landmarks. Otherwise taking min for
            # calculating geomteric mean of the box can create issues later.
            pts[self_occluded_landmark] = pts_mean.data

            scale_mul_factor = 1.25

        else:
            scale_mul_factor = 1.1

        pts = torch.Tensor(pts)  # size is 68*2
        s = torch.Tensor([a['scale_provided_det']]) * scale_mul_factor
        c = torch.Tensor(a['objpos_det'])

        # For single-person pose estimation with a centered/scaled figure
        # the image in the original size
        img = imutils.load_image(img_path)

        r = 0
        s_rand = 1
        if self.is_train:  #data augmentation for training data
            s_rand = (1 + sample_from_bounded_gaussian(self.scale_factor / 2.))
            s = s * s_rand

            r = sample_from_bounded_gaussian(self.rot_factor / 2.)

            #print('s shape is ', s.size(), 's is ', s)
            #if np.random.uniform(0, 1, 1) <= 0.6:
            #    r = np.array([0])

            if self.use_flipping:
                # Flip
                if np.random.random() <= 0.5:
                    img = torch.from_numpy(HumanAug.fliplr(
                        img.numpy())).float()
                    pts = HumanAug.shufflelr(pts,
                                             width=img.size(2),
                                             dataset='face')
                    c[0] = img.size(2) - c[0]

            # Color
            img[0, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)
            img[1, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)
            img[2, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)

            if self.use_occlusion:
                # Apply a random black occlusion
                # C x H x W
                patch_center_row = randint(1, img.size(1))
                patch_center_col = randint(1, img.size(2))

                patch_height = randint(1, img.size(1) / 2)
                patch_width = randint(1, img.size(2) / 2)

                row_min = max(0, patch_center_row - patch_height)
                row_max = min(img.size(1), patch_center_row + patch_height)
                col_min = max(0, patch_center_col - patch_width)
                col_max = min(img.size(2), patch_center_col + patch_width)

                img[:, row_min:row_max, col_min:col_max] = 0

        # Prepare points first
        pts_input_res = HumanAug.TransformPts(pts.numpy(), c.numpy(),
                                              s.numpy(), r, self.inp_res,
                                              self.std_size)

        # Some landmark points can go outside after transformation. Determine the
        # extra scaling required.
        # This can only be done for the training points. For validation, we do
        # not know the points location.
        if self.is_train and self.keep_pts_inside:
            # visible copy takes care of whether point is visible or not.
            visible_copy = visible_multiclass.copy()
            visible_copy[visible_multiclass > 1] = 1
            scale_down = get_ideal_scale(pts_input_res,
                                         self.inp_res,
                                         img_path,
                                         visible=visible_copy)
            s = s / scale_down
            s_rand = s_rand / scale_down
            pts_input_res = HumanAug.TransformPts(pts.numpy(), c.numpy(),
                                                  s.numpy(), r, self.inp_res,
                                                  self.std_size)

        if a['dataset'] == "aflw":
            meta_box_size = a['box_size']
            # We convert the meta_box size also to the input res. The meta_box
            # is not formed by the landmark point but is supplied externally.
            # We assume the meta_box as two points [meta_box_size, 0] and [0, 0]
            # apply the transformation on top of it
            temp = HumanAug.TransformPts(
                np.array([[meta_box_size, 0], [0, 0]]), c.numpy(), s.numpy(),
                r, self.inp_res, self.std_size)
            # Passed as array of 2 x 2
            # we only want the transformed distance between the points
            meta_box_size_input_res = np.linalg.norm(temp[1] - temp[0])
        else:
            meta_box_size_input_res = -10  # some invalid number

        # pts_input_res is in the size of 256 x 256
        # Bring down to 64 x 64 since finally heatmap will be 64 x 64
        pts_aug = pts_input_res * (1. * self.out_res / self.inp_res)

        # Prepare image
        inp = HumanAug.crop(imutils.im_to_numpy(img), c.numpy(), s.numpy(), r,
                            self.inp_res, self.std_size)
        inp_vis = inp
        inp = imutils.im_to_torch(inp).float()  # 3*256*256

        # Generate proxy ground truth heatmap
        heatmap, pts_aug = HumanPts.pts2heatmap(pts_aug,
                                                [self.out_res, self.out_res],
                                                sigma=self.sigma)
        heatmap = torch.from_numpy(heatmap).float()
        heatmap_mask = HumanPts.pts2mask(pts_aug, [self.out_res, self.out_res],
                                         bb=10)

        if self.is_train:
            return inp, heatmap, pts_input_res, heatmap_mask, s_rand, visible_multiclass, meta_box_size_input_res
        else:
            return inp, heatmap, pts_input_res, c, s, index, inp_vis, s_rand, visible_multiclass, meta_box_size_input_res
Example #14
0
    grams(layer_dict[ls_name].output) for ls_name in style_layers
]
predict_style = K.function([vgg_model.input], style_output_layers)

content_layers = ['conv_3_2']
content_output_layers = [
    layer_dict[lc_name].output for lc_name in content_layers
]
predict_content = K.function([vgg_model.input], content_output_layers)

if 'results_style_dir' in locals():
    image_list = get_image_list(args.style_dir)
    for image_path in image_list:
        X_train_style = np.array([
            load_image(image_path,
                       size=(height, width),
                       preprocess_type='vgg19',
                       verbose=True)
        ])
        results = predict_style([X_train_style])

        filename = image_path.split('/')[-1].split('.')[0]
        output_filename = results_style_dir + '/' + filename + '_' + str(
            args.image_size) + '.hdf5'
        with h5py.File(output_filename, 'w') as hf:
            for idx, style_layer in enumerate(style_layers):
                hf.create_dataset(style_layer, data=results[idx][0])

if 'results_content_dir' in locals():
    print(
        'be carefull, every file dumped is taking 22mb, check you have enough space'
    )
    def __getitem__(self, index):

        if self.is_train:
            a = self.anno[self.train[index]]
        else:
            a = self.anno[self.valid[index]]

        img_path = os.path.join(self.img_folder, a['img_paths'])
        pts = torch.Tensor(a['joint_self'])
        # pts[:, 0:2] -= 1  # Convert pts to zero based
        pts = pts[:, 0:2]

        # c = torch.Tensor(a['objpos']) - 1
        c = torch.Tensor(a['objpos'])
        # print c
        s = torch.Tensor([a['scale_provided']])
        # print s
        # exit()
        if a['dataset'] == 'MPII':
            c[1] = c[1] + 15 * s[0]
            s = s * 1.25
            normalizer = a['normalizer'] * 0.6
        elif a['dataset'] == 'LEEDS':
            print 'using lsp data'
            s = s * 1.4375
            normalizer = torch.dist(pts[2, :], pts[13, :])
        else:
            print 'no such dataset {}'.format(a['dataset'])

        # For single-person pose estimation with a centered/scaled figure
        img = imutils.load_image(img_path)
        # print img.size()
        # exit()
        # img = scipy.misc.imread(img_path, mode='RGB') # CxHxW
        # img = torch.from_numpy(img)

        r = 0
        if self.is_train:
            s = s * (2**(sample_from_bounded_gaussian(self.scale_factor)))
            r = sample_from_bounded_gaussian(self.rot_factor)
            if np.random.uniform(0, 1, 1) <= 0.6:
                r = 0

            # Flip
            if np.random.random() <= 0.5:
                img = torch.from_numpy(HumanAug.fliplr(img.numpy())).float()
                pts = HumanAug.shufflelr(pts,
                                         width=img.size(2),
                                         dataset='mpii')
                c[0] = img.size(2) - c[0]

            # Color
            img[0, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)
            img[1, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)
            img[2, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)

        # Prepare image and groundtruth map
        inp = HumanAug.crop(imutils.im_to_numpy(img), c.numpy(), s.numpy(), r,
                            self.inp_res, self.std_size)
        inp = imutils.im_to_torch(inp).float()
        # inp = self.color_normalize(inp, self.mean, self.std)
        pts_aug = HumanAug.TransformPts(pts.numpy(), c.numpy(), s.numpy(), r,
                                        self.out_res, self.std_size)

        #idx_indicator = (pts[:, 0] <= 0) | (pts[:, 1] <= 0)
        #idx = torch.arange(0, pts.size(0)).long()
        #idx = idx[idx_indicator]
        #pts_aug[idx, :] = 0
        # Generate ground truth
        heatmap, pts_aug = HumanPts.pts2heatmap(pts_aug,
                                                [self.out_res, self.out_res],
                                                sigma=1)
        heatmap = torch.from_numpy(heatmap).float()
        # pts_aug = torch.from_numpy(pts_aug).float()

        r = torch.FloatTensor([r])
        #normalizer = torch.FloatTensor([normalizer])
        if self.is_train:
            #print 'inp size: ', inp.size()
            #print 'heatmap size: ', heatmap.size()
            #print 'c size: ', c.size()
            #print 's size: ', s.size()
            #print 'r size: ', r.size()
            #print 'pts size: ', pts.size()
            #print 'normalizer size: ', normalizer.size()
            #print 'r: ', r
            #    if len(r.size()) != 1:
            #	print 'r: ', r
            #    if len(c.size()) != 1:
            #	print 'c: ', c
            return inp, heatmap, c, s, r, pts, normalizer
        else:
            # Meta info
            #meta = {'index': index, 'center': c, 'scale': s,
            #        'pts': pts, 'tpts': pts_aug}

            return inp, heatmap, c, s, r, pts, normalizer, index
Example #16
0
parser.add_argument('--pooling_type', default='avg', type=str, choices=['max', 'avg'], help='VGG pooling type.')
parser.add_argument('--image_size', default=256, type=int, help='Input image size.')
parser.add_argument('--max_iter', default=600, type=int, help='Number of training iter.')
args = parser.parse_args()

dim_ordering = K.image_dim_ordering()
channels = 3
width = args.image_size
height = args.image_size
size = (height, width)
if dim_ordering == 'th':
    input_shape = (channels, width, height)
else:
    input_shape = (width, height, channels)

X_train = np.array([load_image(args.content, size=(height, width), preprocess_type='vgg19', verbose=True)])
print("X_train shape:", X_train.shape)

X_train_style = np.array([load_image(args.style, size=(height, width), preprocess_type='vgg19', verbose=True)])
print("X_train_style shape:", X_train_style.shape)

print('Loading VGG headless 5')
modelWeights = "%s/%s-%s-%s%s" % (vgg19Dir,'vgg-19', dim_ordering, K._BACKEND, '_headless_5_weights.hdf5')
model = VGG_19_headless_5(input_shape, modelWeights, trainable=False, pooling_type=args.pooling_type)
layer_dict, layers_names = get_layer_data(model, 'conv_')
print('Layers found:' + ', '.join(layers_names))

input_layer = model.input

print('Building white noise images')
input_data = create_noise_tensor(height, width, channels)
Example #17
0
                    help='Number of training iter.')
args = parser.parse_args()

dim_ordering = K.image_dim_ordering()
channels = 3
width = args.image_size
height = args.image_size
size = (height, width)
if dim_ordering == 'th':
    input_shape = (channels, width, height)
else:
    input_shape = (width, height, channels)

X_train = np.array([
    load_image(args.content,
               size=(height, width),
               preprocess_type='vgg19',
               verbose=True)
])
print("X_train shape:", X_train.shape)

X_train_style = np.array([
    load_image(args.style,
               size=(height, width),
               preprocess_type='vgg19',
               verbose=True)
])
print("X_train_style shape:", X_train_style.shape)

print('Loading VGG headless 5')
modelWeights = "%s/%s-%s-%s%s" % (vgg19Dir, 'vgg-19', dim_ordering, K._BACKEND,
                                  '_headless_5_weights.hdf5')
Example #18
0
    val_image_list = val_image_list[:10]
nb_val_samples = len(val_image_list)
val_generator = generate_data_from_image_list(val_image_list, (height, width),
                                              style_fullpath_prefix,
                                              input_len=1,
                                              output_len=6,
                                              batch_size=args.batch_size,
                                              transform_f=true_content_f,
                                              preprocess_type='none',
                                              verbose=False)
# Tensorboard callback doesn't handle generator so far and we actually only need
# a few images to see the qualitative result
validation_data = []
validation_data.append(
    load_image(train_image_list[0],
               size=(height, width),
               preprocess_type='none'))
validation_data.append(
    load_image(val_image_list[0], size=(height, width),
               preprocess_type='none'))
st_model.validation_data = [validation_data]

print('Iterating over hyper parameters')
current_iter = 0
# Alpha need to be a lot lower than in the gatys_paper
# This is probably due to the fact that here we are looking at a new content picture each batch
# while the style is always the same and so he can go down the style gradient much faster than the content one
# which is  noisier
for alpha in [1e1]:
    for beta in [1.]:
        for gamma in [1e-5]: