Esempio n. 1
0
    def __getitem__(self, index):

        if self.is_train:
            ids = self.train[index]
        else:
            ids = self.valid[index]

        images = self.dataset.get_image([self.cam_name], [ids])
        img_path = images[0]

        img = load_image(img_path)  # CxHxW
        target = self.load_angles(img_path)

        original_size = np.array((img.shape[2], img.shape[1]))

        segmasks = self.dataset.get_seg([self.cam_name], [ids])
        segmask = io.imread(segmasks[0])

        binary_arm = vdb.get_obj_mask(segmask, self.color)
        bb = vdb.seg2bb(binary_arm)
        x0, x1, y0, y1 = bb

        c = np.array([(x0 + x1), (y0 + y1)]) / 2
        # s = np.sqrt((y1-y0)*(x1-x0))/120.0
        s = np.sqrt((y1 - y0) * (x1 - x0)) / 60.0
        r = 0

        # s = max(x1-x0, y1-y0)/125
        if self.is_train:
            c = c + np.array([
                -30 + 60 * random.random(), -30 + 60 * random.random()
            ])  # random move
            s *= 0.6 * (1 + 2 * random.random())  # random scale

            rf = 15
            r = -rf + 2 * random.random() * rf  # random rotation
            # r = torch.randn(1).mul_(rf).clamp(-2*rf, 2*rf)[0] if random.random() <= 0.6 else 0

            # Color
            im_rgb = im_to_numpy(img)
            im_lab = cv2.cvtColor(im_rgb, cv2.COLOR_RGB2LAB)
            im_lab[:, :,
                   0] = np.clip(im_lab[:, :, 0] * (random.uniform(0.3, 1.3)),
                                0, 255)
            img = im_to_torch(cv2.cvtColor(im_lab, cv2.COLOR_LAB2RGB))

            if random.random() <= 0.5:
                img = torch.from_numpy(fliplr(img.numpy())).float()

        inp = crop(img, c, s, [self.inp_res, self.inp_res], rot=r)
        inp = color_normalize(inp, self.mean, self.std)

        return inp, target
Esempio n. 2
0
    def __getitem__(self, index):
        #actor_name = "RobotArmActor_3"
        #color = [0, 255, 63]
        scale_factor = 60.0

        if self.multi_scale:
            scale = self.scales[index % len(self.scales)]
            index = index // len(self.scales)

        if self.anno_type == '3d' or self.anno_type == '2d':

            if self.is_train:
                ids = self.train[index]
            else:
                ids = self.valid[index]

            if self.anno_type == '3d':
                joint_2d, vertex_2d, img_path = get_joint_vertex_2d(
                    self.dataset, ids, self.cam_name, self.actor_name)
                joint_2d = joint_2d[
                    1:]  #discard the first joint, as we do not predict it.

                with open(os.path.join(self.meta_dir, 'vertex.json'),
                          'r') as f:  #from raw vertexs to final keypoints
                    vertex_seq = json.load(f)

                num_vertex = len(vertex_seq)
                pts = np.zeros((num_vertex, 2))

                for i in range(num_vertex):
                    pts[i] = np.average(vertex_2d[vertex_seq[i]], axis=0)
                    #pts[i] = (vertex_2d[2*i]+vertex_2d[2*i+1])/2

                pts = np.concatenate((joint_2d, pts), axis=0)

            if self.anno_type == '2d':  #data with only 2d annotations
                img_path = os.path.join(self.img_folder, 'imgs',
                                        self.dataset[index])
                with open(
                        os.path.join(
                            self.img_folder, 'd3_preds',
                            os.path.splitext(os.path.basename(img_path))[0] +
                            '.json'), 'r') as f:
                    obj = json.load(f)
                    pts = np.transpose(np.array(obj['reprojection']))

                    if self.ignore_invis_pts and 'visibility' in obj:
                        visibility = obj['visibility'][:-2]
                        pts[np.invert(visibility), :] = -1.0

            # For single-person pose estimation with a centered/scaled figure
            nparts = pts.shape[0]

            if not self.replace_bg:
                img = load_image(img_path)  # CxHxW
            else:
                img = im_to_torch(
                    cv2.cvtColor(
                        self.background_replace.replace(
                            cv2.imread(img_path), 'white'), cv2.COLOR_BGR2RGB))

            original_size = np.array((img.shape[2], img.shape[1]))

            if self.anno_type == '3d':
                segmasks = self.dataset.get_seg([self.cam_name], [ids])
                segmask = io.imread(segmasks[0])

                binary_arm = vdb.get_obj_mask(segmask, self.color)
                bb = vdb.seg2bb(binary_arm)
                x0, x1, y0, y1 = bb

            if self.anno_type == '2d':
                bb = self.bbox_anno[os.path.basename(img_path)]
                x0, x1, y0, y1 = bb[0][0], bb[1][0], bb[0][1], bb[1][1]

            c = np.array([(x0 + x1), (y0 + y1)]) / 2
            s = np.sqrt((y1 - y0) * (x1 - x0)) / scale_factor
            r = 0

            if self.is_train:
                c = c + np.array([
                    -30 + 60 * random.random(), -30 + 60 * random.random()
                ])  #random move
                s *= 0.6 * (1 + 2 * random.random())  #random scale

                rf = 15
                r = -rf + 2 * random.random() * rf  #random rotation
                #r = torch.randn(1).mul_(rf).clamp(-2*rf, 2*rf)[0] if random.random() <= 0.6 else 0

                # Color
                im_rgb = im_to_numpy(img)
                im_lab = cv2.cvtColor(im_rgb, cv2.COLOR_RGB2LAB)
                im_lab[:, :, 0] = np.clip(
                    im_lab[:, :, 0] * (random.uniform(0.3, 1.3)), 0, 255)
                img = im_to_torch(cv2.cvtColor(im_lab, cv2.COLOR_LAB2RGB))

                if random.random() <= 0.5:
                    img = torch.from_numpy(fliplr(img.numpy())).float()
                    pts[:, 0] = img.size(2) - pts[:, 0]
                    for pair in self.lr_pairs:
                        pts[[pair[0], pair[1]]] = pts[[pair[1], pair[0]]]
                    c[0] = img.size(2) - c[0]

            if self.multi_scale:
                s = s * scale

            inp = crop(img, c, s, [self.inp_res, self.inp_res], rot=r)
            inp = color_normalize(inp, self.mean, self.std)

            # print(pts)

            tpts = pts.copy()
            target = torch.zeros(nparts, self.out_res, self.out_res)
            for i in range(nparts):
                # if tpts[i, 2] > 0: # This is evil!!
                if tpts[i, 1] > 0:
                    tpts[i, 0:2] = to_torch(
                        transform(tpts[i, 0:2],
                                  c,
                                  s, [self.out_res, self.out_res],
                                  rot=r))
                    target[i] = draw_labelmap(target[i],
                                              tpts[i],
                                              self.sigma,
                                              type=self.label_type)

            # print(transform_preds(torch.from_numpy(tpts), c, s, [64, 64]))

            # Meta info
            meta = {
                'index': index,
                'pts': pts,
                'tpts': tpts,
                'center': c,
                'original_size': original_size,
                'scale': s,
                'img_name': os.path.splitext(os.path.basename(img_path))[0]
            }

            return inp, target, meta

        if self.anno_type == 'none':
            img_path = self.dataset[index]

            if not self.replace_bg:
                img = load_image(img_path)  # CxHxW
            else:
                img = im_to_torch(
                    cv2.cvtColor(
                        self.background_replace.replace(
                            cv2.imread(img_path), 'white'), cv2.COLOR_BGR2RGB))

            original_size = np.array((img.shape[2], img.shape[1]))

            inp = img

            if self.anno is not None:
                joints = self.anno[self.F[index]]
                x0, y0, x1, y1 = joints[0][0], joints[0][1], joints[1][
                    0], joints[1][1]
                c = np.array([(x0 + x1), (y0 + y1)]) / 2
                s = np.sqrt((y1 - y0) * (x1 - x0)) / scale_factor
                if self.multi_scale:
                    s = s * scale

            else:
                c = np.array([img.shape[2] / 2, img.shape[1] / 2])
                s = 5.0

            r = 0

            inp = crop(img, c, s, [self.inp_res, self.inp_res], rot=r)

            inp = color_normalize(inp, self.mean, self.std)

            meta = {
                'index': index,
                'pts': [],
                'tpts': [],
                'center': c,
                'original_size': original_size,
                'scale': s,
                'img_name': os.path.splitext(os.path.basename(img_path))[0]
            }

            return inp, [], meta