Ejemplo n.º 1
0
    def __getitem__(self, idx):

        image_path = os.path.join(self.data_root,
                                  self.landmarks_frame.iloc[idx, 0])
        scale = self.landmarks_frame.iloc[idx, 1]

        center_w = self.landmarks_frame.iloc[idx, 2]
        center_h = self.landmarks_frame.iloc[idx, 3]
        center = torch.Tensor([center_w, center_h])

        pts = self.landmarks_frame.iloc[idx, 4:].values
        pts = pts.astype('float').reshape(-1, 2)

        scale *= 1.25
        nparts = pts.shape[0]
        img = np.array(Image.open(image_path).convert('RGB'), dtype=np.float32)

        r = 0
        if self.is_train:
            scale = scale * (random.uniform(1 - self.scale_factor,
                                            1 + self.scale_factor))
            r = random.uniform(-self.rot_factor, self.rot_factor) \
                if random.random() <= 0.6 else 0
            if random.random() <= 0.5 and self.flip:
                img = np.fliplr(img)
                pts = fliplr_joints(pts, width=img.shape[1], dataset='WFLW')
                center[0] = img.shape[1] - center[0]

        img = crop(img, center, scale, self.input_size, rot=r)

        target = np.zeros((nparts, self.output_size[0], self.output_size[1]))
        tpts = pts.copy()

        for i in range(nparts):
            if tpts[i, 1] > 0:
                tpts[i, 0:2] = transform_pixel(tpts[i, 0:2] + 1,
                                               center,
                                               scale,
                                               self.output_size,
                                               rot=r)
                target[i] = generate_target(target[i],
                                            tpts[i] - 1,
                                            self.sigma,
                                            label_type=self.label_type)
        img = img.astype(np.float32)
        img = (img / 255.0 - self.mean) / self.std
        img = img.transpose([2, 0, 1])
        target = torch.Tensor(target)
        tpts = torch.Tensor(tpts)
        center = torch.Tensor(center)

        meta = {
            'index': idx,
            'center': center,
            'scale': scale,
            'pts': torch.Tensor(pts),
            'tpts': tpts
        }

        return img, target, meta
Ejemplo n.º 2
0
def data_augmentation(sample, is_train):
    image_file = sample['image']
    filename = sample['filename'] if 'filename' in sample else ''
    joints = sample['joints_3d']
    joints_vis = sample['joints_3d_vis']
    c = sample['center']
    s = sample['scale']
    score = sample['score'] if 'score' in sample else 1
    # imgnum = sample['imgnum'] if 'imgnum' in sample else ''
    r = 0

    # used for ce
    if 'ce_mode' in os.environ:
        random.seed(0)
        np.random.seed(0)

    data_numpy = cv2.imread(image_file,
                            cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

    if is_train:
        sf = cfg.SCALE_FACTOR
        rf = cfg.ROT_FACTOR
        s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
        r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

        if cfg.FLIP and random.random() <= 0.5:
            data_numpy = data_numpy[:, ::-1, :]
            joints, joints_vis = fliplr_joints(joints, joints_vis,
                                               data_numpy.shape[1],
                                               cfg.FLIP_PAIRS)
            c[0] = data_numpy.shape[1] - c[0] - 1

    trans = get_affine_transform(c, s, r, cfg.IMAGE_SIZE)
    input = cv2.warpAffine(data_numpy,
                           trans,
                           (int(cfg.IMAGE_SIZE[0]), int(cfg.IMAGE_SIZE[1])),
                           flags=cv2.INTER_LINEAR)

    for i in range(cfg.NUM_JOINTS):
        if joints_vis[i, 0] > 0.0:
            joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

    # Numpy target
    target, target_weight = generate_target(cfg, joints, joints_vis)

    if cfg.DEBUG:
        visualize(cfg, filename, data_numpy, input.copy(), joints, target)

    # Normalization
    input = input.astype('float32').transpose((2, 0, 1)) / 255
    input -= np.array(cfg.MEAN).reshape((3, 1, 1))
    input /= np.array(cfg.STD).reshape((3, 1, 1))

    if is_train:
        return input, target, target_weight
    else:
        return input, target, target_weight, c, s, score, image_file
Ejemplo n.º 3
0
    def __getitem__(self, idx):

        img = self.images[idx][0]

        if len(img.shape) == 2:
            img = img.reshape(img.shape[0], img.shape[1], 1)
            img = np.repeat(img, 3, axis=2)

        pts = self.pts[idx][0:58].reshape(2, -1).transpose()

        xmin = np.min(pts[:, 0])
        xmax = np.max(pts[:, 0])
        ymin = np.min(pts[:, 1])
        ymax = np.max(pts[:, 1])

        center_w = (math.floor(xmin) + math.ceil(xmax)) / 2.0
        center_h = (math.floor(ymin) + math.ceil(ymax)) / 2.0

        scale = max(math.ceil(xmax) - math.floor(xmin), math.ceil(ymax) - math.floor(ymin)) / 200.0
        center = torch.Tensor([center_w, center_h])

        scale *= 1.25
        nparts = pts.shape[0]

        r = 0
        if self.is_train:
            scale = scale * (random.uniform(1 - self.scale_factor,
                                            1 + self.scale_factor))
            r = random.uniform(-self.rot_factor, self.rot_factor) \
                if random.random() <= 0.6 else 0

            if random.random() <= 0.5 and self.flip:
                img = np.fliplr(img)
                pts = fliplr_joints(pts, width=img.shape[1], dataset='COFW')
                center[0] = img.shape[1] - center[0]

        img = crop(img, center, scale, self.input_size, rot=r)

        target = np.zeros((nparts, self.output_size[0], self.output_size[1]))
        tpts = pts.copy()

        for i in range(nparts):
            if tpts[i, 1] > 0:
                tpts[i, 0:2] = transform_pixel(tpts[i, 0:2]+1, center,
                                               scale, self.output_size, rot=r)
                target[i] = generate_target(target[i], tpts[i]-1, self.sigma,
                                            label_type=self.label_type)
        img = img.astype(np.float32)
        img = (img/255 - self.mean) / self.std
        img = img.transpose([2, 0, 1])
        target = torch.Tensor(target)
        tpts = torch.Tensor(tpts)
        center = torch.Tensor(center)

        meta = {'index': idx, 'center': center, 'scale': scale,
                'pts': torch.Tensor(pts), 'tpts': tpts}

        return img, target, meta
Ejemplo n.º 4
0
def get_final_preds_match(config, outputs, center, scale, flip_pairs=None):
    pred_logits = outputs['pred_logits'].detach().cpu()
    pred_coords = outputs['pred_coords'].detach().cpu()

    num_joints = pred_logits.shape[-1] - 1

    if config.TEST.INCLUDE_BG_LOGIT:
        prob = F.softmax(pred_logits, dim=-1)[..., :-1]
    else:
        prob = F.softmax(pred_logits[..., :-1], dim=-1)

    score_holder = []
    coord_holder = []
    orig_coord = []
    for b, C in enumerate(prob):
        _, query_ind = linear_sum_assignment(
            -C.transpose(0, 1))  # Cost Matrix: [17, N]
        score = prob[b, query_ind, list(np.arange(num_joints))][...,
                                                                None].numpy()
        pred_raw = pred_coords[b, query_ind].numpy()
        if flip_pairs is not None:
            pred_raw, score = fliplr_joints(pred_raw,
                                            score,
                                            1,
                                            flip_pairs,
                                            pixel_align=False,
                                            is_vis_logit=True)
        # scale to the whole patch
        pred_raw *= np.array(config.MODEL.IMAGE_SIZE)
        # transform back w.r.t. the entire img
        pred = transform_preds(pred_raw, center[b], scale[b],
                               config.MODEL.IMAGE_SIZE)
        orig_coord.append(pred_raw)
        score_holder.append(score)
        coord_holder.append(pred)

    matched_score = np.stack(score_holder)
    matched_coord = np.stack(coord_holder)

    return matched_coord, matched_score, np.stack(orig_coord)
Ejemplo n.º 5
0
    def __getitem__(self, idx):
        # 【c】db_rec是db的其中一个,是啥来着,一张图及其相关信息?
        db_rec = copy.deepcopy(self.db[idx])
        image_file = db_rec['image']  # db是数据集
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''  # 【c】总数?batch?

        if self.data_format == 'zip':  # 解压
            from utils import zipreader  # 【see】如果要用才导
            data_numpy = zipreader.imread(
                image_file,
                cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)  # 【l】
        else:
            data_numpy = cv2.imread(
                image_file,
                cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)  # 【】随便挑一个选项?

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy,
                                      cv2.COLOR_BGR2RGB)  # 【l】为啥要转,不是该rgb2bgr?

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            # 【see】语法不会报错但是完全影响了后面的结果,因此让其主动报错
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']  # 【c】3d?
        joints_vis = db_rec['joints_3d_vis']  # 【】之前那个joints_vis就是从这儿获取的吧?

        c = db_rec['center']
        s = db_rec['scale']  # 数据集标注的
        # 【】谁的score,还是说暂时只用来说明非空
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:  # 训练集才求半身
            if (np.sum(joints_vis[:, 0]) > self.
                    num_joints_half_body  # 【】第0列元素求和;那么就是第一列为0,1?那么就是所有的点都有?
                    and np.random.rand() <
                    self.prob_half_body):  # 【c】第二个是要采取半身的概率,为什么不在预处理做
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body  # 取到了上半身或下半身的点就将c和s替换掉原标注的

            sf = self.scale_factor
            rf = self.rotation_factor  # 缩放旋转因子
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf,
                            1 + sf)  # 【l】取最大?
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0  # 【c】

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]  # 将图像值水平翻转
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)  # GT坐标
                c[0] = data_numpy.shape[1] - c[0] - 1  # 最右-原==翻转过的因为宽比最右多1

        trans = get_affine_transform(
            c, s, r, self.image_size)  # 缩放旋转在transform里定义的,旋转空白怎么解决的?
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)  # 【l】应用缩放旋转变换,input的size也变了吧?

        if self.transform:
            input = self.transform(input)  # 【c】还有另外的变换?从哪儿传入的哪儿定义的?
            # cut_trans = self._cutpoint(8, 1, 1, point)
            # input = cut_trans(input)
            #
        for i in range(self.num_joints):
            if joints_vis[i,
                          0] > 0.0:  # 【c】第一列不是0,1?有权重?只对可见点执行?还是说vis是未缺失有标记的点?
                # 【】对GT坐标也执行,怎么上面那个用的是warpAffine有何不同?
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints,
                                                     joints_vis)  # 权重代表什么?
        # 【】上面都是在对numpy进行变换
        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)  # 【c】

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }  # 【】有何用,日志?

        return input, target, target_weight, meta  # 【c】input是Tensor?
    def __getitem__(self, idx):
        # 根据 idx 从db获取样本信息
        db_rec = copy.deepcopy(self.db[idx])
        # 获取图像名
        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        # 如果数据格式为zip则解压
        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        # 否则直接读取图像,获得像素值
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        # 转化为rgb格式
        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        # 如果读取到的数据不为numpy格式则报错
        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        # 获取人体关键点坐标
        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        # 获取训练样本转化之后的center以及scale
        c = db_rec['center']
        s = db_rec['scale']

        # 如果训练样本中没有设置score,则加载该属性,并且设置为1
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        # 如果是进行训练
        if self.is_train:
            # 如果可见关键点大于人体一半关键点,并且生成的随机数小于self.prob_half_body=0.3
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                # 重新调整center、scale
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            # 缩放因子scale_factor=0.35,以及旋转因子rotation_factor=0.35
            sf = self.scale_factor
            rf = self.rotation_factor

            # s大小为[1-0.35=0.65, 1+0.35=1.35]之间
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            # r大小为[-2*45=95, 2*45=90]之间
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            # 进行数据水平翻转
            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        # 进行仿射变换,样本数据关键点发生角度旋转之后,每个像素也旋转到对应位置
        # 获得旋转矩阵
        trans = get_affine_transform(c, s, r, self.image_size)
        # 根据旋转矩阵进行仿射变换
        # 通过仿射变换截取实例图片
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        # 进行正则化,形状改变等
        if self.transform:
            input = self.transform(input)

        # 对人体关键点也进行仿射变换
        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        # 获得ground truch,热图target[17, 64, 48],target_weight[17, 1]
        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, target, target_weight, meta
Ejemplo n.º 7
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        if db_rec['source'] == 'h36m' and self.no_distortion:
            image_dir_zip = 'images_nodistortion.zip@'
        else:
            image_dir_zip = 'images.zip@'

        image_dir = image_dir_zip if self.data_format == 'zip' else ''
        # special process for coco dataset
        if db_rec['source'] == 'coco':
            image_dir = ''
        image_file = osp.join(self.root, db_rec['source'], image_dir, 'images',
                              db_rec['image'])

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if db_rec['source'] == 'h36m' and self.pseudo_label:
            joints = db_rec['joints_2d_pseudo'].copy()  # [union_joints, 2]
            joints_vis = db_rec['joints_vis_pseudo'].copy()[:, :2]  # [union_joints, 2]
        else:
            joints = db_rec['joints_2d'].copy()  # [union_joints, 2]
            joints_vis = db_rec['joints_vis'].copy()[:, :2]  # [union_joints, 2]
        assert len(joints) == self.num_joints
        assert len(joints_vis) == self.num_joints

        # crop and scale according to ground truth
        center = np.array(db_rec['center']).copy()
        scale = np.array(db_rec['scale']).copy()
        rotation = 0

        if self.is_train and db_rec['source'] != 'h36m':
            sf = self.aug_param_dict[db_rec['source']]['scale_factor']
            rf = self.aug_param_dict[db_rec['source']]['rotation_factor']
            scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                if random.random() <= 0.6 else 0

            if self.aug_param_dict[db_rec['source']]['flip'] and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(
                    joints, joints_vis, data_numpy.shape[1], self.flip_pairs)
                center[0] = data_numpy.shape[1] - center[0] - 1

        trans = get_affine_transform(center, scale, rotation, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            if self.color_jitter:
                input = input[:, :, ::-1]  # BGR -> RGB
                input = self.color_jitter(input)
                r, g, b = input.split()
                input = Image.merge("RGB", (b, g, r))  # RGB -> BGR
            input = self.transform(input)

        visible_joints = joints_vis[:, 0] > 0
        if np.any(visible_joints):
            joints[visible_joints, :2] = affine_transform(joints[visible_joints, :2], trans)
            # zero_indices = np.any(
            #         np.concatenate((joints[:, :2]<0, 
            #         joints[:, [0]] >= self.image_size[0],
            #         joints[:, [1]] >= self.image_size[1]), 
            #         axis=1), 
            #         axis=1)
            # joints_vis[zero_indices, :] = 0

        target, target_weight = self.generate_target(joints, joints_vis, db_rec['source'])

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'scale': scale,
            'center': center,
            'rotation': rotation,
            'joints_2d': db_rec['joints_2d'],
            'joints_2d_transformed': joints,
            'joints_vis': joints_vis,
            'source': db_rec['source'],
            'subject': db_rec['subject'] if db_rec['source'] == 'h36m' else -1
        }
        return input, target, target_weight, meta
Ejemplo n.º 8
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        data_numpy = cv2.imread(
            image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']
        if 'interference' in db_rec.keys():
            interference_joints = db_rec['interference']
            interference_joints_vis = db_rec['interference_vis']
        else:
            interference_joints = [joints]
            interference_joints_vis = [joints_vis]

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1
                for i in range(len(interference_joints)):
                    interference_joints[i], interference_joints_vis[
                        i] = fliplr_joints(interference_joints[i],
                                           interference_joints_vis[i],
                                           data_numpy.shape[1],
                                           self.flip_pairs)

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)
        # cv2.imwrite('img.jpg',input[:,:,::-1])
        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
        target, target_weight = self.generate_target(joints, joints_vis)

        # interference joints heatmaps
        inter_target = np.zeros_like(target)
        inter_target_weight = np.zeros_like(target_weight)
        for i in range(len(interference_joints)):
            inter_joints = interference_joints[i]
            inter_joints_vis = interference_joints_vis[i]
            for j in range(self.num_joints):
                if inter_joints_vis[j, 0] > 0.0:
                    inter_joints[j, 0:2] = affine_transform(
                        inter_joints[j, 0:2], trans)
            _inter_target, _inter_target_weight = self.generate_target(
                inter_joints, inter_joints_vis)
            inter_target = np.maximum(inter_target, _inter_target)
            inter_target_weight = np.maximum(inter_target_weight,
                                             _inter_target_weight)
        all_ins_target = np.maximum(inter_target, target)
        all_ins_target_weight = np.maximum(inter_target_weight, target_weight)

        # AE labels
        All_joints = [joints] + interference_joints
        ae_targets = self.generate_joints_ae_targets(All_joints)

        # GPU formate
        all_ins_target = torch.from_numpy(all_ins_target)
        all_ins_target_weight = torch.from_numpy(all_ins_target_weight)
        ae_targets = torch.from_numpy(ae_targets)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score,
            'interference_maps': inter_target,
        }
        return input, all_ins_target, all_ins_target_weight, ae_targets, meta
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_dir = 'images.zip@' if self.data_format == 'zip' else ''
        image_file = osp.join(self.root, db_rec['source'], image_dir, 'images',
                              db_rec['image'])
        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        joints = db_rec['joints_2d'].copy()  # [union_joints, 2]
        joints_vis = db_rec['joints_vis'].copy()[:, :2]  # [union_joints, 2]
        assert len(joints) == self.num_joints
        assert len(joints_vis) == self.num_joints

        # crop and scale according to ground truth
        center = np.array(db_rec['center']).copy()
        scale = np.array(db_rec['scale']).copy()
        rotation = 0

        if self.is_train and db_rec['source'] == 'mpii':
            sf = self.mpii_scale_factor
            rf = self.mpii_rotation_factor
            scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                if random.random() <= 0.6 else 0

            if self.mpii_flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.mpii_flip_pairs)
                center[0] = data_numpy.shape[1] - center[0] - 1

        trans = get_affine_transform(center, scale, rotation, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        visible_joints = joints_vis[:, 0] > 0
        if np.any(visible_joints):
            joints[visible_joints, :2] = affine_transform(
                joints[visible_joints, :2], trans)
            # zero_indices = np.any(
            #         np.concatenate((joints[:, :2]<0,
            #         joints[:, [0]] >= self.image_size[0],
            #         joints[:, [1]] >= self.image_size[1]),
            #         axis=1),
            #         axis=1)
            # joints_vis[zero_indices, :] = 0

        target, target_weight = self.generate_target(joints, joints_vis,
                                                     db_rec['source'])

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'scale': scale,
            'center': center,
            'rotation': rotation,
            'joints_2d': db_rec['joints_2d'],
            'joints_2d_transformed': joints,
            'joints_vis': joints_vis,
            'source': db_rec['source']
        }
        return input, target, target_weight, meta
Ejemplo n.º 10
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = self.read_image(image_file)

            ##### supporting frame
            if (self.is_train
                    and self.use_warping_train) or (not self.is_train
                                                    and self.use_warping_test):
                T = self.timestep_delta_range
                temp = image_file.split('/')
                prev_nm = temp[len(temp) - 1]
                ref_idx = int(prev_nm.replace('.jpg', ''))

                if self.timestep_delta_rand:
                    delta = -T + np.random.randint(T * 2 + 1)
                else:
                    delta = self.timestep_delta

                sup_idx = ref_idx + delta
                ########

                if 'nframes' in db_rec:
                    nframes = db_rec['nframes']
                    if not self.is_posetrack18:
                        sup_idx = np.clip(sup_idx, 1, nframes)
                    else:
                        sup_idx = np.clip(sup_idx, 0, nframes - 1)

                if not self.is_posetrack18:
                    new_sup_image_file = image_file.replace(
                        prev_nm,
                        str(sup_idx).zfill(8) + '.jpg')
                else:
                    new_sup_image_file = image_file.replace(
                        prev_nm,
                        str(sup_idx).zfill(6) + '.jpg')

                if os.path.exists(new_sup_image_file):
                    sup_image_file = new_sup_image_file
                else:
                    sup_image_file = image_file
                ##########

                data_numpy_sup = self.read_image(sup_image_file)
            ###########

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
            if (self.is_train
                    and self.use_warping_train) or (not self.is_train
                                                    and self.use_warping_test):
                data_numpy_sup = cv2.cvtColor(data_numpy_sup,
                                              cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))
        if (self.is_train
                and self.use_warping_train) or (not self.is_train
                                                and self.use_warping_test):
            if data_numpy_sup is None:
                logger.error('=> SUP: fail to read {}'.format(sup_image_file))
                raise ValueError('SUP: Fail to read {}'.format(sup_image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                if (self.is_train and self.use_warping_train) or (
                        not self.is_train and self.use_warping_test):
                    data_numpy_sup = data_numpy_sup[:, ::-1, :]

                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        ##### supportingimage
        if (self.is_train
                and self.use_warping_train) or (not self.is_train
                                                and self.use_warping_test):
            input_sup = cv2.warpAffine(
                data_numpy_sup,
                trans, (int(self.image_size[0]), int(self.image_size[1])),
                flags=cv2.INTER_LINEAR)
        #########

        if self.transform:
            input = self.transform(input)
            if (self.is_train
                    and self.use_warping_train) or (not self.is_train
                                                    and self.use_warping_test):
                input_sup = self.transform(input_sup)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        if (self.is_train
                and self.use_warping_train) or (not self.is_train
                                                and self.use_warping_test):

            meta = {
                'image': image_file,
                'sup_image': sup_image_file,
                'filename': filename,
                'imgnum': imgnum,
                'joints': joints,
                'joints_vis': joints_vis,
                'center': c,
                'scale': s,
                'rotation': r,
                'score': score
            }

            return input, input_sup, target, target_weight, meta

        else:
            meta = {
                'image': image_file,
                'filename': filename,
                'imgnum': imgnum,
                'joints': joints,
                'joints_vis': joints_vis,
                'center': c,
                'scale': s,
                'rotation': r,
                'score': score
            }

            return input, target, target_weight, meta
Ejemplo n.º 11
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        # # sharpening
        # kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
        # input = cv2.filter2D(input, -1, kernel)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }
        onehot_heatmap = self.render_onehot_heatmap(meta['joints'],
                                                    input.shape[1])

        return input, target, target_weight, meta, onehot_heatmap
Ejemplo n.º 12
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec["image"]
        filename = db_rec["filename"] if "filename" in db_rec else ""
        imgnum = db_rec["imgnum"] if "imgnum" in db_rec else ""

        if self.data_format == "zip":
            from utils import zipreader

            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            logger.error("=> fail to read {}".format(image_file))
            raise ValueError("Fail to read {}".format(image_file))

        joints = db_rec["joints_3d"]
        joints_vis = db_rec["joints_3d_vis"]

        c = db_rec["center"]
        s = db_rec["scale"]
        score = db_rec["score"] if "score" in db_rec else 1
        r = 0

        if self.is_train:
            if np.sum(joints_vis[:, 0]
                      ) > self.num_joints_half_body and np.random.rand(
                      ) < self.prob_half_body:
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn() * rf, -rf * 2, rf *
                        2) if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        joints_heatmap = joints.copy()
        trans = get_affine_transform(c, s, r, self.image_size)
        trans_heatmap = get_affine_transform(c, s, r, self.heatmap_size)

        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
                joints_heatmap[i,
                               0:2] = affine_transform(joints_heatmap[i, 0:2],
                                                       trans_heatmap)

        target, target_weight = self.generate_target(joints_heatmap,
                                                     joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            "image": image_file,
            "filename": filename,
            "imgnum": imgnum,
            "joints": joints,
            "joints_vis": joints_vis,
            "center": c,
            "scale": s,
            "rotation": r,
            "score": score,
        }

        return input, target, target_weight, meta
Ejemplo n.º 13
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])  # img,及5个特征点坐标
        image_file = db_rec['image']  # img path
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        data_numpy = cv2.imread(image_file, cv2.IMREAD_COLOR
                                | cv2.IMREAD_IGNORE_ORIENTATION)  # 原始图片的尺寸
        cv2.imwrite("data_numpy.jpg", data_numpy)
        img_raw = copy.deepcopy(data_numpy)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints']
        joints_raw = copy.deepcopy(db_rec['joints'])
        joints_vis = db_rec['joints_vis']

        img_resize256 = cv2.resize(data_numpy,
                                   (256, 256))  # resize成256, 直接写入meta,返回输出

        joints_256 = np.array([[0 for i in range(2)] for j in range(5)])
        joints_256[:, 0] = joints[:, 0] * 256 / img_raw.shape[0]
        joints_256[:, 1] = joints[:, 1] * 256 / img_raw.shape[0]

        target_256_64, target_weight = self.generate_target(
            joints_256, joints_vis)  # 对进行仿射变换后的label,生成heatmap

        data_numpy = cv2.resize(data_numpy, (250, 250))

        joints[:, 0] = joints[:, 0] * 250 / img_raw.shape[
            0]  # 将label中的特征点,缩放到250这个级别
        joints[:, 1] = joints[:, 1] * 250 / img_raw.shape[1]

        # drift
        c = np.array([
            125.0 + random.uniform(-30.0, 30.0),
            125.0 + random.uniform(-30.0, 30.0)
        ])  # db_rec['center'], 中心点,偏移后的量

        s = 1.0  # db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:  # 训练时,做缩放和旋转,测试时不做
            sf = self.scale_factor  # 缩放因子
            rf = self.rotation_factor  # 旋转因子
            #s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
            s = s * np.clip(np.random.randn() * sf + 1, 0.7, 1.2)
            r = np.clip(np.random.randn() * rf, -rf * 2, rf *
                        2) if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(
            c, s, r, self.image_size
        )  # 定义trans, img做缩放,平移和翻转,将图片扩充为256, trans比例为rand(以240为例)-256
        input = cv2.warpAffine(  # 对input做放射变换到256
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        img_256 = copy.deepcopy(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2],
                                                  trans)  # 对label做放射变换到256

        input = augmentation(input)  # 图像加噪音

        if self.transform:
            input = self.transform(
                input)  # 对input做transform,为tensor(除以255),再做BN
        if self.transform:
            img_resize256_BN = self.transform(
                img_resize256)  # 对input做transform,为tensor(除以255),再做BN

        target, target_weight = self.generate_target(
            joints, joints_vis)  # 对进行仿射变换后的label,生成heatmap
        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,  # 文件的名字
            'img_raw': img_raw,  # img的pixel数组
            'img_resize256': img_resize256,
            'img_resize256_BN': img_resize256_BN,
            'img_256': img_256,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_raw': joints_raw,  # txt中的label信息
            'joints_256': joints_256,
            'target_256_64': target_256_64,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }
        # return input, target, target_weight  # targer_weight用于控制特征点显示不显示
        return input, target, target_weight, meta
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(
            self.db[idx]
        )  # dict_keys(['image', 'center', 'scale', 'joints_3d', 'joints_3d_vis', 'filename', 'imgnum'])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(image_file, cv2.IMREAD_COLOR
                                    | cv2.IMREAD_IGNORE_ORIENTATION
                                    )  # e.g. data_numpy.shape = (426, 640, 3)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf,
                            1 + sf)  # scale factorにaugmentation, prob = 1
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0 # rotate のaugmentation, p=0.6

            if self.flip and random.random() <= 0.5:  # horizontal flip p=0.5
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1  # centerをflip

        trans = get_affine_transform(c, s, r, self.image_size)  # affine変換
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)  # resize

        if self.transform:
            input = self.transform(input)  # normalize

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(
            joints, joints_vis)  # target: heatmap, target_weight: ignore

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, target, target_weight, meta
Ejemplo n.º 15
0
    def __getitem__(self, idx):
        if self.landmarks_frame.iloc[idx, 0].find('/ibug/image_092_01.jpg'):
            self.landmarks_frame.iloc[idx, 0] = self.landmarks_frame.iloc[
                idx, 0].replace('image_092_01.jpg', 'image_092 _01.jpg')

        image_path = os.path.join(self.data_root,
                                  self.landmarks_frame.iloc[idx, 0])
        scale = self.landmarks_frame.iloc[idx, 1]

        center_w = self.landmarks_frame.iloc[idx, 2]
        center_h = self.landmarks_frame.iloc[idx, 3]
        center = torch.Tensor([center_w, center_h])

        pts = self.landmarks_frame.iloc[idx, 4:].values
        pts = pts.astype('float').reshape(-1, 2)

        scale *= 1.3
        nparts = pts.shape[0]
        img = np.array(Image.open(image_path).convert('RGB'), dtype=np.float32)

        r = 0
        t_x, t_y = (1, 1)
        if self.is_train:
            scale = scale * (random.uniform(1 - self.scale_factor,
                                            1 + self.scale_factor))
            r = random.uniform(-self.rot_factor, self.rot_factor) \
                if random.random() <= 0.6 else 0
            t_x = random.uniform(1-self.trans_factor, 1+self.trans_factor) \
                if random.random() <= 0.5 else 1
            t_y = random.uniform(1-self.trans_factor, 1+self.trans_factor) \
                if random.random() <= 0.5 else 1
            if random.random() <= 0.5 and self.flip:
                img = np.fliplr(img)
                pts = fliplr_joints(pts, width=img.shape[1], dataset='300W')
                center[0] = img.shape[1] - center[0]

        img = crop(img,
                   center,
                   scale,
                   self.input_size,
                   translation=(t_x, t_y),
                   rot=r)

        if self.is_train:
            if random.random() <= 0.3 and self.gaussian_blur:
                radius = random.choice([1, 3, 5])
                img = cv2.GaussianBlur(img, (radius, radius), sigmaX=1.0)
            if random.random() <= 1.0 and self.occlusion:
                img = add_occlusion(img, max_size=102)

        targets = []
        offsets = []
        for output_size in self.output_size:
            target = np.zeros((nparts, output_size[0], output_size[1]))
            tpts = pts.copy()

            for i in range(nparts):
                if tpts[i, 1] > 0:
                    tpts[i, 0:2] = transform_pixel(tpts[i, 0:2],
                                                   center,
                                                   scale,
                                                   output_size,
                                                   translation=(t_x, t_y),
                                                   rot=r)
                    x, y = tpts[i].astype(int)

                    if 0 <= y < output_size[0] and 0 <= x < output_size[1]:
                        target[i, y, x] = 1

            xx_channel, yy_channel = self._generate_offset(
                nparts, tpts, output_size, self.offset_mode, self.offset_dim)
            offset = [torch.Tensor(xx_channel), torch.Tensor(yy_channel)]

            targets.append(torch.Tensor(target))
            offsets.append(offset)

        img = img.astype(np.float32)
        img = (img / 255.0 - self.mean) / self.std
        img = img.transpose([2, 0, 1])
        tpts = torch.Tensor(tpts)
        center = torch.Tensor(center)

        meta = {
            'index': idx,
            'center': center,
            'scale': scale,
            'pts': torch.Tensor(pts),
            'tpts': tpts
        }

        return img, targets, offsets, meta
Ejemplo n.º 16
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        ############################################## data augmentation
        if self.is_train:

            # scale and rotation augmentation
            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn() * rf, -rf * 2, rf *
                        2) if random.random() <= 0.6 else 0

            # flips images
            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

            # brighten/darken image by shifting all pixels. not sure if this actually helps
#             if self.brighten and random.random() <= 0.5:
#                 shift = 2 * np.random.randn()
#                 data_numpy = np.clip(data_numpy + shift, 0, 255).astype(np.uint8)

        trans = get_affine_transform(c, s, r, self.image_size)

        # NOTE: This scales images and crops them to be 256*256. During eval, replace with input = data_numpy
        input = data_numpy
        if not 'TEST_MODE' in self.cfg:
            input = cv2.warpAffine(
                data_numpy,
                trans, (int(self.image_size[0]), int(self.image_size[1])),
                flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, target, target_weight, meta
Ejemplo n.º 17
0
    def __getitem__(self, index):
        item = self.files[index]
        image_id = item["name"]
        num = int(image_id.split('_')[1])
        image_rgb = cv2.imread(item["img_rgb"], cv2.IMREAD_COLOR)
        label = cv2.imread(item["img_depth"], cv2.IMREAD_GRAYSCALE)
        size = image_rgb.shape

        if 'train' in image_id:
            bbox = list(map(int, self.annot_t["annotations"][num]["bbox"]))
            keypoints = self.annot_t["annotations"][num]["keypoints"]
        else:
            bbox = list(map(int, self.annot_v["annotations"][num]["bbox"]))
            keypoints = self.annot_v["annotations"][num]["keypoints"]

        #crop
        image_rgb = image_rgb[bbox[1]:bbox[1] + bbox[3],
                              bbox[0]:bbox[0] + bbox[2]]
        label = label[bbox[1]:bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]]

        #resize
        image_rgb = cv2.resize(image_rgb, (256, 256),
                               interpolation=cv2.INTER_LINEAR)
        label = cv2.resize(label, (256, 256), interpolation=cv2.INTER_LINEAR)

        #one line
        image_depth = np.zeros((256, 256))
        image_depth[:, 128] = label[:, 128]

        label = cv2.resize(label, (64, 64), interpolation=cv2.INTER_LINEAR)

        if 'test' in self.list_path:
            image_rgb = self.input_transform(image_rgb)
            image_depth = self.label_transform(image_depth)
            image_rgb = image_rgb.transpose((2, 0, 1))

            return image_rgb.copy(), image_depth.copy(), np.array(
                size), image_id

        image_rgb, image_depth, label = self.gen_sample(
            image_rgb, image_depth, label)

        joints = np.array([[keypoints[3 * i], keypoints[3 * i + 1], 0]
                           for i in range(15)])
        joints_vis = np.array([[keypoints[3 * i + 2], keypoints[3 * i + 2], 0]
                               for i in range(15)])

        for i in range(15):
            joints[i, 0] = (joints[i, 0] - bbox[0]) * 256 / bbox[2]
            joints[i, 1] = (joints[i, 1] - bbox[1]) * 256 / bbox[3]

        image_depth = np.expand_dims(image_depth, axis=0)
        image = np.concatenate((image_rgb, image_depth), axis=0)

        label = np.expand_dims(label, axis=0)

        if self.flip:
            flip = np.random.choice(2) * 2 - 1
            image = image[:, :, ::flip]
            label = label[:, :, ::flip]
            joints, joints_vis = fliplr_joints(joints, joints_vis, 256,
                                               self.flip_pairs)

        target, target_weight = self.generate_target(joints, joints_vis)
        target = np.concatenate((target, label), axis=0)

        return image.copy(), target.copy(), target_weight.copy(), np.array(
            size), image_id, joints.copy(), joints_vis.copy()
Ejemplo n.º 18
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        from boxx import cf
        if cf.args.task == 'ssm':
            feat_stride = self.image_size / self.heatmap_size
            joints_h = copy.deepcopy(joints)
            # TODO 减少量化损失
            joints_h[:, 0] = (joints_h[:, 0] / feat_stride[0] + 0.5)
            joints_h[:, 1] = (joints_h[:, 1] / feat_stride[1] + 0.5)
            joints_h = joints_h.astype(np.int32)
            meta['joints_h'] = joints_h

        return input, target, target_weight, meta
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        if (self.is_train
                and self.use_warping_train) or (not self.is_train
                                                and self.use_warping_test):
            prev_image_file1 = db_rec['image']
            prev_image_file2 = db_rec['image']
            next_image_file1 = db_rec['image']
            next_image_file2 = db_rec['image']

        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = self.read_image(image_file)

            ##### supporting frames
            if (self.is_train
                    and self.use_warping_train) or (not self.is_train
                                                    and self.use_warping_test):
                T = self.timestep_delta_range
                temp = prev_image_file1.split('/')
                prev_nm = temp[len(temp) - 1]
                ref_idx = int(prev_nm.replace('.jpg', ''))

                ### setting deltas
                prev_delta1 = -1
                prev_delta2 = -2
                next_delta1 = 1
                next_delta2 = 2

                #### image indices
                prev_idx1 = ref_idx + prev_delta1
                prev_idx2 = ref_idx + prev_delta2
                next_idx1 = ref_idx + next_delta1
                next_idx2 = ref_idx + next_delta2

                if 'nframes' in db_rec:
                    nframes = db_rec['nframes']
                    if not self.is_posetrack18:
                        prev_idx1 = np.clip(prev_idx1, 1, nframes)
                        prev_idx2 = np.clip(prev_idx2, 1, nframes)
                        next_idx1 = np.clip(next_idx1, 1, nframes)
                        next_idx2 = np.clip(next_idx2, 1, nframes)
                    else:
                        prev_idx1 = np.clip(prev_idx1, 0, nframes - 1)
                        prev_idx2 = np.clip(prev_idx2, 0, nframes - 1)
                        next_idx1 = np.clip(next_idx1, 0, nframes - 1)
                        next_idx2 = np.clip(next_idx2, 0, nframes - 1)

                if self.is_posetrack18:
                    z = 6
                else:
                    z = 8

                ### delta -1
                new_prev_image_file1 = prev_image_file1.replace(
                    prev_nm,
                    str(prev_idx1).zfill(z) + '.jpg')
                #### delta -2
                new_prev_image_file2 = prev_image_file1.replace(
                    prev_nm,
                    str(prev_idx2).zfill(z) + '.jpg')
                ### delta 1
                new_next_image_file1 = next_image_file1.replace(
                    prev_nm,
                    str(next_idx1).zfill(z) + '.jpg')
                #### delta 2
                new_next_image_file2 = next_image_file1.replace(
                    prev_nm,
                    str(next_idx2).zfill(z) + '.jpg')

                ###### checking for files existence
                if os.path.exists(new_prev_image_file1):
                    prev_image_file1 = new_prev_image_file1
                if os.path.exists(new_prev_image_file2):
                    prev_image_file2 = new_prev_image_file2
                if os.path.exists(new_next_image_file1):
                    next_image_file1 = new_next_image_file1
                if os.path.exists(new_next_image_file2):
                    next_image_file2 = new_next_image_file2

                ##########

            data_numpy_prev1 = self.read_image(prev_image_file1)
            data_numpy_prev2 = self.read_image(prev_image_file2)
            data_numpy_next1 = self.read_image(next_image_file1)
            data_numpy_next2 = self.read_image(next_image_file2)
            ###########

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
            if (self.is_train
                    and self.use_warping_train) or (not self.is_train
                                                    and self.use_warping_test):
                data_numpy_prev1 = cv2.cvtColor(data_numpy_prev1,
                                                cv2.COLOR_BGR2RGB)
                data_numpy_prev2 = cv2.cvtColor(data_numpy_prev2,
                                                cv2.COLOR_BGR2RGB)
                data_numpy_next1 = cv2.cvtColor(data_numpy_next1,
                                                cv2.COLOR_BGR2RGB)
                data_numpy_next2 = cv2.cvtColor(data_numpy_next2,
                                                cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))
        if (self.is_train
                and self.use_warping_train) or (not self.is_train
                                                and self.use_warping_test):
            if data_numpy_prev1 is None:
                logger.error(
                    '=> PREV SUP: fail to read {}'.format(prev_image_file1))
                raise ValueError(
                    'PREV SUP: Fail to read {}'.format(prev_image_file1))
            if data_numpy_prev2 is None:
                logger.error(
                    '=> PREV SUP: fail to read {}'.format(prev_image_file2))
                raise ValueError(
                    'PREV SUP: Fail to read {}'.format(prev_image_file2))
            if data_numpy_next1 is None:
                logger.error(
                    '=> NEXT SUP: fail to read {}'.format(next_image_file1))
                raise ValueError(
                    'NEXT SUP: Fail to read {}'.format(next_image_file1))
            if data_numpy_next2 is None:
                logger.error(
                    '=> NEXT SUP: fail to read {}'.format(next_image_file2))
                raise ValueError(
                    'NEXT SUP: Fail to read {}'.format(next_image_file2))
        ##########

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                #####
                if (self.is_train and self.use_warping_train) or (
                        not self.is_train and self.use_warping_test):
                    data_numpy_prev1 = data_numpy_prev1[:, ::-1, :]
                    data_numpy_prev2 = data_numpy_prev2[:, ::-1, :]
                    data_numpy_next1 = data_numpy_next1[:, ::-1, :]
                    data_numpy_next2 = data_numpy_next2[:, ::-1, :]
                ##########

                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if (self.is_train
                and self.use_warping_train) or (not self.is_train
                                                and self.use_warping_test):
            input_prev1 = cv2.warpAffine(
                data_numpy_prev1,
                trans, (int(self.image_size[0]), int(self.image_size[1])),
                flags=cv2.INTER_LINEAR)
            input_prev2 = cv2.warpAffine(
                data_numpy_prev2,
                trans, (int(self.image_size[0]), int(self.image_size[1])),
                flags=cv2.INTER_LINEAR)
            input_next1 = cv2.warpAffine(
                data_numpy_next1,
                trans, (int(self.image_size[0]), int(self.image_size[1])),
                flags=cv2.INTER_LINEAR)
            input_next2 = cv2.warpAffine(
                data_numpy_next2,
                trans, (int(self.image_size[0]), int(self.image_size[1])),
                flags=cv2.INTER_LINEAR)
        #########

        if self.transform:
            input = self.transform(input)
            if (self.is_train
                    and self.use_warping_train) or (not self.is_train
                                                    and self.use_warping_test):
                input_prev1 = self.transform(input_prev1)
                input_prev2 = self.transform(input_prev2)
                input_next1 = self.transform(input_next1)
                input_next2 = self.transform(input_next2)
            ############
        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        if (self.is_train
                and self.use_warping_train) or (not self.is_train
                                                and self.use_warping_test):

            meta = {
                'image': image_file,
                'sup_image': prev_image_file1,
                'filename': filename,
                'imgnum': imgnum,
                'joints': joints,
                'joints_vis': joints_vis,
                'center': c,
                'scale': s,
                'rotation': r,
                'score': score
            }

            return input, input_prev1, input_prev2, input_next1, input_next2, target, target_weight, meta

        else:
            meta = {
                'image': image_file,
                'filename': filename,
                'imgnum': imgnum,
                'joints': joints,
                'joints_vis': joints_vis,
                'center': c,
                'scale': s,
                'rotation': r,
                'score': score
            }

            return input, target, target_weight, meta
Ejemplo n.º 20
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        data_numpy = cv2.imread(
            image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
        # cv2.imwrite('ori_img.jpg', data_numpy[:, :, ::-1])
        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']
        if 'interference' in db_rec.keys():
            interference_joints = db_rec['interference']
            interference_joints_vis = db_rec['interference_vis']
        else:
            interference_joints = [joints]
            interference_joints_vis = [joints_vis]

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1

        size = db_rec['obj_size']
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1
                for i in range(len(interference_joints)):
                    interference_joints[i], interference_joints_vis[
                        i] = fliplr_joints(interference_joints[i],
                                           interference_joints_vis[i],
                                           data_numpy.shape[1],
                                           self.flip_pairs)

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)
        # cv2.imwrite('img.jpg',input[:,:,::-1])
        if self.transform:
            input = self.transform(input)

        # relation_joints = []
        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)
        # all_points = np.asarray(np.where(target == 1))[::-1].transpose()
        # for p in all_points:
        #     relation_joints += [c[0], c[1], size[0], size[1], p[0], p[1], p[2], 1]
        # interference joints heatmaps
        inter_target = np.zeros_like(target)
        inter_target_weight = np.zeros_like(target_weight)
        for i in range(len(interference_joints)):
            inter_joints = interference_joints[i]
            inter_joints_vis = interference_joints_vis[i]
            for j in range(self.num_joints):
                if inter_joints_vis[j, 0] > 0.0:
                    inter_joints[j, 0:2] = affine_transform(
                        inter_joints[j, 0:2], trans)

            _inter_target, _inter_target_weight = self.generate_target(
                inter_joints, inter_joints_vis)

            inter_target = np.maximum(inter_target, _inter_target)
            inter_target_weight = np.maximum(inter_target_weight,
                                             _inter_target_weight)
        # if inter_target.max()>0:
        #     all_points = np.asarray(np.where(inter_target == 1))[::-1].transpose()
        #     for p in all_points:
        #         relation_joints += [c[0], c[1], size[0], size[1], p[0], p[1], p[2], 0]

        # all_ins_target = np.maximum(inter_target, target)
        all_ins_target = np.maximum(inter_target * 0.5, target)
        # points = self.generate_candidate_points_from_heatmaps(inter_target)
        all_ins_target_weight = np.maximum(inter_target_weight, target_weight)
        # cv2.imwrite('heatmap.jpg',np.max(target,axis=0)*255)
        # cv2.imwrite('inter_heatmap.jpg', np.max(inter_target, axis=0) * 255)
        # relation labels
        # relation_joints = np.asarray(relation_joints).reshape((-1,8))
        kpts_onehots = self.heatmap2onehot(target)
        # if kpts_onehots.shape[0]!=15:
        #     print(target.shape)
        # target_amaps, target_aweights = self.generate_association_map_from_gt_heatmaps(target, all_ins_target)

        # amaps = self.generate_association_map_from_labels(relation_joints)
        # max_points = self.num_joints * 5
        # num_points = len(relation_joints) if len(relation_joints) <= max_points else max_points
        # target_relation_points = np.zeros((max_points, 8))
        # target_amaps = np.zeros((max_points, max_points))
        # target_relation_points[:num_points] = relation_joints[:num_points]
        # target_amaps[:num_points, :num_points] = amaps[:num_points, :num_points]

        # heatmap labels
        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)
        all_ins_target = torch.from_numpy(all_ins_target)
        all_ins_target_weight = torch.from_numpy(all_ins_target_weight)
        # target_amaps = torch.from_numpy(target_amaps)
        # target_aweights = torch.from_numpy(target_aweights)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score,
            # 'relation_joints': target_relation_points,
            # 'num_points': num_points,
            # 'association_maps': target_amaps,
            # 'association_weights': target_aweights,
            'interference_maps': inter_target,
            'kpt_cat_maps': kpts_onehots,
        }
        # return input, target, target_weight, meta
        return input, target, target_weight, all_ins_target, all_ins_target_weight, meta
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
            )
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
            )

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        # print(joints)
        joints_copy = db_rec['joints_3d_copy']
        joints_vis = db_rec['joints_3d_vis']
        # body = db_rec['body_3d']
        # body_vis = db_rec['body_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis
                )

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)  # 随机缩放因子
            r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                if random.random() <= 0.6 else 0  # 随机旋转因子

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(
                    joints, joints_vis, data_numpy.shape[1], self.flip_pairs)
                # 加我们的对称
                c[0] = data_numpy.shape[1] - c[0] - 1  # 重新确定镜像翻转后的中心点

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans,
            (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        body = np.zeros((self.num_body, 3), dtype=np.float)
        body_vis = np.zeros((self.num_body, 3), dtype=np.float)
        for idbody, skeleton in enumerate(self.skeletons):
            point_a = joints[skeleton[0]]
            # print(point_a)
            point_b = joints[skeleton[1]]
            # if point_a[2] == 0 or point_b[2] == 0:
            if joints_copy[skeleton[0]][2] == 0 or joints_copy[skeleton[1]][2] == 0:
                continue
            axis_x = (point_b - point_a)[:-1]
            # print(x)
            lx = np.sqrt(axis_x.dot(axis_x))
            if lx == 0:
                continue
            ly = 1
            cos_angle = axis_x.dot(self.axis_y) / (lx * ly)
            angle = np.arccos(cos_angle)
            angle = angle / np.pi
            # angle2 = angle * 180 / np.pi

            if axis_x[1] < 0:
                angle = - angle
            # print(angle2)
            # print(lx,angle2)
            body[idbody] = [lx/332.55, angle, 1]
            body_vis[idbody] = [1, 1, 0]

        joint_target, joint_target_weight = self.generate_target(joints, joints_vis)
        body_target, body_target_weight = self.generate_body_target(joints, joints_copy, body_vis)
        # for i in range(19):
        #     # print(image_file)
        #     cv2.imwrite('image/'+image_file.split('/')[-1][:-4]+'_'+str(i)+'.jpg', np.uint8(body_target[i][:,:,np.newaxis]*255))
        # for i in range(17):
        #     # print(image_file)
        #     cv2.imwrite('image/'+image_file.split('/')[-1][:-4]+'_'+str(i)+'_point.jpg', np.uint8(joint_target[i][:,:,np.newaxis]*255))
        joint_target = torch.from_numpy(joint_target)
        joint_target_weight = torch.from_numpy(joint_target_weight)
        body_target = torch.from_numpy(body_target)
        body_target_weight = torch.from_numpy(body_target_weight)
        body = torch.from_numpy(body)
        body_vis = torch.from_numpy(body_vis)



        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'body': body,
            'body_vis': body_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, joint_target, joint_target_weight, body_target, body_target_weight, body, body_vis, meta
Ejemplo n.º 22
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, target, target_weight, meta
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        info = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        if self.is_train:
            ################################################################
            # @yangsen
            # 难度系数和遗忘程度 初始化
            noise = db_rec['invisible_keypoints']  #标记但不可见点
            w = np.array(db_rec['num_keypoints'])
            decay_beta = 2  #控制权重衰减
            lamda = np.exp(1 / decay_beta) + 1  #控制标注点数量为1的难度值D设为最大为1

            initial_difficult = lamda * (
                1 - 1 / (1 + np.exp(-np.sqrt(w - noise) / decay_beta)))  #(0,1]
            meta = {
                'index': idx,
                'memory_difficult': int(100 * initial_difficult),  #(0,100]
                'forget_degree':
                100,  #[0,100] 0 represents remenber, 100 represents forgotten
            }
            # yangsen
            ###############################################################
            return input, target, target_weight, info, meta
        else:
            return input, target, target_weight, info