Example #1
0
    def __getitem__(self, index):
        val_item = self.val_list[index]
        # Load training image
        im_path = os.path.join(self.root, self.dataset + '_images', val_item + '.jpg')
        im = cv2.imread(im_path, cv2.IMREAD_COLOR)
        h, w, _ = im.shape
        # Get person center and scale
        person_center, s = self._box2cs([0, 0, w - 1, h - 1])
        r = 0
        trans = get_affine_transform(person_center, s, r, self.crop_size)
        input = cv2.warpAffine(
            im,
            trans,
            (int(self.crop_size[1]), int(self.crop_size[0])),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(0, 0, 0))
        input = self.transform(input)
        flip_input = input.flip(dims=[-1])
        if self.flip:
            batch_input_im = torch.stack([input, flip_input])
        else:
            batch_input_im = input

        meta = {
            'name': val_item,
            'center': person_center,
            'height': h,
            'width': w,
            'scale': s,
            'rotation': r
        }

        return batch_input_im, meta
Example #2
0
    def get_image_info(self, index):

        info = self.gt_db[index]
        imgpath = info['image']
        image = cv2.imread(imgpath)[:, :, ::-1]
        joints = info['joints_3d']
        joints_vis = info['joints_3d_vis'][:, 0]

        c = info['center']
        s = info['scale']
        r = 0
        if self.train_flag:
            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

        trans = get_affine_transform(c, s, r, (self.crop_size, self.crop_size))
        dst_image = cv2.warpAffine(image,
                                   trans, (self.crop_size, self.crop_size),
                                   flags=cv2.INTER_LINEAR)

        for i in range(self.num_joints):
            if joints_vis[i] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
        kp2d = np.concatenate([joints[:, 0:2], joints_vis[:, None]],
                              1)[self.mpii_2_lsp14]

        result_dir = '{}/{}'.format(self.save_dir, os.path.basename(imgpath))
        metas = ('mpii', imgpath, result_dir, self.empty_kp3d, self.empty_kp3d,
                 self.empty_param, self.empty_gr)

        return dst_image, kp2d, self.const_box, metas
Example #3
0
    def __getitem__(self, index):
        img_name = self.file_list[index]
        img_path = os.path.join(self.root, img_name)
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        h, w, _ = img.shape

        # Get person center and scale
        person_center, s = self._box2cs([0, 0, w - 1, h - 1])
        r = 0
        trans = get_affine_transform(person_center, s, r, self.input_size)
        input = cv2.warpAffine(
            img,
            trans, (int(self.input_size[1]), int(self.input_size[0])),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(0, 0, 0))

        input = self.transform(input)
        meta = {
            'name': img_name,
            'center': person_center,
            'height': h,
            'width': w,
            'scale': s,
            'rotation': r
        }

        return input, meta
def get_pose_estimation_prediction(pose_model, image, center, scale):
    rotation = 0

    # pose estimation transformation
    trans = get_affine_transform(center, scale, rotation, cfg.MODEL.IMAGE_SIZE)
    model_input = cv2.warpAffine(
        image,
        trans,
        (int(cfg.MODEL.IMAGE_SIZE[0]), int(cfg.MODEL.IMAGE_SIZE[1])),
        flags=cv2.INTER_LINEAR)
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])

    # pose estimation inference
    model_input = transform(model_input).unsqueeze(0)
    # switch to evaluate mode
    pose_model.eval()
    with torch.no_grad():
        # compute output heatmap
        output = pose_model(model_input.to(device))
        preds, _ = get_final_preds(
            cfg,
            output.clone().cpu().numpy(),
            np.asarray([center]),
            np.asarray([scale]))

        return preds
Example #5
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_dir = 'images.zip@' if self.data_format == 'zip' else ''
        image_file = osp.join(self.root, db_rec['source'], image_dir, 'images',
                              db_rec['image'])
        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        joints = db_rec['joints_2d'].copy()
        joints_vis = db_rec['joints_vis'].copy()

        center = np.array(db_rec['center']).copy()
        scale = np.array(db_rec['scale']).copy()
        rotation = 0

        if self.is_train:
            sf = self.scale_factor
            rf = self.rotation_factor
            scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                if random.random() <= 0.6 else 0

        trans = get_affine_transform(center, scale, rotation, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
                if (np.min(joints[i, :2]) < 0
                        or joints[i, 0] >= self.image_size[0]
                        or joints[i, 1] >= self.image_size[1]):
                    joints_vis[i, :] = 0

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'scale': scale,
            'center': center,
            'rotation': rotation,
            'joints_2d': db_rec['joints_2d'],
            'joints_2d_transformed': joints,
            'joints_vis': joints_vis,
            'source': db_rec['source']
        }
        return input, target, target_weight, meta
Example #6
0
def get_pose_estimation_prediction(pose_model, image, centers, scales,
                                   transform):
    rotation = 0

    # pose estimation transformation
    model_inputs = []
    for center, scale in zip(centers, scales):
        trans = get_affine_transform(center, scale, rotation,
                                     cfg.MODEL.IMAGE_SIZE)
        # Crop smaller image of people
        model_input = cv2.warpAffine(
            image,
            trans,
            (int(cfg.MODEL.IMAGE_SIZE[0]), int(cfg.MODEL.IMAGE_SIZE[1])),
            flags=cv2.INTER_LINEAR)

        # hwc -> 1chw
        model_input = transform(model_input)  #.unsqueeze(0)
        model_inputs.append(model_input)

    # n * 1chw -> nchw
    model_inputs = torch.stack(model_inputs)

    # compute output heatmap
    output = pose_model(model_inputs.to(CTX))
    coords, _ = get_final_preds(cfg,
                                output.cpu().detach().numpy(),
                                np.asarray(centers), np.asarray(scales))

    return coords
Example #7
0
    def __getitem__(self, index):
        # Load image
        im_name = self.im_list[index]
        im_path = os.path.join(self.root, im_name)
        im = cv2.imread(im_path, cv2.IMREAD_COLOR)
        h, w, _ = im.shape
        # Get center and scale
        center, s = self._box2cs([0, 0, w - 1, h - 1])
        r = 0

        trans = get_affine_transform(center, s, r, self.crop_size)
        input = cv2.warpAffine(
            im,
            trans, (int(self.crop_size[1]), int(self.crop_size[0])),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(0, 0, 0))

        if self.transform:
            input = self.transform(input)

        meta = {
            'name': os.path.basename(im_name)
            [:-4],  # drop file extension such as ".jpg" and ".png"
            'center': center,
            'height': h,
            'width': w,
            'scale': s,
            'rotation': r
        }

        return input, meta
Example #8
0
def data_augmentation(sample, is_train):
    image_file = sample['image']
    filename = sample['filename'] if 'filename' in sample else ''
    joints = sample['joints_3d']
    joints_vis = sample['joints_3d_vis']
    c = sample['center']
    s = sample['scale']
    score = sample['score'] if 'score' in sample else 1
    # imgnum = sample['imgnum'] if 'imgnum' in sample else ''
    r = 0

    # used for ce
    if 'ce_mode' in os.environ:
        random.seed(0)
        np.random.seed(0)

    data_numpy = cv2.imread(image_file,
                            cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

    if is_train:
        sf = cfg.SCALE_FACTOR
        rf = cfg.ROT_FACTOR
        s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
        r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

        if cfg.FLIP and random.random() <= 0.5:
            data_numpy = data_numpy[:, ::-1, :]
            joints, joints_vis = fliplr_joints(joints, joints_vis,
                                               data_numpy.shape[1],
                                               cfg.FLIP_PAIRS)
            c[0] = data_numpy.shape[1] - c[0] - 1

    trans = get_affine_transform(c, s, r, cfg.IMAGE_SIZE)
    input = cv2.warpAffine(data_numpy,
                           trans,
                           (int(cfg.IMAGE_SIZE[0]), int(cfg.IMAGE_SIZE[1])),
                           flags=cv2.INTER_LINEAR)

    for i in range(cfg.NUM_JOINTS):
        if joints_vis[i, 0] > 0.0:
            joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

    # Numpy target
    target, target_weight = generate_target(cfg, joints, joints_vis)

    if cfg.DEBUG:
        visualize(cfg, filename, data_numpy, input.copy(), joints, target)

    # Normalization
    input = input.astype('float32').transpose((2, 0, 1)) / 255
    input -= np.array(cfg.MEAN).reshape((3, 1, 1))
    input /= np.array(cfg.STD).reshape((3, 1, 1))

    if is_train:
        return input, target, target_weight
    else:
        return input, target, target_weight, c, s, score, image_file
Example #9
0
    def __getitem__(self, idx: int):
        data = self.data[idx]
        frame_idx = data["image_id"]
        x,y,w,h = data['bbox']
        # x1,y1,x2,y2 = data['orig_bbox']
        
        
        self.cap.set(1, frame_idx)
        _, img = self.cap.read()
        
        aspect_ratio = self.cfg.MODEL.IMAGE_SIZE[1] / self.cfg.MODEL.IMAGE_SIZE[0]
        centre = np.array([x+w*.5, y+h*.5])

        if w > aspect_ratio * h:
            h = w / aspect_ratio
        elif w < aspect_ratio * h:
            w = h * aspect_ratio
        scale = np.array([w, h]) * 1.25
        rotation = 0
        
        trans = get_affine_transform(centre, scale, rotation, (self.cfg.MODEL.IMAGE_SIZE[1], self.cfg.MODEL.IMAGE_SIZE[0]))
        cropped_img = cv2.warpAffine(img, trans, (self.cfg.MODEL.IMAGE_SIZE[1], self.cfg.MODEL.IMAGE_SIZE[0]), flags=cv2.INTER_LINEAR)
        cropped_img = normalize_input(cropped_img, self.cfg)
        
        # cv2.imshow("orig", img)
        # cropped_show = denormalize_input(cropped_img, self.cfg).copy().astype(np.uint8)
        # cv2.imshow("crop", cropped_show)
        # cv2.waitKey()
        # cv2.destroyAllWindows()
        
        estimated_joints = np.zeros((self.cfg.MODEL.NUM_JOINTS, 3), dtype=np.float)
        offsets = np.zeros((self.cfg.MODEL.NUM_JOINTS, 2), dtype=np.float)
        offsets[:, 0] = self.frame_area[0]
        offsets[:, 1] = self.frame_area[1]
        
        estimated_joints[:, :2] = np.array(data['joints']).reshape(self.cfg.MODEL.NUM_JOINTS, 2)
        estimated_joints[:, :2] += offsets
        estimated_joints[:,  2] = np.array(data['score'])
        
        for j in range(self.cfg.MODEL.NUM_JOINTS):
            if estimated_joints[j,2] > 0:
                estimated_joints[j,:2] = affine_transform(estimated_joints[j,:2], trans)
                estimated_joints[j, 2] *= ((estimated_joints[j,0] >= 0) & (estimated_joints[j,0] < self.cfg.MODEL.IMAGE_SIZE[1]) & (estimated_joints[j,1] >= 0) & (estimated_joints[j,1] < self.cfg.MODEL.IMAGE_SIZE[0]))

        input_pose_coord = estimated_joints[:,:2]
        input_pose_valid = np.array([1 if i not in self.cfg.ignore_kps else 0 for i in range(self.cfg.MODEL.NUM_JOINTS)])
        input_pose_score = estimated_joints[:, 2]
        
        crop_info = np.asarray([centre[0]-scale[0]*0.5, centre[1]-scale[1]*0.5, centre[0]+scale[0]*0.5, centre[1]+scale[1]*0.5])
        
        
        return [torch.from_numpy(cropped_img).float().permute(2, 0, 1), 
                input_pose_coord,
                input_pose_valid, 
                input_pose_score,
                crop_info,
                frame_idx,
        ]
Example #10
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        data_numpy = cv2.imread(
            image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            print('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)
        target_map = torch.from_numpy(self.generate_paf(joints, joints_vis))

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, target, target_weight, target_map, meta
Example #11
0
    def getitem(self):  #return input, meta
        db_rec = self._load_coco_keypoint_annotation_kernal()
        image_file = db_rec[0]['image']
        filename = db_rec[0]['filename'] if 'filename' in db_rec[0] else ''
        imgnum = db_rec[0]['imgnum'] if 'imgnum' in db_rec[0] else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(  #(文件名,标记)
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        c = db_rec[0]['center']
        s = db_rec[0]['scale']
        score = db_rec[0]['score'] if 'score' in db_rec[0] else 1
        r = 0

        trans = get_affine_transform(
            c, s, r, self.image_size)  #将s经过r缩放到imagesize   矩阵2x3
        input = cv2.warpAffine(  #input是一个旋转缩放过的原图
            data_numpy,  #输入图像      
            trans,  #变换矩阵
            (int(self.image_size[0]), int(self.image_size[1])),  #输出图像大小192x256
            flags=cv2.INTER_LINEAR)  #插值方法

        if self.transform:
            input = self.transform(input)

        meta = {
            'image':
            image_file,
            'filename':
            filename,
            'imgnum':
            imgnum,
            'center':
            c,
            'scale':
            s,
            'score':
            score,
            'joints_vis': [[(1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0),
                            (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0),
                            (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0),
                            (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0),
                            (1, 1, 0)]]
        }

        return input, meta
def resize_align_multi_scale(image, input_size, current_scale, min_scale):
    size_resized, center, scale = get_multi_scale_size(image, input_size,
                                                       current_scale,
                                                       min_scale)
    trans = get_affine_transform(center, scale, 0, size_resized)

    image_resized = cv2.warpAffine(image, trans, size_resized
                                   # (int(w_resized), int(h_resized))
                                   )

    return image_resized, center, scale
Example #13
0
    def get_humankeypoints(self, img_orig, objs, threshold=0.6):

        n_objs = len(objs)
        np_inputs = np.zeros(shape=(n_objs, 3, 256, 192), dtype=np.float32)
        # output_data = np.zeros(shape=(n_objs, 17, 64, 48), dtype=np.float32)

        for idx in range(n_objs):
            c = objs[idx]['center']
            s = objs[idx]['scale']
            trans = get_affine_transform(c, s, 0, (192, 256), inv=0)
            warp_img = cv2.warpAffine(img_orig,
                                      trans, (192, 256),
                                      flags=cv2.INTER_LINEAR)
            np_input = cv2.cvtColor(warp_img, cv2.COLOR_BGR2RGB)
            np_input = np.expand_dims(np_input, 0).astype(np.float32)
            np_inputs_nchw = np_input.transpose(0, 3, 1, 2) / 255
            np_inputs[idx] = self.standardization(np_inputs_nchw[0])

        output_data = self.rep.run(np_inputs)[0]

        list_c = [obj['center'] for obj in objs]
        list_s = [obj['scale'] for obj in objs]
        preds, maxvals = get_final_preds(output_data, list_c, list_s)

        annotations = []
        cnt_num_point = 0
        for obj_idx in range(len(objs)):
            keypoints = []
            for idx, ptval in enumerate(zip(preds[obj_idx], maxvals[obj_idx])):
                point, maxval = ptval
                x, y = np.array(point, dtype=np.float)
                if maxval > threshold:
                    keypoints.extend([x, y, 2])
                    cnt_num_point += 1
                else:
                    keypoints.extend([0, 0, 0])

            x, y, w, h = objs[obj_idx]['bbox']

            annotation = usrcoco.create_annotation_info(
                annotation_id=obj_idx + 1,
                image_id=1,
                category_info=1,
                keypoints=keypoints,
                num_keypoints=cnt_num_point,
                bounding_box=objs[obj_idx]['bbox'])
            annotations.append(annotation)

        return annotations
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        # filename = db_rec['filename'] if 'filename' in db_rec else ''
        # imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        # joints = db_rec['joints_3d']
        # joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        meta = {
            'image': image_file,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, meta
Example #15
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        data_numpy = cv2.imread(
            self.root + image_file,
            cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        #input = np.swapaxes(input, 1, 2)
        #target = np.swapaxes(target, 1, 2)

        return input, meta
def get_pose_estimation_prediction(pose_model, image, centers, scales, box,
                                   transform):
    rotation = 0

    # pose estimation transformation
    model_inputs = []
    for center, scale in zip(centers, scales):
        cv2.imwrite('../data/nlos/nlos_result/first_input.jpg', image)
        trans = get_affine_transform(center, scale, rotation,
                                     cfg.MODEL.IMAGE_SIZE)
        # Crop smaller image of people
        model_input = cv2.warpAffine(
            image,
            trans,
            (int(cfg.MODEL.IMAGE_SIZE[0]), int(cfg.MODEL.IMAGE_SIZE[1])),
            flags=cv2.INTER_LINEAR)

        #print('model_input(w/ trans)', model_input.shape)
        img = model_input
        cv2.imwrite('../data/nlos/nlos_result/trans_input.jpg', img)

        #inv_trans = get_affine_transform(center, scale, rotation, cfg.MODEL.IMAGE_SIZE, inv=1)

        # hwc -> 1chw
        model_input = transform(model_input)  # .unsqueeze(0)
        model_inputs.append(model_input)

    # n * 1chw -> nchw
    model_inputs = torch.stack(model_inputs)
    zero_heatmap = torch.cuda.FloatTensor(int(
        cfg.MODEL.HEATMAP_SIZE[0]), int(cfg.MODEL.HEATMAP_SIZE[1])).fill_(0)
    # compute output heatmap
    output = pose_model(model_inputs.to(CTX))

    # using heatmap, get inverse transformed coordinates
    coords, _ = get_final_preds(cfg,
                                output.cpu().detach().numpy(),
                                np.asarray(centers), np.asarray(scales))

    for idx1, mat in enumerate(coords[0]):
        x_coord, y_coord = int(mat[0]), int(mat[1])
        if not (in_box(x_coord, y_coord, box)):
            coords[0][idx1] = [-1, -1]
            output[0][idx1] = zero_heatmap

    return output, coords
Example #17
0
def get_pose_estimation_prediction(pose_model, image, centers, scales, box,
                                   transform):
    rotation = 0

    #print("img shape ", image.shape)
    #print("centers ", centers)
    #print("scales ", scales)
    #print(box)
    # pose estimation transformation
    model_inputs = []
    for center, scale in zip(centers, scales):
        trans = get_affine_transform(center, scale, rotation,
                                     cfg.MODEL.IMAGE_SIZE)
        #print("trans", trans)
        # Crop smaller image of people
        model_input = cv2.warpAffine(
            image,
            trans,
            (int(cfg.MODEL.IMAGE_SIZE[0]), int(cfg.MODEL.IMAGE_SIZE[1])),
            flags=cv2.INTER_LINEAR)

        img = model_input
        cv2.imwrite('trans_input.jpg', img)
        # hwc -> 1chw
        model_input = transform(model_input)  #.unsqueeze(0)
        model_inputs.append(model_input)

    # n * 1chw -> nchw
    model_inputs = torch.stack(model_inputs)
    #zero_heatmap = np.zeros((120, 120), dtype=np.float32)
    zero_heatmap = torch.cuda.FloatTensor(120, 120).fill_(0)
    # compute output heatmap
    output = pose_model(model_inputs.to(CTX))
    # heatmap output :
    coords, _ = get_final_preds(cfg,
                                output.cpu().detach().numpy(),
                                np.asarray(centers), np.asarray(scales))

    for idx1, mat in enumerate(coords[0]):
        x_coord, y_coord = int(mat[0]), int(mat[1])
        if not (in_box(x_coord, y_coord, box)):
            coords[0][idx1] = [-1, -1]
            output[0][idx1] = zero_heatmap

    return output, coords
Example #18
0
def compute_unary_term(heatmap, grid, bbox2D, cam, imgSize):
    """
    Args:
        heatmap: array of size (n * k * h * w)
                -n: number of views,  -k: number of joints
                -h: heatmap height,   -w: heatmap width
        grid: list of k ndarrays of size (nbins * 3)
                    -k: number of joints; 1 when the grid is shared in PSM
                    -nbins: number of bins in the grid
        bbox2D: bounding box on which heatmap is computed
    Returns:
        unary_of_all_joints: a list of ndarray of size nbins
    """

    n, k = heatmap.shape[0], heatmap.shape[1]
    h, w = heatmap.shape[2], heatmap.shape[3]
    nbins = grid[0].shape[0]

    unary_of_all_joints = []
    for j in range(k):
        unary = np.zeros(nbins)
        for c in range(n):

            grid_id = 0 if len(grid) == 1 else j
            xy = cameras.project_pose(grid[grid_id], cam[c])
            trans = get_affine_transform(bbox2D[c]['center'],
                                         bbox2D[c]['scale'], 0, imgSize)

            xy = affine_transform_pts(xy, trans) * np.array([w, h]) / imgSize
            # for i in range(nbins):
            #     xy[i] = affine_transform(xy[i], trans) * np.array([w, h]) / imgSize

            hmap = heatmap[c, j, :, :]
            point_x, point_y = np.arange(hmap.shape[0]), np.arange(
                hmap.shape[1])
            rgi = RegularGridInterpolator(points=[point_x, point_y],
                                          values=hmap.transpose(),
                                          bounds_error=False,
                                          fill_value=0)
            score = rgi(xy)
            unary = unary + np.reshape(score, newshape=unary.shape)
        unary_of_all_joints.append(unary)

    return unary_of_all_joints
Example #19
0
    def __getitem__(self, idx):
        assert self.last_idx_read is None or self.last_idx_read == idx - 1, "idx junmp: %d -> %d" % (
            self.last_idx_read, idx)
        db_rec = copy.deepcopy(self.db[idx])
        self.last_idx_read = idx

        image_file = db_rec['image']

        frame = self._get_img(image_file)

        if frame is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            frame,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        meta = {
            'image': image_file,
            'origbox': db_rec['origbox'],
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, meta
Example #20
0
    def get_item(self, img):
        h, w, _ = img.shape
        person_center, s = self._box2cs([0, 0, w - 1, h - 1])
        r = 0
        trans = get_affine_transform(person_center, s, r, self.input_size)
        input = cv2.warpAffine(
            img,
            trans, (int(self.input_size[1]), int(self.input_size[0])),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(0, 0, 0))

        input = self.transform(input)
        input = input.unsqueeze(0)
        meta = {
            'center': person_center,
            'height': h,
            'width': w,
            'scale': s,
            'rotation': r
        }

        return input, meta
Example #21
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        data_numpy = cv2.imread(
            image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']
        if 'interference' in db_rec.keys():
            interference_joints = db_rec['interference']
            interference_joints_vis = db_rec['interference_vis']
        else:
            interference_joints = [joints]
            interference_joints_vis = [joints_vis]

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1
                for i in range(len(interference_joints)):
                    interference_joints[i], interference_joints_vis[
                        i] = fliplr_joints(interference_joints[i],
                                           interference_joints_vis[i],
                                           data_numpy.shape[1],
                                           self.flip_pairs)

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)
        # cv2.imwrite('img.jpg',input[:,:,::-1])
        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
        target, target_weight = self.generate_target(joints, joints_vis)

        # interference joints heatmaps
        inter_target = np.zeros_like(target)
        inter_target_weight = np.zeros_like(target_weight)
        for i in range(len(interference_joints)):
            inter_joints = interference_joints[i]
            inter_joints_vis = interference_joints_vis[i]
            for j in range(self.num_joints):
                if inter_joints_vis[j, 0] > 0.0:
                    inter_joints[j, 0:2] = affine_transform(
                        inter_joints[j, 0:2], trans)
            _inter_target, _inter_target_weight = self.generate_target(
                inter_joints, inter_joints_vis)
            inter_target = np.maximum(inter_target, _inter_target)
            inter_target_weight = np.maximum(inter_target_weight,
                                             _inter_target_weight)
        all_ins_target = np.maximum(inter_target, target)
        all_ins_target_weight = np.maximum(inter_target_weight, target_weight)

        # AE labels
        All_joints = [joints] + interference_joints
        ae_targets = self.generate_joints_ae_targets(All_joints)

        # GPU formate
        all_ins_target = torch.from_numpy(all_ins_target)
        all_ins_target_weight = torch.from_numpy(all_ins_target_weight)
        ae_targets = torch.from_numpy(ae_targets)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score,
            'interference_maps': inter_target,
        }
        return input, all_ins_target, all_ins_target_weight, ae_targets, meta
Example #22
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        data_numpy = cv2.imread(
            image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
        # cv2.imwrite('ori_img.jpg', data_numpy[:, :, ::-1])
        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']
        if 'interference' in db_rec.keys():
            interference_joints = db_rec['interference']
            interference_joints_vis = db_rec['interference_vis']
        else:
            interference_joints = [joints]
            interference_joints_vis = [joints_vis]

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1

        size = db_rec['obj_size']
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1
                for i in range(len(interference_joints)):
                    interference_joints[i], interference_joints_vis[
                        i] = fliplr_joints(interference_joints[i],
                                           interference_joints_vis[i],
                                           data_numpy.shape[1],
                                           self.flip_pairs)

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)
        # cv2.imwrite('img.jpg',input[:,:,::-1])
        if self.transform:
            input = self.transform(input)

        # relation_joints = []
        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)
        # all_points = np.asarray(np.where(target == 1))[::-1].transpose()
        # for p in all_points:
        #     relation_joints += [c[0], c[1], size[0], size[1], p[0], p[1], p[2], 1]
        # interference joints heatmaps
        inter_target = np.zeros_like(target)
        inter_target_weight = np.zeros_like(target_weight)
        for i in range(len(interference_joints)):
            inter_joints = interference_joints[i]
            inter_joints_vis = interference_joints_vis[i]
            for j in range(self.num_joints):
                if inter_joints_vis[j, 0] > 0.0:
                    inter_joints[j, 0:2] = affine_transform(
                        inter_joints[j, 0:2], trans)

            _inter_target, _inter_target_weight = self.generate_target(
                inter_joints, inter_joints_vis)

            inter_target = np.maximum(inter_target, _inter_target)
            inter_target_weight = np.maximum(inter_target_weight,
                                             _inter_target_weight)
        # if inter_target.max()>0:
        #     all_points = np.asarray(np.where(inter_target == 1))[::-1].transpose()
        #     for p in all_points:
        #         relation_joints += [c[0], c[1], size[0], size[1], p[0], p[1], p[2], 0]

        # all_ins_target = np.maximum(inter_target, target)
        all_ins_target = np.maximum(inter_target * 0.5, target)
        # points = self.generate_candidate_points_from_heatmaps(inter_target)
        all_ins_target_weight = np.maximum(inter_target_weight, target_weight)
        # cv2.imwrite('heatmap.jpg',np.max(target,axis=0)*255)
        # cv2.imwrite('inter_heatmap.jpg', np.max(inter_target, axis=0) * 255)
        # relation labels
        # relation_joints = np.asarray(relation_joints).reshape((-1,8))
        kpts_onehots = self.heatmap2onehot(target)
        # if kpts_onehots.shape[0]!=15:
        #     print(target.shape)
        # target_amaps, target_aweights = self.generate_association_map_from_gt_heatmaps(target, all_ins_target)

        # amaps = self.generate_association_map_from_labels(relation_joints)
        # max_points = self.num_joints * 5
        # num_points = len(relation_joints) if len(relation_joints) <= max_points else max_points
        # target_relation_points = np.zeros((max_points, 8))
        # target_amaps = np.zeros((max_points, max_points))
        # target_relation_points[:num_points] = relation_joints[:num_points]
        # target_amaps[:num_points, :num_points] = amaps[:num_points, :num_points]

        # heatmap labels
        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)
        all_ins_target = torch.from_numpy(all_ins_target)
        all_ins_target_weight = torch.from_numpy(all_ins_target_weight)
        # target_amaps = torch.from_numpy(target_amaps)
        # target_aweights = torch.from_numpy(target_aweights)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score,
            # 'relation_joints': target_relation_points,
            # 'num_points': num_points,
            # 'association_maps': target_amaps,
            # 'association_weights': target_aweights,
            'interference_maps': inter_target,
            'kpt_cat_maps': kpts_onehots,
        }
        # return input, target, target_weight, meta
        return input, target, target_weight, all_ins_target, all_ins_target_weight, meta
Example #23
0
    def __getitem__(self, index):
        im_name = self.im_list[index]
        im_path = os.path.join(self.root, self.dataset + '_images',
                               im_name + '.jpg')
        parsing_anno_path = os.path.join(self.root,
                                         self.dataset + '_segmentations',
                                         im_name + '.png')

        im = cv2.imread(im_path, cv2.IMREAD_COLOR)
        h, w, _ = im.shape
        parsing_anno = np.zeros((h, w), dtype=np.long)

        # get pose anno
        if self.dataset == 'train' or self.dataset == 'val':
            joints_all_info = np.array(self.pose_info[im_name])
            joints_loc = np.zeros((joints_all_info.shape[0], 2))
            joints_loc[:, :] = joints_all_info[:, 0:2]  # 1st and 2nd column

            # get visibility of joints
            coord_sum = np.sum(joints_loc, axis=1)
            visibility = coord_sum != 0

        # Get center and scale
        center, s = self._box2cs([0, 0, w - 1, h - 1])
        r = 0

        if self.dataset != 'test':
            parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE)

            if self.dataset == 'train' or self.dataset == 'trainval':

                sf = self.scale_factor
                rf = self.rotation_factor
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
                r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                    if random.random() <= 0.6 else 0

                if random.random() <= self.flip_prob:
                    im = im[:, ::-1, :]
                    parsing_anno = parsing_anno[:, ::-1]

                    center[0] = im.shape[1] - center[0] - 1
                    right_idx = [15, 17, 19]
                    left_idx = [14, 16, 18]
                    for i in range(0, 3):
                        right_pos = np.where(parsing_anno == right_idx[i])
                        left_pos = np.where(parsing_anno == left_idx[i])
                        parsing_anno[right_pos[0], right_pos[1]] = left_idx[i]
                        parsing_anno[left_pos[0], left_pos[1]] = right_idx[i]

                    # flip the joints
                    joints_loc = flip_joints(joints_loc, w)

                    # swap the visibility of left and right joints
                    r_joint = [0, 1, 2, 10, 11, 12]
                    l_joint = [3, 4, 5, 13, 14, 15]
                    for i in range(0, 6):
                        temp_visibility = visibility[r_joint[i]]
                        visibility[r_joint[i]] = visibility[l_joint[i]]
                        visibility[l_joint[i]] = temp_visibility

        trans = get_affine_transform(center, s, r, self.crop_size)

        input = cv2.warpAffine(
            im,
            trans, (int(self.crop_size[1]), int(self.crop_size[0])),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(0, 0, 0))

        if self.transform:
            input = self.transform(input)

        meta = {
            'name': im_name,
            'center': center,
            'height': h,
            'width': w,
            'scale': s,
            'rotation': r
        }

        if self.dataset == 'test':
            return input, meta
        else:

            label_parsing = cv2.warpAffine(
                parsing_anno,
                trans, (int(self.crop_size[1]), int(self.crop_size[0])),
                flags=cv2.INTER_NEAREST,
                borderMode=cv2.BORDER_CONSTANT,
                borderValue=(255))

            grid_x = int(self.crop_size[1] / self.pose_net_stride)
            grid_y = int(self.crop_size[0] / self.pose_net_stride)

            for i in range(joints_all_info.shape[0]):
                if visibility[i] > 0:
                    joints_loc[i, 0:2] = self.affine_trans(
                        joints_loc[i, 0:2], trans)

            label_pose = generate_pose(joints_loc, visibility, trans, grid_x,
                                       grid_y, self.pose_net_stride,
                                       self.sigma)
            label_edge = generate_edge(label_parsing)

            return input, label_parsing, label_pose, label_edge, meta
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_dir = 'images.zip@' if self.data_format == 'zip' else ''
        image_file = osp.join(self.root, db_rec['source'], image_dir, 'images',
                              db_rec['image'])
        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        joints = db_rec['joints_2d'].copy()  # [union_joints, 2]
        joints_vis = db_rec['joints_vis'].copy()[:, :2]  # [union_joints, 2]
        assert len(joints) == self.num_joints
        assert len(joints_vis) == self.num_joints

        # crop and scale according to ground truth
        center = np.array(db_rec['center']).copy()
        scale = np.array(db_rec['scale']).copy()
        rotation = 0

        if self.is_train and db_rec['source'] == 'mpii':
            sf = self.mpii_scale_factor
            rf = self.mpii_rotation_factor
            scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                if random.random() <= 0.6 else 0

            if self.mpii_flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.mpii_flip_pairs)
                center[0] = data_numpy.shape[1] - center[0] - 1

        trans = get_affine_transform(center, scale, rotation, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        visible_joints = joints_vis[:, 0] > 0
        if np.any(visible_joints):
            joints[visible_joints, :2] = affine_transform(
                joints[visible_joints, :2], trans)
            # zero_indices = np.any(
            #         np.concatenate((joints[:, :2]<0,
            #         joints[:, [0]] >= self.image_size[0],
            #         joints[:, [1]] >= self.image_size[1]),
            #         axis=1),
            #         axis=1)
            # joints_vis[zero_indices, :] = 0

        target, target_weight = self.generate_target(joints, joints_vis,
                                                     db_rec['source'])

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'scale': scale,
            'center': center,
            'rotation': rotation,
            'joints_2d': db_rec['joints_2d'],
            'joints_2d_transformed': joints,
            'joints_vis': joints_vis,
            'source': db_rec['source']
        }
        return input, target, target_weight, meta
Example #25
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if data_numpy is None:
            # logger.error('=> fail to read {}'.format(image_file))
            # raise ValueError('Fail to read {}'.format(image_file))
            return None, None, None, None, None, None

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        joints = db_rec['joints_2d']
        joints_vis = db_rec['joints_2d_vis']
        joints_3d = db_rec['joints_3d']
        joints_3d_vis = db_rec['joints_3d_vis']

        nposes = len(joints)
        assert nposes <= self.maximum_person, 'too many persons'

        height, width, _ = data_numpy.shape
        c = np.array([width / 2.0, height / 2.0])
        s = get_scale((width, height), self.image_size)
        r = 0

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for n in range(nposes):
            for i in range(len(joints[0])):
                if joints_vis[n][i, 0] > 0.0:
                    joints[n][i,
                              0:2] = affine_transform(joints[n][i, 0:2], trans)
                    if (np.min(joints[n][i, :2]) < 0
                            or joints[n][i, 0] >= self.image_size[0]
                            or joints[n][i, 1] >= self.image_size[1]):
                        joints_vis[n][i, :] = 0

        if 'pred_pose2d' in db_rec and db_rec['pred_pose2d'] != None:
            # For convenience, we use predicted poses and corresponding values at the original heatmaps
            # to generate 2d heatmaps for Campus and Shelf dataset.
            # You can also use other 2d backbone trained on COCO to generate 2d heatmaps directly.
            pred_pose2d = db_rec['pred_pose2d']
            for n in range(len(pred_pose2d)):
                for i in range(len(pred_pose2d[n])):
                    pred_pose2d[n][i, 0:2] = affine_transform(
                        pred_pose2d[n][i, 0:2], trans)

            input_heatmap = self.generate_input_heatmap(pred_pose2d)
            input_heatmap = torch.from_numpy(input_heatmap)
        else:
            input_heatmap = torch.zeros(self.cfg.NETWORK.NUM_JOINTS,
                                        self.heatmap_size[1],
                                        self.heatmap_size[0])

        target_heatmap, target_weight = self.generate_target_heatmap(
            joints, joints_vis)
        target_heatmap = torch.from_numpy(target_heatmap)
        target_weight = torch.from_numpy(target_weight)

        # make joints and joints_vis having same shape
        joints_u = np.zeros((self.maximum_person, self.num_joints, 2))
        joints_vis_u = np.zeros((self.maximum_person, self.num_joints, 2))
        for i in range(nposes):
            joints_u[i] = joints[i]
            joints_vis_u[i] = joints_vis[i]

        joints_3d_u = np.zeros((self.maximum_person, self.num_joints, 3))
        joints_3d_vis_u = np.zeros((self.maximum_person, self.num_joints, 3))
        for i in range(nposes):
            joints_3d_u[i] = joints_3d[i][:, 0:3]
            joints_3d_vis_u[i] = joints_3d_vis[i][:, 0:3]

        target_3d = self.generate_3d_target(joints_3d)
        target_3d = torch.from_numpy(target_3d)

        if isinstance(self.root_id, int):
            roots_3d = joints_3d_u[:, self.root_id]
        elif isinstance(self.root_id, list):
            roots_3d = np.mean([joints_3d_u[:, j] for j in self.root_id],
                               axis=0)
        meta = {
            'image': image_file,
            'num_person': nposes,
            'joints_3d': joints_3d_u,
            'joints_3d_vis': joints_3d_vis_u,
            'roots_3d': roots_3d,
            'joints': joints_u,
            'joints_vis': joints_vis_u,
            'center': c,
            'scale': s,
            'rotation': r,
            'camera': db_rec['camera']
        }

        return input, target_heatmap, target_weight, target_3d, meta, input_heatmap
Example #26
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        from boxx import cf
        if cf.args.task == 'ssm':
            feat_stride = self.image_size / self.heatmap_size
            joints_h = copy.deepcopy(joints)
            # TODO 减少量化损失
            joints_h[:, 0] = (joints_h[:, 0] / feat_stride[0] + 0.5)
            joints_h[:, 1] = (joints_h[:, 1] / feat_stride[1] + 0.5)
            joints_h = joints_h.astype(np.int32)
            meta['joints_h'] = joints_h

        return input, target, target_weight, meta
Example #27
0
    ))
image_names = []
for i in range(len(img_files['images'])):
    image_names.append(img_files['images'][i]['file_name'])

data_numpy = cv2.imread(image_file,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
if data_numpy is None:
    raise ValueError('Fail to read {}'.format(image_file))

# object detection box
box = [300, 100, 200, 250]
c, s = _box2cs(box, data_numpy.shape[0], data_numpy.shape[1])
r = 0

trans = get_affine_transform(c, s, r, config.MODEL.IMAGE_SIZE)
input = cv2.warpAffine(
    data_numpy,
    trans, (int(config.MODEL.IMAGE_SIZE[0]), int(config.MODEL.IMAGE_SIZE[1])),
    flags=cv2.INTER_LINEAR)

# vis transformed image
cv2.imshow('image', input)
cv2.waitKey(1000)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224,
                                                          0.225]),
])
input = transform(input).unsqueeze(0)
Example #28
0
    def __getitem__(self, index):
        train_item = self.train_list[index]

        im_path = os.path.join(self.root, self.dataset + '_images', train_item + '.jpg')
        parsing_anno_path = os.path.join(self.root, self.dataset + '_segmentations', train_item + '.png')

        im = cv2.imread(im_path, cv2.IMREAD_COLOR)
        h, w, _ = im.shape
        parsing_anno = np.zeros((h, w), dtype=np.long)

        # Get person center and scale
        person_center, s = self._box2cs([0, 0, w - 1, h - 1])
        r = 0

        if self.dataset != 'test':
            # Get pose annotation
            parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE)
            if self.dataset == 'train' or self.dataset == 'trainval':
                sf = self.scale_factor
                rf = self.rotation_factor
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
                r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0

                if random.random() <= self.flip_prob:
                    im = im[:, ::-1, :]
                    parsing_anno = parsing_anno[:, ::-1]
                    person_center[0] = im.shape[1] - person_center[0] - 1
                    right_idx = [15, 17, 19]
                    left_idx = [14, 16, 18]
                    for i in range(0, 3):
                        right_pos = np.where(parsing_anno == right_idx[i])
                        left_pos = np.where(parsing_anno == left_idx[i])
                        parsing_anno[right_pos[0], right_pos[1]] = left_idx[i]
                        parsing_anno[left_pos[0], left_pos[1]] = right_idx[i]

        trans = get_affine_transform(person_center, s, r, self.crop_size)
        input = cv2.warpAffine(
            im,
            trans,
            (int(self.crop_size[1]), int(self.crop_size[0])),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(0, 0, 0))

        if self.transform:
            input = self.transform(input)

        meta = {
            'name': train_item,
            'center': person_center,
            'height': h,
            'width': w,
            'scale': s,
            'rotation': r
        }

        if self.dataset == 'val' or self.dataset == 'test':
            return input, meta
        else:
            label_parsing = cv2.warpAffine(
                parsing_anno,
                trans,
                (int(self.crop_size[1]), int(self.crop_size[0])),
                flags=cv2.INTER_NEAREST,
                borderMode=cv2.BORDER_CONSTANT,
                borderValue=(255))

            label_parsing = torch.from_numpy(label_parsing)

            return input, label_parsing, meta
Example #29
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, target, target_weight, meta
Example #30
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        ############################################## data augmentation
        if self.is_train:

            # scale and rotation augmentation
            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn() * rf, -rf * 2, rf *
                        2) if random.random() <= 0.6 else 0

            # flips images
            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

            # brighten/darken image by shifting all pixels. not sure if this actually helps
#             if self.brighten and random.random() <= 0.5:
#                 shift = 2 * np.random.randn()
#                 data_numpy = np.clip(data_numpy + shift, 0, 255).astype(np.uint8)

        trans = get_affine_transform(c, s, r, self.image_size)

        # NOTE: This scales images and crops them to be 256*256. During eval, replace with input = data_numpy
        input = data_numpy
        if not 'TEST_MODE' in self.cfg:
            input = cv2.warpAffine(
                data_numpy,
                trans, (int(self.image_size[0]), int(self.image_size[1])),
                flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, target, target_weight, meta