def __init__(self, path, background_path, data_indices, img_height = 480, img_width = 640,
                 n_class=9,
                 gaussian_noise=False,
                 gamma_augmentation=False,
                 avaraging=False,
                 salt_pepper_noise=False,
                 contrast=False,
                 random_iteration=False,
                 bg_flip = True,
                 channel_swap=False,
                 metric_filter=1.0):
        self.base_path = path
        self.n_class = n_class
        self.data_indices = data_indices
        self.img_height = img_height
        self.img_width = img_width

        ## augmentation option
        self.gaussian_noise = gaussian_noise
        self.gamma_augmentation = gamma_augmentation
        self.avaraging = avaraging
        self.salt_pepper_noise =salt_pepper_noise
        self.contrast = contrast
        if self.contrast:
            self.contrast_server = preprocess_utils.ContrastAugmentation()

        self.bg_flip = bg_flip

        self.bg_fpaths = glob.glob(os.path.join(background_path, '*.jpg'))

        self.random_iteration = random_iteration

        ## load models
        model_fpath_mask = os.path.join(path, 'models_ply', '{0}.ply')
        dummy_model_fpath_mask = os.path.join(path, 'dummy_models_ply', '{0}.ply')
        self.objs = ['Ape', 'Can', 'Cat', 'Driller', 'Duck', 'Eggbox', 'Glue', 'Holepuncher']
        self.dummy_objs = ['benchviseblue', 'cam', 'iron', 'lamp', 'phone']
        self.models = []
        self.dummy_models = []
        for obj in self.objs:
            print 'Loading data:', obj
            model_fpath = model_fpath_mask.format(obj)
            self.models.append(inout.load_ply(model_fpath))
        for obj in self.dummy_objs:
            print 'Loading data:', obj
            model_fpath = dummy_model_fpath_mask.format(obj)
            self.dummy_models.append(inout.load_ply(model_fpath))

        self.K = np.array([[572.41140, 0, 325.26110],
                           [0, 573.57043, 242.04899],
                           [0, 0, 0]])

        self.channel_swap = channel_swap
        self.metric_filter = metric_filter
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser(description='Data Augmentation Samples')
    parser.add_argument(
        '--path',
        '-p',
        default="../../train_data/OcclusionChallengeICCV2015/models_ply",
        help='Path to ply data')
    parser.add_argument('--out',
                        '-o',
                        default="output.png",
                        help='output image name')
    parser.add_argument(
        '--objs',
        default=[
            'Ape', 'Can', 'Cat', 'Driller', 'Duck', 'Eggbox', 'Glue',
            'Holepuncher'
        ],
    )
    args = parser.parse_args()

    # load object ply
    models = []
    for obj_name in args.objs:
        print 'load ply : ', obj_name
        models.append(
            inout.load_ply(os.path.join(args.path, '{}.ply'.format(obj_name))))

    # Camera parameters
    K = np.eye(3)
    K[0, 0] = 500.0  # fx
    K[1, 1] = 500.0  # fy
    K[0, 2] = 250.0  # cx
    K[1, 2] = 250.0  # cy
    im_size = (500, 500)

    # pose
    R = np.zeros((3, 3, 3))
    R[0] = transform.rotation_matrix(np.pi, (1, 0, 0))[:3, :3]
    R[1] = transform.rotation_matrix(np.pi, (1, 0.5, 0.5))[:3, :3]
    R[2] = transform.rotation_matrix(np.pi, (1, 0, 1))[:3, :3]

    t = np.array([[0, 0, 0.3]]).T

    # output fig
    fig = plt.figure(figsize=(len(models), 3))

    for i in range(len(models)):
        for j in range(3):
            depth = renderer.render(models[i],
                                    im_size,
                                    K,
                                    R[j],
                                    t,
                                    0.1,
                                    2.0,
                                    mode='depth')
            ax = fig.add_subplot(3, len(models), i + len(models) * j + 1)
            ax.imshow(depth)

    plt.show()
Esempio n. 3
0
    def __init__(self,
                 path,
                 objs_indices,
                 background_path,
                 models_path=None,
                 img_height=480,
                 img_width=640,
                 gaussian_noise=False,
                 gamma_augmentation=False,
                 avaraging=False,
                 salt_pepper_noise=False,
                 contrast=False,
                 bg_flip=True,
                 channel_swap=True,
                 random_iteration=False,
                 mode='test',
                 interval=1,
                 resize_rate=0.5,
                 iteration_per_epoch=1000,
                 metric_filter=1.0):

        super(LinemodSIXDRenderingDataset,
              self).__init__(path,
                             objs_indices,
                             background_path,
                             img_height=img_height,
                             img_width=img_width,
                             gaussian_noise=gaussian_noise,
                             gamma_augmentation=gamma_augmentation,
                             avaraging=False,
                             salt_pepper_noise=salt_pepper_noise,
                             contrast=contrast,
                             mode=mode,
                             interval=interval,
                             resize_rate=resize_rate,
                             random_iteration=random_iteration,
                             iteration_per_epoch=iteration_per_epoch,
                             bg_flip=bg_flip,
                             channel_swap=channel_swap,
                             load_poses=False,
                             metric_filter=metric_filter)
        if models_path is None:
            models_path = os.path.join(path, 'models')
        models_fpath_mask = os.path.join(models_path, 'obj_{0:0>2}.ply')
        self.models = []
        for obj in objs_indices:
            print 'Loading data: obj_{0:0>2}'.format(obj)
            model_fpath = models_fpath_mask.format(obj)
            self.models.append(inout.load_ply(model_fpath))

        self.K = np.array([[572.41140, 0, 325.26110],
                           [0, 573.57043, 242.04899], [0, 0, 0]])

        self.iteration_per_epoch = iteration_per_epoch
Esempio n. 4
0
def main():
    # Camera parameters
    im_size = (640, 480)
    K = np.array([[572.41140, 0, 325.26110],
                  [0, 573.57043, 242.04899],
                  [0, 0, 0]])

    objs = ['Ape', 'Can', 'Cat', 'Driller', 'Duck', 'Eggbox', 'Glue', 'Holepuncher']
    model_fpath_mask = os.path.join('../../train_data/OcclusionChallengeICCV2015',
                                    'models_ply', '{0}.ply')

    pose_fpath_mask = os.path.join('../../train_data/OcclusionChallengeICCV2015',
                                   'poses', '{0}', '*.txt')
    models = []
    gt_poses = []
    for obj in objs:
        print 'Loading data:', obj
        model_fpath = model_fpath_mask.format(obj)
        models.append(inout.load_ply(model_fpath))

        gt_fpaths = sorted(glob.glob(pose_fpath_mask.format(obj)))
        gt_poses_obj = []
        for gt_fpath in gt_fpaths:
            gt_poses_obj.append(
                inout.load_gt_pose_dresden(gt_fpath))
        gt_poses.append(gt_poses_obj)

    fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(16, 10))
    repeats = 100
    for i in range(repeats):
        print "multi object rendering {} / {}".format(i, repeats)
        rot_list = []
        pos_list = []
        indices = np.random.choice(len(objs), 1)
        model_list = []
        labels = []
        for obj_id, obj_name in enumerate(objs):
            # pos = np.random.rand(3) - 0.5
            # pos = np.zeros(3)
            # pos[2] += -2.0
            # pos = np.array([0,0,-1.5]).T
            # rot = np.eye(3)
            pose = gt_poses[obj_id][int(i)]
            if pose['R'].size != 0 and pose['t'].size != 0:
                pos = pose['t']
                rot = pose['R']
                pos_list.append(pos)
                rot_list.append(rot)
                model_list.append(models[obj_id])
                # rgb_ren, depth_ren = renderer.render(
                #     models[obj_id], im_size, K, rot, pos, 0.1, 4.0, mode='rgb+depth')
                labels.append(obj_id + 1)
        rgb_ren, depth_ren, label_ren  = multi_object_renderer.render(
            model_list, im_size, K, rot_list, pos_list, 0.1, 4.0,
            labels=labels, mode='rgb+depth+label')

        label_img = np.zeros((im_size[1], im_size[0], 3))
        n_colors = len(DEFAULT_COLORS)
        for lbl_id in labels:
            color = color_dict[DEFAULT_COLORS[lbl_id % n_colors]]
            label_img[(label_ren == lbl_id), :] = color

        # Clear axes
        for ax in axes.flatten():
            ax.clear()
        axes[0].imshow(rgb_ren.astype(np.uint8))
        axes[0].set_title('RGB image')
        axes[1].imshow(depth_ren)
        axes[1].set_title('Depth image')
        axes[2].imshow(label_img)
        axes[2].set_title('Label image')
        fig.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95,
                            hspace=0.15, wspace=0.15)
        plt.draw()
        # # plt.pause(0.01)
        plt.waitforbuttonpress()
Esempio n. 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('model_file')
    parser.add_argument('--auto-data', dest='auto_data', action='store_true')
    parser.set_defaults(auto_data=False)
    parser.add_argument('-g', '--gpu', default=-1, type=int,
                        help='if -1, use cpu only (default: 0)')
    args = parser.parse_args()
    alpha=0.3
    image_alpha=1
    render_eps = 0.015

    data_path = '../../train_data/linemodSIXD2017'
    bg_path = '../../train_data/VOCdevkit/VOC2012/JPEGImages'

    ## delta for visibility correspondance
    delta = 0.015 # [m]
    # objs =['background', 'Ape', 'Can', 'Cat', 'Driller', 'Duck', 'Eggbox', 'Glue', 'Holepuncher']
    objs = np.arange(15) + 1
    n_class = len(objs) + 1
    distance_sanity = 0.05
    min_distance= 0.005
    output_scale = 0.14
    prob_eps = 0.4
    eps = 0.05
    im_size=(640, 480)
    interval = 15

    K_orig = np.array([[572.41140000, 0.00000000, 325.26110000],
                       [0.00000000, 573.57043000, 242.04899000],
                       [0.00000000, 0.00000000, 1.00000000]])

    ## load object models
    obj_model_fpath_mask = os.path.join(data_path, 'models', 'obj_{0:0>2}.ply')
    obj_models = []
    for obj in objs:
        if obj != 'background':
            print 'Loading data: obj_{0}'.format(obj)
            obj_model_fpath = obj_model_fpath_mask.format(obj)
            obj_models.append(inout.load_ply(obj_model_fpath))

    ## load network model
    model = DualCenterProposalNetworkRes50_predict7(n_class=n_class)
    chainer.serializers.load_npz(args.model_file, model)

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()

    test = LinemodSIXDExtendedDataset(data_path, objs,
                                      mode='test',
                                      interval=interval,
                                      metric_filter=output_scale + eps)

    fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(16, 10))
    im_ids = range(test.__len__())

    # pose estimator instance
    # pei = SimplePoseEstimationInterface(distance_sanity=distance_sanity,
    #                                     base_path=data_path,
    #                                     min_distance=min_distance, eps=eps, im_size=im_size)

    pei = PoseEstimationInterface(objs= ['obj_{0:0>2}'.format(i) for i in objs],
                                  base_path=data_path,
                                  distance_sanity=distance_sanity,
                                  model_scale = 1000.0,
                                  model_partial= 1,
                                  min_distance=min_distance, eps=prob_eps, im_size=im_size)

    imagenet_mean = np.array(
        [103.939, 116.779, 123.68], dtype=np.float32)[np.newaxis, np.newaxis, :]
    colors= ('grey', 'red', 'blue', 'yellow', 'magenta', 'green', 'indigo', 'darkorange', 'cyan', 'pink',
             'yellowgreen', 'blueviolet', 'brown', 'darkmagenta', 'aqua', 'crimson')

    # save output img
    save_image = False

    if save_image:
        save_dir = os.path.join("bvise_demo_data")
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

    for im_id in im_ids:
        print "executing {0} / {1}".format(im_id, test.__len__())
        img_rgb, label, img_depth, img_cp, img_ocp, pos, rot, pc, obj_mask, nonnan_mask, K = test.get_example(im_id)
        x_data = np.expand_dims(img_rgb, axis=0)
        with chainer.no_backprop_mode():
            if args.gpu >= 0:
                x_data = cuda.to_gpu(x_data)
            x = chainer.Variable(x_data)
            with chainer.using_config('train', False):
                y_cls_d, y_cp_d, y_ocp_d = model(x)
                cls_pred = chainer.functions.argmax(y_cls_d, axis=1)[0]
                cls_prob = chainer.functions.max(y_cls_d, axis=1)[0]
                cls_pred = chainer.cuda.to_cpu(cls_pred.data)
                cls_prob = chainer.cuda.to_cpu(cls_prob.data)
                y_cls = chainer.cuda.to_cpu(y_cls_d.data)[0]
                y_cp = chainer.cuda.to_cpu(y_cp_d.data)[0]
                y_ocp = chainer.cuda.to_cpu(y_ocp_d.data)[0]

        cls_pred[cls_prob < prob_eps] = 0

        y_pos, y_rot = pei.execute(y_cls, y_cp * output_scale, y_ocp * output_scale, img_depth, K)

        img_rgb = (img_rgb.transpose(1,2,0) * 255.0 + imagenet_mean).astype(np.uint8)
        img_rgb_resize = cv2.resize(img_rgb, (img_rgb.shape[1] / 2, img_rgb.shape[0] / 2))
        # gray = img_as_float(rgb2gray(img_rgb_resize))
        gray = img_as_float(rgb2gray(img_rgb))
        gray = gray2rgb(gray) * image_alpha + (1 - image_alpha)

        cls_gt = np.zeros_like(img_rgb_resize)
        cls_mask = np.zeros_like(img_rgb_resize)
        # cls ground truth
        for cls_id, cls in enumerate(objs):
            if cls_id != 0:
                color = color_dict[colors[cls_id]]
                acls = (cls_pred == cls_id)[:, :, np.newaxis] * color
                acls_gt = (label == cls_id)[:, :, np.newaxis] * color
                cls_mask = cls_mask + acls
                cls_gt = cls_gt + acls_gt

        cls_mask = cv2.resize((cls_mask * 255).astype(np.uint8), im_size, interpolation=cv2.INTER_NEAREST)
        cls_gt = cv2.resize((cls_gt * 255).astype(np.uint8), im_size, interpolation=cv2.INTER_NEAREST)

        cls_vis_gt = cls_gt * alpha + gray * (1 - alpha)
        cls_vis_gt = (cls_vis_gt * 255).astype(np.uint8)
        cls_vis = cls_mask * alpha + gray * (1 - alpha)
        cls_vis = (cls_vis * 255).astype(np.uint8)

        pose_vis_gt = (gray.copy() * 255).astype(np.uint8)
        pose_vis_pred = (gray.copy() * 255).astype(np.uint8)
        vis_render = True

        for i in six.moves.range(n_class - 1):
            if np.sum(pos[i]) != 0 and  np.sum(rot[i]) != 0:
                pos_diff = np.linalg.norm(y_pos[i] - pos[i])
                quat = quaternion.from_rotation_matrix(np.dot(y_rot[i].T, rot[i]))
                quat_w = min(1, abs(quat.w))
                diff_angle = np.rad2deg(np.arccos(quat_w)) * 2
                print "obj_{0:0>2} : position_diff = {1}, rotation_diff = {2}".format(i + 1, pos_diff, diff_angle)

            if not vis_render:
                continue
            # Render the object model
            img_depth_resize = cv2.resize(img_depth, im_size)
            if np.sum(rot[i]) != 0:
                ren_gt = renderer.render(
                    obj_models[i], im_size,  K_orig,
                    rot[i], (pos[i].T * 1000), 100, 4000, mode='rgb+depth')
                mask = np.logical_and((ren_gt[1] != 0), np.logical_or((ren_gt[1] / 1000.0 < img_depth_resize + render_eps), (img_depth_resize == 0)))
                pose_vis_gt[mask, :] = ren_gt[0][mask]
            if np.sum(y_rot[i]) != 0:
                ren_pred = renderer.render(
                    obj_models[i], im_size, K_orig, y_rot[i],
                    (y_pos[i].T * 1000), 100, 4000, mode='rgb+depth')
                mask = np.logical_and((ren_pred[1] != 0), np.logical_or((ren_pred[1] / 1000.0 < img_depth_resize + render_eps), (img_depth_resize == 0)))
                pose_vis_pred[mask, :]  = ren_pred[0][mask]

        if save_image:
            cv2.imwrite(os.path.join(save_dir, "rgb_{0:0>4}.png".format(im_id)), img_rgb.astype(np.uint8))
            cv2.imwrite(os.path.join(save_dir, "mask_vis_{0:0>4}.png".format(im_id)), cls_vis[:, :, ::-1].astype(np.uint8))
            cv2.imwrite(os.path.join(save_dir, "pose_{0:0>4}.png".format(im_id)), pose_vis_pred[:, :, ::-1].astype(np.uint8))
            cv2.imwrite(os.path.join(save_dir, "gt_pose_{0:0>4}.png".format(im_id)), pose_vis_gt[:, :, ::-1].astype(np.uint8))

        # Clear axes
        for ax in axes.flatten():
            ax.clear()
        axes[0, 0].imshow(img_rgb[:,:,::-1].astype(np.uint8))
        axes[0, 0].set_title('RGB image')
        axes[0, 1].imshow(img_depth)
        axes[0, 1].set_title('Depth image')
        axes[1, 0].imshow(cls_vis_gt)
        axes[1, 0].set_title('class gt')
        axes[1, 1].imshow(cls_vis)
        axes[1, 1].set_title('class pred')
        axes[2, 0].imshow(pose_vis_gt)
        axes[2, 0].set_title('pose gt vis')
        axes[2, 1].imshow(pose_vis_pred)
        axes[2, 1].set_title('pose pred vis')

        fig.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95, hspace=0.15, wspace=0.15)
        plt.draw()
        plt.pause(0.01)
        plt.waitforbuttonpress()
def main():
    parser = argparse.ArgumentParser(description='generate mask image of LinemodSIXD dataset')
    parser.add_argument('--mode',  default='train',
                        help='select train or test')
    args = parser.parse_args()
    mode = args.mode
    if not mode in ['train', 'test']:
        sys.stderr.write("Error: mode should be 'train' or 'test', but mode is {}\n".format(mode))
        sys.exit()

    objs = np.arange(15) + 1
    im_size = (640, 480)
    data_basepath = '../train_data/linemodSIXD2017'

    model_fpath_mask = os.path.join(data_basepath, 'models', 'obj_{0:0>2}.ply')
    rgb_fpath_mask = os.path.join(data_basepath, mode, '{0:0>2}', 'rgb', '{1:0>4}.png')
    depth_fpath_mask = os.path.join(data_basepath, mode, '{0:0>2}', 'depth', '{1:0>4}.png')
    gt_poses_mask = os.path.join(data_basepath, mode, '{0:0>2}','gt.yml')
    info_mask = os.path.join(data_basepath, mode, '{0:0>2}','info.yml')
    gt_mask_dir = os.path.join(data_basepath, mode, '{0:0>2}', 'mask')
    mask_fpath_mask = os.path.join(data_basepath, mode, '{0:0>2}', 'mask', '{1:0>4}_{2:0>2}.png')

    # load object models
    models = []
    if mode == 'test':
        for obj in objs:
            print 'Loading data: {0:0>2}'.format(obj)
            model_fpath = model_fpath_mask.format(obj)
            models.append(inout.load_ply(model_fpath))

    ## delta for visibility correspondance
    delta = 15 # [mm]

    for scene_id in six.moves.range(len(objs)):
        gt_poses = yaml.load(open(gt_poses_mask.format(objs[scene_id])))
        info = yaml.load(open(info_mask.format(objs[scene_id])))
        if not os.path.exists(gt_mask_dir.format(objs[scene_id])):
            os.makedirs(gt_mask_dir.format(objs[scene_id]))
        n_imgs = len(glob.glob(rgb_fpath_mask.format(objs[scene_id], '****')))
        for im_id in six.moves.range(n_imgs):
            print "scene : obj_{0:0>2}, image id : {1}".format(objs[scene_id], im_id)
            if mode == 'test':
                depth_fpath = depth_fpath_mask.format(objs[scene_id], im_id)
                depth = cv2.imread(depth_fpath, cv2.IMREAD_UNCHANGED).astype(np.float32)
                depth = preprocess_utils.depth_inpainting(depth)
                K = np.asarray(info[im_id]['cam_K']).reshape(3,3)
                # Convert the input depth image to a distance image
                dist = depth_im_to_dist_im(depth, K)
                poses = gt_poses[im_id]
                for pose in poses:
                    obj_id = pose['obj_id']
                    rot = np.asarray(pose['cam_R_m2c']).reshape(3, 3)
                    trans = np.asarray(pose['cam_t_m2c']).T
                    # Render the object model
                    depth_ren_gt = renderer.render(
                        models[obj_id - 1], im_size, K, rot, trans, 100, 2500, mode='depth')
                    depth_ren_gt = depth_ren_gt
                    dist_ren_gt = depth_im_to_dist_im(depth_ren_gt, K)
                    visib_mask = estimate_visib_mask(dist, dist_ren_gt, delta)

                    cv2.imwrite(mask_fpath_mask.format(objs[scene_id], im_id, obj_id), visib_mask * 255)

            elif mode == 'train':
                depth_fpath = depth_fpath_mask.format(objs[scene_id], im_id)
                depth = cv2.imread(depth_fpath, cv2.IMREAD_UNCHANGED).astype(np.float32)
                mask = (depth > 0) * 255
                cv2.imwrite(mask_fpath_mask.format(objs[scene_id], im_id, objs[scene_id]), mask)
Esempio n. 7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('model_file')
    parser.add_argument('--auto-data', dest='auto_data', action='store_true')
    parser.set_defaults(auto_data=False)
    parser.add_argument('-g',
                        '--gpu',
                        default=-1,
                        type=int,
                        help='if -1, use cpu only (default: 0)')
    args = parser.parse_args()
    alpha = 0.3
    image_alpha = 1
    render_eps = 0.015

    data_path = '../../train_data/OcclusionChallengeICCV2015'
    bg_path = '../../train_data/VOCdevkit/VOC2012/JPEGImages'

    ## delta for visibility correspondance
    delta = 0.015  # [m]

    objs = [
        'background', 'Ape', 'Can', 'Cat', 'Driller', 'Duck', 'Eggbox', 'Glue',
        'Holepuncher'
    ]
    n_class = len(objs)
    distance_sanity = 0.05
    min_distance = 0.005
    output_scale = 0.12
    eps = 0.2
    im_size = (640, 480)
    # im_size=(512, 384)

    ## load object models
    obj_model_fpath_mask = os.path.join(data_path, 'models_ply', '{0}.ply')
    obj_models = []
    for obj in objs:
        if obj != 'background':
            print 'Loading data:', obj
            obj_model_fpath = obj_model_fpath_mask.format(obj)
            obj_models.append(inout.load_ply(obj_model_fpath))

    ## load network model
    model = DualCenterProposalNetworkRes50_predict7(n_class=n_class)
    chainer.serializers.load_npz(args.model_file, model)

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()

    if args.auto_data:
        test_range = np.arange(100)
        test = DualCPNetAutoGenerateDataset(data_path,
                                            bg_path,
                                            test_range,
                                            random_iteration=True,
                                            metric_filter=output_scale + eps)
    else:
        train_range, test_range = train_test_split(np.arange(1213),
                                                   test_size=100,
                                                   random_state=1234)
        test = DualCPNetDataset(data_path,
                                test_range,
                                img_height=im_size[1],
                                img_width=im_size[0])

    # pose estimator instance
    pei = SimplePoseEstimationInterface(distance_sanity=distance_sanity,
                                        base_path=data_path,
                                        min_distance=min_distance,
                                        eps=eps,
                                        im_size=im_size)

    fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(16, 10))
    n_colors = len(DEFAULT_COLORS)
    im_ids = range(test.__len__())

    imagenet_mean = np.array([103.939, 116.779, 123.68],
                             dtype=np.float32)[np.newaxis, np.newaxis, :]

    for im_id in im_ids:
        print "executing {0} / {1}".format(im_id, test.__len__())
        img_rgb, label, img_depth, img_cp, img_ocp, pos, rot, pc, obj_mask, nonnan_mask, K = test.get_example(
            im_id)
        x_data = np.expand_dims(img_rgb, axis=0)
        with chainer.no_backprop_mode():
            if args.gpu >= 0:
                x_data = cuda.to_gpu(x_data)
            x = chainer.Variable(x_data)
            with chainer.using_config('train', False):
                y_cls_d, y_cp_d, y_ocp_d = model(x)
                cls_pred = chainer.functions.argmax(y_cls_d, axis=1)[0]
                cls_pred = chainer.cuda.to_cpu(cls_pred.data)
                y_cls = chainer.cuda.to_cpu(y_cls_d.data)[0]
                y_cp = chainer.cuda.to_cpu(y_cp_d.data)[0]
                y_ocp = chainer.cuda.to_cpu(y_ocp_d.data)[0]

        estimated_ocp, estimated_R = pei.execute(y_cls, y_cp * output_scale,
                                                 y_ocp * output_scale,
                                                 img_depth, K)

        img_rgb = (img_rgb.transpose(1, 2, 0) * 255.0 + imagenet_mean).astype(
            np.uint8)
        img_rgb_resize = cv2.resize(
            img_rgb, (img_rgb.shape[1] / 2, img_rgb.shape[0] / 2))
        gray = img_as_float(rgb2gray(img_rgb_resize))
        gray = gray2rgb(gray) * image_alpha + (1 - image_alpha)

        cls_gt = np.zeros_like(img_rgb_resize)
        cls_mask = np.zeros_like(img_rgb_resize)
        # cls ground truth
        for cls_id, cls in enumerate(objs):
            if cls_id != 0:
                color = color_dict[DEFAULT_COLORS[cls_id % n_colors]]
                acls = (cls_pred == cls_id)[:, :, np.newaxis] * color
                acls_gt = (label == cls_id)[:, :, np.newaxis] * color
                cls_mask = cls_mask + acls
                cls_gt = cls_gt + acls_gt
        cls_vis_gt = cls_gt * alpha + gray * (1 - alpha)
        cls_vis_gt = (cls_vis_gt * 255).astype(np.uint8)
        cls_vis = cls_mask * alpha + gray * (1 - alpha)
        cls_vis = (cls_vis * 255).astype(np.uint8)

        pose_mask_gt = np.zeros_like(img_rgb_resize)
        pose_mask_pred = np.zeros_like(img_rgb_resize)
        pose_vis_gt = (gray.copy() * 255).astype(np.uint8)
        pose_vis_pred = (gray.copy() * 255).astype(np.uint8)
        for obj_id, obj_name in enumerate(objs):
            if obj == 'background':
                continue
            # Render the object model
            if np.sum(rot[obj_id - 1]) != 0:
                ren_gt = renderer.render(obj_models[obj_id - 1],
                                         (im_size[0] / 2, im_size[1] / 2),
                                         K,
                                         rot[obj_id - 1],
                                         pos[obj_id - 1].T,
                                         0.1,
                                         4.0,
                                         mode='rgb+depth')
                mask = np.logical_and(
                    (ren_gt[1] != 0),
                    np.logical_or((ren_gt[1] < img_depth + render_eps),
                                  (img_depth == 0)))
                pose_vis_gt[mask, :] = ren_gt[0][mask]
            if np.sum(estimated_R[obj_id - 1]) != 0:
                ren_pred = renderer.render(obj_models[obj_id - 1],
                                           (im_size[0] / 2, im_size[1] / 2),
                                           K,
                                           estimated_R[obj_id - 1],
                                           estimated_ocp[obj_id - 1].T,
                                           0.1,
                                           4.0,
                                           mode='rgb+depth')
                mask = np.logical_and(
                    (ren_pred[1] != 0),
                    np.logical_or((ren_pred[1] < img_depth + render_eps),
                                  (img_depth == 0)))
                pose_vis_pred[mask, :] = ren_pred[0][mask]

        # Clear axes
        for ax in axes.flatten():
            ax.clear()
        axes[0, 0].imshow(img_rgb[:, :, ::-1].astype(np.uint8))
        axes[0, 0].set_title('RGB image')
        axes[0, 1].imshow(img_depth)
        axes[0, 1].set_title('Depth image')
        axes[1, 0].imshow(cls_vis_gt)
        axes[1, 0].set_title('class gt')
        axes[1, 1].imshow(cls_vis)
        axes[1, 1].set_title('class pred')
        axes[2, 0].imshow(pose_vis_gt)
        axes[2, 0].set_title('pose gt vis')
        axes[2, 1].imshow(pose_vis_pred)
        axes[2, 1].set_title('pose pred vis')

        fig.subplots_adjust(left=0.05,
                            bottom=0.05,
                            right=0.95,
                            top=0.95,
                            hspace=0.15,
                            wspace=0.15)
        plt.draw()
        # # plt.pause(0.01)
        plt.waitforbuttonpress()
Esempio n. 8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('model_file')
    parser.add_argument('-g',
                        '--gpu',
                        default=0,
                        type=int,
                        help='if -1, use cpu only (default: 0)')
    args = parser.parse_args()
    alpha = 0.3
    image_alpha = 1
    render_eps = 0.015

    data_path = '../../train_data/linemodSIXD2017'

    ## delta for visibility correspondance
    delta = 0.015  # [m]
    objs = np.arange(15) + 1
    n_class = len(objs) + 1
    distance_sanity = 0.02
    min_distance = 0.005
    output_scale = 0.14
    prob_eps = 0.4
    eps = 0.02
    im_size = (640, 480)
    interval = 15

    ## load object models
    obj_model_fpath_mask = os.path.join(data_path, 'models', 'obj_{0:0>2}.ply')
    obj_models = []
    for obj in objs:
        if obj != 'background':
            print 'Loading data: obj_{0}'.format(obj)
            obj_model_fpath = obj_model_fpath_mask.format(obj)
            obj_models.append(inout.load_ply(obj_model_fpath))

    ## load network model
    model = DualCenterProposalNetworkRes50_predict7(n_class=n_class)
    chainer.serializers.load_npz(args.model_file, model)

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()

    # pose estimator instance
    # pei = SimplePoseEstimationInterface(distance_sanity=distance_sanity,
    #                                     min_distance=min_distance, eps=prob_eps)
    pei = PoseEstimationInterface(objs=['obj_{0:0>2}'.format(i) for i in objs],
                                  base_path=data_path,
                                  n_ransac=500,
                                  distance_sanity=distance_sanity,
                                  model_scale=1000.0,
                                  model_partial=1,
                                  min_distance=min_distance,
                                  eps=prob_eps,
                                  im_size=im_size)
    scores_pos = []
    scores_rot = []
    ids_arr = []
    scores_5cm5deg = []
    scores_6dpose = []
    test_objs = np.delete(np.arange(15) + 1, [2, 6])
    for obj_id in test_objs:
        test = LinemodSIXDSingleInstanceDataset(data_path,
                                                obj_id,
                                                mode='test',
                                                interval=interval,
                                                metric_filter=output_scale +
                                                eps)
        im_ids = test.__len__()
        # im_ids = 50
        detect_cnt = 0
        cnt_5cm5deg = 0
        cnt_6dpose = 0
        sum_pos = 0
        sum_rot = 0
        for im_id in tqdm.trange(im_ids):
            # print "executing {0} / {1}".format(im_id, test.__len__())
            img_rgb, img_depth, pos, rot, K = test.get_example(im_id)
            x_data = np.expand_dims(img_rgb, axis=0)
            with chainer.no_backprop_mode():
                if args.gpu >= 0:
                    x_data = cuda.to_gpu(x_data)
                    x = chainer.Variable(x_data)
                with chainer.using_config('train', False):
                    y_cls_d, y_cp_d, y_ocp_d = model(x)
                    y_cls = chainer.cuda.to_cpu(y_cls_d.data)[0]
                    y_cp = chainer.cuda.to_cpu(y_cp_d.data)[0]
                    y_ocp = chainer.cuda.to_cpu(y_ocp_d.data)[0]

            y_pos, y_rot = pei.execute(y_cls,
                                       y_cp * output_scale,
                                       y_ocp * output_scale,
                                       img_depth,
                                       K,
                                       estimate_idx=[obj_id - 1])
            for i in six.moves.range(n_class - 1):
                if np.sum(pos[i]) != 0 and np.sum(rot[i]) != 0:
                    # 5cm 5deg metric
                    pos_diff = np.linalg.norm(y_pos[i] - pos[i])
                    quat = quaternion.from_rotation_matrix(
                        np.dot(y_rot[i].T, rot[i]))
                    quat_w = min(1, abs(quat.w))
                    diff_angle = np.rad2deg(np.arccos(quat_w)) * 2
                    # print "obj_{0:0>2} : position_diff = {1}, rotation_diff = {2}".format(i + 1, pos_diff, diff_angle)
                    if i + 1 == obj_id and pos_diff < 1.0:
                        sum_pos += pos_diff
                        sum_rot += diff_angle
                        detect_cnt += 1
                        if pos_diff < 0.05 and diff_angle < 5:
                            cnt_5cm5deg += 1
                    # 6d pose metric
                    model_pts = obj_models[i]['pts'].transpose(1, 0)
                    gt_proj = np.dot(rot[i].T, model_pts) + pos[i, :,
                                                                np.newaxis]
                    pred_proj = np.dot(y_rot[i].T, model_pts) + pos[i, :,
                                                                    np.newaxis]
                    diff_mean = np.mean(
                        np.linalg.norm((gt_proj - pred_proj), axis=0))
                    if i + 1 == obj_id and pos_diff < 1.0 and diff_mean < 0.1 * object_diameters[
                            i]:
                        cnt_6dpose += 1

        print "obj_{0:0>2} : detection_rate = {1},\n position_diff = {2}, rotation_diff = {3}, 5cm5deg = {4}, 6d pose = {5}" \
            .format(obj_id, detect_cnt / (1.0 * im_ids), sum_pos / detect_cnt, sum_rot / detect_cnt, cnt_5cm5deg / (1.0 * im_ids), cnt_6dpose / (1.0 * im_ids))
        scores_pos.append(sum_pos / im_ids)
        scores_rot.append(sum_rot / im_ids)
        scores_5cm5deg.append(cnt_5cm5deg)
        scores_6dpose.append(cnt_6dpose)
        ids_arr.append(im_ids)

    print "-- results --"
    print scores_pos
    print scores_rot
    print "-- total results --"
    ids_arr = np.asarray(ids_arr)
    scores_5cm5deg = np.asarray(scores_5cm5deg)
    ave_5cm5deg = np.sum(scores_5cm5deg) / (1.0 * np.sum(ids_arr))
    ave_6dpose = np.sum(scores_6dpose) / (1.0 * np.sum(ids_arr))
    print "5cm5deg metric : {}".format(ave_5cm5deg)
    print "6d pose metric : {}".format(ave_6dpose)

    # scores_pos = scores_pos * 1000 # m -> mm
    scores_5cm5deg = scores_5cm5deg / ids_arr
    scores_6dpose = scores_6dpose / ids_arr
    scores_ave = np.array([ave_5cm5deg, ave_6dpose])

    if not os.path.exists('eval_results'):
        os.makedirs('eval_results')
    np.save('eval_results/scores_pos.npy', scores_pos)
    np.save('eval_results/scores_rot.npy', scores_rot)
    np.save('eval_results/scores_5cm5deg.npy', scores_5cm5deg)
    np.save('eval_results/scores_6dpose.npy', scores_6dpose)
    np.save('eval_results/scores_ave.npy', scores_ave)