Exemple #1
0
def test_net_single_frame(sess, net, imdb, weights_filename, rig_filename, is_kfusion):

    output_dir = get_output_dir(imdb, weights_filename)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    seg_file = os.path.join(output_dir, 'segmentations.pkl')
    print imdb.name
    if os.path.exists(seg_file):
        with open(seg_file, 'rb') as fid:
            segmentations = cPickle.load(fid)
        imdb.evaluate_segmentations(segmentations, output_dir)
        return

    """Test a FCN on an image database."""
    num_images = len(imdb.image_index)
    segmentations = [[] for _ in xrange(num_images)]

    # timers
    _t = {'im_segment' : Timer(), 'misc' : Timer()}

    # kinect fusion
    if is_kfusion:
        KF = kfusion.PyKinectFusion(rig_filename)

    # pose estimation
    if cfg.TEST.VERTEX_REG and cfg.TEST.RANSAC:
        RANSAC = ransac.PyRansac3D()

    # construct colors
    colors = np.zeros((3 * imdb.num_classes), dtype=np.uint8)
    for i in range(imdb.num_classes):
        colors[i * 3 + 0] = imdb._class_colors[i][0]
        colors[i * 3 + 1] = imdb._class_colors[i][1]
        colors[i * 3 + 2] = imdb._class_colors[i][2]

    if cfg.TEST.VISUALIZE:
        # perm = np.random.permutation(np.arange(num_images))
        perm = xrange(0, num_images, 5)
    else:
        perm = xrange(num_images)

    video_index = ''
    have_prediction = False
    for i in perm:

        # parse image name
        image_index = imdb.image_index[i]
        pos = image_index.find('/')
        if video_index == '':
            video_index = image_index[:pos]
            have_prediction = False
        else:
            if video_index != image_index[:pos]:
                have_prediction = False
                video_index = image_index[:pos]
                print 'start video {}'.format(video_index)

        # read color image
        rgba = pad_im(cv2.imread(imdb.image_path_at(i), cv2.IMREAD_UNCHANGED), 16)
        if rgba.shape[2] == 4:
            im = np.copy(rgba[:,:,:3])
            alpha = rgba[:,:,3]
            I = np.where(alpha == 0)
            im[I[0], I[1], :] = 0
        else:
            im = rgba

        # read depth image
        im_depth = pad_im(cv2.imread(imdb.depth_path_at(i), cv2.IMREAD_UNCHANGED), 16)

        # load meta data
        meta_data = scipy.io.loadmat(imdb.metadata_path_at(i))

        # read label image
        labels_gt = pad_im(cv2.imread(imdb.label_path_at(i), cv2.IMREAD_UNCHANGED), 16)
        if len(labels_gt.shape) == 2:
            im_label_gt = imdb.labels_to_image(im, labels_gt)
        else:
            im_label_gt = np.copy(labels_gt[:,:,:3])
            im_label_gt[:,:,0] = labels_gt[:,:,2]
            im_label_gt[:,:,2] = labels_gt[:,:,0]

        _t['im_segment'].tic()
        labels, probs, vertex_pred = im_segment_single_frame(sess, net, im, im_depth, meta_data, imdb.num_classes)
        if cfg.TEST.VERTEX_REG:
            vertmap = _extract_vertmap(labels, vertex_pred, imdb._extents, imdb.num_classes)
            if cfg.TEST.RANSAC:
                # pose estimation using RANSAC
                fx = meta_data['intrinsic_matrix'][0, 0]
                fy = meta_data['intrinsic_matrix'][1, 1]
                px = meta_data['intrinsic_matrix'][0, 2]
                py = meta_data['intrinsic_matrix'][1, 2]
                depth_factor = meta_data['factor_depth'][0, 0]
                poses = RANSAC.estimate_pose(im_depth, probs, vertex_pred[0,:,:,:] / cfg.TRAIN.VERTEX_W, imdb._extents, fx, fy, px, py, depth_factor)

                # print gt poses
                # cls_indexes = meta_data['cls_indexes']
                # poses_gt = meta_data['poses']
                # for j in xrange(len(cls_indexes)):
                #    print 'object {}'.format(cls_indexes[j])
                #    print poses_gt[:,:,j]
            else:
                poses = []

        _t['im_segment'].toc()

        _t['misc'].tic()
        labels = unpad_im(labels, 16)
        # build the label image
        im_label = imdb.labels_to_image(im, labels)

        if not have_prediction:    
            if is_kfusion:
                KF.set_voxel_grid(-3, -3, -3, 6, 6, 7)

        # run kinect fusion
        if is_kfusion:
            height = im.shape[0]
            width = im.shape[1]
            labels_kfusion = np.zeros((height, width), dtype=np.int32)

            im_rgb = np.copy(im)
            im_rgb[:, :, 0] = im[:, :, 2]
            im_rgb[:, :, 2] = im[:, :, 0]
            KF.feed_data(im_depth, im_rgb, im.shape[1], im.shape[0], float(meta_data['factor_depth']))
            KF.back_project();
            if have_prediction:
                pose_world2live, pose_live2world = KF.solve_pose()

            KF.feed_label(im_label, probs, colors)
            KF.fuse_depth()
            labels_kfusion = KF.extract_surface(labels_kfusion)
            im_label_kfusion = imdb.labels_to_image(im, labels_kfusion)
            KF.render()
            filename = os.path.join(output_dir, 'images', '{:04d}'.format(i))
            KF.draw(filename, 0)
        have_prediction = True

        if is_kfusion:
            seg = {'labels': labels_kfusion}
        else:
            seg = {'labels': labels}
        segmentations[i] = seg

        _t['misc'].toc()

        print 'im_segment {}: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(video_index, i + 1, num_images, _t['im_segment'].diff, _t['misc'].diff)

        if cfg.TEST.VISUALIZE:
            if cfg.TEST.VERTEX_REG:
                # centers_gt = _vote_centers(labels_gt, meta_data['cls_indexes'], meta_data['center'], imdb.num_classes)
                vertmap_gt = pad_im(cv2.imread(imdb.vertmap_path_at(i), cv2.IMREAD_UNCHANGED), 16)
                vertmap_gt = vertmap_gt[:, :, (2, 1, 0)]
                vertmap_gt = vertmap_gt.astype(np.float32) / 255.0
                vertmap_gt = _unscale_vertmap(vertmap_gt, imdb._process_label_image(labels_gt), imdb._extents, imdb.num_classes)
                print 'visualization'
                vis_segmentations_vertmaps(im, im_depth, im_label, im_label_gt, imdb._class_colors, \
                    vertmap_gt, vertmap, labels, labels_gt, poses, meta_data['intrinsic_matrix'])
            else:
                vis_segmentations(im, im_depth, im_label, im_label_gt, imdb._class_colors)

    seg_file = os.path.join(output_dir, 'segmentations.pkl')
    with open(seg_file, 'wb') as f:
        cPickle.dump(segmentations, f, cPickle.HIGHEST_PROTOCOL)

    # evaluation
    imdb.evaluate_segmentations(segmentations, output_dir)
Exemple #2
0
    args = parse_args()
    is_save = 0

    print('Called with args:')
    print(args)

    # imdb
    imdb = get_imdb(args.imdb_name)
    """Test a FCN on an image database."""
    num_images = len(imdb.image_index)

    # voxel labels
    labels_voxel = np.zeros((128, 128, 128), dtype=np.int32)

    # kinect fusion
    KF = kfusion.PyKinectFusion(args.rig_name)

    # construct colors
    colors = np.zeros((3, imdb.num_classes), dtype=np.uint8)
    for i in range(imdb.num_classes):
        colors[0, i] = imdb._class_colors[i][0]
        colors[1, i] = imdb._class_colors[i][1]
        colors[2, i] = imdb._class_colors[i][2]
    colors[:, 0] = 255

    video_index = ''
    have_prediction = False
    for i in xrange(num_images):
        print i
        # parse image name
        image_index = imdb.image_index[i]
Exemple #3
0
def test_net(sess, net, imdb, weights_filename, rig_filename, is_kfusion):

    output_dir = get_output_dir(imdb, weights_filename)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    seg_file = os.path.join(output_dir, 'segmentations.pkl')
    print imdb.name
    if os.path.exists(seg_file):
        with open(seg_file, 'rb') as fid:
            segmentations = cPickle.load(fid)
        imdb.evaluate_segmentations(segmentations, output_dir)
        return

    """Test a FCN on an image database."""
    num_images = len(imdb.image_index)
    segmentations = [[] for _ in xrange(num_images)]

    # timers
    _t = {'im_segment' : Timer(), 'misc' : Timer()}

    # voxelizer
    voxelizer = Voxelizer(cfg.TEST.GRID_SIZE, imdb.num_classes)
    voxelizer.setup(-3, -3, -3, 3, 3, 4)
    # voxelizer.setup(-2, -2, -2, 2, 2, 2)

    # kinect fusion
    if is_kfusion:
        KF = kfusion.PyKinectFusion(rig_filename)

    # construct colors
    colors = np.zeros((3 * imdb.num_classes), dtype=np.uint8)
    for i in range(imdb.num_classes):
        colors[i * 3 + 0] = imdb._class_colors[i][0]
        colors[i * 3 + 1] = imdb._class_colors[i][1]
        colors[i * 3 + 2] = imdb._class_colors[i][2]

    if cfg.TEST.VISUALIZE:
        perm = np.random.permutation(np.arange(num_images))
    else:
        perm = xrange(num_images)

    video_index = ''
    have_prediction = False
    for i in perm:
        rgba = pad_im(cv2.imread(imdb.image_path_at(i), cv2.IMREAD_UNCHANGED), 16)
        height = rgba.shape[0]
        width = rgba.shape[1]

        # parse image name
        image_index = imdb.image_index[i]
        pos = image_index.find('/')
        if video_index == '':
            video_index = image_index[:pos]
            have_prediction = False
            state = np.zeros((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32)
            weights = np.ones((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32)
            points = np.zeros((1, height, width, 3), dtype=np.float32)
        else:
            if video_index != image_index[:pos]:
                have_prediction = False
                video_index = image_index[:pos]
                state = np.zeros((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32)
                weights = np.ones((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32)
                points = np.zeros((1, height, width, 3), dtype=np.float32)
                print 'start video {}'.format(video_index)

        # read color image
        if rgba.shape[2] == 4:
            im = np.copy(rgba[:,:,:3])
            alpha = rgba[:,:,3]
            I = np.where(alpha == 0)
            im[I[0], I[1], :] = 0
        else:
            im = rgba

        # read depth image
        im_depth = pad_im(cv2.imread(imdb.depth_path_at(i), cv2.IMREAD_UNCHANGED), 16)

        # load meta data
        meta_data = scipy.io.loadmat(imdb.metadata_path_at(i))

        # backprojection for the first frame
        if not have_prediction:    
            if is_kfusion:
                # KF.set_voxel_grid(-3, -3, -3, 6, 6, 7)
                KF.set_voxel_grid(voxelizer.min_x, voxelizer.min_y, voxelizer.min_z, voxelizer.max_x-voxelizer.min_x, voxelizer.max_y-voxelizer.min_y, voxelizer.max_z-voxelizer.min_z)
                # identity transformation
                RT_world = np.zeros((3,4), dtype=np.float32)
                RT_world[0, 0] = 1
                RT_world[1, 1] = 1
                RT_world[2, 2] = 1
            else:
                # store the RT for the first frame
                RT_world = meta_data['rotation_translation_matrix']

        # run kinect fusion
        if is_kfusion:
            im_rgb = np.copy(im)
            im_rgb[:, :, 0] = im[:, :, 2]
            im_rgb[:, :, 2] = im[:, :, 0]
            KF.feed_data(im_depth, im_rgb, im.shape[1], im.shape[0], float(meta_data['factor_depth']))
            KF.back_project();
            if have_prediction:
                pose_world2live, pose_live2world = KF.solve_pose()
                RT_live = pose_world2live
            else:
                RT_live = RT_world
        else:
            # compute camera poses
            RT_live = meta_data['rotation_translation_matrix']

        pose_world2live = se3_mul(RT_live, se3_inverse(RT_world))
        pose_live2world = se3_inverse(pose_world2live)

        _t['im_segment'].tic()
        labels, probs, state, weights, points = im_segment(sess, net, im, im_depth, state, weights, points, meta_data, voxelizer, pose_world2live, pose_live2world)
        _t['im_segment'].toc()
        # time.sleep(3)

        _t['misc'].tic()
        labels = unpad_im(labels, 16)

        # build the label image
        im_label = imdb.labels_to_image(im, labels)

        if is_kfusion:
            labels_kfusion = np.zeros((height, width), dtype=np.int32)
            if probs.shape[2] < 10:
                probs_new = np.zeros((probs.shape[0], probs.shape[1], 10), dtype=np.float32)
                probs_new[:,:,:imdb.num_classes] = probs
                probs = probs_new
            KF.feed_label(im_label, probs, colors)
            KF.fuse_depth()
            labels_kfusion = KF.extract_surface(labels_kfusion)
            im_label_kfusion = imdb.labels_to_image(im, labels_kfusion)
            KF.render()
            filename = os.path.join(output_dir, 'images', '{:04d}'.format(i))
            KF.draw(filename, 0)
        have_prediction = True

        # compute the delta transformation between frames
        RT_world = RT_live

        if is_kfusion:
            seg = {'labels': labels_kfusion}
        else:
            seg = {'labels': labels}
        segmentations[i] = seg

        _t['misc'].toc()

        if cfg.TEST.VISUALIZE:
            # read label image
            labels_gt = pad_im(cv2.imread(imdb.label_path_at(i), cv2.IMREAD_UNCHANGED), 16)
            if len(labels_gt.shape) == 2:
                im_label_gt = imdb.labels_to_image(im, labels_gt)
            else:
                im_label_gt = np.copy(labels_gt[:,:,:3])
                im_label_gt[:,:,0] = labels_gt[:,:,2]
                im_label_gt[:,:,2] = labels_gt[:,:,0]
            vis_segmentations(im, im_depth, im_label, im_label_gt, imdb._class_colors)

        print 'im_segment: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_segment'].diff, _t['misc'].diff)

    if is_kfusion:
        KF.draw(filename, 1)

    seg_file = os.path.join(output_dir, 'segmentations.pkl')
    with open(seg_file, 'wb') as f:
        cPickle.dump(segmentations, f, cPickle.HIGHEST_PROTOCOL)

    # evaluation
    imdb.evaluate_segmentations(segmentations, output_dir)