Esempio n. 1
0
def test_net(sess, net, imdb, weights_filename):

    output_dir = get_output_dir(imdb, weights_filename)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    seg_file = os.path.join(output_dir, 'segmentations.pkl')
    print imdb.name
    if os.path.exists(seg_file):
        with open(seg_file, 'rb') as fid:
            segmentations = cPickle.load(fid)
        imdb.evaluate_segmentations(segmentations, output_dir)
        return

    """Test a FCN on an image database."""
    num_images = len(imdb.image_index)
    segmentations = [[] for _ in xrange(num_images)]

    roidb = imdb.roidb

    # timers
    _t = {'im_segment' : Timer(), 'misc' : Timer()}

    perm = np.random.permutation(np.arange(num_images))

    # for i in xrange(num_images):
    for i in perm:
        im = cv2.imread(roidb[i]['image'])
        im_depth = cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED)
        meta_data = roidb[i]['meta_data']

        _t['im_segment'].tic()
        labels = im_segment(sess, net, im, im_depth, meta_data, imdb.num_classes)
        _t['im_segment'].toc()

        _t['misc'].tic()
        seg = {'labels': labels}
        segmentations[i] = seg
        _t['misc'].toc()

        vis_segmentations(im, im_depth, labels)
        print 'im_segment: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_segment'].average_time, _t['misc'].average_time)

    seg_file = os.path.join(output_dir, 'segmentations.pkl')
    with open(seg_file, 'wb') as f:
        cPickle.dump(segmentations, f, cPickle.HIGHEST_PROTOCOL)

    # evaluation
    imdb.evaluate_segmentations(segmentations, output_dir)
Esempio n. 2
0
def setup():
    cfg_from_file("experiments/cfgs/lov_color_box.yml")
    cfg.GPU_ID = 0
    device_name = '/gpu:{:d}'.format(0)
    print(device_name)

    cfg.TRAIN.NUM_STEPS = 1
    cfg.TRAIN.GRID_SIZE = cfg.TEST.GRID_SIZE
    cfg.TRAIN.TRAINABLE = False

    cfg.RIG = "data/LOV/camera.json"
    cfg.CAD = "data/LOV/models.txt"
    cfg.POSE = "data/LOV/poses.txt"
    cfg.BACKGROUND = "data/cache/backgrounds.pkl"
    cfg.IS_TRAIN = False
    print('Using config:')
    pprint.pprint(cfg)
    set_seed()

    imdb = get_imdb("lov_single_000_box_train")
    output_dir = get_output_dir(imdb, None)

    # This can not be imported at the top like it should be, but has to be imported after the default config has been merged with the file config
    from networks.factory import get_network
    network = get_network("vgg16_convs")

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=False,
                                            gpu_options=gpu_options))

    with open("generate_dataset/config.yaml", "r") as config:
        config_dict = yaml.load(config)

    model = config_dict["model"]
    print('Loading model weights from {:s}').format(model)
    saver = tf.train.Saver()
    saver.restore(sess, model)

    return config_dict, sess, network, imdb, output_dir
    # prepare dataset
    cfg.MODE = 'TRAIN'
    dataset = get_dataset(args.dataset_name)
    worker_init_fn = dataset.worker_init_fn if hasattr(dataset, 'worker_init_fn') else None
    num_workers = 4
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=cfg.TRAIN.IMS_PER_BATCH, shuffle=True, 
        num_workers=num_workers, worker_init_fn=worker_init_fn)
    print('Use dataset `{:s}` for training'.format(dataset.name))

    # overwrite intrinsics
    if len(cfg.INTRINSICS) > 0:
        K = np.array(cfg.INTRINSICS).reshape(3, 3)
        dataset._intrinsic_matrix = K
        print(dataset._intrinsic_matrix)

    output_dir = get_output_dir(dataset, None)
    print('Output will be saved to `{:s}`'.format(output_dir))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # prepare network
    if args.pretrained:
        network_data = torch.load(args.pretrained)
        if isinstance(network_data, dict) and 'model' in network_data:
            network_data = network_data['model']
        print("=> using pre-trained network '{}'".format(args.network_name))
    else:
        network_data = None
        print("=> creating network '{}'".format(args.network_name))

    network = networks.__dict__[args.network_name](dataset.num_classes, cfg.TRAIN.NUM_UNITS, network_data).cuda()
Esempio n. 4
0
        cfg_from_file(args.cfg_file)

    print('Using config:')
    pprint.pprint(cfg)

    if not args.randomize:
        # fix the random seeds (numpy and caffe) for reproducibility
        np.random.seed(cfg.RNG_SEED)

    imdb = get_imdb(args.imdb_name)
    print 'Loaded dataset `{:s}` for training'.format(imdb.name)
    print 'symmetry'
    print imdb._symmetry
    roidb = get_training_roidb(imdb)

    output_dir = get_output_dir(imdb, None)
    print 'Output will be saved to `{:s}`'.format(output_dir)

    device_name = '/gpu:{:d}'.format(args.gpu_id)
    cfg.GPU_ID = args.gpu_id
    print device_name

    if cfg.NETWORK == 'FCN8VGG':
        path = osp.abspath(osp.join(cfg.ROOT_DIR, args.pretrained_model))
        cfg.TRAIN.MODEL_PATH = path
        pretrained_model = None
    else:
        pretrained_model = args.pretrained_model

    cfg.RIG = args.rig_name
    cfg.CAD = args.cad_name
Esempio n. 5
0
def test_net_single_frame(sess, net, imdb, weights_filename, rig_filename, is_kfusion):

    output_dir = get_output_dir(imdb, weights_filename)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    seg_file = os.path.join(output_dir, 'segmentations.pkl')
    print imdb.name
    if os.path.exists(seg_file):
        with open(seg_file, 'rb') as fid:
            segmentations = cPickle.load(fid)
        imdb.evaluate_segmentations(segmentations, output_dir)
        return

    """Test a FCN on an image database."""
    num_images = len(imdb.image_index)
    segmentations = [[] for _ in xrange(num_images)]

    # timers
    _t = {'im_segment' : Timer(), 'misc' : Timer()}

    # kinect fusion
    if is_kfusion:
        KF = kfusion.PyKinectFusion(rig_filename)

    # pose estimation
    if cfg.TEST.VERTEX_REG and cfg.TEST.RANSAC:
        RANSAC = ransac.PyRansac3D()

    # construct colors
    colors = np.zeros((3 * imdb.num_classes), dtype=np.uint8)
    for i in range(imdb.num_classes):
        colors[i * 3 + 0] = imdb._class_colors[i][0]
        colors[i * 3 + 1] = imdb._class_colors[i][1]
        colors[i * 3 + 2] = imdb._class_colors[i][2]

    if cfg.TEST.VISUALIZE:
        # perm = np.random.permutation(np.arange(num_images))
        perm = xrange(0, num_images, 5)
    else:
        perm = xrange(num_images)

    video_index = ''
    have_prediction = False
    for i in perm:

        # parse image name
        image_index = imdb.image_index[i]
        pos = image_index.find('/')
        if video_index == '':
            video_index = image_index[:pos]
            have_prediction = False
        else:
            if video_index != image_index[:pos]:
                have_prediction = False
                video_index = image_index[:pos]
                print 'start video {}'.format(video_index)

        # read color image
        rgba = pad_im(cv2.imread(imdb.image_path_at(i), cv2.IMREAD_UNCHANGED), 16)
        if rgba.shape[2] == 4:
            im = np.copy(rgba[:,:,:3])
            alpha = rgba[:,:,3]
            I = np.where(alpha == 0)
            im[I[0], I[1], :] = 0
        else:
            im = rgba

        # read depth image
        im_depth = pad_im(cv2.imread(imdb.depth_path_at(i), cv2.IMREAD_UNCHANGED), 16)

        # load meta data
        meta_data = scipy.io.loadmat(imdb.metadata_path_at(i))

        # read label image
        labels_gt = pad_im(cv2.imread(imdb.label_path_at(i), cv2.IMREAD_UNCHANGED), 16)
        if len(labels_gt.shape) == 2:
            im_label_gt = imdb.labels_to_image(im, labels_gt)
        else:
            im_label_gt = np.copy(labels_gt[:,:,:3])
            im_label_gt[:,:,0] = labels_gt[:,:,2]
            im_label_gt[:,:,2] = labels_gt[:,:,0]

        _t['im_segment'].tic()
        labels, probs, vertex_pred = im_segment_single_frame(sess, net, im, im_depth, meta_data, imdb.num_classes)
        if cfg.TEST.VERTEX_REG:
            vertmap = _extract_vertmap(labels, vertex_pred, imdb._extents, imdb.num_classes)
            if cfg.TEST.RANSAC:
                # pose estimation using RANSAC
                fx = meta_data['intrinsic_matrix'][0, 0]
                fy = meta_data['intrinsic_matrix'][1, 1]
                px = meta_data['intrinsic_matrix'][0, 2]
                py = meta_data['intrinsic_matrix'][1, 2]
                depth_factor = meta_data['factor_depth'][0, 0]
                poses = RANSAC.estimate_pose(im_depth, probs, vertex_pred[0,:,:,:] / cfg.TRAIN.VERTEX_W, imdb._extents, fx, fy, px, py, depth_factor)

                # print gt poses
                # cls_indexes = meta_data['cls_indexes']
                # poses_gt = meta_data['poses']
                # for j in xrange(len(cls_indexes)):
                #    print 'object {}'.format(cls_indexes[j])
                #    print poses_gt[:,:,j]
            else:
                poses = []

        _t['im_segment'].toc()

        _t['misc'].tic()
        labels = unpad_im(labels, 16)
        # build the label image
        im_label = imdb.labels_to_image(im, labels)

        if not have_prediction:    
            if is_kfusion:
                KF.set_voxel_grid(-3, -3, -3, 6, 6, 7)

        # run kinect fusion
        if is_kfusion:
            height = im.shape[0]
            width = im.shape[1]
            labels_kfusion = np.zeros((height, width), dtype=np.int32)

            im_rgb = np.copy(im)
            im_rgb[:, :, 0] = im[:, :, 2]
            im_rgb[:, :, 2] = im[:, :, 0]
            KF.feed_data(im_depth, im_rgb, im.shape[1], im.shape[0], float(meta_data['factor_depth']))
            KF.back_project();
            if have_prediction:
                pose_world2live, pose_live2world = KF.solve_pose()

            KF.feed_label(im_label, probs, colors)
            KF.fuse_depth()
            labels_kfusion = KF.extract_surface(labels_kfusion)
            im_label_kfusion = imdb.labels_to_image(im, labels_kfusion)
            KF.render()
            filename = os.path.join(output_dir, 'images', '{:04d}'.format(i))
            KF.draw(filename, 0)
        have_prediction = True

        if is_kfusion:
            seg = {'labels': labels_kfusion}
        else:
            seg = {'labels': labels}
        segmentations[i] = seg

        _t['misc'].toc()

        print 'im_segment {}: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(video_index, i + 1, num_images, _t['im_segment'].diff, _t['misc'].diff)

        if cfg.TEST.VISUALIZE:
            if cfg.TEST.VERTEX_REG:
                # centers_gt = _vote_centers(labels_gt, meta_data['cls_indexes'], meta_data['center'], imdb.num_classes)
                vertmap_gt = pad_im(cv2.imread(imdb.vertmap_path_at(i), cv2.IMREAD_UNCHANGED), 16)
                vertmap_gt = vertmap_gt[:, :, (2, 1, 0)]
                vertmap_gt = vertmap_gt.astype(np.float32) / 255.0
                vertmap_gt = _unscale_vertmap(vertmap_gt, imdb._process_label_image(labels_gt), imdb._extents, imdb.num_classes)
                print 'visualization'
                vis_segmentations_vertmaps(im, im_depth, im_label, im_label_gt, imdb._class_colors, \
                    vertmap_gt, vertmap, labels, labels_gt, poses, meta_data['intrinsic_matrix'])
            else:
                vis_segmentations(im, im_depth, im_label, im_label_gt, imdb._class_colors)

    seg_file = os.path.join(output_dir, 'segmentations.pkl')
    with open(seg_file, 'wb') as f:
        cPickle.dump(segmentations, f, cPickle.HIGHEST_PROTOCOL)

    # evaluation
    imdb.evaluate_segmentations(segmentations, output_dir)
Esempio n. 6
0
def test_net(sess, net, imdb, weights_filename, rig_filename, is_kfusion):

    output_dir = get_output_dir(imdb, weights_filename)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    seg_file = os.path.join(output_dir, 'segmentations.pkl')
    print imdb.name
    if os.path.exists(seg_file):
        with open(seg_file, 'rb') as fid:
            segmentations = cPickle.load(fid)
        imdb.evaluate_segmentations(segmentations, output_dir)
        return

    """Test a FCN on an image database."""
    num_images = len(imdb.image_index)
    segmentations = [[] for _ in xrange(num_images)]

    # timers
    _t = {'im_segment' : Timer(), 'misc' : Timer()}

    # voxelizer
    voxelizer = Voxelizer(cfg.TEST.GRID_SIZE, imdb.num_classes)
    voxelizer.setup(-3, -3, -3, 3, 3, 4)
    # voxelizer.setup(-2, -2, -2, 2, 2, 2)

    # kinect fusion
    if is_kfusion:
        KF = kfusion.PyKinectFusion(rig_filename)

    # construct colors
    colors = np.zeros((3 * imdb.num_classes), dtype=np.uint8)
    for i in range(imdb.num_classes):
        colors[i * 3 + 0] = imdb._class_colors[i][0]
        colors[i * 3 + 1] = imdb._class_colors[i][1]
        colors[i * 3 + 2] = imdb._class_colors[i][2]

    if cfg.TEST.VISUALIZE:
        perm = np.random.permutation(np.arange(num_images))
    else:
        perm = xrange(num_images)

    video_index = ''
    have_prediction = False
    for i in perm:
        rgba = pad_im(cv2.imread(imdb.image_path_at(i), cv2.IMREAD_UNCHANGED), 16)
        height = rgba.shape[0]
        width = rgba.shape[1]

        # parse image name
        image_index = imdb.image_index[i]
        pos = image_index.find('/')
        if video_index == '':
            video_index = image_index[:pos]
            have_prediction = False
            state = np.zeros((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32)
            weights = np.ones((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32)
            points = np.zeros((1, height, width, 3), dtype=np.float32)
        else:
            if video_index != image_index[:pos]:
                have_prediction = False
                video_index = image_index[:pos]
                state = np.zeros((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32)
                weights = np.ones((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32)
                points = np.zeros((1, height, width, 3), dtype=np.float32)
                print 'start video {}'.format(video_index)

        # read color image
        if rgba.shape[2] == 4:
            im = np.copy(rgba[:,:,:3])
            alpha = rgba[:,:,3]
            I = np.where(alpha == 0)
            im[I[0], I[1], :] = 0
        else:
            im = rgba

        # read depth image
        im_depth = pad_im(cv2.imread(imdb.depth_path_at(i), cv2.IMREAD_UNCHANGED), 16)

        # load meta data
        meta_data = scipy.io.loadmat(imdb.metadata_path_at(i))

        # backprojection for the first frame
        if not have_prediction:    
            if is_kfusion:
                # KF.set_voxel_grid(-3, -3, -3, 6, 6, 7)
                KF.set_voxel_grid(voxelizer.min_x, voxelizer.min_y, voxelizer.min_z, voxelizer.max_x-voxelizer.min_x, voxelizer.max_y-voxelizer.min_y, voxelizer.max_z-voxelizer.min_z)
                # identity transformation
                RT_world = np.zeros((3,4), dtype=np.float32)
                RT_world[0, 0] = 1
                RT_world[1, 1] = 1
                RT_world[2, 2] = 1
            else:
                # store the RT for the first frame
                RT_world = meta_data['rotation_translation_matrix']

        # run kinect fusion
        if is_kfusion:
            im_rgb = np.copy(im)
            im_rgb[:, :, 0] = im[:, :, 2]
            im_rgb[:, :, 2] = im[:, :, 0]
            KF.feed_data(im_depth, im_rgb, im.shape[1], im.shape[0], float(meta_data['factor_depth']))
            KF.back_project();
            if have_prediction:
                pose_world2live, pose_live2world = KF.solve_pose()
                RT_live = pose_world2live
            else:
                RT_live = RT_world
        else:
            # compute camera poses
            RT_live = meta_data['rotation_translation_matrix']

        pose_world2live = se3_mul(RT_live, se3_inverse(RT_world))
        pose_live2world = se3_inverse(pose_world2live)

        _t['im_segment'].tic()
        labels, probs, state, weights, points = im_segment(sess, net, im, im_depth, state, weights, points, meta_data, voxelizer, pose_world2live, pose_live2world)
        _t['im_segment'].toc()
        # time.sleep(3)

        _t['misc'].tic()
        labels = unpad_im(labels, 16)

        # build the label image
        im_label = imdb.labels_to_image(im, labels)

        if is_kfusion:
            labels_kfusion = np.zeros((height, width), dtype=np.int32)
            if probs.shape[2] < 10:
                probs_new = np.zeros((probs.shape[0], probs.shape[1], 10), dtype=np.float32)
                probs_new[:,:,:imdb.num_classes] = probs
                probs = probs_new
            KF.feed_label(im_label, probs, colors)
            KF.fuse_depth()
            labels_kfusion = KF.extract_surface(labels_kfusion)
            im_label_kfusion = imdb.labels_to_image(im, labels_kfusion)
            KF.render()
            filename = os.path.join(output_dir, 'images', '{:04d}'.format(i))
            KF.draw(filename, 0)
        have_prediction = True

        # compute the delta transformation between frames
        RT_world = RT_live

        if is_kfusion:
            seg = {'labels': labels_kfusion}
        else:
            seg = {'labels': labels}
        segmentations[i] = seg

        _t['misc'].toc()

        if cfg.TEST.VISUALIZE:
            # read label image
            labels_gt = pad_im(cv2.imread(imdb.label_path_at(i), cv2.IMREAD_UNCHANGED), 16)
            if len(labels_gt.shape) == 2:
                im_label_gt = imdb.labels_to_image(im, labels_gt)
            else:
                im_label_gt = np.copy(labels_gt[:,:,:3])
                im_label_gt[:,:,0] = labels_gt[:,:,2]
                im_label_gt[:,:,2] = labels_gt[:,:,0]
            vis_segmentations(im, im_depth, im_label, im_label_gt, imdb._class_colors)

        print 'im_segment: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_segment'].diff, _t['misc'].diff)

    if is_kfusion:
        KF.draw(filename, 1)

    seg_file = os.path.join(output_dir, 'segmentations.pkl')
    with open(seg_file, 'wb') as f:
        cPickle.dump(segmentations, f, cPickle.HIGHEST_PROTOCOL)

    # evaluation
    imdb.evaluate_segmentations(segmentations, output_dir)
Esempio n. 7
0
    print('Called with args:')
    print(args)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)

    print('Using config:')
    pprint.pprint(cfg)

    if not args.randomize:
        # fix the random seeds (numpy and caffe) for reproducibility
        np.random.seed(cfg.RNG_SEED)

    imdb = get_imdb(args.imdb_name)
    print 'Loaded dataset `{:s}` for training'.format(imdb.name)
    roidb = get_training_roidb(imdb)

    output_dir = get_output_dir(imdb, None)
    print 'Output will be saved to `{:s}`'.format(output_dir)

    device_name = '/gpu:{:d}'.format(args.gpu_id)
    cfg.GPU_ID = args.gpu_id
    print device_name

    network = get_network(args.network_name, args.pretrained_model)
    print 'Use network `{:s}` in training'.format(args.network_name)

    train_net(network, imdb, roidb, output_dir,
              pretrained_model=args.pretrained_model,
              max_iters=args.max_iters)
Esempio n. 8
0
def test_net(sess, net, imdb, weights_filename, rig_filename, is_kfusion):
    output_dir = get_output_dir(imdb, weights_filename)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    print 'The Output DIR is:', output_dir

    # seg_file = os.path.join(output_dir, 'segmentations.pkl')
    # print imdb.name
    # if os.path.exists(seg_file):
    #     with open(seg_file, 'rb') as fid:
    #         segmentations = cPickle.load(fid)
    #     imdb.evaluate_segmentations(segmentations, output_dir)
    #     return
    """Test a FCN on an image database."""
    print 'Test a FCN on an image database'
    num_images = len(imdb.image_index)
    # segmentations = [[] for _ in xrange(num_images)]

    # segmentations = [[] for _ in xrange(100)]

    # timers
    _t = {'im_segment': Timer(), 'misc': Timer()}

    # voxelizer
    voxelizer = Voxelizer(cfg.TEST.GRID_SIZE, imdb.num_classes)
    voxelizer.setup(-3, -3, -3, 3, 3, 4)
    # voxelizer.setup(-2, -2, -2, 2, 2, 2)

    # construct colors
    colors = np.zeros((3 * imdb.num_classes), dtype=np.uint8)
    for i in range(imdb.num_classes):
        colors[i * 3 + 0] = imdb._class_colors[i][0]
        colors[i * 3 + 1] = imdb._class_colors[i][1]
        colors[i * 3 + 2] = imdb._class_colors[i][2]
    # print colors

    if cfg.TEST.VISUALIZE:
        perm = np.random.permutation(np.arange(num_images))
    else:
        perm = xrange(num_images)

    video_index = ''
    have_prediction = False
    i = 0
    while True:
        print i
        # if i>=100:
        #     seg_file = os.path.join('/home/weizhang/DA-RNN/data/LabScene/data/0000/', 'segmentations.pkl')
        #     with open(seg_file, 'wb') as f:
        #         cPickle.dump(segmentations, f, cPickle.HIGHEST_PROTOCOL)
        #     sys.exit()
        # im, im_depth = rgbd_getter.data_getter()
        # start_time = time.time()

        data_chunk = rgbd_getter.data_getter()

        # print "--- %s seconds ---" % (time.time() - start_time)

        im = data_chunk['rgb_image']
        im_depth = data_chunk['depth_image']

        # rgba = cv2.imread(imdb.image_path_at(i), cv2.IMREAD_UNCHANGED)
        # path = '/home/weizhang/DA-RNN/data/LabScene/data/0000/' + '{:04d}_rgba.png'.format(i)
        #
        # im = cv2.imread(path, cv2.IMREAD_UNCHANGED)

        rgba = im[..., [2, 1, 0]]
        rgba = rgba.astype(np.uint8)
        rgba = pad_im(rgba, 16)

        # rgba = pad_im(cv2.imread('/home/weizhang/DA-RNN/data/RGBDScene/data/scene_01/{:05d}-color.png'.format(i), cv2.IMREAD_UNCHANGED), 16)
        height = rgba.shape[0]
        width = rgba.shape[1]

        # parse image name
        image_index = imdb.image_index[i]
        # pos = image_index.find('/')
        # if video_index == '':
        #     video_index = image_index[:pos]
        #     have_prediction = False
        #     state = np.zeros((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32)
        #     weights = np.ones((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32)
        #     points = np.zeros((1, height, width, 3), dtype=np.float32)
        # else:
        #     if video_index != image_index[:pos]:
        #         have_prediction = False
        #         video_index = image_index[:pos]
        #         state = np.zeros((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32)
        #         weights = np.ones((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32)
        #         points = np.zeros((1, height, width, 3), dtype=np.float32)
        #         print 'start video {}'.format(video_index)

        if i == 0:
            have_prediction = False
            state = np.zeros((1, height, width, cfg.TRAIN.NUM_UNITS),
                             dtype=np.float32)
            weights = np.ones((1, height, width, cfg.TRAIN.NUM_UNITS),
                              dtype=np.float32)
            points = np.zeros((1, height, width, 3), dtype=np.float32)

        # read color image
        if rgba.shape[2] == 4:
            im = np.copy(rgba[:, :, :3])
            alpha = rgba[:, :, 3]
            I = np.where(alpha == 0)
            im[I[0], I[1], :] = 0
        else:
            im = rgba

        # read depth image
        # path = '/home/weizhang/DA-RNN/data/LabScene/data/0000/' + '{:04d}_depth.png'.format(i)
        # im_depth = cv2.imread(path, -1)

        # thres = np.percentile(im_depth,60)
        # idx = np.where(im_depth>thres)
        im_depth = pad_im(im_depth, 16)

        # im_depth = cv2.imread('/home/weizhang/DA-RNN/data/RGBDScene/data/scene_01/{:05d}-color.png'.format(i), cv2.IMREAD_UNCHANGED)
        # im_depth = cv2.cvtColor(im_depth, cv2.COLOR_BGR2GRAY)
        # im_depth = im_depth.astype(np.uint16)
        # im_depth = pad_im(im_depth, 16)

        # load meta data
        # meta_data = form_meta_data()
        meta_data = data_chunk['meta_data']

        # backprojection for the first frame
        if not have_prediction:
            RT_world = meta_data['rotation_translation_matrix']

        RT_live = meta_data['rotation_translation_matrix']

        pose_world2live = se3_mul(RT_live, se3_inverse(RT_world))
        pose_live2world = se3_inverse(pose_world2live)

        # print "--- %s seconds ---" % (time.time() - start_time)

        _t['im_segment'].tic()
        print 'before feed dict----------------------------------'
        labels, probs, state, weights, points = im_segment(
            sess, net, im, im_depth, state, weights, points, meta_data,
            voxelizer, pose_world2live, pose_live2world)
        print 'after feed dict----------------------------------'
        _t['im_segment'].toc()

        # print "--- %s seconds ---" % (time.time() - start_time)
        # time.sleep(3)

        _t['misc'].tic()
        labels = unpad_im(labels, 16)

        # build the label image
        im_label = imdb.labels_to_image(im, labels)
        # im_label[idx[0],idx[1],0] = 0
        # im_label[idx[0], idx[1], 1] = 0
        # im_label[idx[0], idx[1], 2] = 0
        # label_path = '/home/weizhang/DA-RNN/data/LabScene/data/0000/' + '{:04d}_label.png'.format(i)
        # cv2.imwrite(label_path,im_label)

        # print "--- %s seconds ---" % (time.time() - start_time)

        im_label_post, lbl_pcd_color = post_proc_da.post_proc(
            im, data_chunk['point_cloud_array'], im_label,
            data_chunk['camera_info'], data_chunk['rgb_image'])

        # print "--- %s seconds ---" % (time.time() - start_time)
        # kernel = np.ones((3,3),np.uint8)
        #
        # im_ero = cv2.erode(im_label,kernel,iterations=1)
        #
        # label_path = '/home/weizhang/DA-RNN/data/LabScene/data/0000/' + '{:04d}_ero_3by3.png'.format(i)
        #
        # cv2.imwrite(label_path,im_ero)

        # label_path = '/home/weizhang/DA-RNN/data/LabScene/data/0000/' + '{:04d}_label.png'.format(i)
        # cv2.imwrite(label_path,im_label)
        # Press Q on keyboard to  exit
        # if cv2.waitKey(25) & 0xFF == ord('q'):
        #     break

        have_prediction = True

        # compute the delta transformation between frames
        RT_world = RT_live

        # seg = {'labels': labels}
        # segmentations[i] = seg

        _t['misc'].toc()

        print 'im_segment: {:d}/{:d} {:.3f}s {:.3f}s' \
            .format(i + 1, num_images, _t['im_segment'].diff, _t['misc'].diff)

        # csv_file_path = os.path.join('/home/weizhang/Documents/domain-adaptation/data/LabScene/data/0025/', "lbl_pcd_color_{:04d}.csv".format(i))
        # np.savetxt(csv_file_path, lbl_pcd_color, delimiter=",")

        # s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        # s.bind((HOST, PORT))
        # s.listen(10)
        # conn, addr = s.accept()
        # conn.sendall(b'Hello, world')

        if cfg.TEST.VISUALIZE:
            # read label image
            labels_gt = pad_im(
                cv2.imread(imdb.label_path_at(i), cv2.IMREAD_UNCHANGED), 16)
            if len(labels_gt.shape) == 2:
                im_label_gt = imdb.labels_to_image(im, labels_gt)
            else:
                im_label_gt = np.copy(labels_gt[:, :, :3])
                im_label_gt[:, :, 0] = labels_gt[:, :, 2]
                im_label_gt[:, :, 2] = labels_gt[:, :, 0]
            vis_segmentations(im, im_depth, im_label, im_label_post,
                              imdb._class_colors)

        # print 'im_segment: {:d}/{:d} {:.3f}s {:.3f}s' \
        #     .format(i + 1, num_images, _t['im_segment'].diff, _t['misc'].diff)

        # data = s.recv(1024)

        i += 1