def augment_bv(blobs, scale=0.5):
    #just do translation to bv_image, scale is in meters
    sx, sy = np.random.uniform(-scale, scale, 2)
    #shift gt_boxes_3D, gt_boxes_corner, gt_boxes_bv
    blobs['gt_boxes_3d'][:, 0] += sx
    blobs['gt_boxes_3d'][:, 1] += sy
    #WZN: disable corners
    blobs['gt_boxes_corners'][:, 0:8] += sx
    blobs['gt_boxes_corners'][:, 8:16] += sy
    blobs['gt_boxes_bv_corners'][:, 0:8] = lidar_cnr_to_bv_cnr(
        blobs['gt_boxes_corners'][:, 0:24])
    blobs['gt_boxes_bv'][:, 0:4] = lidar_3d_to_bv(blobs['gt_boxes_3d'])

    #shift lidar_bv_data
    sx_bv, sy_bv = _lidar_shift_to_bv_shift(sx, sy)
    sx_bv = np.round(sx_bv).astype(int)
    sy_bv = np.round(sy_bv).astype(int)
    blobs['lidar_bv_data'][0, :, :, :] = np.roll(
        blobs['lidar_bv_data'][0, :, :, :], [sy_bv, sx_bv], axis=[0, 1])

    if sy_bv >= 0:
        blobs['lidar_bv_data'][0, 0:sy_bv:, :, :] = 0
    else:
        blobs['lidar_bv_data'][0, sy_bv:, :, :] = 0
    if sx_bv >= 0:
        blobs['lidar_bv_data'][0, :, 0:sx_bv:, :] = 0
    else:
        blobs['lidar_bv_data'][0, :, sx_bv:, :] = 0

    #shift bv_index, clip bv_index,img_index
    blobs['bv_index'][:, 0] += sx_bv
    blobs['bv_index'][:, 1] += sy_bv
    clip_indx = np.logical_and(
        blobs['bv_index'][:, 0] >= 0,
        blobs['bv_index'][:, 0] < blobs['lidar_bv_data'].shape[1])
    clip_indy = np.logical_and(
        blobs['bv_index'][:, 1] >= 0,
        blobs['bv_index'][:, 1] < blobs['lidar_bv_data'].shape[2])
    clip_indx = np.logical_and(clip_indx, clip_indy)
    blobs['bv_index'] = blobs['bv_index'][clip_indx, :]
    blobs['img_index'] = blobs['img_index'][:, clip_indx]
    #shift calib
    Tr = np.reshape(blobs['calib'][3, :], (3, 4))
    Tr[:, 3] += np.dot(Tr[:, 0:3], -np.array([sx, sy, 0]))
    blobs['calib'][3, :] = np.reshape(Tr, (-1))
    return blobs
Exemple #2
0
def proposal_layer_3d(rpn_cls_prob_reshape,
                      rpn_bbox_pred,
                      im_info,
                      calib,
                      cfg_key,
                      _feat_stride=[
                          8,
                      ],
                      anchor_scales=[1.0, 1.0]):
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)

    #layer_params = yaml.load(self.param_str_)

    _anchors = generate_anchors_bv()
    #  _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]

    #print "aaaaaaa",_anchors.shape (4,4)
    #print "bbbbbbb",im_info          (601,601,1)
    #print "ccccccc", calib.shape   (4,12)

    im_info = im_info[0]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'

    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
    min_size = cfg[cfg_key].RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    # print rpn_cls_prob_reshape.shape

    height, width = rpn_cls_prob_reshape.shape[1:3]
    # scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
    scores = np.reshape(
        np.reshape(rpn_cls_prob_reshape,
                   [1, height, width, _num_anchors, 2])[:, :, :, :, 1],
        [1, height, width, _num_anchors])

    bbox_deltas = rpn_bbox_pred

    if DEBUG:
        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        print 'scale: {}'.format(im_info[2])

    # 1. Generate proposals from bbox deltas and shifted anchors

    if DEBUG:
        print 'score map size: {}'.format(scores.shape)

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    # bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 6))
    bbox_deltas = bbox_deltas.reshape((-1, 6))

    # print "bbox_deltas",bbox_deltas.shape
    # print anchors.shape
    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    # scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
    scores = scores.reshape((-1, 1))

    # print np.sort(scores.ravel())[-30:]

    # convert anchors bv to anchors_3d
    anchors_3d = bv_anchor_to_lidar(anchors)
    # Convert anchors into proposals via bbox transformations
    proposals_3d = bbox_transform_inv_3d(anchors_3d, bbox_deltas)
    # convert back to lidar_bv
    proposals_bv = lidar_3d_to_bv(proposals_3d)

    lidar_corners = lidar_3d_to_corners(proposals_3d)
    proposals_img = lidar_cnr_to_img(lidar_corners, calib[3], calib[2],
                                     calib[0])

    if DEBUG:
        # print "bbox_deltas: ", bbox_deltas[:10]
        # print "proposals number: ", proposals_3d[:10]
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape
        print "proposals_img shape:", proposals_img.shape

    # 2. clip predicted boxes to image
    proposals_bv = clip_boxes(proposals_bv, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals_bv, min_size * im_info[2])
    proposals_bv = proposals_bv[keep, :]
    proposals_3d = proposals_3d[keep, :]
    proposals_img = proposals_img[keep, :]
    scores = scores[keep]

    # TODO: pass real image_info
    keep = _filter_img_boxes(proposals_img, [375, 1242])
    proposals_bv = proposals_bv[keep, :]
    proposals_3d = proposals_3d[keep, :]
    proposals_img = proposals_img[keep, :]
    scores = scores[keep]

    if DEBUG:
        print "proposals after clip"
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape
        print "proposals_img shape: ", proposals_img.shape
    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals_bv = proposals_bv[order, :]
    proposals_3d = proposals_3d[order, :]
    proposals_img = proposals_img[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals_bv, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals_bv = proposals_bv[keep, :]
    proposals_3d = proposals_3d[keep, :]
    proposals_img = proposals_img[keep, :]
    scores = scores[keep]

    if DEBUG:
        print "proposals after nms"
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals_bv.shape[0], 1), dtype=np.float32)
    blob_bv = np.hstack((batch_inds, proposals_bv.astype(np.float32,
                                                         copy=False)))
    blob_img = np.hstack(
        (batch_inds, proposals_img.astype(np.float32, copy=False)))
    blob_3d = np.hstack((batch_inds, proposals_3d.astype(np.float32,
                                                         copy=False)))

    if DEBUG:
        print "blob shape ====================:"
        print blob_bv.shape
        print blob_img.shape
        # print '3d', blob_3d[:10]
        # print lidar_corners[:10]
        # print 'bv', blob_bv[:10]
        # print 'img', blob_img[:10]

    return blob_bv, blob_img, blob_3d
def proposal_layer_3d_debug(rpn_cls_prob_reshape,rpn_bbox_pred,im_info,calib,cfg_in, _feat_stride = [8,], anchor_scales=[1.0, 1.0],debug_state=True):
    #copy part of the code from proposal_layer_3d for debug
    _anchors = generate_anchors_bv()
    #  _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]


    im_info = im_info[0]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    # print rpn_cls_prob_reshape.shape

    height, width = rpn_cls_prob_reshape.shape[1:3]
    # scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
    scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:,:,:,:,1],[1, height, width, _num_anchors])

    bbox_deltas = rpn_bbox_pred

    if debug_state:
        print ('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print ('scale: {}'.format(im_info[2]))
    if debug_state:
        print ('score map size: {}'.format(scores.shape))

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                        shift_x.ravel(), shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    bbox_deltas = bbox_deltas.reshape((-1, 6))
    scores = scores.reshape((-1, 1))

    # convert anchors bv to anchors_3d
    anchors_3d = bv_anchor_to_lidar(anchors)
    # Convert anchors into proposals via bbox transformations
    proposals_3d = bbox_transform_inv_3d(anchors_3d, bbox_deltas)
    # convert back to lidar_bv
    proposals_bv = lidar_3d_to_bv(proposals_3d) #[x1,y1,x2,y2]

    lidar_corners = lidar_3d_to_corners(proposals_3d)
    proposals_img = lidar_cnr_to_img(lidar_corners,
                                calib[3], calib[2], calib[0])


    if debug_state:
        # print "bbox_deltas: ", bbox_deltas[:10]
        # print "proposals number: ", proposals_3d[:10]
        print ("proposals_bv shape: ", proposals_bv.shape)
        print ("proposals_3d shape: ", proposals_3d.shape)
        print ("scores shape:", scores.shape)

    # 2. clip predicted boxes to image
    #WZN: delete those not in image
    ind_inside = clip_anchors(anchors, im_info[:2])
    #ind_inside = np.logical_and(ind_inside,clip_anchors(proposals_bv, im_info[:2]))
    proposals_bv = proposals_bv[ind_inside,:]
    proposals_3d = proposals_3d[ind_inside,:]
    proposals_img = proposals_img[ind_inside,:]
    scores = scores[ind_inside,:]
    proposals_bv = clip_boxes(proposals_bv, im_info[:2])
    
    
    # TODO: pass real image_info
    #keep = _filter_img_boxes(proposals_img, [375, 1242])
    #proposals_bv = proposals_bv[keep, :]
    #proposals_3d = proposals_3d[keep, :]
    #proposals_img = proposals_img[keep, :]
    #scores = scores[keep]

    if debug_state:
        print ("proposals after clip")
        print ("proposals_bv shape: ", proposals_bv.shape)
        print ("proposals_3d shape: ", proposals_3d.shape)
        print ("proposals_img shape: ", proposals_img.shape)
    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if cfg_in['pre_keep_topN'] > 0:
        order = order[:cfg_in['pre_keep_topN']]
    #keep = keep[order]
    proposals_bv = proposals_bv[order, :]
    proposals_3d = proposals_3d[order, :]
    proposals_img = proposals_img[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    if cfg_in['use_nms']:
        keep = nms(np.hstack((proposals_bv, scores)), cfg_in['nms_thresh'])
        if cfg_in['nms_topN'] > 0:
            keep = keep[:cfg_in['nms_topN']]
        proposals_bv = proposals_bv[keep, :]
        proposals_3d = proposals_3d[keep, :]
        proposals_img = proposals_img[keep, :]
        scores = scores[keep]

        if debug_state:
            print ("proposals after nms")
            print ("proposals_bv shape: ", proposals_bv.shape)
            print ("proposals_3d shape: ", proposals_3d.shape)

    # debug only: keep probabilities above a threshold
    if cfg_in['prob_thresh']:
        keep_ind = scores[:,0]>cfg_in['prob_thresh']
        print ('scores: ',scores)
        print ('threshold: ', cfg_in['prob_thresh'])
        print ('score shape:', scores.shape)
        #print keep_ind.shape
        #print keep.shape
        #keep = keep[keep_ind]
        proposals_bv = proposals_bv[keep_ind, :]
        proposals_3d = proposals_3d[keep_ind, :]
        proposals_img = proposals_img[keep_ind, :]
        scores = scores[keep_ind]

    return proposals_bv,proposals_3d,proposals_img,scores
Exemple #4
0
    def _load_kitti_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the KITTI
        format.
        """
        # filename = '$Faster-RCNN_TF/data/KITTI/object/training/label_2/000000.txt'
        filename = os.path.join(self._data_path, 'training/label_2',
                                index + '.txt')
        #         print("Loading: ", filename)

        # calib
        calib = self._load_kitti_calib(index)
        Tr = calib['Tr_velo2cam']

        # print 'Loading: {}'.format(filename)
        with open(filename, 'r') as f:
            lines = f.readlines()
        num_objs = len(lines)
        translation = np.zeros((num_objs, 3), dtype=np.float32)
        rys = np.zeros((num_objs), dtype=np.float32)
        lwh = np.zeros((num_objs, 3), dtype=np.float32)
        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        boxes_bv = np.zeros((num_objs, 4), dtype=np.float32)
        boxes3D = np.zeros((num_objs, 6), dtype=np.float32)
        boxes3D_lidar = np.zeros((num_objs, 6), dtype=np.float32)
        boxes3D_cam_cnr = np.zeros((num_objs, 24), dtype=np.float32)
        boxes3D_corners = np.zeros((num_objs, 24), dtype=np.float32)
        alphas = np.zeros((num_objs), dtype=np.float32)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        # print(boxes3D.shape)

        # Load object bounding boxes into a data frame.
        ix = -1
        for line in lines:
            obj = line.strip().split(' ')
            try:
                cls = self._class_to_ind[obj[0].strip()]
                # print cls
            except:
                continue
            # ignore objects with undetermined difficult level
            # level = self._get_obj_level(obj)
            # if level > 3:
            #     continue
            ix += 1
            # 0-based coordinates
            alpha = float(obj[3])
            x1 = float(obj[4])
            y1 = float(obj[5])
            x2 = float(obj[6])
            y2 = float(obj[7])
            h = float(obj[8])
            w = float(obj[9])
            l = float(obj[10])
            tx = float(obj[11])
            ty = float(obj[12])
            tz = float(obj[13])
            ry = float(obj[14])

            rys[ix] = ry
            lwh[ix, :] = [l, w, h]
            alphas[ix] = alpha
            translation[ix, :] = [tx, ty, tz]
            boxes[ix, :] = [x1, y1, x2, y2]
            boxes3D[ix, :] = [tx, ty, tz, l, w, h]
            # convert boxes3D cam to 8 corners(cam)
            boxes3D_cam_cnr_single = computeCorners3D(boxes3D[ix, :], ry)
            boxes3D_cam_cnr[ix, :] = boxes3D_cam_cnr_single.reshape(24)
            # convert 8 corners(cam) to 8 corners(lidar)
            boxes3D_corners[ix, :] = camera_to_lidar_cnr(
                boxes3D_cam_cnr_single, Tr)
            # convert 8 corners(cam) to  lidar boxes3D
            boxes3D_lidar[ix, :] = lidar_cnr_to_3d(boxes3D_corners[ix, :],
                                                   lwh[ix, :])
            # convert 8 corners(lidar) to lidar bird view
            boxes_bv[ix, :] = lidar_3d_to_bv(boxes3D_lidar[ix, :])
            # boxes3D_corners[ix, :] = lidar_to_corners_single(boxes3D_lidar[ix, :])
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0

        rys.resize(ix + 1)
        lwh.resize(ix + 1, 3)
        translation.resize(ix + 1, 3)
        alphas.resize(ix + 1)
        boxes.resize(ix + 1, 4)
        boxes_bv.resize(ix + 1, 4)
        boxes3D.resize(ix + 1, 6)
        boxes3D_lidar.resize(ix + 1, 6)
        boxes3D_cam_cnr.resize(ix + 1, 24)
        boxes3D_corners.resize(ix + 1, 24)
        gt_classes.resize(ix + 1)
        # print(self.num_classes)
        overlaps.resize(ix + 1, self.num_classes)
        # if index == '000142':
        #     print(index)
        #     print(overlaps)
        overlaps = scipy.sparse.csr_matrix(overlaps)
        # if index == '000142':
        #     print(overlaps)

        return {
            'ry': rys,
            'lwh': lwh,
            'boxes': boxes,
            'boxes_bv': boxes_bv,
            'boxes_3D_cam': boxes3D,
            'boxes_3D': boxes3D_lidar,
            'boxes3D_cam_corners': boxes3D_cam_cnr,
            'boxes_corners': boxes3D_corners,
            'gt_classes': gt_classes,
            'gt_overlaps': overlaps,
            'xyz': translation,
            'alphas': alphas,
            'flipped': False
        }
    def _load_kitti_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the KITTI
        format.
        WZN: The non-interested area (Dontcare) is just ignored (treated same as background)
        """
        if self._image_set == 'test':
            return {
                'ry': np.array([]),
                'lwh': np.array([]),
                'boxes': np.array([]),  #xy box in image
                #'boxes_bv' : boxes_bv, #xy box in bird view
                'boxes_3D_cam':
                np.array([]),  #[xyz_center, lwh] in 3D, cam frame 
                #'boxes_3D' : boxes3D_lidar, #[xyz_center, lwh] in 3D, absolute
                'boxes3D_cam_corners':
                np.array([]),  #8 corners of box in 3D, cam frame
                #'boxes_corners' : boxes3D_corners, #8 corners of box in 3D
                #'boxes_bv_corners' : boxes_bv_corners, #4 corners of box in bird view
                'gt_classes': np.array([]),  #classes
                'gt_overlaps': np.array([]),  #default 1, changed later
                'xyz': np.array([]),
                'alphas': np.array([]),
                'diff_level': np.array([]),
                'flipped': False
            }
        else:
            # filename = '$Faster-RCNN_TF/data/KITTI/object/training/label_2/000000.txt'
            filename = os.path.join(self.gt_dir, index + '.txt')
            #         print("Loading: ", filename)

            # calib
            calib = self._load_kitti_calib(index)
            Tr = np.dot(calib['R0'], calib['Tr_velo2cam'])

            # print 'Loading: {}'.format(filename)
            with open(filename, 'r') as f:
                lines = f.readlines()
            num_objs = len(lines)
            translation = np.zeros((num_objs, 3), dtype=np.float32)
            rys = np.zeros((num_objs), dtype=np.float32)
            lwh = np.zeros((num_objs, 3), dtype=np.float32)
            boxes = np.zeros((num_objs, 4), dtype=np.float32)
            boxes_bv = np.zeros((num_objs, 4), dtype=np.float32)
            boxes3D = np.zeros((num_objs, 6), dtype=np.float32)
            boxes3D_lidar = np.zeros((num_objs, 6), dtype=np.float32)
            boxes3D_cam_cnr = np.zeros((num_objs, 24), dtype=np.float32)
            boxes3D_corners = np.zeros((num_objs, 24), dtype=np.float32)
            alphas = np.zeros((num_objs), dtype=np.float32)
            gt_classes = np.zeros((num_objs), dtype=np.int32)
            overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
            # new difficulty level for in training evaluation
            diff_level = np.zeros((num_objs), dtype=np.int32)

            # print(boxes3D.shape)

            # Load object bounding boxes into a data frame.
            ix = -1
            for line in lines:
                obj = line.strip().split(' ')
                try:
                    #WZN.strip() removes white spaces
                    cls = self._class_to_ind[obj[0].strip()]
                    # print cls
                except:
                    continue
                # ignore objects with undetermined difficult level
                level = self._get_obj_level(obj)
                if level > 3:
                    continue
                ix += 1

                # 0-based coordinates
                alpha = float(obj[3])
                x1 = float(obj[4])
                y1 = float(obj[5])
                x2 = float(obj[6])
                y2 = float(obj[7])
                h = float(obj[8])
                w = float(obj[9])
                l = float(obj[10])
                tx = float(obj[11])
                ty = float(obj[12])
                tz = float(obj[13])
                ry = float(obj[14])

                diff_level[ix] = level
                if obj[0].strip() == 'Person_sitting':
                    diff_level[ix] = -1
                rys[ix] = ry
                lwh[ix, :] = [l, w, h]
                alphas[ix] = alpha
                translation[ix, :] = [tx, ty, tz]
                boxes[ix, :] = [x1, y1, x2, y2]
                boxes3D[ix, :] = [tx, ty, tz, l, w, h]
                # convert boxes3D cam to 8 corners(cam)
                boxes3D_cam_cnr_single = computeCorners3D(boxes3D[ix, :], ry)
                boxes3D_cam_cnr[ix, :] = boxes3D_cam_cnr_single.reshape(24)
                # convert 8 corners(cam) to 8 corners(lidar)
                boxes3D_corners[ix, :] = camera_to_lidar_cnr(
                    boxes3D_cam_cnr_single, Tr)

                # convert 8 corners(cam) to  lidar boxes3D, note this is not ivertible because we LOSE ry!
                boxes3D_lidar[ix, :] = lidar_cnr_to_3d(boxes3D_corners[ix, :],
                                                       lwh[ix, :])
                # convert 8 corners(lidar) to lidar bird view
                boxes_bv[ix, :] = lidar_3d_to_bv(boxes3D_lidar[ix, :])
                # boxes3D_corners[ix, :] = lidar_to_corners_single(boxes3D_lidar[ix, :])
                gt_classes[ix] = cls
                overlaps[ix, cls] = 1.0

            rys.resize(ix + 1)
            lwh.resize(ix + 1, 3)
            translation.resize(ix + 1, 3)
            alphas.resize(ix + 1)
            boxes.resize(ix + 1, 4)
            boxes_bv.resize(ix + 1, 4)
            boxes3D.resize(ix + 1, 6)
            boxes3D_lidar.resize(ix + 1, 6)
            boxes3D_cam_cnr.resize(ix + 1, 24)
            boxes3D_corners.resize(ix + 1, 24)

            boxes_bv_corners = lidar_cnr_to_bv_cnr(boxes3D_corners)

            gt_classes.resize(ix + 1)
            # print(self.num_classes)
            overlaps.resize(ix + 1, self.num_classes)
            diff_level.resize(ix + 1)
            # if index == '000142':
            #     print(index)
            #     print(overlaps)
            overlaps = scipy.sparse.csr_matrix(overlaps)
            # if index == '000142':
            #     print(overlaps)

            #if ix>=0:
            #    print index

            return {
                'ry': rys,
                'lwh': lwh,
                'boxes': boxes,  #xy box in image
                #'boxes_bv' : boxes_bv, #xy box in bird view
                'boxes_3D_cam': boxes3D,  #[xyz_center, lwh] in 3D, cam frame 
                #'boxes_3D' : boxes3D_lidar, #[xyz_center, lwh] in 3D, absolute
                'boxes3D_cam_corners':
                boxes3D_cam_cnr,  #8 corners of box in 3D, cam frame
                #'boxes_corners' : boxes3D_corners, #8 corners of box in 3D
                #'boxes_bv_corners' : boxes_bv_corners, #4 corners of box in bird view
                'gt_classes': gt_classes,  #classes
                'gt_overlaps': overlaps,  #default 1, changed later
                'xyz': translation,
                'alphas': alphas,
                'diff_level': diff_level,
                'flipped': False
            }