Ejemplo n.º 1
0
            location = np.dot(np.transpose(Rtilt), location)
            print("rtilted location")
            print(location)
            location_changed = [location[i] for i in sun_permutation]
            location_changed[1] *= -1
            print("내가 location_changed")
            print(location_changed)
            print("dim")
            print(dim)
            #dim_changed = [dim[i]*2 if dim[i] > 0 else print("error" + str(image_id)) for i in sun_permutation]  #왜냐하면 dimension은 크기니까
            dim_changed = [dim[i] * 2
                           for i in sun_permutation]  #왜냐하면 dimension은 크기니까
            print(dim_changed)

            #Rtilt 적용된 location을 2d image에 project시킨것.
            ct3d_to_ct2d = project_to_image(np.array(
                [location_changed]), calib)  # 이제 3d bounding box를 image에 투영시킴
            print(ct3d_to_ct2d)
            cv2.circle(image,
                       (int(ct3d_to_ct2d[0][0]), int(ct3d_to_ct2d[0][1])), 10,
                       (255, 0, 0), -1)
            # rotation_y = -rotation_y
            box_3d = compute_box_3d(dim_changed, location_changed, rotation_y,
                                    image_id)
            # print("before neg")
            # print(rotation_y)
            # rotation_y = -rotation_y
            # print("after neg")
            # print(rotation_y)
            # 만약 rotation_y의 좌표를 반대로 넣고 싶을 때
            alpha = rotation_y - np.arctan2(ct_x - calib[0][2], calib[0][0])
            if alpha > np.pi:
            }  #이미 rotation_y는 theta_c라서
            # print("ann")
            # print(ann)
            ret['annotations'].append(ann)
            # if DEBUG and cat_id in class_final:
            if DEBUG:
                # EXT = [45.75, -0.34, 0.005]
                # calib[0][3] = 45.75
                # calib[1][3] = -0.34
                # calib[2][3] = 0.005
                # img 보이기
                print(cat_id)
                box_3d = compute_box_3d_sun_8(dim, location, rotation_y, Rtilt)
                print(dim, location, rotation_y, Rtilt)
                print(box_3d)
                box_2d = project_to_image(
                    box_3d, calib)  # 이제 3d bounding box를 image에 투영시킴

                print('box_2d', box_2d)

                #이미지 보이기
                c = np.cos(rotation_y)
                s = np.sin(rotation_y)

                image = draw_box_3d(image, box_2d)

                print("alpha:" + str(alpha))
                '''
                print('rot_y, alpha2rot_y, dlt', tmp[0], 
                      rotation_y, alpha2rot_y(alpha, x, calib[0, 2], calib[0, 0]),
                      np.cos(
                        rotation_y - alpha2rot_y(alpha, x, calib[0, 2], calib[0, 0])))
Ejemplo n.º 3
0
      f = open(out_path, 'w')
      ori_anns = []
      for ann_ind, txt in enumerate(anns):
        tmp = txt[:-1].split(' ')
        cat_id = cat_ids[tmp[0]]
        truncated = int(float(tmp[1]))
        occluded = int(tmp[2])
        alpha = float(tmp[3])
        bbox = [float(tmp[4]), float(tmp[5]), float(tmp[6]), float(tmp[7])]
        dim = [float(tmp[8]), float(tmp[9]), float(tmp[10])]
        location = [float(tmp[11]), float(tmp[12]), float(tmp[13])]
        rotation_y = float(tmp[14])

        # #由于label中存在没有在图像像素范围内的框,因此当投影到图片上的3D框顶点超出个数大于6则忽略这个标签
        box_3d = compute_box_3d(dim, location, rotation_y)
        box_2d = project_to_image(box_3d, calib)
        img_size = np.asarray([IMG_W,IMG_H],dtype=np.int)
        # inds = np.greater(box_2d,img_size)
        # out_num = (inds[:,0] * inds[:,1]).sum()
        # if out_num > 6:
        #   continue

        #根据3d框中心以及内参矩阵计算alpha,注意location是3dbox底面中心(但是只用x,所以不用计算)
        alpha = _rot_y2alpha(rotation_y, box_2d[:,0][0:4].sum()/4, 
                                 calib[0, 2], calib[0, 0])
        
        #计算像素坐标系下的2dbbox,就是3dbbox每个轴最小的值组成的框。裁剪到图像上。
        bbox = (np.min(box_2d[:,0]), np.min(box_2d[:,1]), np.max(box_2d[:,0]), np.max(box_2d[:,1]))
        bbox_crop = tuple(max(0, b) for b in bbox)
        bbox_crop = (min(img_size[0], bbox_crop[0]),
                     min(img_size[0], bbox_crop[1]),
Ejemplo n.º 4
0
def main():
  SCENE_SPLITS['mini-val'] = SCENE_SPLITS['val']
  if not os.path.exists(OUT_PATH):
    os.mkdir(OUT_PATH)
  for split in SPLITS:
    data_path = DATA_PATH + '{}/'.format(SPLITS[split])
    nusc = NuScenes(
      version=SPLITS[split], dataroot=data_path, verbose=True)
    out_path = OUT_PATH + '{}.json'.format(split)
    categories_info = [{'name': CATS[i], 'id': i + 1} for i in range(len(CATS))]
    ret = {'images': [], 'annotations': [], 'categories': categories_info, 
           'videos': [], 'attributes': ATTRIBUTE_TO_ID}
    num_images = 0
    num_anns = 0
    num_videos = 0

    # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR.
    for sample in nusc.sample:
      scene_name = nusc.get('scene', sample['scene_token'])['name']
      if not (split in ['mini', 'test']) and \
        not (scene_name in SCENE_SPLITS[split]):
        continue
      if sample['prev'] == '':
        print('scene_name', scene_name)
        num_videos += 1
        ret['videos'].append({'id': num_videos, 'file_name': scene_name})
        frame_ids = {k: 0 for k in sample['data']}
        track_ids = {}
      # We decompose a sample into 6 images in our case.
      for sensor_name in sample['data']:
        if sensor_name in USED_SENSOR:
          image_token = sample['data'][sensor_name]
          image_data = nusc.get('sample_data', image_token)
          num_images += 1

          # Complex coordinate transform. This will take time to understand.
          sd_record = nusc.get('sample_data', image_token)
          cs_record = nusc.get(
            'calibrated_sensor', sd_record['calibrated_sensor_token'])
          pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
          global_from_car = transform_matrix(pose_record['translation'],
            Quaternion(pose_record['rotation']), inverse=False)
          car_from_sensor = transform_matrix(
            cs_record['translation'], Quaternion(cs_record['rotation']),
            inverse=False)
          trans_matrix = np.dot(global_from_car, car_from_sensor)
          _, boxes, camera_intrinsic = nusc.get_sample_data(
            image_token, box_vis_level=BoxVisibility.ANY)
          calib = np.eye(4, dtype=np.float32)
          calib[:3, :3] = camera_intrinsic
          calib = calib[:3]
          frame_ids[sensor_name] += 1

          # image information in COCO format
          image_info = {'id': num_images,
                        'file_name': image_data['filename'],
                        'calib': calib.tolist(), 
                        'video_id': num_videos,
                        'frame_id': frame_ids[sensor_name],
                        'sensor_id': SENSOR_ID[sensor_name],
                        'sample_token': sample['token'],
                        'trans_matrix': trans_matrix.tolist(),
                        'width': sd_record['width'],
                        'height': sd_record['height'],
                        'pose_record_trans': pose_record['translation'],
                        'pose_record_rot': pose_record['rotation'],
                        'cs_record_trans': cs_record['translation'],
                        'cs_record_rot': cs_record['rotation']}
          ret['images'].append(image_info)
          anns = []
          for box in boxes:
            det_name = category_to_detection_name(box.name)
            if det_name is None:
              continue
            num_anns += 1
            v = np.dot(box.rotation_matrix, np.array([1, 0, 0]))
            yaw = -np.arctan2(v[2], v[0])
            box.translate(np.array([0, box.wlh[2] / 2, 0]))
            category_id = CAT_IDS[det_name]

            amodel_center = project_to_image(
              np.array([box.center[0], box.center[1] - box.wlh[2] / 2, box.center[2]], 
                np.float32).reshape(1, 3), calib)[0].tolist()
            sample_ann = nusc.get(
              'sample_annotation', box.token)
            instance_token = sample_ann['instance_token']
            if not (instance_token in track_ids):
              track_ids[instance_token] = len(track_ids) + 1
            attribute_tokens = sample_ann['attribute_tokens']
            attributes = [nusc.get('attribute', att_token)['name'] \
              for att_token in attribute_tokens]
            att = '' if len(attributes) == 0 else attributes[0]
            if len(attributes) > 1:
              print(attributes)
              import pdb; pdb.set_trace()
            track_id = track_ids[instance_token]
            vel = nusc.box_velocity(box.token) # global frame
            vel = np.dot(np.linalg.inv(trans_matrix), 
              np.array([vel[0], vel[1], vel[2], 0], np.float32)).tolist()
            
            # instance information in COCO format
            ann = {
              'id': num_anns,
              'image_id': num_images,
              'category_id': category_id,
              'dim': [box.wlh[2], box.wlh[0], box.wlh[1]],
              'location': [box.center[0], box.center[1], box.center[2]],
              'depth': box.center[2],
              'occluded': 0,
              'truncated': 0,
              'rotation_y': yaw,
              'amodel_center': amodel_center,
              'iscrowd': 0,
              'track_id': track_id,
              'attributes': ATTRIBUTE_TO_ID[att],
              'velocity': vel
            }

            bbox = KittiDB.project_kitti_box_to_image(
              copy.deepcopy(box), camera_intrinsic, imsize=(1600, 900))
            alpha = _rot_y2alpha(yaw, (bbox[0] + bbox[2]) / 2, 
                                 camera_intrinsic[0, 2], camera_intrinsic[0, 0])
            ann['bbox'] = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]
            ann['area'] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
            ann['alpha'] = alpha
            anns.append(ann)

          # Filter out bounding boxes outside the image
          visable_anns = []
          for i in range(len(anns)):
            vis = True
            for j in range(len(anns)):
              if anns[i]['depth'] - min(anns[i]['dim']) / 2 > \
                 anns[j]['depth'] + max(anns[j]['dim']) / 2 and \
                _bbox_inside(anns[i]['bbox'], anns[j]['bbox']):
                vis = False
                break
            if vis:
              visable_anns.append(anns[i])
            else:
              pass

          for ann in visable_anns:
            ret['annotations'].append(ann)
          if DEBUG:
            img_path = data_path + image_info['file_name']
            img = cv2.imread(img_path)
            img_3d = img.copy()
            for ann in visable_anns:
              bbox = ann['bbox']
              cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), 
                            (int(bbox[2] + bbox[0]), int(bbox[3] + bbox[1])), 
                            (0, 0, 255), 3, lineType=cv2.LINE_AA)
              box_3d = compute_box_3d(ann['dim'], ann['location'], ann['rotation_y'])
              box_2d = project_to_image(box_3d, calib)
              img_3d = draw_box_3d(img_3d, box_2d)

              pt_3d = unproject_2d_to_3d(ann['amodel_center'], ann['depth'], calib)
              pt_3d[1] += ann['dim'][0] / 2
              print('location', ann['location'])
              print('loc model', pt_3d)
              pt_2d = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                                dtype=np.float32)
              pt_3d = unproject_2d_to_3d(pt_2d, ann['depth'], calib)
              pt_3d[1] += ann['dim'][0] / 2
              print('loc      ', pt_3d)
            cv2.imshow('img', img)
            cv2.imshow('img_3d', img_3d)
            cv2.waitKey()
            nusc.render_sample_data(image_token)
            plt.show()
    print('reordering images')
    images = ret['images']
    video_sensor_to_images = {}
    for image_info in images:
      tmp_seq_id = image_info['video_id'] * 20 + image_info['sensor_id']
      if tmp_seq_id in video_sensor_to_images:
        video_sensor_to_images[tmp_seq_id].append(image_info)
      else:
        video_sensor_to_images[tmp_seq_id] = [image_info]
    ret['images'] = []
    for tmp_seq_id in sorted(video_sensor_to_images):
      ret['images'] = ret['images'] + video_sensor_to_images[tmp_seq_id]

    print('{} {} images {} boxes'.format(
      split, len(ret['images']), len(ret['annotations'])))
    print('out_path', out_path)
    json.dump(ret, open(out_path, 'w'))
Ejemplo n.º 5
0
        anns = open(ann_path, 'r')
        anns_lines = anns.readlines()
        anns_list = removeWhiteSpaceInfile(anns_lines)
        loc_2d = []
        loc_flag = False

        # generate gt_dict_per_img
        for txt in anns_list:
            tmp = txt.split(',')  # white space제거
            cat_id, gt_label_list, box_3d, loc_w_gt_tilt = read_gt_label(tmp, Rtilt)
            #for table ##
            # if cat_id != 'table' and cat_id != 'desk':
            #     continue
            yaw_gt =str(gt_label_list[2] * 180 / math.pi)[:4]
            gt_dict_per_img[cat_id].append(gt_label_list)
            box_2d = project_to_image(box_3d, calib)
            loc_c_gt_tilt = [loc_w_gt_tilt[i] for i in [0,2,1]]
            loc_c_gt_tilt[1] = -loc_c_gt_tilt[1]
            # img = draw_box_3d(img, box_2d, c=(128, 128, 128))
            if loc_flag == True:
                continue
            if cat_id =='table' or cat_id == 'desk':
                loc_flag = True
                loc_2d = project_to_image(np.array([loc_c_gt_tilt]), calib)

                cv2.circle(img, (int(loc_2d[0][0]), int(loc_2d[0][1])), 5, (0, 0, 0),
                           -1)
                rot_end_x = loc_2d[0][0] + np.cos(gt_label_list[2]) * 100
                rot_end_y = loc_2d[0][1] - np.sin(gt_label_list[2]) * 100
                cv2.arrowedLine(img, (int(loc_2d[0][0]), int(loc_2d[0][1])),
                                (int(rot_end_x), int(rot_end_y)), (0, 255, 255), 10, tipLength=0.3)
Ejemplo n.º 6
0
def main():
    SCENE_SPLITS["mini-val"] = SCENE_SPLITS["val"]
    if not os.path.exists(DATA_PATH):
        os.mkdir(DATA_PATH)
    if not os.path.exists(OUT_PATH):
        os.mkdir(OUT_PATH)
    for split in SPLITS:
        data_path = DATA_PATH  # + '{}/'.format(SPLITS[split])
        nusc = NuScenes(version=SPLITS[split],
                        dataroot=data_path,
                        verbose=True)
        out_path = OUT_PATH + "{}.json".format(split)
        categories_info = [{
            "name": CATS[i],
            "id": i + 1
        } for i in range(len(CATS))]
        ret = {
            "images": [],
            "annotations": [],
            "categories": categories_info,
            "videos": [],
            "attributes": ATTRIBUTE_TO_ID,
        }
        num_images = 0
        num_anns = 0
        num_videos = 0

        # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR.
        for sample in nusc.sample:
            scene_name = nusc.get("scene", sample["scene_token"])["name"]
            if not (split in ["mini", "test"
                              ]) and not (scene_name in SCENE_SPLITS[split]):
                continue
            if sample["prev"] == "":
                print("scene_name", scene_name)
                num_videos += 1
                ret["videos"].append({
                    "id": num_videos,
                    "file_name": scene_name
                })
                frame_ids = {k: 0 for k in sample["data"]}
                track_ids = {}
            # We decompose a sample into 6 images in our case.
            for sensor_name in sample["data"]:
                if sensor_name in USED_SENSOR:
                    image_token = sample["data"][sensor_name]
                    image_data = nusc.get("sample_data", image_token)
                    num_images += 1

                    # Complex coordinate transform. This will take time to understand.
                    sd_record = nusc.get("sample_data", image_token)
                    cs_record = nusc.get("calibrated_sensor",
                                         sd_record["calibrated_sensor_token"])
                    pose_record = nusc.get("ego_pose",
                                           sd_record["ego_pose_token"])
                    global_from_car = transform_matrix(
                        pose_record["translation"],
                        Quaternion(pose_record["rotation"]),
                        inverse=False,
                    )
                    car_from_sensor = transform_matrix(
                        cs_record["translation"],
                        Quaternion(cs_record["rotation"]),
                        inverse=False,
                    )
                    trans_matrix = np.dot(global_from_car, car_from_sensor)
                    _, boxes, camera_intrinsic = nusc.get_sample_data(
                        image_token, box_vis_level=BoxVisibility.ANY)
                    calib = np.eye(4, dtype=np.float32)
                    calib[:3, :3] = camera_intrinsic
                    calib = calib[:3]
                    frame_ids[sensor_name] += 1

                    # image information in COCO format
                    image_info = {
                        "id": num_images,
                        "file_name": image_data["filename"],
                        "calib": calib.tolist(),
                        "video_id": num_videos,
                        "frame_id": frame_ids[sensor_name],
                        "sensor_id": SENSOR_ID[sensor_name],
                        "sample_token": sample["token"],
                        "trans_matrix": trans_matrix.tolist(),
                        "width": sd_record["width"],
                        "height": sd_record["height"],
                        "pose_record_trans": pose_record["translation"],
                        "pose_record_rot": pose_record["rotation"],
                        "cs_record_trans": cs_record["translation"],
                        "cs_record_rot": cs_record["rotation"],
                    }
                    ret["images"].append(image_info)
                    anns = []
                    for box in boxes:
                        det_name = category_to_detection_name(box.name)
                        if det_name is None:
                            continue
                        num_anns += 1
                        v = np.dot(box.rotation_matrix, np.array([1, 0, 0]))
                        yaw = -np.arctan2(v[2], v[0])
                        box.translate(np.array([0, box.wlh[2] / 2, 0]))
                        category_id = CAT_IDS[det_name]

                        amodel_center = project_to_image(
                            np.array(
                                [
                                    box.center[0],
                                    box.center[1] - box.wlh[2] / 2,
                                    box.center[2],
                                ],
                                np.float32,
                            ).reshape(1, 3),
                            calib,
                        )[0].tolist()
                        sample_ann = nusc.get("sample_annotation", box.token)
                        instance_token = sample_ann["instance_token"]
                        if not (instance_token in track_ids):
                            track_ids[instance_token] = len(track_ids) + 1
                        attribute_tokens = sample_ann["attribute_tokens"]
                        attributes = [
                            nusc.get("attribute", att_token)["name"]
                            for att_token in attribute_tokens
                        ]
                        att = "" if len(attributes) == 0 else attributes[0]
                        if len(attributes) > 1:
                            print(attributes)
                            import pdb

                            pdb.set_trace()
                        track_id = track_ids[instance_token]
                        vel = nusc.box_velocity(box.token)  # global frame
                        vel = np.dot(
                            np.linalg.inv(trans_matrix),
                            np.array([vel[0], vel[1], vel[2], 0], np.float32),
                        ).tolist()

                        # instance information in COCO format
                        ann = {
                            "id":
                            num_anns,
                            "image_id":
                            num_images,
                            "category_id":
                            category_id,
                            "dim": [box.wlh[2], box.wlh[0], box.wlh[1]],
                            "location":
                            [box.center[0], box.center[1], box.center[2]],
                            "depth":
                            box.center[2],
                            "occluded":
                            0,
                            "truncated":
                            0,
                            "rotation_y":
                            yaw,
                            "amodel_center":
                            amodel_center,
                            "iscrowd":
                            0,
                            "track_id":
                            track_id,
                            "attributes":
                            ATTRIBUTE_TO_ID[att],
                            "velocity":
                            vel,
                        }

                        bbox = KittiDB.project_kitti_box_to_image(
                            copy.deepcopy(box),
                            camera_intrinsic,
                            imsize=(1600, 900))
                        alpha = _rot_y2alpha(
                            yaw,
                            (bbox[0] + bbox[2]) / 2,
                            camera_intrinsic[0, 2],
                            camera_intrinsic[0, 0],
                        )
                        ann["bbox"] = [
                            bbox[0],
                            bbox[1],
                            bbox[2] - bbox[0],
                            bbox[3] - bbox[1],
                        ]
                        ann["area"] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
                        ann["alpha"] = alpha
                        anns.append(ann)

                    # Filter out bounding boxes outside the image
                    visable_anns = []
                    for i in range(len(anns)):
                        vis = True
                        for j in range(len(anns)):
                            if anns[i]["depth"] - min(anns[i][
                                    "dim"]) / 2 > anns[j]["depth"] + max(
                                        anns[j]["dim"]) / 2 and _bbox_inside(
                                            anns[i]["bbox"], anns[j]["bbox"]):
                                vis = False
                                break
                        if vis:
                            visable_anns.append(anns[i])
                        else:
                            pass

                    for ann in visable_anns:
                        ret["annotations"].append(ann)
                    if DEBUG:
                        img_path = data_path + image_info["file_name"]
                        img = cv2.imread(img_path)
                        img_3d = img.copy()
                        for ann in visable_anns:
                            bbox = ann["bbox"]
                            cv2.rectangle(
                                img,
                                (int(bbox[0]), int(bbox[1])),
                                (int(bbox[2] + bbox[0]),
                                 int(bbox[3] + bbox[1])),
                                (0, 0, 255),
                                3,
                                lineType=cv2.LINE_AA,
                            )
                            box_3d = compute_box_3d(ann["dim"],
                                                    ann["location"],
                                                    ann["rotation_y"])
                            box_2d = project_to_image(box_3d, calib)
                            img_3d = draw_box_3d(img_3d, box_2d)

                            pt_3d = unproject_2d_to_3d(ann["amodel_center"],
                                                       ann["depth"], calib)
                            pt_3d[1] += ann["dim"][0] / 2
                            print("location", ann["location"])
                            print("loc model", pt_3d)
                            pt_2d = np.array(
                                [(bbox[0] + bbox[2]) / 2,
                                 (bbox[1] + bbox[3]) / 2],
                                dtype=np.float32,
                            )
                            pt_3d = unproject_2d_to_3d(pt_2d, ann["depth"],
                                                       calib)
                            pt_3d[1] += ann["dim"][0] / 2
                            print("loc      ", pt_3d)
                        cv2.imshow("img", img)
                        cv2.imshow("img_3d", img_3d)
                        cv2.waitKey()
                        nusc.render_sample_data(image_token)
                        plt.show()
        print("reordering images")
        images = ret["images"]
        video_sensor_to_images = {}
        for image_info in images:
            tmp_seq_id = image_info["video_id"] * 20 + image_info["sensor_id"]
            if tmp_seq_id in video_sensor_to_images:
                video_sensor_to_images[tmp_seq_id].append(image_info)
            else:
                video_sensor_to_images[tmp_seq_id] = [image_info]
        ret["images"] = []
        for tmp_seq_id in sorted(video_sensor_to_images):
            ret["images"] = ret["images"] + video_sensor_to_images[tmp_seq_id]

        print("{} {} images {} boxes".format(split, len(ret["images"]),
                                             len(ret["annotations"])))
        print("out_path", out_path)
        json.dump(ret, open(out_path, "w"))
Ejemplo n.º 7
0
            print("location_ori_changed")
            print(location_ori_changed)
            location_tilt = np.dot(np.transpose(Rtilt), location)
            print("rtilted location")
            print(location)
            location_changed = [location_tilt[i] for i in sun_permutation]
            location_changed[1] *= -1
            ct_3d_from_2d= [ct_x * location_ori_changed[2], ct_y * location_ori_changed[2], location_ori_changed[2]]
            calib_3 = np.zeros((3,3))
            calib_3 = calib[0:3, 0:3]
            inv_K = inv(calib_3)
            ct_3d_from_2d2 = np.dot(inv_K, ct_3d_from_2d)
            ct_3d_from_2d2_world = [ct_3d_from_2d2[i] for i in sun_permutation]
            ct_3d_from_2d2_world[2] *= -1
            box_3d_new = compute_box_3d_world(dim, ct_3d_from_2d2_world, rotation_y, theta)
            box_2d_new = project_to_image(box_3d_new, calib)
            image5 = draw_box_3d_world(image, box_2d_new, image_id, c= (255, 0, 0))

            print(ct_3d_from_2d2)
            print("내가 location_changed")
            print(location_changed)
            print("dim")
            print(dim)
            #dim_changed = [dim[i]*2 if dim[i] > 0 else print("error" + str(image_id)) for i in sun_permutation]  #왜냐하면 dimension은 크기니까
            dim_changed = [dim[i]*2 for i in sun_permutation]  #왜냐하면 dimension은 크기니까
            #dim_2 = [dim[i]*2 for i in range(0,3)]  #왜냐하면 dimension은 크기니까

            #print(dim_changed)

            #Rtilt 적용된 location을 2d image에 project시킨것.
            ct3d_to_ct2d = project_to_image(np.array([location_changed]), calib)  # 이제 3d bounding box를 image에 투영시킴
Ejemplo n.º 8
0
                rotation_y = float(tmp[14])
                num_keypoints = 0
                box_2d_as_point = [0] * 27
                bbox = [0., 0., 0., 0.]
                calib_list = np.reshape(calib, (12)).tolist()
                if tmp[0] in det_cats:
                    image = cv2.imread(
                        os.path.join(image_set_path, image_info['file_name']))
                    bbox = [
                        float(tmp[4]),
                        float(tmp[5]),
                        float(tmp[6]),
                        float(tmp[7])
                    ]
                    box_3d = compute_box_3d(dim, location, rotation_y)
                    box_2d_as_point, vis_num, pts_center = project_to_image(
                        box_3d, calib, image.shape)
                    box_2d_as_point = np.reshape(box_2d_as_point, (1, 27))
                    #box_2d_as_point=box_2d_as_point.astype(np.int)
                    box_2d_as_point = box_2d_as_point.tolist()[0]
                    num_keypoints = vis_num

                    off_set = (calib[0, 3] - calib0[0, 3]) / calib[0, 0]
                    location[
                        0] += off_set  ###################################################confuse
                    alpha = rotation_y - math.atan2(
                        pts_center[0, 0] - calib[0, 2], calib[0, 0])
                    ann = {
                        'segmentation': [[0, 0, 0, 0, 0, 0]],
                        'num_keypoints': num_keypoints,
                        'area': 1,
                        'iscrowd': 0,
Ejemplo n.º 9
0
                    yaw_gt_eval = key_val_gt_list[index_gt][2]
                    bbox2D_gt_eval = key_val_gt_list[index_gt][3]
                    # world(x, y, z) = cam(x, -z, y)
                    depth_gt = loc_w_gt_eval[1]


                    #cam_loc
                    z = depth_gt
                    x = (center_2d_pd_x * z - calib[0, 2] * z) / calib[0, 0]
                    y = (center_2d_pd_y * z - calib[1, 2] * z) / calib[1, 1]
                    #world_loc
                    pt_3d_world = np.array([x, z, -y], dtype=np.float32)

                    rot_yaw_w_gt = compute_rot_yaw_w(yaw_gt_eval)
                    box_3d_loc_gt = compute_3d_box(dim_w_gt_eval, loc_w_gt_eval, rot_yaw_w_gt)
                    box_2d_loc_gt = project_to_image(box_3d_loc_gt, calib)
                    draw_box_3d(img, box_2d_loc_gt, (128, 128, 128))

                    rot_yaw_w_pd = compute_rot_yaw_w(yaw_pd_eval)
                    cos_pd = np.cos(yaw_pd_eval)
                    sin_pd = np.sin(yaw_pd_eval)
                    # rot_yaw_w_pd = np.array([[cos_pd, -sin_pd, 0], [sin_pd, cos_pd, 0], [0, 0, 1]], dtype=np.float32)
                    loc_test = [loc_w_pd_eval[0], loc_w_gt_eval[1], loc_w_pd_eval[2]]
                    # box_3d_loc_pd = compute_3d_box(dim_w_pd_eval, loc_w_pd_eval, rot_yaw_w_pd)
                    box_3d_loc_pd = compute_3d_box(dim_w_pd_eval, pt_3d_world, rot_yaw_w_pd)
                    box_2d_loc_pd = project_to_image(box_3d_loc_pd, calib)
                    draw_box_3d(img, box_2d_loc_pd, (0, 255, 255))

                    # box_3d_loc = compute_3d_box(dim_w_pd_eval, loc_w_pd_eval, rot_yaw_w_pd)
                    # box_2d_loc = project_to_image(box_3d_loc, calib)
                    # draw_box_3d(img, box_2d_loc, (0, 255, 0))
Ejemplo n.º 10
0
    def create_pc_pillars(self, img, img_info, pc_2d, pc_3d, inp_trans,
                          out_trans):
        pillar_wh = np.zeros((2, pc_3d.shape[1]))
        boxes_2d = np.zeros((0, 8, 2))
        pillar_dim = self.opt.pillar_dims
        v = np.dot(np.eye(3), np.array([1, 0, 0]))
        ry = -np.arctan2(v[2], v[0])

        for i, center in enumerate(pc_3d[:3, :].T):
            # Create a 3D pillar at pc location for the full-size image
            box_3d = compute_box_3d(dim=pillar_dim,
                                    location=center,
                                    rotation_y=ry)
            box_2d = project_to_image(box_3d, img_info['calib']).T  # [2x8]

            ## save the box for debug plots
            if self.opt.debug:
                box_2d_img, m = self._transform_pc(box_2d,
                                                   inp_trans,
                                                   self.opt.input_w,
                                                   self.opt.input_h,
                                                   filter_out=False)
                boxes_2d = np.concatenate(
                    (boxes_2d, np.expand_dims(box_2d_img.T, 0)), 0)

            # transform points
            box_2d_t, m = self._transform_pc(box_2d, out_trans,
                                             self.opt.output_w,
                                             self.opt.output_h)

            if box_2d_t.shape[1] <= 1:
                continue

            # get the bounding box in [xyxy] format
            bbox = [
                np.min(box_2d_t[0, :]),
                np.min(box_2d_t[1, :]),
                np.max(box_2d_t[0, :]),
                np.max(box_2d_t[1, :])
            ]  # format: xyxy

            # store height and width of the 2D box
            pillar_wh[0, i] = bbox[2] - bbox[0]
            pillar_wh[1, i] = bbox[3] - bbox[1]

        ## DEBUG #################################################################
        if self.opt.debug:
            img_2d = copy.deepcopy(img)
            # img_3d = copy.deepcopy(img)
            img_2d_inp = cv2.warpAffine(img,
                                        inp_trans,
                                        (self.opt.input_w, self.opt.input_h),
                                        flags=cv2.INTER_LINEAR)
            img_2d_out = cv2.warpAffine(img,
                                        out_trans,
                                        (self.opt.output_w, self.opt.output_h),
                                        flags=cv2.INTER_LINEAR)
            img_3d = cv2.warpAffine(img,
                                    inp_trans,
                                    (self.opt.input_w, self.opt.input_h),
                                    flags=cv2.INTER_LINEAR)
            blank_image = 255 * np.ones(
                (self.opt.input_h, self.opt.input_w, 3), np.uint8)
            overlay = img_2d_inp.copy()
            output = img_2d_inp.copy()

            pc_inp, _ = self._transform_pc(pc_2d, inp_trans, self.opt.input_w,
                                           self.opt.input_h)
            pc_out, _ = self._transform_pc(pc_2d, out_trans, self.opt.output_w,
                                           self.opt.output_h)

            pill_wh_inp = pillar_wh * (self.opt.input_w / self.opt.output_w)
            pill_wh_out = pillar_wh
            pill_wh_ori = pill_wh_inp * 2

            for i, p in enumerate(pc_inp[:3, :].T):
                color = int((p[2].tolist() / 60.0) * 255)
                color = (0, color, 0)

                rect_tl = (np.min(int(p[0] - pill_wh_inp[0, i] / 2),
                                  0), np.min(int(p[1] - pill_wh_inp[1, i]), 0))
                rect_br = (np.min(int(p[0] + pill_wh_inp[0, i] / 2),
                                  0), int(p[1]))
                cv2.rectangle(img_2d_inp,
                              rect_tl,
                              rect_br, (0, 0, 255),
                              1,
                              lineType=cv2.LINE_AA)
                img_2d_inp = cv2.circle(img_2d_inp, (int(p[0]), int(p[1])), 3,
                                        color, -1)

                ## On original-sized image
                rect_tl_ori = (np.min(int(pc_2d[0, i] - pill_wh_ori[0, i] / 2),
                                      0),
                               np.min(int(pc_2d[1, i] - pill_wh_ori[1, i]), 0))
                rect_br_ori = (np.min(int(pc_2d[0, i] + pill_wh_ori[0, i] / 2),
                                      0), int(pc_2d[1, i]))
                cv2.rectangle(img_2d,
                              rect_tl_ori,
                              rect_br_ori, (0, 0, 255),
                              2,
                              lineType=cv2.LINE_AA)
                img_2d = cv2.circle(img_2d,
                                    (int(pc_2d[0, i]), int(pc_2d[1, i])), 6,
                                    color, -1)

                p2 = pc_out[:3, i].T
                rect_tl2 = (np.min(int(p2[0] - pill_wh_out[0, i] / 2),
                                   0), np.min(int(p2[1] - pill_wh_out[1, i]),
                                              0))
                rect_br2 = (np.min(int(p2[0] + pill_wh_out[0, i] / 2),
                                   0), int(p2[1]))
                cv2.rectangle(img_2d_out,
                              rect_tl2,
                              rect_br2, (0, 0, 255),
                              1,
                              lineType=cv2.LINE_AA)
                img_2d_out = cv2.circle(img_2d_out, (int(p[0]), int(p[1])), 3,
                                        (255, 0, 0), -1)

                # on blank image
                cv2.rectangle(blank_image,
                              rect_tl,
                              rect_br,
                              color,
                              -1,
                              lineType=cv2.LINE_AA)

                # overlay
                alpha = 0.1
                cv2.rectangle(overlay,
                              rect_tl,
                              rect_br,
                              color,
                              -1,
                              lineType=cv2.LINE_AA)
                cv2.addWeighted(overlay, alpha, output, 1 - alpha, 0, output)

                # plot 3d pillars
                img_3d = draw_box_3d(img_3d,
                                     boxes_2d[i].astype(np.int32),
                                     [114, 159, 207],
                                     same_color=False)

            cv2.imwrite((self.opt.debug_dir + '/{}pc_pillar_2d_inp.' + self.opt.img_format) \
                        .format(self.img_ind), img_2d_inp)
            cv2.imwrite((self.opt.debug_dir + '/{}pc_pillar_2d_ori.' + self.opt.img_format) \
                        .format(self.img_ind), img_2d)
            cv2.imwrite((self.opt.debug_dir + '/{}pc_pillar_2d_out.' + self.opt.img_format) \
                        .format(self.img_ind), img_2d_out)
            cv2.imwrite((self.opt.debug_dir + '/{}pc_pillar_2d_blank.' + self.opt.img_format) \
                        .format(self.img_ind), blank_image)
            cv2.imwrite((self.opt.debug_dir + '/{}pc_pillar_2d_overlay.' + self.opt.img_format) \
                        .format(self.img_ind), output)
            cv2.imwrite((self.opt.debug_dir + '/{}pc_pillar_3d.' + self.opt.img_format) \
                        .format(self.img_ind), img_3d)
            self.img_ind += 1
        ## DEBUG #################################################################
        return pillar_wh