Exemple #1
0
def map_ground_bounding_box_to_2D(distance_img, world_transform,
                                  obstacle_transform, bounding_box,
                                  rgb_transform, rgb_intrinsic, rgb_img_size):
    (image_width, image_height) = rgb_img_size
    extrinsic_mat = world_transform * rgb_transform
    obj_transform = Transform(obstacle_transform.to_transform_pb2())
    bbox_pb2 = bounding_box.to_bounding_box_pb2()
    bbox_transform = Transform(bbox_pb2.transform)
    ext = bbox_pb2.extent

    # 8 bounding box vertices relative to (0,0,0)
    bbox = np.array([[ext.x, ext.y, ext.z], [ext.x, -ext.y, ext.z],
                     [ext.x, ext.y, -ext.z], [ext.x, -ext.y, -ext.z],
                     [-ext.x, ext.y, ext.z], [-ext.x, -ext.y, ext.z],
                     [-ext.x, ext.y, -ext.z], [-ext.x, -ext.y, -ext.z]])

    # Transform the vertices with respect to the bounding box transform.
    bbox = bbox_transform.transform_points(bbox)

    # The bounding box transform is with respect to the object transform.
    # Transform the points relative to its transform.
    bbox = obj_transform.transform_points(bbox)

    # Object's transform is relative to the world. Thus, the bbox contains
    # the 3D bounding box vertices relative to the world.

    coords = []
    for vertex in bbox:
        pos_vector = np.array([
            [vertex[0, 0]],  # [[X,
            [vertex[0, 1]],  #   Y,
            [vertex[0, 2]],  #   Z,
            [1.0]  #   1.0]]
        ])
        # Transform the points to camera.
        transformed_3d_pos = np.dot(inv(extrinsic_mat.matrix), pos_vector)
        # Transform the points to 2D.
        pos2d = np.dot(rgb_intrinsic, transformed_3d_pos[:3])

        # Normalize the 2D points.
        pos2d = np.array([pos2d[0] / pos2d[2], pos2d[1] / pos2d[2], pos2d[2]])

        # Add the points to the image.
        if pos2d[2] > 0:  # If the point is in front of the camera.
            x_2d = float(image_width - pos2d[0])
            y_2d = float(image_height - pos2d[1])
            if (x_2d >= 0 or y_2d >= 0) and (x_2d < image_width
                                             or y_2d < image_height):
                coords.append((x_2d, y_2d, pos2d[2]))

    return coords
Exemple #2
0
def run_carla_client( args):
    # Here we will run 3 episodes with 300 frames each.
    number_of_episodes = 150
    frames_per_episode = 500

    # for start_i in range(150):
    #     if start_i%4==0:
    #         output_folder = 'Packages/CARLA_0.8.2/PythonClient/new_data-viz/test_' + str(start_i)
    #         if not os.path.exists(output_folder):
    #             os.mkdir(output_folder)
    #             print( "make "+str(output_folder))
    # ./CarlaUE4.sh -carla-server  -benchmark -fps=17 -windowed
    # carla-server "/usr/local/carla/Unreal/CarlaUE4/CarlaUE4.uproject" /usr/local/carla/Maps/Town03 -benchmark -fps=10 -windowed


    # We assume the CARLA server is already waiting for a client to connect at
    # host:port. To create a connection we can use the `make_carla_client`
    # context manager, it creates a CARLA client object and starts the
    # connection. It will throw an exception if something goes wrong. The
    # context manager makes sure the connection is always cleaned up on exit.
    with make_carla_client(args.host, args.port) as client:

        print('CarlaClient connected')
        global episode_nbr
        print (episode_nbr)
        for episode in range(0,number_of_episodes):
            if episode % 1 == 0:
                output_folder = 'Datasets/carla-sync/train/test_' + str(episode)
                if not os.path.exists(output_folder+"/cameras.p"):
                    # Start a new episode.
                    episode_nbr=episode
                    frame_step = 1  # Save one image every 100 frames
                    pointcloud_step=50
                    image_size = [800, 600]
                    camera_local_pos = [0.3, 0.0, 1.3]  # [X, Y, Z]
                    camera_local_rotation = [0, 0, 0]  # [pitch(Y), yaw(Z), roll(X)]
                    fov = 70
                    # Create a CarlaSettings object. This object is a wrapper around
                    # the CarlaSettings.ini file. Here we set the configuration we
                    # want for the new episode.
                    settings = CarlaSettings()
                    settings.set(
                        SynchronousMode=True,
                        SendNonPlayerAgentsInfo=True,
                        NumberOfVehicles=50,
                        NumberOfPedestrians=200,
                        WeatherId=random.choice([1, 3, 7, 8, 14]),
                        QualityLevel=args.quality_level)
                    settings.randomize_seeds()

                    # Now we want to add a couple of cameras to the player vehicle.
                    # We will collect the images produced by these cameras every
                    # frame.


                    camera1 = Camera('CameraDepth', PostProcessing='Depth', FOV=fov)
                    camera1.set_image_size(*image_size)
                    camera1.set_position(*camera_local_pos)
                    camera1.set_rotation(*camera_local_rotation)
                    settings.add_sensor(camera1)

                    camera2 = Camera('CameraRGB', PostProcessing='SceneFinal', FOV=fov)
                    camera2.set_image_size(*image_size)
                    camera2.set_position(*camera_local_pos)
                    camera2.set_rotation(*camera_local_rotation)
                    settings.add_sensor(camera2)

                    camera3 = Camera('CameraSeg', PostProcessing='SemanticSegmentation', FOV=fov)
                    camera3.set_image_size(*image_size)
                    camera3.set_position(*camera_local_pos)
                    camera3.set_rotation(*camera_local_rotation)
                    settings.add_sensor(camera3)



                    # Now we load these settings into the server. The server replies
                    # with a scene description containing the available start spots for
                    # the player. Here we can provide a CarlaSettings object or a
                    # CarlaSettings.ini file as string.
                    scene = client.load_settings(settings)

                    # Choose one player start at random.
                    number_of_player_starts = len(scene.player_start_spots)
                    player_start = episode#random.randint(0, max(0, number_of_player_starts - 1))

                    output_folder = 'Datasets/carla-sync/train/test_' + str(episode)
                    # Notify the server that we want to start the episode at the
                    # player_start index. This function blocks until the server is ready
                    # to start the episode.
                    print('Starting new episode...')
                    client.start_episode(player_start)


                    cameras_dict = {}
                    pedestrians_dict = {}
                    cars_dict = {}
                    # Compute the camera transform matrix
                    camera_to_car_transform = camera2.get_unreal_transform()
                    # (Intrinsic) (3, 3) K Matrix
                    K = np.identity(3)
                    K[0, 2] = image_size[0] / 2.0
                    K[1, 2] = image_size[1] / 2.0
                    K[0, 0] = K[1, 1] = image_size[0] / (2.0 * np.tan(fov * np.pi / 360.0))
                    with open(output_folder + '/camera_intrinsics.p', 'w') as camfile:
                        pickle.dump(K, camfile)


                    # Iterate every frame in the episode.
                    for frame in range(0, frames_per_episode):

                        # Read the data produced by the server this frame.
                        measurements, sensor_data = client.read_data()

                        # Print some of the measurements.
                        #print_measurements(measurements)
                        if not frame % frame_step:
                            # Save the images to disk if requested.

                            for name, measurement in sensor_data.items():
                                filename = args.out_filename_format.format(episode, name, frame)
                                print (filename)
                                measurement.save_to_disk(filename)

                            # We can access the encoded data of a given image as numpy
                            # array using its "data" property. For instance, to get the
                            # depth value (normalized) at pixel X, Y
                            #
                            #     depth_array = sensor_data['CameraDepth'].data
                            #     value_at_pixel = depth_array[Y, X]
                            #

                            # Now we have to send the instructions to control the vehicle.
                            # If we are in synchronous mode the server will pause the
                            # simulation until we send this control.

                            # RGB image [[[r,g,b],..[r,g,b]],..[[r,g,b],..[r,g,b]]]
                            image_RGB = to_rgb_array(sensor_data['CameraRGB'])

                            labels=labels_to_array(sensor_data['CameraSeg'])[:,:,np.newaxis]

                            image_seg = np.tile(labels, (1, 1, 3))
                            depth_array = sensor_data['CameraDepth'].data*1000


                            # 2d to (camera) local 3d
                            # We use the image_RGB to colorize each 3D point, this is optional.
                            # "max_depth" is used to keep only the points that are near to the
                            # camera, meaning 1.0 the farest points (sky)
                            if not frame % pointcloud_step:
                                point_cloud = depth_to_local_point_cloud(
                                    sensor_data['CameraDepth'],
                                    image_RGB,
                                    max_depth=args.far
                                )

                                point_cloud_seg = depth_to_local_point_cloud(
                                    sensor_data['CameraDepth'],
                                    segmentation=image_seg,
                                    max_depth=args.far
                                )

                            # (Camera) local 3d to world 3d.
                            # Get the transform from the player protobuf transformation.
                            world_transform = Transform(
                                measurements.player_measurements.transform
                            )

                            # Compute the final transformation matrix.
                            car_to_world_transform = world_transform * camera_to_car_transform

                            # Car to World transformation given the 3D points and the
                            # transformation matrix.
                            point_cloud.apply_transform(car_to_world_transform)
                            point_cloud_seg.apply_transform(car_to_world_transform)

                            Rt = car_to_world_transform.matrix
                            Rt_inv = car_to_world_transform.inverse().matrix
                            # R_inv=world_transform.inverse().matrix
                            cameras_dict[frame] = {}
                            cameras_dict[frame]['inverse_rotation'] = Rt_inv[:]
                            cameras_dict[frame]['rotation'] = Rt[:]
                            cameras_dict[frame]['translation'] = Rt_inv[0:3, 3]
                            cameras_dict[frame]['camera_to_car'] = camera_to_car_transform.matrix

                            # Get non-player info
                            vehicles = {}
                            pedestrians = {}
                            for agent in measurements.non_player_agents:
                                # check if the agent is a vehicle.
                                if agent.HasField('vehicle'):
                                    pos = agent.vehicle.transform.location
                                    pos_vector = np.array([[pos.x], [pos.y], [pos.z], [1.0]])

                                    trnasformed_3d_pos = np.dot(Rt_inv, pos_vector)
                                    pos2d = np.dot(K, trnasformed_3d_pos[:3])

                                    # Normalize the point
                                    norm_pos2d = np.array([
                                        pos2d[0] / pos2d[2],
                                        pos2d[1] / pos2d[2],
                                        pos2d[2]])

                                    # Now, pos2d contains the [x, y, d] values of the image in pixels (where d is the depth)
                                    # You can use the depth to know the points that are in front of the camera (positive depth).

                                    x_2d = image_size[0] - norm_pos2d[0]
                                    y_2d = image_size[1] - norm_pos2d[1]
                                    vehicles[agent.id] = {}
                                    vehicles[agent.id]['transform'] = [agent.vehicle.transform.location.x,
                                                                       agent.vehicle.transform.location.y,
                                                                       agent.vehicle.transform.location.z]
                                    vehicles[agent.id][
                                        'bounding_box.transform'] = agent.vehicle.bounding_box.transform.location.z

                                    vehicles[agent.id]['yaw'] = agent.vehicle.transform.rotation.yaw
                                    vehicles[agent.id]['bounding_box'] = [agent.vehicle.bounding_box.extent.x,
                                                                          agent.vehicle.bounding_box.extent.y,
                                                                          agent.vehicle.bounding_box.extent.z]
                                    vehicle_transform = Transform(agent.vehicle.bounding_box.transform)
                                    pos = agent.vehicle.transform.location

                                    bbox3d = agent.vehicle.bounding_box.extent

                                    # Compute the 3D bounding boxes
                                    # f contains the 4 points that corresponds to the bottom
                                    f = np.array([[pos.x + bbox3d.x, pos.y - bbox3d.y,
                                                   pos.z - bbox3d.z + agent.vehicle.bounding_box.transform.location.z],
                                                  [pos.x + bbox3d.x, pos.y + bbox3d.y,
                                                   pos.z - bbox3d.z + agent.vehicle.bounding_box.transform.location.z],
                                                  [pos.x - bbox3d.x, pos.y + bbox3d.y,
                                                   pos.z - bbox3d.z + agent.vehicle.bounding_box.transform.location.z],
                                                  [pos.x - bbox3d.x, pos.y - bbox3d.y,
                                                   pos.z - bbox3d.z + agent.vehicle.bounding_box.transform.location.z],
                                                  [pos.x + bbox3d.x, pos.y - bbox3d.y,
                                                   pos.z + bbox3d.z + agent.vehicle.bounding_box.transform.location.z],
                                                  [pos.x + bbox3d.x, pos.y + bbox3d.y,
                                                   pos.z + bbox3d.z + agent.vehicle.bounding_box.transform.location.z],
                                                  [pos.x - bbox3d.x, pos.y + bbox3d.y,
                                                   pos.z + bbox3d.z + agent.vehicle.bounding_box.transform.location.z],
                                                  [pos.x - bbox3d.x, pos.y - bbox3d.y,
                                                   pos.z + bbox3d.z + agent.vehicle.bounding_box.transform.location.z]])

                                    f_rotated = vehicle_transform.transform_points(f)
                                    f_2D_rotated = []
                                    vehicles[agent.id]['bounding_box_coord'] = f_rotated

                                    for i in range(f.shape[0]):
                                        point = np.array([[f_rotated[i, 0]], [f_rotated[i, 1]], [f_rotated[i, 2]], [1]])
                                        transformed_2d_pos = np.dot(Rt_inv, point)
                                        pos2d = np.dot(K, transformed_2d_pos[:3])
                                        norm_pos2d = np.array([
                                            pos2d[0] / pos2d[2],
                                            pos2d[1] / pos2d[2],
                                            pos2d[2]])
                                        # print([image_size[0] - (pos2d[0] / pos2d[2]), image_size[1] - (pos2d[1] / pos2d[2])])
                                        f_2D_rotated.append(
                                            np.array([image_size[0] - norm_pos2d[0], image_size[1] - norm_pos2d[1]]))
                                    vehicles[agent.id]['bounding_box_2D'] = f_2D_rotated


                                elif agent.HasField('pedestrian'):
                                    pedestrians[agent.id] = {}
                                    pedestrians[agent.id]['transform'] = [agent.pedestrian.transform.location.x,
                                                                          agent.pedestrian.transform.location.y,
                                                                          agent.pedestrian.transform.location.z]
                                    pedestrians[agent.id]['yaw'] = agent.pedestrian.transform.rotation.yaw
                                    pedestrians[agent.id]['bounding_box'] = [agent.pedestrian.bounding_box.extent.x,
                                                                             agent.pedestrian.bounding_box.extent.y,
                                                                             agent.pedestrian.bounding_box.extent.z]
                                    # get the needed transformations
                                    # remember to explicitly make it Transform() so you can use transform_points()
                                    pedestrian_transform = Transform(agent.pedestrian.transform)
                                    bbox_transform = Transform(agent.pedestrian.bounding_box.transform)

                                    # get the box extent
                                    ext = agent.pedestrian.bounding_box.extent
                                    # 8 bounding box vertices relative to (0,0,0)
                                    bbox = np.array([
                                        [  ext.x,   ext.y,   ext.z],
                                        [- ext.x,   ext.y,   ext.z],
                                        [  ext.x, - ext.y,   ext.z],
                                        [- ext.x, - ext.y,   ext.z],
                                        [  ext.x,   ext.y, - ext.z],
                                        [- ext.x,   ext.y, - ext.z],
                                        [  ext.x, - ext.y, - ext.z],
                                        [- ext.x, - ext.y, - ext.z]
                                    ])

                                    # transform the vertices respect to the bounding box transform
                                    bbox = bbox_transform.transform_points(bbox)

                                    # the bounding box transform is respect to the pedestrian transform
                                    # so let's transform the points relative to it's transform
                                    bbox = pedestrian_transform.transform_points(bbox)

                                    # pedestrian's transform is relative to the world, so now,
                                    # bbox contains the 3D bounding box vertices relative to the world
                                    pedestrians[agent.id]['bounding_box_coord'] =copy.deepcopy(bbox)

                                    # Additionally, you can print these vertices to check that is working
                                    f_2D_rotated=[]
                                    ys=[]
                                    xs=[]
                                    zs=[]
                                    for vertex in bbox:
                                        pos_vector = np.array([
                                            [vertex[0,0]],  # [[X,
                                            [vertex[0,1]],  #   Y,
                                            [vertex[0,2]],  #   Z,
                                            [1.0]           #   1.0]]
                                        ])

                                        # transform the points to camera
                                        transformed_3d_pos =np.dot(Rt_inv, pos_vector)# np.dot(inv(self._extrinsic.matrix), pos_vector)
                                        zs.append(transformed_3d_pos[2])
                                        # transform the points to 2D
                                        pos2d =np.dot(K, transformed_3d_pos[:3]) #np.dot(self._intrinsic, transformed_3d_pos[:3])

                                        # normalize the 2D points
                                        pos2d = np.array([
                                            pos2d[0] / pos2d[2],
                                            pos2d[1] / pos2d[2],
                                            pos2d[2]
                                        ])

                                        # print the points in the screen
                                        if pos2d[2] > 0: # if the point is in front of the camera
                                            x_2d = image_size[0]-pos2d[0]#WINDOW_WIDTH - pos2d[0]
                                            y_2d = image_size[1]-pos2d[1]#WINDOW_HEIGHT - pos2d[1]
                                            ys.append(y_2d)
                                            xs.append(x_2d)
                                            f_2D_rotated.append( (y_2d, x_2d))
                                    if len(xs)>1:
                                        bbox=[[int(min(xs)), int(max(xs))],[int(min(ys)), int(max(ys))]]
                                        clipped_seg=labels[bbox[1][0]:bbox[1][1],bbox[0][0]:bbox[0][1]]
                                        recounted = Counter(clipped_seg.flatten())


                                        if 4 in recounted.keys() and recounted[4]>0.1*len(clipped_seg.flatten()):
                                            clipped_depth=depth_array[bbox[1][0]:bbox[1][1],bbox[0][0]:bbox[0][1]]
                                            #print (clipped_depth.shape)
                                            people_indx=np.where(clipped_seg==4)
                                            masked_depth=[]
                                            for people in zip(people_indx[0],people_indx[1] ):
                                                #print(people)
                                                masked_depth.append(clipped_depth[people])
                                            #masked_depth=clipped_depth[np.where(clipped_seg==4)]
                                            #print (masked_depth)
                                            #print ("Depth "+ str(min(zs))+" "+ str(max(zs)))
                                            #recounted = Counter(masked_depth)
                                            #print(recounted)
                                            avg_depth=np.mean(masked_depth)
                                            if avg_depth<700 and avg_depth>=min(zs)-10 and avg_depth<= max(zs)+10:
                                                #print("Correct depth")
                                                pedestrians[agent.id]['bounding_box_2D'] = f_2D_rotated
                                                pedestrians[agent.id]['bounding_box_2D_size']=recounted[4]
                                                pedestrians[agent.id]['bounding_box_2D_avg_depth']=avg_depth
                                                pedestrians[agent.id]['bounding_box_2D_depths']=zs
                                                #print ( pedestrians[agent.id].keys())
                                            #else:
                                                # print(recounted)
                                                # print ("Depth "+ str(min(zs))+" "+ str(max(zs)))


                                    #if sum(norm(depth_array-np.mean(zs))<1.0):


                                    # pedestrians[agent.id] = {}
                                    # pedestrians[agent.id]['transform'] = [agent.pedestrian.transform.location.x,
                                    #                                       agent.pedestrian.transform.location.y,
                                    #                                       agent.pedestrian.transform.location.z]
                                    # pedestrians[agent.id]['yaw'] = agent.pedestrian.transform.rotation.yaw
                                    # pedestrians[agent.id]['bounding_box'] = [agent.pedestrian.bounding_box.extent.x,
                                    #                                          agent.pedestrian.bounding_box.extent.y,
                                    #                                          agent.pedestrian.bounding_box.extent.z]
                                    # vehicle_transform = Transform(agent.pedestrian.bounding_box.transform)
                                    # pos = agent.pedestrian.transform.location
                                    #
                                    # bbox3d = agent.pedestrian.bounding_box.extent
                                    #
                                    # # Compute the 3D bounding boxes
                                    # # f contains the 4 points that corresponds to the bottom
                                    # f = np.array([[pos.x + bbox3d.x, pos.y - bbox3d.y, pos.z- bbox3d.z ],
                                    #               [pos.x + bbox3d.x, pos.y + bbox3d.y, pos.z- bbox3d.z ],
                                    #               [pos.x - bbox3d.x, pos.y + bbox3d.y, pos.z- bbox3d.z ],
                                    #               [pos.x - bbox3d.x, pos.y - bbox3d.y, pos.z- bbox3d.z ],
                                    #               [pos.x + bbox3d.x, pos.y - bbox3d.y, pos.z + bbox3d.z],
                                    #               [pos.x + bbox3d.x, pos.y + bbox3d.y, pos.z + bbox3d.z],
                                    #               [pos.x - bbox3d.x, pos.y + bbox3d.y, pos.z + bbox3d.z],
                                    #               [pos.x - bbox3d.x, pos.y - bbox3d.y, pos.z + bbox3d.z]])
                                    #
                                    # f_rotated = pedestrian_transform.transform_points(f)
                                    # pedestrians[agent.id]['bounding_box_coord'] = f_rotated
                                    # f_2D_rotated = []
                                    #
                                    # for i in range(f.shape[0]):
                                    #     point = np.array([[f_rotated[i, 0]], [f_rotated[i, 1]], [f_rotated[i, 2]], [1]])
                                    #     transformed_2d_pos = np.dot(Rt_inv, point)
                                    #     pos2d = np.dot(K, transformed_2d_pos[:3])
                                    #     norm_pos2d = np.array([
                                    #         pos2d[0] / pos2d[2],
                                    #         pos2d[1] / pos2d[2],
                                    #         pos2d[2]])
                                    #     f_2D_rotated.append([image_size[0] - norm_pos2d[0], image_size[1] - norm_pos2d[1]])
                                    # pedestrians[agent.id]['bounding_box_2D'] = f_2D_rotated

                            cars_dict[frame] = vehicles

                            pedestrians_dict[frame] = pedestrians
                            #print("End of Episode")
                            #print(len(pedestrians_dict[frame]))

                            # Save PLY to disk
                            # This generates the PLY string with the 3D points and the RGB colors
                            # for each row of the file.
                            if not frame % pointcloud_step:
                                point_cloud.save_to_disk(os.path.join(
                                    output_folder, '{:0>5}.ply'.format(frame))
                                )
                                point_cloud_seg.save_to_disk(os.path.join(
                                    output_folder, '{:0>5}_seg.ply'.format(frame))
                                )

                        if not args.autopilot:

                            client.send_control(
                                hand_brake=True)

                        else:

                            # Together with the measurements, the server has sent the
                            # control that the in-game autopilot would do this frame. We
                            # can enable autopilot by sending back this control to the
                            # server. We can modify it if wanted, here for instance we
                            # will add some noise to the steer.

                            control = measurements.player_measurements.autopilot_control
                            control.steer += random.uniform(-0.1, 0.1)
                            client.send_control(control)
                    print ("Start pickle save")
                    with open(output_folder + '/cameras.p', 'w') as camerafile:
                        pickle.dump(cameras_dict, camerafile)
                    with open(output_folder + '/people.p', 'w') as peoplefile:
                        pickle.dump(pedestrians_dict, peoplefile)
                    with open(output_folder + '/cars.p', 'w') as carfile:
                        pickle.dump(cars_dict, carfile)
                    print ("Episode done")
Exemple #3
0
def run_carla_client(host, port, far):
    # Here we will run a single episode with 300 frames.
    number_of_frames = 30
    frame_step = 10  # Save one image every 100 frames

    image_size = [800, 600]
    camera_local_pos = [0.3, 0.0, 1.3]  # [X, Y, Z]
    camera_local_rotation = [0, 0, 0]  # [pitch(Y), yaw(Z), roll(X)]
    fov = 70
    autopilot = False
    control = VehicleControl()
    for start_i in range(150):
        output_folder = '/home2/mariap/Packages/CARLA_0.8.2/PythonClient/_out/pos_' + str(
            start_i)
        if not os.path.exists(output_folder):
            os.mkdir(output_folder)
            print("make " + str(output_folder))

    # Connect with the server
    with make_carla_client(host, port) as client:
        print('CarlaClient connected')
        for start_i in range(150):
            output_folder = '/home2/mariap/Packages/CARLA_0.8.2/PythonClient/_out/pos_' + str(
                start_i)
            print(output_folder)

            # Here we load the settings.
            settings = CarlaSettings()
            settings.set(SynchronousMode=True,
                         SendNonPlayerAgentsInfo=True,
                         NumberOfVehicles=100,
                         NumberOfPedestrians=500,
                         WeatherId=random.choice([1, 3, 7, 8, 14]))
            settings.randomize_seeds()

            camera1 = Camera('CameraDepth', PostProcessing='Depth', FOV=fov)
            camera1.set_image_size(*image_size)
            camera1.set_position(*camera_local_pos)
            camera1.set_rotation(*camera_local_rotation)
            settings.add_sensor(camera1)

            camera2 = Camera('CameraRGB', PostProcessing='SceneFinal', FOV=fov)
            camera2.set_image_size(*image_size)
            camera2.set_position(*camera_local_pos)
            camera2.set_rotation(*camera_local_rotation)
            settings.add_sensor(camera2)

            camera3 = Camera('CameraSeg',
                             PostProcessing='SemanticSegmentation')
            camera3.set_image_size(*image_size)
            camera3.set_position(*camera_local_pos)
            camera3.set_rotation(*camera_local_rotation)
            settings.add_sensor(camera3)

            client.load_settings(settings)

            # Start at location index id '0'
            client.start_episode(start_i)

            cameras_dict = {}
            pedestrians_dict = {}
            cars_dict = {}
            # Compute the camera transform matrix
            camera_to_car_transform = camera2.get_unreal_transform()
            # (Intrinsic) (3, 3) K Matrix
            K = np.identity(3)
            K[0, 2] = image_size[0] / 2.0
            K[1, 2] = image_size[1] / 2.0
            K[0,
              0] = K[1,
                     1] = image_size[0] / (2.0 * np.tan(fov * np.pi / 360.0))
            with open(output_folder + '/camera_intrinsics.p', 'w') as camfile:
                pickle.dump(K, camfile)

            # Iterate every frame in the episode except for the first one.
            for frame in range(1, number_of_frames):
                # Read the data produced by the server this frame.
                measurements, sensor_data = client.read_data()

                # Save one image every 'frame_step' frames
                if not frame % frame_step:
                    for name, measurement in sensor_data.items():
                        filename = '{:s}/{:0>6d}'.format(name, frame)
                        measurement.save_to_disk(
                            os.path.join(output_folder, filename))
                    # Start transformations time mesure.
                    timer = StopWatch()

                    # RGB image [[[r,g,b],..[r,g,b]],..[[r,g,b],..[r,g,b]]]
                    image_RGB = to_rgb_array(sensor_data['CameraRGB'])
                    image_seg = np.tile(
                        labels_to_array(sensor_data['CameraSeg']), (1, 1, 3))

                    # 2d to (camera) local 3d
                    # We use the image_RGB to colorize each 3D point, this is optional.
                    # "max_depth" is used to keep only the points that are near to the
                    # camera, meaning 1.0 the farest points (sky)
                    point_cloud = depth_to_local_point_cloud(
                        sensor_data['CameraDepth'], image_RGB, max_depth=far)

                    point_cloud_seg = depth_to_local_point_cloud(
                        sensor_data['CameraDepth'], image_seg, max_depth=far)

                    # (Camera) local 3d to world 3d.
                    # Get the transform from the player protobuf transformation.
                    world_transform = Transform(
                        measurements.player_measurements.transform)

                    # Compute the final transformation matrix.
                    car_to_world_transform = world_transform * camera_to_car_transform

                    # Car to World transformation given the 3D points and the
                    # transformation matrix.
                    point_cloud.apply_transform(car_to_world_transform)
                    point_cloud_seg.apply_transform(car_to_world_transform)

                    Rt = car_to_world_transform.matrix
                    Rt_inv = car_to_world_transform.inverse(
                    ).matrix  # Rt_inv is the world to camera matrix !!
                    #R_inv=world_transform.inverse().matrix
                    cameras_dict[frame] = {}
                    cameras_dict[frame]['inverse_rotation'] = Rt_inv[:]
                    cameras_dict[frame]['rotation'] = Rt[:]
                    cameras_dict[frame]['translation'] = Rt_inv[0:3, 3]
                    cameras_dict[frame][
                        'camera_to_car'] = camera_to_car_transform.matrix

                    # Get non-player info
                    vehicles = {}
                    pedestrians = {}
                    for agent in measurements.non_player_agents:
                        # check if the agent is a vehicle.
                        if agent.HasField('vehicle'):
                            pos = agent.vehicle.transform.location
                            pos_vector = np.array([[pos.x], [pos.y], [pos.z],
                                                   [1.0]])

                            trnasformed_3d_pos = np.dot(Rt_inv, pos_vector)
                            pos2d = np.dot(K, trnasformed_3d_pos[:3])

                            # Normalize the point
                            norm_pos2d = np.array([
                                pos2d[0] / pos2d[2], pos2d[1] / pos2d[2],
                                pos2d[2]
                            ])

                            # Now, pos2d contains the [x, y, d] values of the image in pixels (where d is the depth)
                            # You can use the depth to know the points that are in front of the camera (positive depth).

                            x_2d = image_size[0] - norm_pos2d[0]
                            y_2d = image_size[1] - norm_pos2d[1]
                            vehicles[agent.id] = {}
                            vehicles[agent.id]['transform'] = [
                                agent.vehicle.transform.location.x,
                                agent.vehicle.transform.location.y,
                                agent.vehicle.transform.location.z
                            ]
                            vehicles[agent.id][
                                'bounding_box.transform'] = agent.vehicle.bounding_box.transform.location.z

                            vehicles[agent.id][
                                'yaw'] = agent.vehicle.transform.rotation.yaw
                            vehicles[agent.id]['bounding_box'] = [
                                agent.vehicle.bounding_box.extent.x,
                                agent.vehicle.bounding_box.extent.y,
                                agent.vehicle.bounding_box.extent.z
                            ]
                            vehicle_transform = Transform(
                                agent.vehicle.bounding_box.transform)
                            pos = agent.vehicle.transform.location

                            bbox3d = agent.vehicle.bounding_box.extent

                            # Compute the 3D bounding boxes
                            # f contains the 4 points that corresponds to the bottom
                            f = np.array([[
                                pos.x + bbox3d.x, pos.y - bbox3d.y,
                                pos.z - bbox3d.z +
                                agent.vehicle.bounding_box.transform.location.z
                            ],
                                          [
                                              pos.x + bbox3d.x,
                                              pos.y + bbox3d.y,
                                              pos.z - bbox3d.z + agent.vehicle.
                                              bounding_box.transform.location.z
                                          ],
                                          [
                                              pos.x - bbox3d.x,
                                              pos.y + bbox3d.y,
                                              pos.z - bbox3d.z + agent.vehicle.
                                              bounding_box.transform.location.z
                                          ],
                                          [
                                              pos.x - bbox3d.x,
                                              pos.y - bbox3d.y,
                                              pos.z - bbox3d.z + agent.vehicle.
                                              bounding_box.transform.location.z
                                          ],
                                          [
                                              pos.x + bbox3d.x,
                                              pos.y - bbox3d.y,
                                              pos.z + bbox3d.z + agent.vehicle.
                                              bounding_box.transform.location.z
                                          ],
                                          [
                                              pos.x + bbox3d.x,
                                              pos.y + bbox3d.y,
                                              pos.z + bbox3d.z + agent.vehicle.
                                              bounding_box.transform.location.z
                                          ],
                                          [
                                              pos.x - bbox3d.x,
                                              pos.y + bbox3d.y,
                                              pos.z + bbox3d.z + agent.vehicle.
                                              bounding_box.transform.location.z
                                          ],
                                          [
                                              pos.x - bbox3d.x,
                                              pos.y - bbox3d.y,
                                              pos.z + bbox3d.z + agent.vehicle.
                                              bounding_box.transform.location.z
                                          ]])

                            f_rotated = vehicle_transform.transform_points(f)
                            f_2D_rotated = []
                            vehicles[
                                agent.id]['bounding_box_coord'] = f_rotated

                            for i in range(f.shape[0]):
                                point = np.array([[f_rotated[i, 0]],
                                                  [f_rotated[i, 1]],
                                                  [f_rotated[i, 2]], [1]])
                                transformed_2d_pos = np.dot(
                                    Rt_inv,
                                    point)  # 3d Position in camera space
                                pos2d = np.dot(
                                    K, transformed_2d_pos[:3]
                                )  # Conversion to camera frustum space
                                norm_pos2d = np.array([
                                    pos2d[0] / pos2d[2], pos2d[1] / pos2d[2],
                                    pos2d[2]
                                ])
                                #print([image_size[0] - (pos2d[0] / pos2d[2]), image_size[1] - (pos2d[1] / pos2d[2])])
                                f_2D_rotated.append(
                                    np.array([
                                        image_size[0] - norm_pos2d[0],
                                        image_size[1] - norm_pos2d[1]
                                    ]))
                            vehicles[
                                agent.id]['bounding_box_2D'] = f_2D_rotated

                        elif agent.HasField('pedestrian'):

                            pedestrians[agent.id] = {}
                            pedestrians[agent.id]['transform'] = [
                                agent.pedestrian.transform.location.x,
                                agent.pedestrian.transform.location.y,
                                agent.pedestrian.transform.location.z
                            ]
                            pedestrians[agent.id][
                                'yaw'] = agent.pedestrian.transform.rotation.yaw
                            pedestrians[agent.id]['bounding_box'] = [
                                agent.pedestrian.bounding_box.extent.x,
                                agent.pedestrian.bounding_box.extent.y,
                                agent.pedestrian.bounding_box.extent.z
                            ]
                            vehicle_transform = Transform(
                                agent.pedestrian.bounding_box.transform)
                            pos = agent.pedestrian.transform.location

                            bbox3d = agent.pedestrian.bounding_box.extent

                            # Compute the 3D bounding boxes
                            # f contains the 4 points that corresponds to the bottom
                            f = np.array(
                                [[pos.x + bbox3d.x, pos.y - bbox3d.y, pos.z],
                                 [pos.x + bbox3d.x, pos.y + bbox3d.y, pos.z],
                                 [pos.x - bbox3d.x, pos.y + bbox3d.y, pos.z],
                                 [pos.x - bbox3d.x, pos.y - bbox3d.y, pos.z],
                                 [
                                     pos.x + bbox3d.x, pos.y - bbox3d.y,
                                     pos.z + bbox3d.z
                                 ],
                                 [
                                     pos.x + bbox3d.x, pos.y + bbox3d.y,
                                     pos.z + bbox3d.z
                                 ],
                                 [
                                     pos.x - bbox3d.x, pos.y + bbox3d.y,
                                     pos.z + bbox3d.z
                                 ],
                                 [
                                     pos.x - bbox3d.x, pos.y - bbox3d.y,
                                     pos.z + bbox3d.z
                                 ]])

                            f_rotated = vehicle_transform.transform_points(f)
                            pedestrians[
                                agent.id]['bounding_box_coord'] = f_rotated
                            f_2D_rotated = []

                            for i in range(f.shape[0]):
                                point = np.array([[f_rotated[i, 0]],
                                                  [f_rotated[i, 1]],
                                                  [f_rotated[i, 2]], [1]])
                                transformed_2d_pos = np.dot(
                                    Rt_inv, point)  # See above for cars
                                pos2d = np.dot(K, transformed_2d_pos[:3])
                                norm_pos2d = np.array([
                                    pos2d[0] / pos2d[2], pos2d[1] / pos2d[2],
                                    pos2d[2]
                                ])
                                f_2D_rotated.append([
                                    image_size[0] - norm_pos2d[0],
                                    image_size[1] - norm_pos2d[1]
                                ])
                            pedestrians[
                                agent.id]['bounding_box_2D'] = f_2D_rotated

                    cars_dict[frame] = vehicles
                    pedestrians_dict[frame] = pedestrians

                    # End transformations time mesure.
                    timer.stop()

                    # Save PLY to disk
                    # This generates the PLY string with the 3D points and the RGB colors
                    # for each row of the file.
                    point_cloud.save_to_disk(
                        os.path.join(output_folder,
                                     '{:0>5}.ply'.format(frame)))
                    point_cloud_seg.save_to_disk(
                        os.path.join(output_folder,
                                     '{:0>5}_seg.ply'.format(frame)))

                    print_message(timer.milliseconds(), len(point_cloud),
                                  frame)

                if autopilot:
                    client.send_control(
                        measurements.player_measurements.autopilot_control)
                else:
                    control.hand_brake = True
                    client.send_control(control)
            with open(output_folder + '/cameras.p', 'w') as camerafile:
                pickle.dump(cameras_dict, camerafile)
                print(output_folder + "cameras.p")
            with open(output_folder + '/people.p', 'w') as peoplefile:
                pickle.dump(pedestrians_dict, peoplefile)
            with open(output_folder + '/cars.p', 'w') as carfile:
                pickle.dump(cars_dict, carfile)
Exemple #4
0
def get_bbox(self, measurement, seg):
    global 
    width = WIDTH
    height = HEIGHT
    extrinsic = Transform(measurement.player_measurements.transform) * self.obs_to_car_transform 
    bbox_list = []
    orientation_list = []
    distance_list = []
    # main_rotation = measurement.player_measurements.transform.rotation
    player_location = measurement.player_measurements.transform.location
    player_location = np.array([player_location.x, player_location.y, player_location.z])
    # collect the 2bbox generated from the 3d-bbox of non-player agents
    for agent in measurement.non_player_agents:
        if agent.HasField("vehicle"):
            # veh_id = agent.id
            # idx = self.nonplayer_ids[veh_id]
            vehicle_transform = Transform(agent.vehicle.transform)
            bbox_transform = Transform(agent.vehicle.bounding_box.transform)
            ext = agent.vehicle.bounding_box.extent
            bbox = np.array([
                [  ext.x,   ext.y,   ext.z],
                [- ext.x,   ext.y,   ext.z],
                [  ext.x, - ext.y,   ext.z],
                [- ext.x, - ext.y,   ext.z],
                [  ext.x,   ext.y, - ext.z],
                [- ext.x,   ext.y, - ext.z],
                [  ext.x, - ext.y, - ext.z],
                [- ext.x, - ext.y, - ext.z]
            ])

            bbox = bbox_transform.transform_points(bbox)
            bbox = vehicle_transform.transform_points(bbox)
            
            orientation = agent.vehicle.transform.orientation
            vehicle_location = agent.vehicle.transform.location
            cur_location = np.array([vehicle_location.x, vehicle_location.y, vehicle_location.z])
            distance = np.linalg.norm(player_location - cur_location)

            vertices = []
            for vertex in bbox:
                pos_vector = np.array([
                    [vertex[0,0]],  # [[X,
                    [vertex[0,1]],  #   Y,
                    [vertex[0,2]],  #   Z,
                    [1.0]           #   1.0]]
                ])
                transformed_3d_pos = np.dot(inv(extrinsic.matrix), pos_vector)
                pos2d = np.dot(self.intrinsic, transformed_3d_pos[:3])
                pos2d = np.array([
                    pos2d[0] / pos2d[2], pos2d[1] / pos2d[2], pos2d[2]
                ])
                
                if pos2d[2] > 0:
                    x_2d = width - pos2d[0]
                    y_2d = height - pos2d[1]
                    vertices.append([x_2d, y_2d])
            if len(vertices) > 1:
                # vehicle_rotation = agent.vehicle.transform.rotation
                vertices = np.array(vertices)
                bbox_list.append([np.min(vertices[:, 0]), np.min(vertices[:, 1]),
                    np.max(vertices[:, 0]), np.max(vertices[:, 1])])
                orientation_list.append(orientation)
                distance_list.append(distance)
    seg_bboxes = seg_to_bbox(seg)
    final_bboxes = []
    final_directions = []
    final_distances = []
    assert(len(bbox_list) == len(orientation_list))
    for i in range(len(bbox_list)):
        bbox = bbox_list[i]
        direction = orientation_list[i]
        xmin, ymin, xmax, ymax = bbox
        x1, y1, x2, y2 = width, height, 0, 0
        for segbbox in seg_bboxes:
            xmin0, ymin0, xmax0, ymax0 = segbbox
            if xmin0 >= xmin - 5 and ymin0 >= ymin - 5 and xmax0 < xmax + 5 and ymax0 < ymax + 5:
                x1 = min(x1, xmin0)
                y1 = min(y1, ymin0)
                x2 = max(x2, xmax0)
                y2 = max(y2, ymax0)
        if x2 > x1 and y2 > y1 and [int(x1), int(y1), int(x2), int(y2)] not in final_bboxes:
            final_bboxes.append([int(x1), int(y1), int(x2), int(y2)])
            relative_orientation = get_angle(direction.x, direction.y, self.orientation.x, self.orientation.y)
            final_directions.append(relative_orientation)
            final_distances.append(distance_list[i])
    # for angle in final_directions:
    #     self.angle_logger.write("timestep {}: {}\n".format(self.timestep, angle))
    #     self.angle_logger.flush()
    final_distances = np.array(final_distances)
    visible_coll_num = min(coll_veh_num, final_distances.size)
    coll_idx = np.argpartition(final_distances, visible_coll_num - 1)[:visible_coll_num]
    final_colls = [1 if i in coll_idx else 0 for i in range(final_distances.size)]
    return final_bboxes, final_directions, final_colls