Example #1
0
def convert_image(sensor_data, simple_seg=True):
    obs = np.frombuffer(sensor_data['CameraRGB'].raw_data,
                        dtype=np.uint8).reshape((256, 256, 4))[:, :, :3]
    seg = labels_to_array(sensor_data['CameraSegmentation'])
    if simple_seg:
        seg = simplify_seg(seg)
    return obs, seg
Example #2
0
 def __get_traffic_sign_bboxes(self, segmented_frame):
     segmented_frame = labels_to_array(segmented_frame)
     # Shape is height, width
     traffic_signs_frame = np.zeros(
         (segmented_frame.shape[0], segmented_frame.shape[1]),
         dtype=np.bool)
     # 12 is the key for TrafficSigns segmentation in Carla.
     # Apply mask to only select traffic signs and traffic lights.
     traffic_signs_frame[np.where(segmented_frame == 12)] = True
     bboxes = get_bounding_boxes_from_segmented(traffic_signs_frame)
     traffic_sign_bboxes = [('traffic sign', bbox) for bbox in bboxes]
     return traffic_sign_bboxes
Example #3
0
def run_carla_client( args):
    # Here we will run 3 episodes with 300 frames each.
    number_of_episodes = 150
    frames_per_episode = 500

    # for start_i in range(150):
    #     if start_i%4==0:
    #         output_folder = 'Packages/CARLA_0.8.2/PythonClient/new_data-viz/test_' + str(start_i)
    #         if not os.path.exists(output_folder):
    #             os.mkdir(output_folder)
    #             print( "make "+str(output_folder))
    # ./CarlaUE4.sh -carla-server  -benchmark -fps=17 -windowed
    # carla-server "/usr/local/carla/Unreal/CarlaUE4/CarlaUE4.uproject" /usr/local/carla/Maps/Town03 -benchmark -fps=10 -windowed


    # We assume the CARLA server is already waiting for a client to connect at
    # host:port. To create a connection we can use the `make_carla_client`
    # context manager, it creates a CARLA client object and starts the
    # connection. It will throw an exception if something goes wrong. The
    # context manager makes sure the connection is always cleaned up on exit.
    with make_carla_client(args.host, args.port) as client:

        print('CarlaClient connected')
        global episode_nbr
        print (episode_nbr)
        for episode in range(0,number_of_episodes):
            if episode % 1 == 0:
                output_folder = 'Datasets/carla-sync/train/test_' + str(episode)
                if not os.path.exists(output_folder+"/cameras.p"):
                    # Start a new episode.
                    episode_nbr=episode
                    frame_step = 1  # Save one image every 100 frames
                    pointcloud_step=50
                    image_size = [800, 600]
                    camera_local_pos = [0.3, 0.0, 1.3]  # [X, Y, Z]
                    camera_local_rotation = [0, 0, 0]  # [pitch(Y), yaw(Z), roll(X)]
                    fov = 70
                    # Create a CarlaSettings object. This object is a wrapper around
                    # the CarlaSettings.ini file. Here we set the configuration we
                    # want for the new episode.
                    settings = CarlaSettings()
                    settings.set(
                        SynchronousMode=True,
                        SendNonPlayerAgentsInfo=True,
                        NumberOfVehicles=50,
                        NumberOfPedestrians=200,
                        WeatherId=random.choice([1, 3, 7, 8, 14]),
                        QualityLevel=args.quality_level)
                    settings.randomize_seeds()

                    # Now we want to add a couple of cameras to the player vehicle.
                    # We will collect the images produced by these cameras every
                    # frame.


                    camera1 = Camera('CameraDepth', PostProcessing='Depth', FOV=fov)
                    camera1.set_image_size(*image_size)
                    camera1.set_position(*camera_local_pos)
                    camera1.set_rotation(*camera_local_rotation)
                    settings.add_sensor(camera1)

                    camera2 = Camera('CameraRGB', PostProcessing='SceneFinal', FOV=fov)
                    camera2.set_image_size(*image_size)
                    camera2.set_position(*camera_local_pos)
                    camera2.set_rotation(*camera_local_rotation)
                    settings.add_sensor(camera2)

                    camera3 = Camera('CameraSeg', PostProcessing='SemanticSegmentation', FOV=fov)
                    camera3.set_image_size(*image_size)
                    camera3.set_position(*camera_local_pos)
                    camera3.set_rotation(*camera_local_rotation)
                    settings.add_sensor(camera3)



                    # Now we load these settings into the server. The server replies
                    # with a scene description containing the available start spots for
                    # the player. Here we can provide a CarlaSettings object or a
                    # CarlaSettings.ini file as string.
                    scene = client.load_settings(settings)

                    # Choose one player start at random.
                    number_of_player_starts = len(scene.player_start_spots)
                    player_start = episode#random.randint(0, max(0, number_of_player_starts - 1))

                    output_folder = 'Datasets/carla-sync/train/test_' + str(episode)
                    # Notify the server that we want to start the episode at the
                    # player_start index. This function blocks until the server is ready
                    # to start the episode.
                    print('Starting new episode...')
                    client.start_episode(player_start)


                    cameras_dict = {}
                    pedestrians_dict = {}
                    cars_dict = {}
                    # Compute the camera transform matrix
                    camera_to_car_transform = camera2.get_unreal_transform()
                    # (Intrinsic) (3, 3) K Matrix
                    K = np.identity(3)
                    K[0, 2] = image_size[0] / 2.0
                    K[1, 2] = image_size[1] / 2.0
                    K[0, 0] = K[1, 1] = image_size[0] / (2.0 * np.tan(fov * np.pi / 360.0))
                    with open(output_folder + '/camera_intrinsics.p', 'w') as camfile:
                        pickle.dump(K, camfile)


                    # Iterate every frame in the episode.
                    for frame in range(0, frames_per_episode):

                        # Read the data produced by the server this frame.
                        measurements, sensor_data = client.read_data()

                        # Print some of the measurements.
                        #print_measurements(measurements)
                        if not frame % frame_step:
                            # Save the images to disk if requested.

                            for name, measurement in sensor_data.items():
                                filename = args.out_filename_format.format(episode, name, frame)
                                print (filename)
                                measurement.save_to_disk(filename)

                            # We can access the encoded data of a given image as numpy
                            # array using its "data" property. For instance, to get the
                            # depth value (normalized) at pixel X, Y
                            #
                            #     depth_array = sensor_data['CameraDepth'].data
                            #     value_at_pixel = depth_array[Y, X]
                            #

                            # Now we have to send the instructions to control the vehicle.
                            # If we are in synchronous mode the server will pause the
                            # simulation until we send this control.

                            # RGB image [[[r,g,b],..[r,g,b]],..[[r,g,b],..[r,g,b]]]
                            image_RGB = to_rgb_array(sensor_data['CameraRGB'])

                            labels=labels_to_array(sensor_data['CameraSeg'])[:,:,np.newaxis]

                            image_seg = np.tile(labels, (1, 1, 3))
                            depth_array = sensor_data['CameraDepth'].data*1000


                            # 2d to (camera) local 3d
                            # We use the image_RGB to colorize each 3D point, this is optional.
                            # "max_depth" is used to keep only the points that are near to the
                            # camera, meaning 1.0 the farest points (sky)
                            if not frame % pointcloud_step:
                                point_cloud = depth_to_local_point_cloud(
                                    sensor_data['CameraDepth'],
                                    image_RGB,
                                    max_depth=args.far
                                )

                                point_cloud_seg = depth_to_local_point_cloud(
                                    sensor_data['CameraDepth'],
                                    segmentation=image_seg,
                                    max_depth=args.far
                                )

                            # (Camera) local 3d to world 3d.
                            # Get the transform from the player protobuf transformation.
                            world_transform = Transform(
                                measurements.player_measurements.transform
                            )

                            # Compute the final transformation matrix.
                            car_to_world_transform = world_transform * camera_to_car_transform

                            # Car to World transformation given the 3D points and the
                            # transformation matrix.
                            point_cloud.apply_transform(car_to_world_transform)
                            point_cloud_seg.apply_transform(car_to_world_transform)

                            Rt = car_to_world_transform.matrix
                            Rt_inv = car_to_world_transform.inverse().matrix
                            # R_inv=world_transform.inverse().matrix
                            cameras_dict[frame] = {}
                            cameras_dict[frame]['inverse_rotation'] = Rt_inv[:]
                            cameras_dict[frame]['rotation'] = Rt[:]
                            cameras_dict[frame]['translation'] = Rt_inv[0:3, 3]
                            cameras_dict[frame]['camera_to_car'] = camera_to_car_transform.matrix

                            # Get non-player info
                            vehicles = {}
                            pedestrians = {}
                            for agent in measurements.non_player_agents:
                                # check if the agent is a vehicle.
                                if agent.HasField('vehicle'):
                                    pos = agent.vehicle.transform.location
                                    pos_vector = np.array([[pos.x], [pos.y], [pos.z], [1.0]])

                                    trnasformed_3d_pos = np.dot(Rt_inv, pos_vector)
                                    pos2d = np.dot(K, trnasformed_3d_pos[:3])

                                    # Normalize the point
                                    norm_pos2d = np.array([
                                        pos2d[0] / pos2d[2],
                                        pos2d[1] / pos2d[2],
                                        pos2d[2]])

                                    # Now, pos2d contains the [x, y, d] values of the image in pixels (where d is the depth)
                                    # You can use the depth to know the points that are in front of the camera (positive depth).

                                    x_2d = image_size[0] - norm_pos2d[0]
                                    y_2d = image_size[1] - norm_pos2d[1]
                                    vehicles[agent.id] = {}
                                    vehicles[agent.id]['transform'] = [agent.vehicle.transform.location.x,
                                                                       agent.vehicle.transform.location.y,
                                                                       agent.vehicle.transform.location.z]
                                    vehicles[agent.id][
                                        'bounding_box.transform'] = agent.vehicle.bounding_box.transform.location.z

                                    vehicles[agent.id]['yaw'] = agent.vehicle.transform.rotation.yaw
                                    vehicles[agent.id]['bounding_box'] = [agent.vehicle.bounding_box.extent.x,
                                                                          agent.vehicle.bounding_box.extent.y,
                                                                          agent.vehicle.bounding_box.extent.z]
                                    vehicle_transform = Transform(agent.vehicle.bounding_box.transform)
                                    pos = agent.vehicle.transform.location

                                    bbox3d = agent.vehicle.bounding_box.extent

                                    # Compute the 3D bounding boxes
                                    # f contains the 4 points that corresponds to the bottom
                                    f = np.array([[pos.x + bbox3d.x, pos.y - bbox3d.y,
                                                   pos.z - bbox3d.z + agent.vehicle.bounding_box.transform.location.z],
                                                  [pos.x + bbox3d.x, pos.y + bbox3d.y,
                                                   pos.z - bbox3d.z + agent.vehicle.bounding_box.transform.location.z],
                                                  [pos.x - bbox3d.x, pos.y + bbox3d.y,
                                                   pos.z - bbox3d.z + agent.vehicle.bounding_box.transform.location.z],
                                                  [pos.x - bbox3d.x, pos.y - bbox3d.y,
                                                   pos.z - bbox3d.z + agent.vehicle.bounding_box.transform.location.z],
                                                  [pos.x + bbox3d.x, pos.y - bbox3d.y,
                                                   pos.z + bbox3d.z + agent.vehicle.bounding_box.transform.location.z],
                                                  [pos.x + bbox3d.x, pos.y + bbox3d.y,
                                                   pos.z + bbox3d.z + agent.vehicle.bounding_box.transform.location.z],
                                                  [pos.x - bbox3d.x, pos.y + bbox3d.y,
                                                   pos.z + bbox3d.z + agent.vehicle.bounding_box.transform.location.z],
                                                  [pos.x - bbox3d.x, pos.y - bbox3d.y,
                                                   pos.z + bbox3d.z + agent.vehicle.bounding_box.transform.location.z]])

                                    f_rotated = vehicle_transform.transform_points(f)
                                    f_2D_rotated = []
                                    vehicles[agent.id]['bounding_box_coord'] = f_rotated

                                    for i in range(f.shape[0]):
                                        point = np.array([[f_rotated[i, 0]], [f_rotated[i, 1]], [f_rotated[i, 2]], [1]])
                                        transformed_2d_pos = np.dot(Rt_inv, point)
                                        pos2d = np.dot(K, transformed_2d_pos[:3])
                                        norm_pos2d = np.array([
                                            pos2d[0] / pos2d[2],
                                            pos2d[1] / pos2d[2],
                                            pos2d[2]])
                                        # print([image_size[0] - (pos2d[0] / pos2d[2]), image_size[1] - (pos2d[1] / pos2d[2])])
                                        f_2D_rotated.append(
                                            np.array([image_size[0] - norm_pos2d[0], image_size[1] - norm_pos2d[1]]))
                                    vehicles[agent.id]['bounding_box_2D'] = f_2D_rotated


                                elif agent.HasField('pedestrian'):
                                    pedestrians[agent.id] = {}
                                    pedestrians[agent.id]['transform'] = [agent.pedestrian.transform.location.x,
                                                                          agent.pedestrian.transform.location.y,
                                                                          agent.pedestrian.transform.location.z]
                                    pedestrians[agent.id]['yaw'] = agent.pedestrian.transform.rotation.yaw
                                    pedestrians[agent.id]['bounding_box'] = [agent.pedestrian.bounding_box.extent.x,
                                                                             agent.pedestrian.bounding_box.extent.y,
                                                                             agent.pedestrian.bounding_box.extent.z]
                                    # get the needed transformations
                                    # remember to explicitly make it Transform() so you can use transform_points()
                                    pedestrian_transform = Transform(agent.pedestrian.transform)
                                    bbox_transform = Transform(agent.pedestrian.bounding_box.transform)

                                    # get the box extent
                                    ext = agent.pedestrian.bounding_box.extent
                                    # 8 bounding box vertices relative to (0,0,0)
                                    bbox = np.array([
                                        [  ext.x,   ext.y,   ext.z],
                                        [- ext.x,   ext.y,   ext.z],
                                        [  ext.x, - ext.y,   ext.z],
                                        [- ext.x, - ext.y,   ext.z],
                                        [  ext.x,   ext.y, - ext.z],
                                        [- ext.x,   ext.y, - ext.z],
                                        [  ext.x, - ext.y, - ext.z],
                                        [- ext.x, - ext.y, - ext.z]
                                    ])

                                    # transform the vertices respect to the bounding box transform
                                    bbox = bbox_transform.transform_points(bbox)

                                    # the bounding box transform is respect to the pedestrian transform
                                    # so let's transform the points relative to it's transform
                                    bbox = pedestrian_transform.transform_points(bbox)

                                    # pedestrian's transform is relative to the world, so now,
                                    # bbox contains the 3D bounding box vertices relative to the world
                                    pedestrians[agent.id]['bounding_box_coord'] =copy.deepcopy(bbox)

                                    # Additionally, you can print these vertices to check that is working
                                    f_2D_rotated=[]
                                    ys=[]
                                    xs=[]
                                    zs=[]
                                    for vertex in bbox:
                                        pos_vector = np.array([
                                            [vertex[0,0]],  # [[X,
                                            [vertex[0,1]],  #   Y,
                                            [vertex[0,2]],  #   Z,
                                            [1.0]           #   1.0]]
                                        ])

                                        # transform the points to camera
                                        transformed_3d_pos =np.dot(Rt_inv, pos_vector)# np.dot(inv(self._extrinsic.matrix), pos_vector)
                                        zs.append(transformed_3d_pos[2])
                                        # transform the points to 2D
                                        pos2d =np.dot(K, transformed_3d_pos[:3]) #np.dot(self._intrinsic, transformed_3d_pos[:3])

                                        # normalize the 2D points
                                        pos2d = np.array([
                                            pos2d[0] / pos2d[2],
                                            pos2d[1] / pos2d[2],
                                            pos2d[2]
                                        ])

                                        # print the points in the screen
                                        if pos2d[2] > 0: # if the point is in front of the camera
                                            x_2d = image_size[0]-pos2d[0]#WINDOW_WIDTH - pos2d[0]
                                            y_2d = image_size[1]-pos2d[1]#WINDOW_HEIGHT - pos2d[1]
                                            ys.append(y_2d)
                                            xs.append(x_2d)
                                            f_2D_rotated.append( (y_2d, x_2d))
                                    if len(xs)>1:
                                        bbox=[[int(min(xs)), int(max(xs))],[int(min(ys)), int(max(ys))]]
                                        clipped_seg=labels[bbox[1][0]:bbox[1][1],bbox[0][0]:bbox[0][1]]
                                        recounted = Counter(clipped_seg.flatten())


                                        if 4 in recounted.keys() and recounted[4]>0.1*len(clipped_seg.flatten()):
                                            clipped_depth=depth_array[bbox[1][0]:bbox[1][1],bbox[0][0]:bbox[0][1]]
                                            #print (clipped_depth.shape)
                                            people_indx=np.where(clipped_seg==4)
                                            masked_depth=[]
                                            for people in zip(people_indx[0],people_indx[1] ):
                                                #print(people)
                                                masked_depth.append(clipped_depth[people])
                                            #masked_depth=clipped_depth[np.where(clipped_seg==4)]
                                            #print (masked_depth)
                                            #print ("Depth "+ str(min(zs))+" "+ str(max(zs)))
                                            #recounted = Counter(masked_depth)
                                            #print(recounted)
                                            avg_depth=np.mean(masked_depth)
                                            if avg_depth<700 and avg_depth>=min(zs)-10 and avg_depth<= max(zs)+10:
                                                #print("Correct depth")
                                                pedestrians[agent.id]['bounding_box_2D'] = f_2D_rotated
                                                pedestrians[agent.id]['bounding_box_2D_size']=recounted[4]
                                                pedestrians[agent.id]['bounding_box_2D_avg_depth']=avg_depth
                                                pedestrians[agent.id]['bounding_box_2D_depths']=zs
                                                #print ( pedestrians[agent.id].keys())
                                            #else:
                                                # print(recounted)
                                                # print ("Depth "+ str(min(zs))+" "+ str(max(zs)))


                                    #if sum(norm(depth_array-np.mean(zs))<1.0):


                                    # pedestrians[agent.id] = {}
                                    # pedestrians[agent.id]['transform'] = [agent.pedestrian.transform.location.x,
                                    #                                       agent.pedestrian.transform.location.y,
                                    #                                       agent.pedestrian.transform.location.z]
                                    # pedestrians[agent.id]['yaw'] = agent.pedestrian.transform.rotation.yaw
                                    # pedestrians[agent.id]['bounding_box'] = [agent.pedestrian.bounding_box.extent.x,
                                    #                                          agent.pedestrian.bounding_box.extent.y,
                                    #                                          agent.pedestrian.bounding_box.extent.z]
                                    # vehicle_transform = Transform(agent.pedestrian.bounding_box.transform)
                                    # pos = agent.pedestrian.transform.location
                                    #
                                    # bbox3d = agent.pedestrian.bounding_box.extent
                                    #
                                    # # Compute the 3D bounding boxes
                                    # # f contains the 4 points that corresponds to the bottom
                                    # f = np.array([[pos.x + bbox3d.x, pos.y - bbox3d.y, pos.z- bbox3d.z ],
                                    #               [pos.x + bbox3d.x, pos.y + bbox3d.y, pos.z- bbox3d.z ],
                                    #               [pos.x - bbox3d.x, pos.y + bbox3d.y, pos.z- bbox3d.z ],
                                    #               [pos.x - bbox3d.x, pos.y - bbox3d.y, pos.z- bbox3d.z ],
                                    #               [pos.x + bbox3d.x, pos.y - bbox3d.y, pos.z + bbox3d.z],
                                    #               [pos.x + bbox3d.x, pos.y + bbox3d.y, pos.z + bbox3d.z],
                                    #               [pos.x - bbox3d.x, pos.y + bbox3d.y, pos.z + bbox3d.z],
                                    #               [pos.x - bbox3d.x, pos.y - bbox3d.y, pos.z + bbox3d.z]])
                                    #
                                    # f_rotated = pedestrian_transform.transform_points(f)
                                    # pedestrians[agent.id]['bounding_box_coord'] = f_rotated
                                    # f_2D_rotated = []
                                    #
                                    # for i in range(f.shape[0]):
                                    #     point = np.array([[f_rotated[i, 0]], [f_rotated[i, 1]], [f_rotated[i, 2]], [1]])
                                    #     transformed_2d_pos = np.dot(Rt_inv, point)
                                    #     pos2d = np.dot(K, transformed_2d_pos[:3])
                                    #     norm_pos2d = np.array([
                                    #         pos2d[0] / pos2d[2],
                                    #         pos2d[1] / pos2d[2],
                                    #         pos2d[2]])
                                    #     f_2D_rotated.append([image_size[0] - norm_pos2d[0], image_size[1] - norm_pos2d[1]])
                                    # pedestrians[agent.id]['bounding_box_2D'] = f_2D_rotated

                            cars_dict[frame] = vehicles

                            pedestrians_dict[frame] = pedestrians
                            #print("End of Episode")
                            #print(len(pedestrians_dict[frame]))

                            # Save PLY to disk
                            # This generates the PLY string with the 3D points and the RGB colors
                            # for each row of the file.
                            if not frame % pointcloud_step:
                                point_cloud.save_to_disk(os.path.join(
                                    output_folder, '{:0>5}.ply'.format(frame))
                                )
                                point_cloud_seg.save_to_disk(os.path.join(
                                    output_folder, '{:0>5}_seg.ply'.format(frame))
                                )

                        if not args.autopilot:

                            client.send_control(
                                hand_brake=True)

                        else:

                            # Together with the measurements, the server has sent the
                            # control that the in-game autopilot would do this frame. We
                            # can enable autopilot by sending back this control to the
                            # server. We can modify it if wanted, here for instance we
                            # will add some noise to the steer.

                            control = measurements.player_measurements.autopilot_control
                            control.steer += random.uniform(-0.1, 0.1)
                            client.send_control(control)
                    print ("Start pickle save")
                    with open(output_folder + '/cameras.p', 'w') as camerafile:
                        pickle.dump(cameras_dict, camerafile)
                    with open(output_folder + '/people.p', 'w') as peoplefile:
                        pickle.dump(pedestrians_dict, peoplefile)
                    with open(output_folder + '/cars.p', 'w') as carfile:
                        pickle.dump(cars_dict, carfile)
                    print ("Episode done")
Example #4
0
def run_carla_client(host, port, far):
    # Here we will run a single episode with 300 frames.
    number_of_frames = 30
    frame_step = 10  # Save one image every 100 frames

    image_size = [800, 600]
    camera_local_pos = [0.3, 0.0, 1.3]  # [X, Y, Z]
    camera_local_rotation = [0, 0, 0]  # [pitch(Y), yaw(Z), roll(X)]
    fov = 70
    autopilot = False
    control = VehicleControl()
    for start_i in range(150):
        output_folder = '/home2/mariap/Packages/CARLA_0.8.2/PythonClient/_out/pos_' + str(
            start_i)
        if not os.path.exists(output_folder):
            os.mkdir(output_folder)
            print("make " + str(output_folder))

    # Connect with the server
    with make_carla_client(host, port) as client:
        print('CarlaClient connected')
        for start_i in range(150):
            output_folder = '/home2/mariap/Packages/CARLA_0.8.2/PythonClient/_out/pos_' + str(
                start_i)
            print(output_folder)

            # Here we load the settings.
            settings = CarlaSettings()
            settings.set(SynchronousMode=True,
                         SendNonPlayerAgentsInfo=True,
                         NumberOfVehicles=100,
                         NumberOfPedestrians=500,
                         WeatherId=random.choice([1, 3, 7, 8, 14]))
            settings.randomize_seeds()

            camera1 = Camera('CameraDepth', PostProcessing='Depth', FOV=fov)
            camera1.set_image_size(*image_size)
            camera1.set_position(*camera_local_pos)
            camera1.set_rotation(*camera_local_rotation)
            settings.add_sensor(camera1)

            camera2 = Camera('CameraRGB', PostProcessing='SceneFinal', FOV=fov)
            camera2.set_image_size(*image_size)
            camera2.set_position(*camera_local_pos)
            camera2.set_rotation(*camera_local_rotation)
            settings.add_sensor(camera2)

            camera3 = Camera('CameraSeg',
                             PostProcessing='SemanticSegmentation')
            camera3.set_image_size(*image_size)
            camera3.set_position(*camera_local_pos)
            camera3.set_rotation(*camera_local_rotation)
            settings.add_sensor(camera3)

            client.load_settings(settings)

            # Start at location index id '0'
            client.start_episode(start_i)

            cameras_dict = {}
            pedestrians_dict = {}
            cars_dict = {}
            # Compute the camera transform matrix
            camera_to_car_transform = camera2.get_unreal_transform()
            # (Intrinsic) (3, 3) K Matrix
            K = np.identity(3)
            K[0, 2] = image_size[0] / 2.0
            K[1, 2] = image_size[1] / 2.0
            K[0,
              0] = K[1,
                     1] = image_size[0] / (2.0 * np.tan(fov * np.pi / 360.0))
            with open(output_folder + '/camera_intrinsics.p', 'w') as camfile:
                pickle.dump(K, camfile)

            # Iterate every frame in the episode except for the first one.
            for frame in range(1, number_of_frames):
                # Read the data produced by the server this frame.
                measurements, sensor_data = client.read_data()

                # Save one image every 'frame_step' frames
                if not frame % frame_step:
                    for name, measurement in sensor_data.items():
                        filename = '{:s}/{:0>6d}'.format(name, frame)
                        measurement.save_to_disk(
                            os.path.join(output_folder, filename))
                    # Start transformations time mesure.
                    timer = StopWatch()

                    # RGB image [[[r,g,b],..[r,g,b]],..[[r,g,b],..[r,g,b]]]
                    image_RGB = to_rgb_array(sensor_data['CameraRGB'])
                    image_seg = np.tile(
                        labels_to_array(sensor_data['CameraSeg']), (1, 1, 3))

                    # 2d to (camera) local 3d
                    # We use the image_RGB to colorize each 3D point, this is optional.
                    # "max_depth" is used to keep only the points that are near to the
                    # camera, meaning 1.0 the farest points (sky)
                    point_cloud = depth_to_local_point_cloud(
                        sensor_data['CameraDepth'], image_RGB, max_depth=far)

                    point_cloud_seg = depth_to_local_point_cloud(
                        sensor_data['CameraDepth'], image_seg, max_depth=far)

                    # (Camera) local 3d to world 3d.
                    # Get the transform from the player protobuf transformation.
                    world_transform = Transform(
                        measurements.player_measurements.transform)

                    # Compute the final transformation matrix.
                    car_to_world_transform = world_transform * camera_to_car_transform

                    # Car to World transformation given the 3D points and the
                    # transformation matrix.
                    point_cloud.apply_transform(car_to_world_transform)
                    point_cloud_seg.apply_transform(car_to_world_transform)

                    Rt = car_to_world_transform.matrix
                    Rt_inv = car_to_world_transform.inverse(
                    ).matrix  # Rt_inv is the world to camera matrix !!
                    #R_inv=world_transform.inverse().matrix
                    cameras_dict[frame] = {}
                    cameras_dict[frame]['inverse_rotation'] = Rt_inv[:]
                    cameras_dict[frame]['rotation'] = Rt[:]
                    cameras_dict[frame]['translation'] = Rt_inv[0:3, 3]
                    cameras_dict[frame][
                        'camera_to_car'] = camera_to_car_transform.matrix

                    # Get non-player info
                    vehicles = {}
                    pedestrians = {}
                    for agent in measurements.non_player_agents:
                        # check if the agent is a vehicle.
                        if agent.HasField('vehicle'):
                            pos = agent.vehicle.transform.location
                            pos_vector = np.array([[pos.x], [pos.y], [pos.z],
                                                   [1.0]])

                            trnasformed_3d_pos = np.dot(Rt_inv, pos_vector)
                            pos2d = np.dot(K, trnasformed_3d_pos[:3])

                            # Normalize the point
                            norm_pos2d = np.array([
                                pos2d[0] / pos2d[2], pos2d[1] / pos2d[2],
                                pos2d[2]
                            ])

                            # Now, pos2d contains the [x, y, d] values of the image in pixels (where d is the depth)
                            # You can use the depth to know the points that are in front of the camera (positive depth).

                            x_2d = image_size[0] - norm_pos2d[0]
                            y_2d = image_size[1] - norm_pos2d[1]
                            vehicles[agent.id] = {}
                            vehicles[agent.id]['transform'] = [
                                agent.vehicle.transform.location.x,
                                agent.vehicle.transform.location.y,
                                agent.vehicle.transform.location.z
                            ]
                            vehicles[agent.id][
                                'bounding_box.transform'] = agent.vehicle.bounding_box.transform.location.z

                            vehicles[agent.id][
                                'yaw'] = agent.vehicle.transform.rotation.yaw
                            vehicles[agent.id]['bounding_box'] = [
                                agent.vehicle.bounding_box.extent.x,
                                agent.vehicle.bounding_box.extent.y,
                                agent.vehicle.bounding_box.extent.z
                            ]
                            vehicle_transform = Transform(
                                agent.vehicle.bounding_box.transform)
                            pos = agent.vehicle.transform.location

                            bbox3d = agent.vehicle.bounding_box.extent

                            # Compute the 3D bounding boxes
                            # f contains the 4 points that corresponds to the bottom
                            f = np.array([[
                                pos.x + bbox3d.x, pos.y - bbox3d.y,
                                pos.z - bbox3d.z +
                                agent.vehicle.bounding_box.transform.location.z
                            ],
                                          [
                                              pos.x + bbox3d.x,
                                              pos.y + bbox3d.y,
                                              pos.z - bbox3d.z + agent.vehicle.
                                              bounding_box.transform.location.z
                                          ],
                                          [
                                              pos.x - bbox3d.x,
                                              pos.y + bbox3d.y,
                                              pos.z - bbox3d.z + agent.vehicle.
                                              bounding_box.transform.location.z
                                          ],
                                          [
                                              pos.x - bbox3d.x,
                                              pos.y - bbox3d.y,
                                              pos.z - bbox3d.z + agent.vehicle.
                                              bounding_box.transform.location.z
                                          ],
                                          [
                                              pos.x + bbox3d.x,
                                              pos.y - bbox3d.y,
                                              pos.z + bbox3d.z + agent.vehicle.
                                              bounding_box.transform.location.z
                                          ],
                                          [
                                              pos.x + bbox3d.x,
                                              pos.y + bbox3d.y,
                                              pos.z + bbox3d.z + agent.vehicle.
                                              bounding_box.transform.location.z
                                          ],
                                          [
                                              pos.x - bbox3d.x,
                                              pos.y + bbox3d.y,
                                              pos.z + bbox3d.z + agent.vehicle.
                                              bounding_box.transform.location.z
                                          ],
                                          [
                                              pos.x - bbox3d.x,
                                              pos.y - bbox3d.y,
                                              pos.z + bbox3d.z + agent.vehicle.
                                              bounding_box.transform.location.z
                                          ]])

                            f_rotated = vehicle_transform.transform_points(f)
                            f_2D_rotated = []
                            vehicles[
                                agent.id]['bounding_box_coord'] = f_rotated

                            for i in range(f.shape[0]):
                                point = np.array([[f_rotated[i, 0]],
                                                  [f_rotated[i, 1]],
                                                  [f_rotated[i, 2]], [1]])
                                transformed_2d_pos = np.dot(
                                    Rt_inv,
                                    point)  # 3d Position in camera space
                                pos2d = np.dot(
                                    K, transformed_2d_pos[:3]
                                )  # Conversion to camera frustum space
                                norm_pos2d = np.array([
                                    pos2d[0] / pos2d[2], pos2d[1] / pos2d[2],
                                    pos2d[2]
                                ])
                                #print([image_size[0] - (pos2d[0] / pos2d[2]), image_size[1] - (pos2d[1] / pos2d[2])])
                                f_2D_rotated.append(
                                    np.array([
                                        image_size[0] - norm_pos2d[0],
                                        image_size[1] - norm_pos2d[1]
                                    ]))
                            vehicles[
                                agent.id]['bounding_box_2D'] = f_2D_rotated

                        elif agent.HasField('pedestrian'):

                            pedestrians[agent.id] = {}
                            pedestrians[agent.id]['transform'] = [
                                agent.pedestrian.transform.location.x,
                                agent.pedestrian.transform.location.y,
                                agent.pedestrian.transform.location.z
                            ]
                            pedestrians[agent.id][
                                'yaw'] = agent.pedestrian.transform.rotation.yaw
                            pedestrians[agent.id]['bounding_box'] = [
                                agent.pedestrian.bounding_box.extent.x,
                                agent.pedestrian.bounding_box.extent.y,
                                agent.pedestrian.bounding_box.extent.z
                            ]
                            vehicle_transform = Transform(
                                agent.pedestrian.bounding_box.transform)
                            pos = agent.pedestrian.transform.location

                            bbox3d = agent.pedestrian.bounding_box.extent

                            # Compute the 3D bounding boxes
                            # f contains the 4 points that corresponds to the bottom
                            f = np.array(
                                [[pos.x + bbox3d.x, pos.y - bbox3d.y, pos.z],
                                 [pos.x + bbox3d.x, pos.y + bbox3d.y, pos.z],
                                 [pos.x - bbox3d.x, pos.y + bbox3d.y, pos.z],
                                 [pos.x - bbox3d.x, pos.y - bbox3d.y, pos.z],
                                 [
                                     pos.x + bbox3d.x, pos.y - bbox3d.y,
                                     pos.z + bbox3d.z
                                 ],
                                 [
                                     pos.x + bbox3d.x, pos.y + bbox3d.y,
                                     pos.z + bbox3d.z
                                 ],
                                 [
                                     pos.x - bbox3d.x, pos.y + bbox3d.y,
                                     pos.z + bbox3d.z
                                 ],
                                 [
                                     pos.x - bbox3d.x, pos.y - bbox3d.y,
                                     pos.z + bbox3d.z
                                 ]])

                            f_rotated = vehicle_transform.transform_points(f)
                            pedestrians[
                                agent.id]['bounding_box_coord'] = f_rotated
                            f_2D_rotated = []

                            for i in range(f.shape[0]):
                                point = np.array([[f_rotated[i, 0]],
                                                  [f_rotated[i, 1]],
                                                  [f_rotated[i, 2]], [1]])
                                transformed_2d_pos = np.dot(
                                    Rt_inv, point)  # See above for cars
                                pos2d = np.dot(K, transformed_2d_pos[:3])
                                norm_pos2d = np.array([
                                    pos2d[0] / pos2d[2], pos2d[1] / pos2d[2],
                                    pos2d[2]
                                ])
                                f_2D_rotated.append([
                                    image_size[0] - norm_pos2d[0],
                                    image_size[1] - norm_pos2d[1]
                                ])
                            pedestrians[
                                agent.id]['bounding_box_2D'] = f_2D_rotated

                    cars_dict[frame] = vehicles
                    pedestrians_dict[frame] = pedestrians

                    # End transformations time mesure.
                    timer.stop()

                    # Save PLY to disk
                    # This generates the PLY string with the 3D points and the RGB colors
                    # for each row of the file.
                    point_cloud.save_to_disk(
                        os.path.join(output_folder,
                                     '{:0>5}.ply'.format(frame)))
                    point_cloud_seg.save_to_disk(
                        os.path.join(output_folder,
                                     '{:0>5}_seg.ply'.format(frame)))

                    print_message(timer.milliseconds(), len(point_cloud),
                                  frame)

                if autopilot:
                    client.send_control(
                        measurements.player_measurements.autopilot_control)
                else:
                    control.hand_brake = True
                    client.send_control(control)
            with open(output_folder + '/cameras.p', 'w') as camerafile:
                pickle.dump(cameras_dict, camerafile)
                print(output_folder + "cameras.p")
            with open(output_folder + '/people.p', 'w') as peoplefile:
                pickle.dump(pedestrians_dict, peoplefile)
            with open(output_folder + '/cars.p', 'w') as carfile:
                pickle.dump(cars_dict, carfile)
    def on_ground_segmented_frame(self, msg):
        # We ignore the first several seconds of the simulation because the car
        # is not moving at beginning.
        if msg.timestamp.coordinates[
                0] > self._flags.eval_ground_truth_ignore_first:
            start_time = time.time()
            # We don't fully transform it to cityscapes palette to avoid
            # introducing extra latency.
            frame_array = labels_to_array(msg.data)
            frame_masks = generate_masks(frame_array)

            if len(self._ground_masks) > 0:
                if self._time_delta is None:
                    self._time_delta = (msg.timestamp.coordinates[0] -
                                        self._ground_masks[0][0])
                else:
                    # Check that no frames got dropped.
                    recv_time_delta = (msg.timestamp.coordinates[0] -
                                       self._ground_masks[-1][0])
                    assert self._time_delta == recv_time_delta

                # Pop the oldest frame if it's older than the max latency
                # we're interested in.
                if (msg.timestamp.coordinates[0] - self._ground_masks[0][0] >
                        self._flags.eval_ground_truth_max_latency):
                    self._ground_masks.popleft()

                cur_time = time_epoch_ms()
                for timestamp, ground_masks in self._ground_masks:
                    (mean_iou, class_iou) = compute_semantic_iou_from_masks(
                        ground_masks, frame_masks)
                    time_diff = msg.timestamp.coordinates[0] - timestamp
                    self._logger.info(
                        'Segmentation ground latency {} ; mean IoU {}'.format(
                            time_diff, mean_iou))
                    self._csv_logger.info('{},{},mIoU,{},{}'.format(
                        cur_time, self.name, time_diff, mean_iou))
                    pedestrian_key = 4
                    if pedestrian_key in class_iou:
                        self._logger.info(
                            'Segmentation ground latency {} ; pedestrian IoU {}'
                            .format(time_diff, class_iou[pedestrian_key]))
                        self._csv_logger.info(
                            '{},{},pedestrianIoU,{},{}'.format(
                                cur_time, self.name, time_diff,
                                class_iou[pedestrian_key]))

                    vehicle_key = 10
                    if vehicle_key in class_iou:
                        self._logger.info(
                            'Segmentation ground latency {} ; vehicle IoU {}'.
                            format(time_diff, class_iou[vehicle_key]))
                        self._csv_logger.info('{},{},vehicleIoU,{},{}'.format(
                            cur_time, self.name, time_diff,
                            class_iou[vehicle_key]))

            # Append the processed image to the buffer.
            self._ground_masks.append(
                (msg.timestamp.coordinates[0], frame_masks))

            runtime = (time.time() - start_time) * 1000
            self._logger.info(
                'Segmentation eval ground runtime {}'.format(runtime))