Beispiel #1
0
def get_latest_checkpoint_validation():
    csv_file_path = os.path.join('_logs', g_conf.EXPERIMENT_BATCH_NAME,
                                 g_conf.EXPERIMENT_NAME,
                                 g_conf.PROCESS_NAME + '_csv')

    csv_files = os.listdir(csv_file_path)

    if len(csv_files) == 0:
        return None

    sort_nicely(csv_files)

    csv_file_numbers = set(
        [float(re.findall('\d+', file)[0]) for file in csv_files])

    not_evaluated_logs = list(
        set(g_conf.TEST_SCHEDULE).difference(csv_file_numbers))

    not_evaluated_logs = sorted(not_evaluated_logs, reverse=False)

    if len(not_evaluated_logs) == 0:  # Just in case that is the last one
        return g_conf.TEST_SCHEDULE[-1]

    if g_conf.TEST_SCHEDULE.index(not_evaluated_logs[0]) == 0:
        return None

    return g_conf.TEST_SCHEDULE[
        g_conf.TEST_SCHEDULE.index(not_evaluated_logs[0]) - 1]
def get_latest_saved_checkpoint():
    """
        Returns the latest checkpoint number that was saved

    """
    checkpoint_files = os.listdir(os.path.join('_logs', g_conf.EXPERIMENT_BATCH_NAME,
                                               g_conf.EXPERIMENT_NAME, 'checkpoints'))
    if len(checkpoint_files) == 0:
        return None
    else:
        sort_nicely(checkpoint_files)
        return checkpoint_files[-1]
Beispiel #3
0
def print_folder_process_names(exp_batch):
    experiments_list = os.listdir(os.path.join('configs', exp_batch))
    sort_nicely(experiments_list)

    for experiment in experiments_list:
        if '.yaml' in experiment:
            g_conf.immutable(False)

            merge_with_yaml(os.path.join('configs', exp_batch, experiment))

            print(
                experiment.split('.')[-2] + ': ' +
                g_conf.EXPERIMENT_GENERATED_NAME)
def get_latest_saved_checkpoint():
    """
        Returns the , latest checkpoint number that was saved

    """
    checkpoint_files = glob.glob(os.path.join('_logs', g_conf.EXPERIMENT_BATCH_NAME,
                                               g_conf.EXPERIMENT_NAME, 'checkpoints/*0.pth')
                                 )
    if checkpoint_files == []:
        return None
    else:
        sort_nicely(checkpoint_files)
        return checkpoint_files[-1].split('/')[-1]
Beispiel #5
0
def get_latest_checkpoint_validation():
    """
    Get the checkpoint to validate
    """
    # Path to the directory containing all the csv files
    csv_file_path = os.path.join('_logs', g_conf.EXPERIMENT_BATCH_NAME,
                                 g_conf.EXPERIMENT_NAME,
                                 f'{g_conf.PROCESS_NAME}_csv')

    # List them
    csv_files = os.listdir(csv_file_path)

    # Return None if there aren't any
    if len(csv_files) == 0:
        return None

    # Otherwise, we should sort them and return the latest one
    sort_nicely(csv_files)

    # Get the number (as a float?), delete repeated ones
    csv_file_numbers = set(
        [float(re.findall(r'\d+', file)[0]) for file in csv_files])

    # Remove the checkpoints that have already been tested and sort it
    not_evaluated_logs = sorted(list(
        set(g_conf.TEST_SCHEDULE).difference(csv_file_numbers)),
                                reverse=False)

    if len(not_evaluated_logs) == 0:  # Just in case that is the last one
        return g_conf.TEST_SCHEDULE[-1]

    if g_conf.TEST_SCHEDULE.index(not_evaluated_logs[0]) == 0:
        return None

    return g_conf.TEST_SCHEDULE[
        g_conf.TEST_SCHEDULE.index(not_evaluated_logs[0]) - 1]
Beispiel #6
0
    def _pre_load_image_folders(self, path, dataset_index=0):
        """
        Pre load the image folders for each episode, keep in mind that we only take
        the measurements that we think that are interesting for now.

        Args
            the path for the dataset

        Returns
            sensor data names: it is a vector with n dimensions being one for each sensor modality
            for instance, rgb only dataset will have a single vector with all the image names.
            float_data: all the wanted float data is loaded inside a vector, that is a vector
            of dictionaries.

        """

        episodes_list = glob.glob(os.path.join(path, 'episode_*'))
        sort_nicely(episodes_list)
        # Do a check if the episodes list is empty
        if len(episodes_list) == 0:
            raise ValueError(
                "There are no episodes on the training dataset folder %s" %
                path)

        sensor_data_names = []
        float_dicts = []

        number_of_hours_pre_loaded = 0

        # Now we do a check to try to find all the
        for episode in episodes_list:

            print('Episode ', episode)

            available_measurements_dict = data_parser.check_available_measurements(
                episode)

            if number_of_hours_pre_loaded > g_conf.NUMBER_OF_HOURS:
                # The number of wanted hours achieved
                break

            # Get all the measurements from this episode
            measurements_list = glob.glob(os.path.join(episode,
                                                       'measurement*'))
            sort_nicely(measurements_list)

            if len(measurements_list) == 0:
                print("EMPTY EPISODE")
                continue

            # A simple count to keep track how many measurements were added this episode.
            count_added_measurements = 0

            for measurement in measurements_list:

                data_point_number = measurement.split('_')[-1].split('.')[0]

                with open(measurement) as f:
                    measurement_data = json.load(f)

                # depending on the configuration file, we eliminated the kind of measurements
                # that are not going to be used for this experiment
                # We extract the interesting subset from the measurement dict

                speed = data_parser.get_speed(measurement_data)

                directions = measurement_data['directions']
                final_measurement = self._get_final_measurement(
                    speed, measurement_data, 0, directions,
                    available_measurements_dict)

                if self.is_measurement_partof_experiment(final_measurement):
                    sensor_data_names.append([])
                    float_dicts.append(final_measurement)
                    rgb = 'CentralRGB_' + data_point_number + '.png'
                    sensor_data_names[-1].append(
                        os.path.join(episode.split('/')[-1], rgb))
                    seg = 'SemanticSeg_' + data_point_number + '.png'
                    sensor_data_names[-1].append(
                        os.path.join(episode.split('/')[-1], seg))
                    count_added_measurements += 1

                # We do measurements for the left side camera
                # We convert the speed to KM/h for the augmentation

                # We extract the interesting subset from the measurement dict

                final_measurement = self._get_final_measurement(
                    speed, measurement_data, -30.0, directions,
                    available_measurements_dict)

                if self.is_measurement_partof_experiment(final_measurement):
                    sensor_data_names.append([])
                    float_dicts.append(final_measurement)
                    rgb = 'LeftRGB_' + data_point_number + '.png'
                    sensor_data_names[-1].append(
                        os.path.join(episode.split('/')[-1], rgb))
                    count_added_measurements += 1

                # We do measurements augmentation for the right side cameras

                final_measurement = self._get_final_measurement(
                    speed, measurement_data, 30.0, directions,
                    available_measurements_dict)

                if self.is_measurement_partof_experiment(final_measurement):
                    sensor_data_names.append([])
                    float_dicts.append(final_measurement)
                    rgb = 'RightRGB_' + data_point_number + '.png'
                    sensor_data_names[-1].append(
                        os.path.join(episode.split('/')[-1], rgb))
                    count_added_measurements += 1

            # Check how many hours were actually added
            last_data_point_number = measurements_list[-1].split(
                '_')[-1].split('.')[0]
            number_of_hours_pre_loaded += (
                float(count_added_measurements / 10.0) / 3600.0)
            print(" Loaded ", number_of_hours_pre_loaded, " hours of data")

        # Make the path to save the pre loaded datasets
        if not os.path.exists('_preloads'):
            os.mkdir('_preloads')
        # If there is a name we saved the preloaded data
        if self.preload_names[dataset_index] is not None:
            np.save(
                os.path.join('_preloads', self.preload_names[dataset_index]),
                [sensor_data_names, float_dicts])

        return sensor_data_names, float_dicts
Beispiel #7
0
    def _pre_load_image_folders(self, path):
        """
        Pre load the image folders for each episode, keep in mind that we only take
        the measurements that we think that are interesting for now.

        Args
            the path for the dataset

        Returns
            sensor data names: it is a vector with n dimensions being one for each sensor modality
            for instance, rgb only dataset will have a single vector with all the image names.
            float_data: all the wanted float data is loaded inside a vector, that is a vector
            of dictionaries.

        """

        episodes_list = glob.glob(os.path.join(path, 'episode_*'))
        sort_nicely(episodes_list)
        # Do a check if the episodes list is empty
        if len(episodes_list) == 0:
            raise ValueError("There are no episodes on the training dataset folder %s" % path)

        sensor_data_names = []
        float_dicts = []

        number_of_hours_pre_loaded = 0

        # Now we do a check to try to find all the
        for episode in episodes_list:

            print('Episode ', episode)

            available_measurements_dict = data_parser.check_available_measurements(episode)

            if number_of_hours_pre_loaded > g_conf.NUMBER_OF_HOURS:
                # The number of wanted hours achieved
                break

            # Get all the measurements from this episode
            measurements_list = glob.glob(os.path.join(episode, 'measurement*'))
            sort_nicely(measurements_list)

            if len(measurements_list) == 0:
                print("EMPTY EPISODE")
                continue

            # A simple count to keep track how many measurements were added this episode.
            count_added_measurements = 0

            for measurement in measurements_list[:-3]:

                data_point_number = measurement.split('_')[-1].split('.')[0]

                with open(measurement) as f:
                    measurement_data = json.load(f)

                # depending on the configuration file, we eliminated the kind of measurements
                # that are not going to be used for this experiment
                # We extract the interesting subset from the measurement dict

                speed = data_parser.get_speed(measurement_data)

                directions = measurement_data['directions']
                final_measurement = self._get_final_measurement(speed, measurement_data, 0,
                                                                directions,
                                                                available_measurements_dict)

                if self.is_measurement_partof_experiment(final_measurement):
                    float_dicts.append(final_measurement)
Beispiel #8
0
    def _pre_load_image_folders_old(self, path):
        """
        Pre load the image folders for each episode, keep in mind that we only take
        the measurements that we think that are interesting for now.

        Args
            the path for the dataset

        Returns
            sensor data names: it is a vector with n dimensions being one for each sensor modality
            for instance, rgb only dataset will have a single vector with all the image names.
            float_data: all the wanted float data is loaded inside a vector, that is a vector
            of dictionaries.

        """
        containers_list = glob.glob(os.path.join(path, 'Container_*'))
        sort_nicely(containers_list)
        # Do a check if the episodes list is empty
        if len(containers_list) == 0:
            raise ValueError(
                f"There are no containers on the training dataset folder: {path}"
            )

        # We will check one image to see if it matches the size expected by the network
        checked_image = False
        sensor_data_names = {}
        float_dicts = []

        number_of_hours_pre_loaded = 0

        # Now we do a check to try to find all the
        for container in containers_list:
            print(f'Container name: {container}')
            if number_of_hours_pre_loaded > g_conf.NUMBER_OF_HOURS:
                # The number of wanted hours achieved
                break
            # A simple count to keep track how many measurements were added this episode.
            count_added_measurements = 0
            # We may have more than one client for each container, so the data_point_number might clash later
            client_list = glob.glob(os.path.join(container, '**/Client_*'),
                                    recursive=True)

            for client in client_list:
                # Get all the measurements from this client
                measurements_list = glob.glob(os.path.join(client, 'can_bus*'))
                sort_nicely(measurements_list)

                if len(measurements_list) == 0:
                    print("Empty client")
                    continue

                for measurement in tqdm(measurements_list):
                    data_point_number = os.path.splitext(measurement)[0][
                        -6:]  # /pth/to/can_bus000019.json => 000019
                    with open(measurement) as f:
                        measurement_data = json.load(f)
                    # Delete some non-floatable cases
                    # depending on the configuration file, we eliminated the kind of measurements
                    # that are not going to be used for this experiment
                    del measurement_data['hand_brake']
                    del measurement_data[
                        'reverse']  # TODO: not relevant now, but we might be interested later on
                    del measurement_data['ego_position']
                    del measurement_data['route_nodes_xyz']
                    del measurement_data['route_nodes']

                    # We extract the interesting subset from the measurement dict
                    speed = data_parser.get_speed(measurement_data)

                    for sensor in g_conf.SENSORS.keys():
                        # We will go through each of the cameras
                        cameras = (('central', 0), ('left', -30.0), ('right',
                                                                     30.0))
                        sensor_name = sensor.split('_')[0]
                        for cam in cameras:
                            # We do measurements for the three cameras
                            # We convert the speed to KM/h for the augmentation
                            # We extract the interesting subset from the measurement dict
                            final_measurement = self._get_final_measurement(
                                speed, measurement_data, cam[1])
                            if self.is_measurement_part_of_experiment(
                                    final_measurement):
                                float_dicts.append(final_measurement)
                                sensor_path = glob.glob(os.path.join(
                                    client,
                                    f'**/{sensor_name}_{cam[0]}{data_point_number}.png'
                                ),
                                                        recursive=True)
                                if len(sensor_path) == 0:
                                    continue
                                if not checked_image:
                                    if not check_size(*sensor_path,
                                                      g_conf.SENSORS[sensor]):
                                        raise RuntimeError(
                                            'Unexpected image size for the network!'
                                        )
                                    checked_image = True

                                if sensor_name in sensor_data_names:
                                    sensor_data_names[sensor_name].append(
                                        *sensor_path)
                                else:
                                    sensor_data_names[sensor_name] = [
                                        *sensor_path
                                    ]
                                count_added_measurements += 1

            # Check how many hours were actually added
            number_of_hours_pre_loaded += (
                float(count_added_measurements / g_conf.TRAIN_DATA_FPS) /
                3600.0)
            print(f"Loaded {number_of_hours_pre_loaded} hours of data")

        # Make the path to save the pre loaded datasets
        if not os.path.exists('_preloads'):
            os.mkdir('_preloads')
        # If there is a name we saved the preloaded data
        if self.preload_name is not None:
            np.save(os.path.join('_preloads', self.preload_name),
                    [sensor_data_names, float_dicts])

        return sensor_data_names, float_dicts