Beispiel #1
0
 def get_dataset_next(dataset):
     try:
         return next(dataset)
     except tf.errors.UnknownError as e:  # sometimes encountered when reading from google drive
         default_logger.error(
             f'Error occurred during reading from dataset\n{e}'
         )
def write_tf_record(output_path, groups, data, trainer=None):
    """
    Write data to TFRecord.
    Args:
        output_path: Full path to save.
        groups: pandas GroupBy object.
        data: pandas DataFrame
        trainer: Main.Trainer object.

    Returns:
        None
    """
    print(f'Processing {os.path.split(output_path)[-1]}')
    if trainer:
        if 'train' in output_path:
            trainer.train_tf_record = output_path
        if 'test' in output_path:
            trainer.valid_tf_record = output_path
    with tf.io.TFRecordWriter(output_path) as r_writer:
        for current_image, (image_path, objects) in enumerate(groups, 1):
            print(
                f'\rBuilding example: {current_image}/{len(groups)} ... '
                f'{os.path.split(image_path)[-1]} '
                f'{round(100 * (current_image / len(groups)))}% completed',
                end='',
            )
            separate_data = pd.DataFrame(
                objects, columns=data.columns
            ).T.to_numpy()
            (
                image_width,
                image_height,
                x_min,
                y_min,
                x_max,
                y_max,
            ) = separate_data[2:8]
            x_min /= image_width
            x_max /= image_width
            y_min /= image_height
            y_max /= image_height
            try:
                image_data = open(image_path, 'rb').read()
                key = hashlib.sha256(image_data).hexdigest()
                training_example = create_example(
                    separate_data, key, image_data
                )
                r_writer.write(training_example.SerializeToString())
            except Exception as e:
                default_logger.error(e)
    print()
Beispiel #3
0
 def __init__(
     self,
     labels_file,
     augmentation_map,
     workers=32,
     converted_coordinates_file=None,
     image_folder=None,
 ):
     """
     Initialize augmentation session.
     Args:
         labels_file: cvv file containing relative image labels
         augmentation_map: A structured dictionary containing categorized augmentation
         sequences.
         workers: Parallel threads.
         converted_coordinates_file: csv file containing converted from relative
         to coordinates.
         image_folder: Folder containing images other than Data/Photos/
     """
     assert all([ia, iaa, iap])
     self.labels_file = labels_file
     self.mapping = pd.read_csv(labels_file)
     self.image_folder = (Path(os.path.join('..', 'Data',
                                            'Photos')).absolute().resolve())
     if image_folder:
         self.image_folder = Path(image_folder).absolute().resolve()
     self.image_paths = [
         Path(os.path.join(self.image_folder, image)).absolute().resolve()
         for image in os.listdir(self.image_folder)
         if not image.startswith('.')
     ]
     self.image_paths_copy = self.image_paths.copy()
     if not self.image_paths:
         default_logger.error(
             f'Augmentation aborted: no photos found in {self.image_folder}'
         )
         raise ValueError(f'No photos given')
     self.image_width, self.image_height = imagesize.get(
         self.image_paths[0])
     self.converted_coordinates = (pd.read_csv(converted_coordinates_file)
                                   if converted_coordinates_file else
                                   self.relative_to_coordinates())
     self.converted_groups = self.converted_coordinates.groupby('image')
     self.augmentation_data = []
     self.augmentation_sequences = []
     self.augmentation_map = augmentation_map
     self.workers = workers
     self.augmented_images = 0
     self.total_images = len(self.image_paths)
     self.session_id = np.random.randint(10**6, (10**7))
    def check_tf_records(self):
        """
        Ensure TFRecords are specified to start training.

        Returns:
            None
        """
        if not self.train_tf_record:
            issue = 'No training TFRecord specified'
            default_logger.error(issue)
            raise ValueError(issue)
        if not self.valid_tf_record:
            issue = 'No validation TFRecord specified'
            default_logger.error(issue)
            raise ValueError(issue)