def __init__( self, labels_file, augmentation_map, workers=32, converted_coordinates_file=None, image_folder=None, ): """ Initialize augmentation session. Args: labels_file: cvv file containing relative image labels augmentation_map: A structured dictionary containing categorized augmentation sequences. workers: Parallel threads. converted_coordinates_file: csv file containing converted from relative to coordinates. image_folder: Folder containing images other than data/photos/ """ assert all([ia, iaa, iap]) self.labels_file = labels_file self.mapping = pd.read_csv(labels_file) self.image_folder = Path('data', 'photos').absolute().resolve() if image_folder: self.image_folder = Path(image_folder).absolute().resolve() self.image_paths = [ (Path(self.image_folder) / image).absolute().resolve() for image in os.listdir(self.image_folder) if not image.startswith('.') ] self.image_paths_copy = self.image_paths.copy() if not self.image_paths: LOGGER.error( f'Augmentation aborted: no photos found in {self.image_folder}' ) raise ValueError(f'No photos given') self.image_width, self.image_height = imagesize.get( self.image_paths[0]) self.converted_coordinates = (pd.read_csv(converted_coordinates_file) if converted_coordinates_file else self.relative_to_coordinates()) self.converted_groups = self.converted_coordinates.groupby('image') self.augmentation_data = [] self.augmentation_sequences = [] self.augmentation_map = augmentation_map self.workers = workers self.augmented_images = 0 self.total_images = len(self.image_paths) self.session_id = np.random.randint(10**6, (10**7))
def write_tf_record(output_path, groups, data, trainer=None): """ Write data to TFRecord. Args: output_path: Full path to save. groups: pandas GroupBy object. data: pandas DataFrame trainer: core.Trainer object. Returns: None """ print(f'Processing {os.path.split(output_path)[-1]}') if trainer: if 'train' in output_path: trainer.train_tf_record = output_path if 'test' in output_path: trainer.valid_tf_record = output_path with tf.io.TFRecordWriter(output_path) as r_writer: for current_image, (image_path, objects) in enumerate(groups, 1): print( f'\rBuilding example: {current_image}/{len(groups)} ... ' f'{os.path.split(image_path)[-1]} ' f'{round(100 * (current_image / len(groups)))}% completed', end='', ) separate_data = pd.DataFrame(objects, columns=data.columns).T.to_numpy() ( image_width, image_height, x_min, y_min, x_max, y_max, ) = separate_data[2:8] x_min /= image_width x_max /= image_width y_min /= image_height y_max /= image_height try: image_data = open(image_path, 'rb').read() key = hashlib.sha256(image_data).hexdigest() training_example = create_example(separate_data, key, image_data) r_writer.write(training_example.SerializeToString()) except Exception as e: LOGGER.error(e) print()
def check_tf_records(self): """ Ensure tfrecords are specified to start training. Returns: None """ if not self.train_tf_record: issue = 'No training TFRecord specified' LOGGER.error(issue) raise ValueError(issue) if not self.valid_tf_record: issue = 'No validation TFRecord specified' LOGGER.error(issue) raise ValueError(issue)
def get_dataset_next(dataset): try: return next(dataset) except tf.errors.UnknownError as e: LOGGER.error(f'Error occurred during reading from dataset\n{e}')
def get_dataset_next(dataset): try: return next(dataset) except tf.errors.UnknownError as e: # sometimes encountered when reading from google drive LOGGER.error(f'Error occurred during reading from dataset\n{e}')