def run_with_reraise(fn, k, example_data, tensor_info): try: return fn(example_data, tensor_info) except Exception as e: # pylint: disable=broad-except utils.reraise( e, f"Error while serializing feature `{k}`: `{tensor_info}`: ", )
def run_with_reraise(fn, k, example_data, tensor_info): try: return fn(example_data, tensor_info) except Exception: # pylint: disable=broad-except utils.reraise( "Error while serializing feature `{}`: `{}`: ".format( k, tensor_info))
def set_file_format( self, file_format: Union[None, str, file_adapters.FileFormat], ) -> None: """Internal function to define the file format. The file format is set during `FileReaderBuilder.__init__`, not `DatasetInfo.__init__`. Args: file_format: The file format. """ # If file format isn't present already, fallback to `DEFAULT_FILE_FORMAT` file_format = ( file_format # Format explicitly given: tfds.builder(..., file_format=x) or self.file_format # Format restored from dataset_info.json or file_adapters.DEFAULT_FILE_FORMAT) try: new_file_format = file_adapters.FileFormat(file_format) except ValueError as e: all_values = [f.value for f in file_adapters.FileFormat] utils.reraise(e, suffix=f". Valid file formats: {all_values}") # If the file format has been set once, file format should be consistent if self.file_format and self.file_format != new_file_format: raise ValueError( f"File format is already set to {self.file_format}. " f"Got {new_file_format}") self.as_proto.file_format = new_file_format.value
def _build_from_generator( self, split_name: str, generator: Iterable[KeyExample], path: type_utils.PathLike, ) -> _SplitInfoFuture: """Split generator for example generators. Args: split_name: str, generator: Iterable[KeyExample], path: type_utils.PathLike, Returns: future: The future containing the `tfds.core.SplitInfo`. """ if self._max_examples_per_split is not None: logging.warning('Splits capped at %s examples max.', self._max_examples_per_split) generator = itertools.islice(generator, self._max_examples_per_split) total_num_examples = self._max_examples_per_split else: # If dataset info has been pre-downloaded from the internet, # we can use the pre-computed number of example for the progression bar. split_info = self._split_dict.get(split_name) if split_info and split_info.num_examples: total_num_examples = split_info.num_examples else: total_num_examples = None writer = tfrecords_writer.Writer( example_specs=self._features.get_serialized_info(), path=path, hash_salt=split_name, file_format=self._file_format, ) for key, example in utils.tqdm( generator, desc=f'Generating {split_name} examples...', unit=' examples', total=total_num_examples, leave=False, ): try: example = self._features.encode_example(example) except Exception as e: # pylint: disable=broad-except utils.reraise(e, prefix=f'Failed to encode example:\n{example}\n') writer.write(key, example) shard_lengths, total_size = writer.finalize() split_info = splits_lib.SplitInfo( name=split_name, shard_lengths=shard_lengths, num_bytes=total_size, ) return _SplitInfoFuture(lambda: split_info)
def encode_example(self, example_dict): """See base class for details.""" example = {} for k, (feature, example_value) in utils.zip_dict(self._feature_dict, example_dict): try: example[k] = feature.encode_example(example_value) except Exception as e: # pylint: disable=broad-except utils.reraise(e, prefix=f'In <{feature.__class__.__name__}>' + f' with name "{k}":\n') return example
def encode_example(self, audio_or_path_or_fobj): if isinstance(audio_or_path_or_fobj, (np.ndarray, list)): return audio_or_path_or_fobj elif isinstance(audio_or_path_or_fobj, epath.PathLikeCls): filename = os.fspath(audio_or_path_or_fobj) file_format = self._file_format or filename.split('.')[-1] with tf.io.gfile.GFile(filename, 'rb') as audio_f: try: return self._encode_file(audio_f, file_format) except Exception as e: # pylint: disable=broad-except utils.reraise(e, prefix=f'Error for {filename}: ') else: return self._encode_file(audio_or_path_or_fobj, self._file_format)
def reraise_with_context(error_cls: Type[Exception]) -> Iterator[None]: """Contextmanager which reraises an exception with an additional message. Args: error_cls: The exception to be reraised. Yields: None. """ # If current_context_msg exists, we are already within the scope of the # session contextmanager. if hasattr(context_holder, 'current_context_msg'): yield return context_holder.current_context_msg = ErrorContext() try: yield except error_cls as e: context_msg = '\n'.join(context_holder.current_context_msg.messages) utils.reraise(e, suffix=context_msg) finally: del context_holder.current_context_msg
def _build_from_generator( self, split_name: str, generator: Iterable[KeyExample], filename_template: naming.ShardedFileTemplate, disable_shuffling: bool, ) -> _SplitInfoFuture: """Split generator for example generators. Args: split_name: str, generator: Iterable[KeyExample], filename_template: Template to format the filename for a shard. disable_shuffling: Specifies whether to shuffle the examples, Returns: future: The future containing the `tfds.core.SplitInfo`. """ if self._max_examples_per_split is not None: logging.warning('Splits capped at %s examples max.', self._max_examples_per_split) generator = itertools.islice(generator, self._max_examples_per_split) total_num_examples = self._max_examples_per_split else: # If dataset info has been pre-downloaded from the internet, # we can use the pre-computed number of example for the progression bar. split_info = self._split_dict.get(split_name) if split_info and split_info.num_examples: total_num_examples = split_info.num_examples else: total_num_examples = None writer = writer_lib.Writer( serializer=example_serializer.ExampleSerializer( self._features.get_serialized_info()), filename_template=filename_template, hash_salt=split_name, disable_shuffling=disable_shuffling, # TODO(weide) remove this because it's already in filename_template? file_format=self._file_format, ) for key, example in utils.tqdm( generator, desc=f'Generating {split_name} examples...', unit=' examples', total=total_num_examples, leave=False, ): try: example = self._features.encode_example(example) except Exception as e: # pylint: disable=broad-except utils.reraise(e, prefix=f'Failed to encode example:\n{example}\n') writer.write(key, example) shard_lengths, total_size = writer.finalize() split_info = splits_lib.SplitInfo( name=split_name, shard_lengths=shard_lengths, num_bytes=total_size, filename_template=filename_template, ) return _SplitInfoFuture(lambda: split_info)