def __init__(self, args): super(RawAudioDataset, self).__init__() self._input_tarball = args["input_tarball"] self._transcripts = None self._translations = None self._feature_extractor = build_feature_extractor(args) try: import soundfile _ = soundfile except ImportError: raise ImportError( "Please install soundfile with: pip3 install soundfile") except OSError: raise OSError("sndfile library not found. Please install with: " "apt-get install libsndfile1") try: from pydub import AudioSegment _ = AudioSegment except ImportError: raise ImportError("Please install pydub with: pip3 install pydub") excluded_file = args["excluded_file"] self._excluded_str = None if excluded_file is not None: if not tf.io.gfile.exists(excluded_file): raise ValueError(f"`excluded_file` not found: {excluded_file}") with tf.io.gfile.GFile(excluded_file) as fp: self._excluded_str = " ".join( set([x.strip().lower() for x in fp]))
def _main(_): # define and parse program flags arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=True) args, remaining_argv = flags_core.intelligent_parse_flags( FLAG_LIST, arg_parser) flags_core.verbose_flags(FLAG_LIST, args, remaining_argv) dataset = build_dataset(args) feature_extractor = build_feature_extractor(args) if dataset is None: raise ValueError("dataset must be provided.") main(dataset, feature_extractor)
def __init__(self, args): super(RawAudioDataset, self).__init__() self._input_tarball = args["input_tarball"] self._transcripts = None self._translations = None self._feature_extractor = build_feature_extractor(args) try: import soundfile _ = soundfile except ImportError: raise ImportError( "Please install soundfile with: pip3 install soundfile") except OSError: raise OSError("sndfile library not found. Please install with: " "apt-get install libsndfile1")
def __init__(self, args): super(RawAudioDataset, self).__init__() self._input_tarball = args["input_tarball"] self._transcripts = None self._translations = None self._feature_extractor = build_feature_extractor(args) try: import sox self._sox_transformer = sox.Transformer() self._sox_transformer.set_output_format(rate=16000, file_type="wav") except (ImportError, ModuleNotFoundError): self._sox_transformer = None excluded_file = args["excluded_file"] self._excluded_str = None if excluded_file is not None: if not tf.io.gfile.exists(excluded_file): raise ValueError(f"`excluded_file` not found: {excluded_file}") with tf.io.gfile.GFile(excluded_file) as fp: self._excluded_str = " ".join( set([x.strip().lower() for x in fp]))