def __init__( self, real_wave_mfcc=None, synt_wave_mfcc=None, real_wave_path=None, synt_wave_path=None, rconf=None, logger=None ): if (real_wave_mfcc is not None) and (type(real_wave_mfcc) is not AudioFileMFCC): raise ValueError(u"Real wave mfcc must be None or of type AudioFileMFCC") if (synt_wave_mfcc is not None) and (type(synt_wave_mfcc) is not AudioFileMFCC): raise ValueError(u"Synt wave mfcc must be None or of type AudioFileMFCC") if (real_wave_path is not None) and (not gf.file_can_be_read(real_wave_path)): raise ValueError(u"Real wave cannot be read") if (synt_wave_path is not None) and (not gf.file_can_be_read(synt_wave_path)): raise ValueError(u"Synt wave cannot be read") if (rconf is not None) and (rconf[RuntimeConfiguration.DTW_ALGORITHM] not in DTWAlgorithm.ALLOWED_VALUES): raise ValueError(u"Algorithm value not allowed") super(DTWAligner, self).__init__(rconf=rconf, logger=logger) self.real_wave_mfcc = real_wave_mfcc self.synt_wave_mfcc = synt_wave_mfcc self.real_wave_path = real_wave_path self.synt_wave_path = synt_wave_path if (self.real_wave_mfcc is None) and (self.real_wave_path is not None): self.real_wave_mfcc = AudioFileMFCC(self.real_wave_path, rconf=self.rconf, logger=self.logger) if (self.synt_wave_mfcc is None) and (self.synt_wave_path is not None): self.synt_wave_mfcc = AudioFileMFCC(self.synt_wave_path, rconf=self.rconf, logger=self.logger) self.dtw = None
def _read_from_file(self): """ Read text fragments from file. """ # test if we can read the given file if not gf.file_can_be_read(self.file_path): self.log_exc(u"File '%s' cannot be read" % (self.file_path), None, True, OSError) if self.file_format not in TextFileFormat.ALLOWED_VALUES: self.log_exc(u"Text file format '%s' is not supported." % (self.file_format), None, True, ValueError) # read the contents of the file self.log([u"Reading contents of file '%s'", self.file_path]) with io.open(self.file_path, "r", encoding="utf-8") as text_file: lines = text_file.readlines() # clear text fragments self.clear() # parse the contents map_read_function = { TextFileFormat.MPLAIN: self._read_mplain, TextFileFormat.MUNPARSED: self._read_munparsed, TextFileFormat.PARSED: self._read_parsed, TextFileFormat.PLAIN: self._read_plain, TextFileFormat.SUBTITLES: self._read_subtitles, TextFileFormat.UNPARSED: self._read_unparsed } map_read_function[self.file_format](lines) # log the number of fragments self.log([u"Parsed %d fragments", len(self.fragments)])
def _select_tts_engine(self): """ Select the TTS engine to be used by looking at the rconf object. """ self.log(u"Selecting TTS engine...") requested_tts_engine = self.rconf[RuntimeConfiguration.TTS] if requested_tts_engine == self.CUSTOM: self.log(u"TTS engine: custom") tts_path = self.rconf[RuntimeConfiguration.TTS_PATH] if tts_path is None: self.log_exc(u"You must specify a value for tts_path", None, True, ValueError) if not gf.file_can_be_read(tts_path): self.log_exc(u"Cannot read tts_path", None, True, OSError) try: import imp self.log([u"Loading CustomTTSWrapper module from '%s'...", tts_path]) imp.load_source("CustomTTSWrapperModule", tts_path) self.log([u"Loading CustomTTSWrapper module from '%s'... done", tts_path]) self.log(u"Importing CustomTTSWrapper...") from CustomTTSWrapperModule import CustomTTSWrapper self.log(u"Importing CustomTTSWrapper... done") self.log(u"Creating CustomTTSWrapper instance...") self.tts_engine = CustomTTSWrapper(rconf=self.rconf, logger=self.logger) self.log(u"Creating CustomTTSWrapper instance... done") except Exception as exc: self.log_exc(u"Unable to load custom TTS wrapper", exc, True, OSError) elif requested_tts_engine == self.AWS: try: import boto3 except ImportError as exc: self.log_exc(u"Unable to import boto3 for AWS Polly TTS API wrapper", exc, True, ImportError) self.log(u"TTS engine: AWS Polly TTS API") self.tts_engine = AWSTTSWrapper(rconf=self.rconf, logger=self.logger) elif requested_tts_engine == self.NUANCE: try: import requests except ImportError as exc: self.log_exc(u"Unable to import requests for Nuance TTS API wrapper", exc, True, ImportError) self.log(u"TTS engine: Nuance TTS API") self.tts_engine = NuanceTTSWrapper(rconf=self.rconf, logger=self.logger) elif requested_tts_engine == self.ESPEAKNG: self.log(u"TTS engine: eSpeak-ng") self.tts_engine = ESPEAKNGTTSWrapper(rconf=self.rconf, logger=self.logger) elif requested_tts_engine == self.FESTIVAL: self.log(u"TTS engine: Festival") self.tts_engine = FESTIVALTTSWrapper(rconf=self.rconf, logger=self.logger) elif requested_tts_engine == self.MACOS: self.log(u"TTS engine: macOS") self.tts_engine = MacOSTTSWrapper(rconf=self.rconf, logger=self.logger) else: self.log(u"TTS engine: eSpeak") self.tts_engine = ESPEAKTTSWrapper(rconf=self.rconf, logger=self.logger) self.log(u"Selecting TTS engine... done")
def check_input_file_or_directory(self, path): """ If the given path does not exist, emit an error and return ``False``. Otherwise return ``True``. :param path: the path of the input file or directory :type path: string (path) :rtype: bool """ if (not gf.file_can_be_read(path)) and (not os.path.isdir(path)): self.print_error(u"Unable to read file or directory '%s'" % (path)) self.print_error(u"Make sure the path is written/escaped correctly and that you have read permission on it") return False return True
def download( self, expected_size, download_format=None, largest_audio=True, ): path = self.audio_from_youtube( self.URL_VALID, download=True, output_file_path=None, download_format=download_format, largest_audio=largest_audio ) self.assertTrue(gf.file_can_be_read(path)) self.assertEqual(gf.file_size(path), expected_size) gf.delete_file(None, path)
def check_file_encoding(self, input_file_path): """ Check whether the given file is UTF-8 encoded. :param string input_file_path: the path of the file to be checked :rtype: :class:`~aeneas.validator.ValidatorResult` """ self.log([u"Checking encoding of file '%s'", input_file_path]) self.result = ValidatorResult() if not gf.file_can_be_read(input_file_path): self._failed(u"File '%s' cannot be read." % (input_file_path)) return self.result with io.open(input_file_path, "rb") as file_object: bstring = file_object.read() self._check_utf8_encoding(bstring) return self.result
def read_properties(self): """ Populate this object by reading the audio properties of the file at the given path. Currently this function uses :class:`~aeneas.ffprobewrapper.FFPROBEWrapper` to get the audio file properties. :raises: :class:`~aeneas.audiofile.AudioFileProbeError`: if the path to the ``ffprobe`` executable cannot be called :raises: :class:`~aeneas.audiofile.AudioFileUnsupportedFormatError`: if the audio file has a format not supported :raises: OSError: if the audio file cannot be read """ self.log(u"Reading properties...") # check the file can be read if not gf.file_can_be_read(self.file_path): self.log_exc(u"File '%s' cannot be read" % (self.file_path), None, True, OSError) # get the file size self.log([u"Getting file size for '%s'", self.file_path]) self.file_size = gf.file_size(self.file_path) self.log([u"File size for '%s' is '%d'", self.file_path, self.file_size]) # get the audio properties using FFPROBEWrapper try: self.log(u"Reading properties with FFPROBEWrapper...") properties = FFPROBEWrapper( rconf=self.rconf, logger=self.logger ).read_properties(self.file_path) self.log(u"Reading properties with FFPROBEWrapper... done") except FFPROBEPathError: self.log_exc(u"Unable to call ffprobe executable", None, True, AudioFileProbeError) except (FFPROBEUnsupportedFormatError, FFPROBEParsingError): self.log_exc(u"Audio file format not supported by ffprobe", None, True, AudioFileUnsupportedFormatError) # save relevant properties in results inside the audiofile object self.audio_length = TimeValue(properties[FFPROBEWrapper.STDOUT_DURATION]) self.audio_format = properties[FFPROBEWrapper.STDOUT_CODEC_NAME] self.audio_sample_rate = gf.safe_int(properties[FFPROBEWrapper.STDOUT_SAMPLE_RATE]) self.audio_channels = gf.safe_int(properties[FFPROBEWrapper.STDOUT_CHANNELS]) self.log([u"Stored audio_length: '%s'", self.audio_length]) self.log([u"Stored audio_format: '%s'", self.audio_format]) self.log([u"Stored audio_sample_rate: '%s'", self.audio_sample_rate]) self.log([u"Stored audio_channels: '%s'", self.audio_channels]) self.log(u"Reading properties... done")
def read(self, sync_map_format, input_file_path, parameters=None): """ Read sync map fragments from the given file in the specified format, and add them the current (this) sync map. Return ``True`` if the call succeeded, ``False`` if an error occurred. :param sync_map_format: the format of the sync map :type sync_map_format: :class:`~aeneas.syncmap.SyncMapFormat` :param string input_file_path: the path to the input file to read :param dict parameters: additional parameters (e.g., for ``SMIL`` input) :raises: ValueError: if ``sync_map_format`` is ``None`` or it is not an allowed value :raises: OSError: if ``input_file_path`` does not exist """ if sync_map_format is None: self.log_exc(u"Sync map format is None", None, True, ValueError) if sync_map_format not in SyncMapFormat.CODE_TO_CLASS: self.log_exc(u"Sync map format '%s' is not allowed" % (sync_map_format), None, True, ValueError) if not gf.file_can_be_read(input_file_path): self.log_exc(u"Cannot read sync map file '%s'. Wrong permissions?" % (input_file_path), None, True, OSError) self.log([u"Input format: '%s'", sync_map_format]) self.log([u"Input path: '%s'", input_file_path]) self.log([u"Input parameters: '%s'", parameters]) reader = (SyncMapFormat.CODE_TO_CLASS[sync_map_format])( variant=sync_map_format, parameters=parameters, rconf=self.rconf, logger=self.logger ) # open file for reading self.log(u"Reading input file...") with io.open(input_file_path, "r", encoding="utf-8") as input_file: input_text = input_file.read() reader.parse(input_text=input_text, syncmap=self) self.log(u"Reading input file... done") # overwrite language if requested language = gf.safe_get(parameters, gc.PPN_SYNCMAP_LANGUAGE, None) if language is not None: self.log([u"Overwriting language to '%s'", language]) for fragment in self.fragments: fragment.text_fragment.language = language
def _select_tts_engine(self): """ Select the TTS engine to be used by looking at the rconf object. """ self.log(u"Selecting TTS engine...") if self.rconf[RuntimeConfiguration.TTS] == self.CUSTOM: self.log(u"TTS engine: custom") tts_path = self.rconf[RuntimeConfiguration.TTS_PATH] if not gf.file_can_be_read(tts_path): self.log_exc(u"Cannot read tts_path", None, True, OSError) try: import imp self.log([u"Loading CustomTTSWrapper module from '%s'...", tts_path]) imp.load_source("CustomTTSWrapperModule", tts_path) self.log([u"Loading CustomTTSWrapper module from '%s'... done", tts_path]) self.log(u"Importing CustomTTSWrapper...") from CustomTTSWrapperModule import CustomTTSWrapper self.log(u"Importing CustomTTSWrapper... done") self.log(u"Creating CustomTTSWrapper instance...") self.tts_engine = CustomTTSWrapper(rconf=self.rconf, logger=self.logger) self.log(u"Creating CustomTTSWrapper instance... done") except Exception as exc: self.log_exc(u"Unable to load custom TTS wrapper", exc, True, OSError) elif self.rconf[RuntimeConfiguration.TTS] == self.FESTIVAL: self.log(u"TTS engine: Festival") self.tts_engine = FESTIVALWrapper(rconf=self.rconf, logger=self.logger) elif self.rconf[RuntimeConfiguration.TTS] == self.NUANCETTSAPI: try: import requests except ImportError as exc: self.log_exc(u"Unable to import requests for Nuance TTS API wrapper", exc, True, ImportError) self.log(u"TTS engine: Nuance TTS API") self.tts_engine = NuanceTTSAPIWrapper(rconf=self.rconf, logger=self.logger) else: self.log(u"TTS engine: eSpeak") self.tts_engine = ESPEAKWrapper(rconf=self.rconf, logger=self.logger) self.log(u"Selecting TTS engine... done")
def file_path(self, file_path): if (file_path is not None) and (not gf.file_can_be_read(file_path)): self.log_exc(u"Text file '%s' cannot be read" % (file_path), None, True, OSError) self.__file_path = file_path
def _synthesize_single_subprocess_helper(self, text, voice_code, output_file_path=None, return_audio_data=True): """ This is an helper function to synthesize a single text fragment via ``subprocess``. If ``output_file_path`` is ``None``, the audio data will not persist to file at the end of the method. If ``return_audio_data`` is ``True``, return the audio data at the end of the function call; if ``False``, just return ``(True, None)`` in case of success. :rtype: tuple (result, (duration, sample_rate, codec, data)) or (result, None) """ # return zero if text is the empty string if len(text) == 0: # # NOTE sample_rate, codec, data do not matter # if the duration is 0.000 => set them to None # self.log(u"len(text) is zero: returning 0.000") return (True, (TimeValue("0.000"), None, None, None)) # create a temporary output file if needed synt_tmp_file = (output_file_path is None) if synt_tmp_file: self.log( u"Synthesizer helper called with output_file_path=None => creating temporary output file" ) output_file_handler, output_file_path = gf.tmp_file( suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH]) self.log([u"Temporary output file path is '%s'", output_file_path]) try: # if the TTS engine reads text from file, # write the text into a temporary file if self.CLI_PARAMETER_TEXT_PATH in self.subprocess_arguments: self.log(u"TTS engine reads text from file") tmp_text_file_handler, tmp_text_file_path = gf.tmp_file( suffix=u".txt", root=self.rconf[RuntimeConfiguration.TMP_PATH]) self.log([ u"Creating temporary text file '%s'...", tmp_text_file_path ]) with io.open(tmp_text_file_path, "w", encoding="utf-8") as tmp_text_file: tmp_text_file.write(text) self.log([ u"Creating temporary text file '%s'... done", tmp_text_file_path ]) else: self.log(u"TTS engine reads text from stdin") tmp_text_file_handler = None tmp_text_file_path = None # copy all relevant arguments self.log(u"Creating arguments list...") arguments = [] for arg in self.subprocess_arguments: if arg == self.CLI_PARAMETER_VOICE_CODE_FUNCTION: arguments.extend( self._voice_code_to_subprocess(voice_code)) elif arg == self.CLI_PARAMETER_VOICE_CODE_STRING: arguments.append(voice_code) elif arg == self.CLI_PARAMETER_TEXT_PATH: arguments.append(tmp_text_file_path) elif arg == self.CLI_PARAMETER_WAVE_PATH: arguments.append(output_file_path) elif arg == self.CLI_PARAMETER_TEXT_STDIN: # placeholder, do not append pass elif arg == self.CLI_PARAMETER_WAVE_STDOUT: # placeholder, do not append pass else: arguments.append(arg) self.log(u"Creating arguments list... done") # actual call via subprocess self.log(u"Calling TTS engine...") self.log([u"Calling with arguments '%s'", arguments]) self.log([u"Calling with text '%s'", text]) proc = subprocess.Popen(arguments, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) if self.CLI_PARAMETER_TEXT_STDIN in self.subprocess_arguments: self.log(u"Passing text via stdin...") if gf.PY2: (stdoutdata, stderrdata) = proc.communicate(input=gf.safe_bytes(text)) else: (stdoutdata, stderrdata) = proc.communicate(input=text) self.log(u"Passing text via stdin... done") else: self.log(u"Passing text via file...") (stdoutdata, stderrdata) = proc.communicate() self.log(u"Passing text via file... done") proc.stdout.close() proc.stdin.close() proc.stderr.close() if self.CLI_PARAMETER_WAVE_STDOUT in self.subprocess_arguments: self.log(u"TTS engine wrote audio data to stdout") self.log( [u"Writing audio data to file '%s'...", output_file_path]) with io.open(output_file_path, "wb") as output_file: output_file.write(stdoutdata) self.log([ u"Writing audio data to file '%s'... done", output_file_path ]) else: self.log(u"TTS engine wrote audio data to file") if tmp_text_file_path is not None: self.log( [u"Delete temporary text file '%s'", tmp_text_file_path]) gf.delete_file(tmp_text_file_handler, tmp_text_file_path) self.log(u"Calling TTS ... done") except Exception as exc: self.log_exc( u"An unexpected error occurred while calling TTS engine via subprocess", exc, False, None) return (False, None) # check the file can be read if not gf.file_can_be_read(output_file_path): self.log_exc( u"Output file '%s' cannot be read" % (output_file_path), None, True, None) return (False, None) # read audio data ret = self._read_audio_data( output_file_path) if return_audio_data else (True, None) # if the output file was temporary, remove it if synt_tmp_file: self.log([ u"Removing temporary output file path '%s'", output_file_path ]) gf.delete_file(output_file_handler, output_file_path) # return audio data or (True, None) return ret
def read_samples_from_file(self): """ Load the audio samples from file into memory. If ``self.is_mono_wave`` is ``False``, the file will be first converted to a temporary PCM16 mono WAVE file. Audio data will be read from this temporary file, which will be then deleted from disk immediately. If ``self.is_mono_wave`` is ``True``, the audio data will be read directly from the given file, which will not be deleted from disk. :raises: :class:`~aeneas.audiofile.AudioFileConverterError`: if the path to the ``ffmpeg`` executable cannot be called :raises: :class:`~aeneas.audiofile.AudioFileUnsupportedFormatError`: if the audio file has a format not supported :raises: OSError: if the audio file cannot be read """ self.log(u"Loading audio data...") # check the file can be read if not gf.file_can_be_read(self.file_path): self.log_exc(u"File '%s' cannot be read" % (self.file_path), None, True, OSError) # convert file to PCM16 mono WAVE if self.is_mono_wave: self.log(u"is_mono_wave=True => reading self.file_path directly") tmp_handler = None tmp_file_path = self.file_path else: self.log(u"is_mono_wave=False => converting self.file_path") tmp_handler, tmp_file_path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH]) self.log([u"Temporary PCM16 mono WAVE file: '%s'", tmp_file_path]) try: self.log(u"Converting audio file to mono...") converter = FFMPEGWrapper(rconf=self.rconf, logger=self.logger) converter.convert(self.file_path, tmp_file_path) self.log(u"Converting audio file to mono... done") except FFMPEGPathError: gf.delete_file(tmp_handler, tmp_file_path) self.log_exc(u"Unable to call ffmpeg executable", None, True, AudioFileConverterError) except OSError: gf.delete_file(tmp_handler, tmp_file_path) self.log_exc(u"Audio file format not supported by ffmpeg", None, True, AudioFileUnsupportedFormatError) # TODO allow calling C extension cwave to read samples faster try: self.audio_format = "pcm16" self.audio_channels = 1 self.audio_sample_rate, self.__samples = scipywavread(tmp_file_path) # scipy reads a sample as an int16_t, that is, a number in [-32768, 32767] # so we convert it to a float64 in [-1, 1] self.__samples = self.__samples.astype("float64") / 32768 self.__samples_capacity = len(self.__samples) self.__samples_length = self.__samples_capacity self._update_length() except ValueError: self.log_exc(u"Audio format not supported by scipywavread", None, True, AudioFileUnsupportedFormatError) if not self.is_mono_wave: gf.delete_file(tmp_handler, tmp_file_path) self.log([u"Deleted temporary PCM16 mono WAVE file: '%s'", tmp_file_path]) self._update_length() self.log([u"Sample length: %.3f", self.audio_length]) self.log([u"Sample rate: %d", self.audio_sample_rate]) self.log([u"Audio format: %s", self.audio_format]) self.log([u"Audio channels: %d", self.audio_channels]) self.log(u"Loading audio data... done")
def read_properties(self, audio_file_path): """ Read the properties of an audio file and return them as a dictionary. Example: :: d["index"]=0 d["codec_name"]=mp3 d["codec_long_name"]=MP3 (MPEG audio layer 3) d["profile"]=unknown d["codec_type"]=audio d["codec_time_base"]=1/44100 d["codec_tag_string"]=[0][0][0][0] d["codec_tag"]=0x0000 d["sample_fmt"]=s16p d["sample_rate"]=44100 d["channels"]=1 d["channel_layout"]=mono d["bits_per_sample"]=0 d["id"]=N/A d["r_frame_rate"]=0/0 d["avg_frame_rate"]=0/0 d["time_base"]=1/14112000 d["start_pts"]=0 d["start_time"]=0.000000 d["duration_ts"]=1545083190 d["duration"]=109.487188 d["bit_rate"]=128000 d["max_bit_rate"]=N/A d["bits_per_raw_sample"]=N/A d["nb_frames"]=N/A d["nb_read_frames"]=N/A d["nb_read_packets"]=N/A d["DISPOSITION:default"]=0 d["DISPOSITION:dub"]=0 d["DISPOSITION:original"]=0 d["DISPOSITION:comment"]=0 d["DISPOSITION:lyrics"]=0 d["DISPOSITION:karaoke"]=0 d["DISPOSITION:forced"]=0 d["DISPOSITION:hearing_impaired"]=0 d["DISPOSITION:visual_impaired"]=0 d["DISPOSITION:clean_effects"]=0 d["DISPOSITION:attached_pic"]=0 :param string audio_file_path: the path of the audio file to analyze :rtype: dict :raises: TypeError: if ``audio_file_path`` is None :raises: OSError: if the file at ``audio_file_path`` cannot be read :raises: FFPROBEParsingError: if the call to ``ffprobe`` does not produce any output :raises: FFPROBEPathError: if the path to the ``ffprobe`` executable cannot be called :raises: FFPROBEUnsupportedFormatError: if the file has a format not supported by ``ffprobe`` """ # test if we can read the file at audio_file_path if audio_file_path is None: self.log_exc(u"The audio file path is None", None, True, TypeError) if not gf.file_can_be_read(audio_file_path): self.log_exc(u"Input file '%s' cannot be read" % (audio_file_path), None, True, OSError) # call ffprobe arguments = [self.rconf[RuntimeConfiguration.FFPROBE_PATH]] arguments.extend(self.FFPROBE_PARAMETERS) arguments.append(audio_file_path) self.log([u"Calling with arguments '%s'", arguments]) try: proc = subprocess.Popen( arguments, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE ) (stdoutdata, stderrdata) = proc.communicate() proc.stdout.close() proc.stdin.close() proc.stderr.close() except OSError as exc: self.log_exc(u"Unable to call the '%s' ffprobe executable" % (self.rconf[RuntimeConfiguration.FFPROBE_PATH]), exc, True, FFPROBEPathError) self.log(u"Call completed") # check there is some output if (stdoutdata is None) or (len(stderrdata) == 0): self.log_exc(u"ffprobe produced no output", None, True, FFPROBEParsingError) # decode stdoutdata and stderrdata to Unicode string try: stdoutdata = gf.safe_unicode(stdoutdata) stderrdata = gf.safe_unicode(stderrdata) except UnicodeDecodeError as exc: self.log_exc(u"Unable to decode ffprobe out/err", exc, True, FFPROBEParsingError) # dictionary for the results results = { self.STDOUT_CHANNELS: None, self.STDOUT_CODEC_NAME: None, self.STDOUT_DURATION: None, self.STDOUT_SAMPLE_RATE: None } # scan the first audio stream the ffprobe stdout output # TODO more robust parsing # TODO deal with multiple audio streams for line in stdoutdata.splitlines(): if line == self.STDOUT_END_STREAM: self.log(u"Reached end of the stream") break elif len(line.split("=")) == 2: key, value = line.split("=") results[key] = value self.log([u"Found property '%s'='%s'", key, value]) try: self.log([u"Duration found in stdout: '%s'", results[self.STDOUT_DURATION]]) results[self.STDOUT_DURATION] = TimeValue(results[self.STDOUT_DURATION]) self.log(u"Valid duration") except: self.log_warn(u"Invalid duration") results[self.STDOUT_DURATION] = None # try scanning ffprobe stderr output for line in stderrdata.splitlines(): match = self.STDERR_DURATION_REGEX.search(line) if match is not None: self.log([u"Found matching line '%s'", line]) results[self.STDOUT_DURATION] = gf.time_from_hhmmssmmm(line) self.log([u"Extracted duration '%.3f'", results[self.STDOUT_DURATION]]) break if results[self.STDOUT_DURATION] is None: self.log_exc(u"No duration found in stdout or stderr. Unsupported audio file format?", None, True, FFPROBEUnsupportedFormatError) # return dictionary self.log(u"Returning dict") return results
def _synthesize_single_subprocess(self, text, voice_code, output_file_path): """ Synthesize a single text fragment via ``subprocess``. :rtype: tuple (result, (duration, sample_rate, encoding, samples)) """ self.log(u"Synthesizing using pure Python...") try: # if the TTS engine reads text from file, # write the text into a temporary file if self.CLI_PARAMETER_TEXT_PATH in self.subprocess_arguments: self.log(u"TTS engine reads text from file") tmp_text_file_handler, tmp_text_file_path = gf.tmp_file(suffix=u".txt", root=self.rconf[RuntimeConfiguration.TMP_PATH]) self.log([u"Creating temporary text file '%s'...", tmp_text_file_path]) with io.open(tmp_text_file_path, "w", encoding="utf-8") as tmp_text_file: tmp_text_file.write(text) self.log([u"Creating temporary text file '%s'... done", tmp_text_file_path]) else: self.log(u"TTS engine reads text from stdin") tmp_text_file_handler = None tmp_text_file_path = None # copy all relevant arguments self.log(u"Creating arguments list...") arguments = [] for arg in self.subprocess_arguments: if arg == self.CLI_PARAMETER_VOICE_CODE_FUNCTION: arguments.extend(self._voice_code_to_subprocess(voice_code)) elif arg == self.CLI_PARAMETER_VOICE_CODE_STRING: arguments.append(voice_code) elif arg == self.CLI_PARAMETER_TEXT_PATH: arguments.append(tmp_text_file_path) elif arg == self.CLI_PARAMETER_WAVE_PATH: arguments.append(output_file_path) elif arg == self.CLI_PARAMETER_TEXT_STDIN: # placeholder, do not append pass elif arg == self.CLI_PARAMETER_WAVE_STDOUT: # placeholder, do not append pass else: arguments.append(arg) self.log(u"Creating arguments list... done") # actual call via subprocess self.log(u"Calling TTS engine...") self.log([u"Calling with arguments '%s'", arguments]) self.log([u"Calling with text '%s'", text]) proc = subprocess.Popen( arguments, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True ) if self.CLI_PARAMETER_TEXT_STDIN in self.subprocess_arguments: self.log(u"Passing text via stdin...") if gf.PY2: (stdoutdata, stderrdata) = proc.communicate(input=gf.safe_bytes(text)) else: (stdoutdata, stderrdata) = proc.communicate(input=text) self.log(u"Passing text via stdin... done") else: self.log(u"Passing text via file...") (stdoutdata, stderrdata) = proc.communicate() self.log(u"Passing text via file... done") proc.stdout.close() proc.stdin.close() proc.stderr.close() if self.CLI_PARAMETER_WAVE_STDOUT in self.subprocess_arguments: self.log(u"TTS engine wrote audio data to stdout") self.log([u"Writing audio data to file '%s'...", output_file_path]) with io.open(output_file_path, "wb") as output_file: output_file.write(stdoutdata) self.log([u"Writing audio data to file '%s'... done", output_file_path]) else: self.log(u"TTS engine wrote audio data to file") if tmp_text_file_path is not None: self.log([u"Delete temporary text file '%s'", tmp_text_file_path]) gf.delete_file(tmp_text_file_handler, tmp_text_file_path) self.log(u"Calling TTS ... done") except Exception as exc: self.log_exc(u"An unexpected error occurred while calling TTS engine via subprocess", exc, False, None) return (False, None) # check the file can be read if not gf.file_can_be_read(output_file_path): self.log_exc(u"Output file '%s' cannot be read" % (output_file_path), None, True, None) return (False, None) # return the duration of the output file try: # if we know the TTS outputs to PCM16 mono WAVE, # we can read samples directly from it, # without an intermediate conversion through ffmpeg audio_file = AudioFile( file_path=output_file_path, is_mono_wave=self.OUTPUT_MONO_WAVE, rconf=self.rconf, logger=self.logger ) audio_file.read_samples_from_file() self.log([u"Duration of '%s': %f", output_file_path, audio_file.audio_length]) self.log(u"Synthesizing using pure Python... done") return (True, ( audio_file.audio_length, audio_file.audio_sample_rate, audio_file.audio_format, audio_file.audio_samples )) except (AudioFileUnsupportedFormatError, OSError) as exc: self.log_exc(u"An unexpected error occurred while trying to read the sythesized audio file", exc, True, None) return (False, None)
def read_samples_from_file(self): """ Load the audio samples from file into memory. If ``self.file_format`` is ``None`` or it is not ``("pcm_s16le", 1, self.rconf.sample_rate)``, the file will be first converted to a temporary PCM16 mono WAVE file. Audio data will be read from this temporary file, which will be then deleted from disk immediately. Otherwise, the audio data will be read directly from the given file, which will not be deleted from disk. :raises: :class:`~aeneas.audiofile.AudioFileConverterError`: if the path to the ``ffmpeg`` executable cannot be called :raises: :class:`~aeneas.audiofile.AudioFileUnsupportedFormatError`: if the audio file has a format not supported :raises: OSError: if the audio file cannot be read """ self.log(u"Loading audio data...") # check the file can be read if not gf.file_can_be_read(self.file_path): self.log_exc(u"File '%s' cannot be read" % (self.file_path), None, True, OSError) # determine if we need to convert the audio file convert_audio_file = ( (self.file_format is None) or ( (self.rconf.safety_checks) and (self.file_format != ("pcm_s16le", 1, self.rconf.sample_rate)) ) ) # convert the audio file if needed if convert_audio_file: # convert file to PCM16 mono WAVE with correct sample rate self.log(u"self.file_format is None or not good => converting self.file_path") tmp_handler, tmp_file_path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH]) self.log([u"Temporary PCM16 mono WAVE file: '%s'", tmp_file_path]) try: self.log(u"Converting audio file to mono...") converter = FFMPEGWrapper(rconf=self.rconf, logger=self.logger) converter.convert(self.file_path, tmp_file_path) self.file_format = ("pcm_s16le", 1, self.rconf.sample_rate) self.log(u"Converting audio file to mono... done") except FFMPEGPathError: gf.delete_file(tmp_handler, tmp_file_path) self.log_exc(u"Unable to call ffmpeg executable", None, True, AudioFileConverterError) except OSError: gf.delete_file(tmp_handler, tmp_file_path) self.log_exc(u"Audio file format not supported by ffmpeg", None, True, AudioFileUnsupportedFormatError) else: # read the file directly if self.rconf.safety_checks: self.log(u"self.file_format is good => reading self.file_path directly") else: self.log_warn(u"Safety checks disabled => reading self.file_path directly") tmp_handler = None tmp_file_path = self.file_path # TODO allow calling C extension cwave to read samples faster try: self.audio_format = "pcm16" self.audio_channels = 1 self.audio_sample_rate, self.__samples = scipywavread(tmp_file_path) # scipy reads a sample as an int16_t, that is, a number in [-32768, 32767] # so we convert it to a float64 in [-1, 1] self.__samples = self.__samples.astype("float64") / 32768 self.__samples_capacity = len(self.__samples) self.__samples_length = self.__samples_capacity self._update_length() except ValueError: self.log_exc(u"Audio format not supported by scipywavread", None, True, AudioFileUnsupportedFormatError) # if we converted the audio file, delete the temporary converted audio file if convert_audio_file: gf.delete_file(tmp_handler, tmp_file_path) self.log([u"Deleted temporary audio file: '%s'", tmp_file_path]) self._update_length() self.log([u"Sample length: %.3f", self.audio_length]) self.log([u"Sample rate: %d", self.audio_sample_rate]) self.log([u"Audio format: %s", self.audio_format]) self.log([u"Audio channels: %d", self.audio_channels]) self.log(u"Loading audio data... done")
def test_file_can_be_read_true(self): handler, path = gf.tmp_file() self.assertTrue(gf.file_can_be_read(path)) gf.delete_file(handler, path)
def convert( self, input_file_path, output_file_path, head_length=None, process_length=None ): """ Convert the audio file at ``input_file_path`` into ``output_file_path``, using the parameters set in the constructor or through the ``parameters`` property. You can skip the beginning of the audio file by specifying ``head_length`` seconds to skip (if it is ``None``, start at time zero), and you can specify to convert only ``process_length`` seconds (if it is ``None``, process the entire input file length). By specifying both ``head_length`` and ``process_length``, you can skip a portion at the beginning and at the end of the original input file. :param string input_file_path: the path of the audio file to convert :param string output_file_path: the path of the converted audio file :param float head_length: skip these many seconds from the beginning of the audio file :param float process_length: process these many seconds of the audio file :raises: :class:`~aeneas.ffmpegwrapper.FFMPEGPathError`: if the path to the ``ffmpeg`` executable cannot be called :raises: OSError: if ``input_file_path`` does not exist or ``output_file_path`` cannot be written """ # test if we can read the input file if not gf.file_can_be_read(input_file_path): self.log_exc(u"Input file '%s' cannot be read" % (input_file_path), None, True, OSError) # test if we can write the output file if not gf.file_can_be_written(output_file_path): self.log_exc(u"Output file '%s' cannot be written" % (output_file_path), None, True, OSError) # call ffmpeg arguments = [self.rconf[RuntimeConfiguration.FFMPEG_PATH]] arguments.extend(["-i", input_file_path]) if head_length is not None: arguments.extend(["-ss", head_length]) if process_length is not None: arguments.extend(["-t", process_length]) if self.rconf.sample_rate in self.FFMPEG_PARAMETERS_MAP: arguments.extend(self.FFMPEG_PARAMETERS_MAP[self.rconf.sample_rate]) else: arguments.extend(self.FFMPEG_PARAMETERS_DEFAULT) arguments.append(output_file_path) self.log([u"Calling with arguments '%s'", arguments]) try: proc = subprocess.Popen( arguments, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE ) proc.communicate() proc.stdout.close() proc.stdin.close() proc.stderr.close() except OSError as exc: self.log_exc(u"Unable to call the '%s' ffmpeg executable" % (self.rconf[RuntimeConfiguration.FFMPEG_PATH]), exc, True, FFMPEGPathError) self.log(u"Call completed") # check if the output file exists if not gf.file_exists(output_file_path): self.log_exc(u"Output file '%s' was not written" % (output_file_path), None, True, OSError) # returning the output file path self.log([u"Returning output file path '%s'", output_file_path]) return output_file_path
def test_file_can_be_read_false(self): path = "/foo/bar/baz" self.assertFalse(gf.file_can_be_read(path))
def convert(self, input_file_path, output_file_path, head_length=None, process_length=None): """ Convert the audio file at ``input_file_path`` into ``output_file_path``, using the parameters set in the constructor or through the ``parameters`` property. You can skip the beginning of the audio file by specifying ``head_length`` seconds to skip (if it is ``None``, start at time zero), and you can specify to convert only ``process_length`` seconds (if it is ``None``, process the entire input file length). By specifying both ``head_length`` and ``process_length``, you can skip a portion at the beginning and at the end of the original input file. :param string input_file_path: the path of the audio file to convert :param string output_file_path: the path of the converted audio file :param float head_length: skip these many seconds from the beginning of the audio file :param float process_length: process these many seconds of the audio file :raises: :class:`~aeneas.ffmpegwrapper.FFMPEGPathError`: if the path to the ``ffmpeg`` executable cannot be called :raises: OSError: if ``input_file_path`` does not exist or ``output_file_path`` cannot be written """ # test if we can read the input file if not gf.file_can_be_read(input_file_path): self.log_exc(u"Input file '%s' cannot be read" % (input_file_path), None, True, OSError) # test if we can write the output file if not gf.file_can_be_written(output_file_path): self.log_exc( u"Output file '%s' cannot be written" % (output_file_path), None, True, OSError) # call ffmpeg arguments = [self.rconf[RuntimeConfiguration.FFMPEG_PATH]] arguments.extend(["-i", input_file_path]) if head_length is not None: arguments.extend(["-ss", head_length]) if process_length is not None: arguments.extend(["-t", process_length]) if self.rconf.sample_rate in self.FFMPEG_PARAMETERS_MAP: arguments.extend( self.FFMPEG_PARAMETERS_MAP[self.rconf.sample_rate]) else: arguments.extend(self.FFMPEG_PARAMETERS_DEFAULT) arguments.append(output_file_path) self.log([u"Calling with arguments '%s'", arguments]) try: proc = subprocess.Popen(arguments, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE) proc.communicate() proc.stdout.close() proc.stdin.close() proc.stderr.close() except OSError as exc: self.log_exc( u"Unable to call the '%s' ffmpeg executable" % (self.rconf[RuntimeConfiguration.FFMPEG_PATH]), exc, True, FFMPEGPathError) self.log(u"Call completed") # check if the output file exists if not gf.file_exists(output_file_path): self.log_exc( u"Output file '%s' was not written" % (output_file_path), None, True, OSError) # returning the output file path self.log([u"Returning output file path '%s'", output_file_path]) return output_file_path
def file_path(self, file_path): if (file_path is not None) and (not gf.file_can_be_read(file_path)): self.log_exc(u"Text file '%s' cannot be read" % (file_path), None, True, OSError) self.__file_path = file_path
def test_file_can_be_read_true(self): handler, path = gf.tmp_file() self.assertTrue(gf.file_can_be_read(path)) gf.delete_file(handler, path)
def read_properties(self, audio_file_path): """ Read the properties of an audio file and return them as a dictionary. Example: :: d["index"]=0 d["codec_name"]=mp3 d["codec_long_name"]=MP3 (MPEG audio layer 3) d["profile"]=unknown d["codec_type"]=audio d["codec_time_base"]=1/44100 d["codec_tag_string"]=[0][0][0][0] d["codec_tag"]=0x0000 d["sample_fmt"]=s16p d["sample_rate"]=44100 d["channels"]=1 d["channel_layout"]=mono d["bits_per_sample"]=0 d["id"]=N/A d["r_frame_rate"]=0/0 d["avg_frame_rate"]=0/0 d["time_base"]=1/14112000 d["start_pts"]=0 d["start_time"]=0.000000 d["duration_ts"]=1545083190 d["duration"]=109.487188 d["bit_rate"]=128000 d["max_bit_rate"]=N/A d["bits_per_raw_sample"]=N/A d["nb_frames"]=N/A d["nb_read_frames"]=N/A d["nb_read_packets"]=N/A d["DISPOSITION:default"]=0 d["DISPOSITION:dub"]=0 d["DISPOSITION:original"]=0 d["DISPOSITION:comment"]=0 d["DISPOSITION:lyrics"]=0 d["DISPOSITION:karaoke"]=0 d["DISPOSITION:forced"]=0 d["DISPOSITION:hearing_impaired"]=0 d["DISPOSITION:visual_impaired"]=0 d["DISPOSITION:clean_effects"]=0 d["DISPOSITION:attached_pic"]=0 :param string audio_file_path: the path of the audio file to analyze :rtype: dict :raises: TypeError: if ``audio_file_path`` is None :raises: OSError: if the file at ``audio_file_path`` cannot be read :raises: FFPROBEParsingError: if the call to ``ffprobe`` does not produce any output :raises: FFPROBEPathError: if the path to the ``ffprobe`` executable cannot be called :raises: FFPROBEUnsupportedFormatError: if the file has a format not supported by ``ffprobe`` """ # test if we can read the file at audio_file_path if audio_file_path is None: self.log_exc(u"The audio file path is None", None, True, TypeError) if not gf.file_can_be_read(audio_file_path): self.log_exc(u"Input file '%s' cannot be read" % (audio_file_path), None, True, OSError) # call ffprobe arguments = [self.rconf[RuntimeConfiguration.FFPROBE_PATH]] arguments.extend(self.FFPROBE_PARAMETERS) arguments.append(audio_file_path) self.log([u"Calling with arguments '%s'", arguments]) try: proc = subprocess.Popen( arguments, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE ) (stdoutdata, stderrdata) = proc.communicate() proc.stdout.close() proc.stdin.close() proc.stderr.close() except OSError as exc: self.log_exc(u"Unable to call the '%s' ffprobe executable" % (self.rconf[RuntimeConfiguration.FFPROBE_PATH]), exc, True, FFPROBEPathError) self.log(u"Call completed") # check there is some output if (stdoutdata is None) or (len(stderrdata) == 0): self.log_exc(u"ffprobe produced no output", None, True, FFPROBEParsingError) # decode stdoutdata and stderrdata to Unicode string try: stdoutdata = gf.safe_unicode(stdoutdata) stderrdata = gf.safe_unicode(stderrdata) except UnicodeDecodeError as exc: self.log_exc(u"Unable to decode ffprobe out/err", exc, True, FFPROBEParsingError) # dictionary for the results results = { self.STDOUT_CHANNELS : None, self.STDOUT_CODEC_NAME : None, self.STDOUT_DURATION : None, self.STDOUT_SAMPLE_RATE : None } # scan the first audio stream the ffprobe stdout output # TODO more robust parsing # TODO deal with multiple audio streams for line in stdoutdata.splitlines(): if line == self.STDOUT_END_STREAM: self.log(u"Reached end of the stream") break elif len(line.split("=")) == 2: key, value = line.split("=") results[key] = value self.log([u"Found property '%s'='%s'", key, value]) try: self.log([u"Duration found in stdout: '%s'", results[self.STDOUT_DURATION]]) results[self.STDOUT_DURATION] = TimeValue(results[self.STDOUT_DURATION]) self.log(u"Valid duration") except: self.log_warn(u"Invalid duration") results[self.STDOUT_DURATION] = None # try scanning ffprobe stderr output for line in stderrdata.splitlines(): match = self.STDERR_DURATION_REGEX.search(line) if match is not None: self.log([u"Found matching line '%s'", line]) results[self.STDOUT_DURATION] = gf.time_from_hhmmssmmm(line) self.log([u"Extracted duration '%.3f'", results[self.STDOUT_DURATION]]) break if results[self.STDOUT_DURATION] is None: self.log_exc(u"No duration found in stdout or stderr. Unsupported audio file format?", None, True, FFPROBEUnsupportedFormatError) # return dictionary self.log(u"Returning dict") return results
def test_file_can_be_read_false(self): path = "/foo/bar/baz" self.assertFalse(gf.file_can_be_read(path))