def __init__( self, file_path=None, file_path_is_mono_wave=False, mfcc_matrix=None, audio_file=None, rconf=None, logger=None ): if (file_path is None) and (audio_file is None) and (mfcc_matrix is None): raise ValueError(u"You must initialize with at least one of: file_path, audio_file, or mfcc_matrix") super(AudioFileMFCC, self).__init__(rconf=rconf, logger=logger) self.file_path = file_path self.audio_file = audio_file self.is_reversed = False self.__mfcc = None self.__mfcc_mask = None self.__mfcc_mask_map = None self.__speech_intervals = None self.__nonspeech_intervals = None self.log(u"Initializing MFCCs...") if mfcc_matrix is not None: self.__mfcc = mfcc_matrix self.audio_length = self.all_length * self.rconf.mws elif (self.file_path is not None) or (self.audio_file is not None): audio_file_was_none = False if self.audio_file is None: audio_file_was_none = True self.audio_file = AudioFile( self.file_path, is_mono_wave=file_path_is_mono_wave, rconf=self.rconf, logger=self.logger ) # NOTE load audio samples into memory, if not present already self.audio_file.audio_samples gf.run_c_extension_with_fallback( self.log, "cmfcc", self._compute_mfcc_c_extension, self._compute_mfcc_pure_python, (), rconf=self.rconf ) self.audio_length = self.audio_file.audio_length if audio_file_was_none: self.log(u"Clearing the audio data...") self.audio_file.clear_data() self.audio_file = None self.log(u"Clearing the audio data... done") self.__middle_begin = 0 self.__middle_end = self.__mfcc.shape[1] self.log(u"Initializing MFCCs... done")
def __init__( self, file_path=None, file_path_is_mono_wave=False, mfcc_matrix=None, audio_file=None, rconf=None, logger=None ): if (file_path is None) and (audio_file is None) and (mfcc_matrix is None): raise ValueError(u"You must initialize with at least one of: file_path, audio_file, or mfcc_matrix") super(AudioFileMFCC, self).__init__(rconf=rconf, logger=logger) self.file_path = file_path self.audio_file = audio_file self.is_reversed = False self.__mfcc = None self.__mfcc_mask = None self.__mfcc_mask_map = None self.__speech_intervals = None self.__nonspeech_intervals = None self.log(u"Initializing MFCCs...") if mfcc_matrix is not None: self.__mfcc = mfcc_matrix self.audio_length = self.all_length * self.rconf.mws elif (self.file_path is not None) or (self.audio_file is not None): audio_file_was_none = False if self.audio_file is None: audio_file_was_none = True self.audio_file = AudioFile( self.file_path, is_mono_wave=file_path_is_mono_wave, rconf=self.rconf, logger=self.logger ) # NOTE load audio samples into memory, if not present already self.audio_file.audio_samples gf.run_c_extension_with_fallback( self.log, "cmfcc", self._compute_mfcc_c_extension, self._compute_mfcc_pure_python, (), c_extension=self.rconf[RuntimeConfiguration.C_EXTENSIONS] ) self.audio_length = self.audio_file.audio_length if audio_file_was_none: self.log(u"Clearing the audio data...") self.audio_file.clear_data() self.audio_file = None self.log(u"Clearing the audio data... done") self.__middle_begin = 0 self.__middle_end = self.__mfcc.shape[1] self.log(u"Initializing MFCCs... done")
def compute_path(self): return gf.run_c_extension_with_fallback(self.log, "cdtw", self._compute_path_c_extension, self._compute_path_pure_python, (), rconf=self.rconf)
def compute_accumulated_cost_matrix(self): return gf.run_c_extension_with_fallback(self.log, "cdtw", self._compute_acm_c_extension, self._compute_acm_pure_python, (), rconf=self.rconf)
def compute_path(self): return gf.run_c_extension_with_fallback( self.log, "cdtw", self._compute_path_c_extension, self._compute_path_pure_python, (), rconf=self.rconf )
def compute_accumulated_cost_matrix(self): return gf.run_c_extension_with_fallback( self.log, "cdtw", self._compute_acm_c_extension, self._compute_acm_pure_python, (), rconf=self.rconf )
def synthesize_multiple(self, text_file, output_file_path, quit_after=None, backwards=False): """ Synthesize the text contained in the given fragment list into a WAVE file. Return a tuple (anchors, total_time, num_chars). Concrete subclasses must implement at least one of the following private functions: 1. ``_synthesize_multiple_python()`` 2. ``_synthesize_multiple_c_extension()`` 3. ``_synthesize_multiple_subprocess()`` :param text_file: the text file to be synthesized :type text_file: :class:`~aeneas.textfile.TextFile` :param string output_file_path: the path to the output audio file :param quit_after: stop synthesizing as soon as reaching this many seconds :type quit_after: :class:`~aeneas.exacttiming.TimeValue` :param bool backwards: if > 0, synthesize from the end of the text file :rtype: tuple (anchors, total_time, num_chars) :raises: TypeError: if ``text_file`` is ``None`` or one of the text fragments is not a Unicode string :raises: ValueError: if ``self.rconf[RuntimeConfiguration.ALLOW_UNLISTED_LANGUAGES]`` is ``False`` and a fragment has a language code not supported by the TTS engine, or if ``text_file`` has no fragments or all its fragments are empty :raises: OSError: if output file cannot be written to ``output_file_path`` :raises: RuntimeError: if both the C extension and the pure Python code did not succeed. """ if text_file is None: self.log_exc(u"text_file is None", None, True, TypeError) if len(text_file) < 1: self.log_exc(u"The text file has no fragments", None, True, ValueError) if text_file.chars == 0: self.log_exc(u"All fragments in the text file are empty", None, True, ValueError) if not self.rconf[RuntimeConfiguration.ALLOW_UNLISTED_LANGUAGES]: for fragment in text_file.fragments: if fragment.language not in self.LANGUAGE_TO_VOICE_CODE: self.log_exc( u"Language '%s' is not supported by the selected TTS engine" % (fragment.language), None, True, ValueError) for fragment in text_file.fragments: for line in fragment.lines: if not gf.is_unicode(line): self.log_exc( u"The text file contain a line which is not a Unicode string", None, True, TypeError) # log parameters if quit_after is not None: self.log([u"Quit after reaching %.3f", quit_after]) if backwards: self.log(u"Synthesizing backwards") # check that output_file_path can be written if not gf.file_can_be_written(output_file_path): self.log_exc( u"Cannot write to output file '%s'" % (output_file_path), None, True, OSError) # first, call Python function _synthesize_multiple_python() if available if self.HAS_PYTHON_CALL: self.log(u"Calling TTS engine via Python") try: computed, result = self._synthesize_multiple_python( text_file, output_file_path, quit_after, backwards) if computed: self.log( u"The _synthesize_multiple_python call was successful, returning anchors" ) return result else: self.log(u"The _synthesize_multiple_python call failed") except Exception as exc: self.log_exc( u"An unexpected error occurred while calling _synthesize_multiple_python", exc, False, None) # call _synthesize_multiple_c_extension() or _synthesize_multiple_subprocess() self.log(u"Calling TTS engine via C extension or subprocess") c_extension_function = self._synthesize_multiple_c_extension if self.HAS_C_EXTENSION_CALL else None subprocess_function = self._synthesize_multiple_subprocess if self.HAS_SUBPROCESS_CALL else None return gf.run_c_extension_with_fallback( self.log, self.C_EXTENSION_NAME, c_extension_function, subprocess_function, (text_file, output_file_path, quit_after, backwards), rconf=self.rconf)
def synthesize_single(self, text, language, output_file_path): """ Create a mono WAVE audio file containing the synthesized text. The ``text`` must be a Unicode string encodable with UTF-8. Return the duration of the synthesized audio file, in seconds. Concrete subclasses can (but they are not required to) implement one of the following private functions: 1. ``_synthesize_single_python()`` 2. ``_synthesize_single_c_extension()`` 3. ``_synthesize_single_subprocess()`` :param string text: the text to synthesize :param language: the language to use :type language: :class:`~aeneas.language.Language` :param string output_file_path: the path of the output audio file :rtype: :class:`~aeneas.timevalue.TimeValue` :raises: TypeError: if ``text`` is ``None`` or it is not a Unicode string :raises: ValueError: if ``self.rconf[RuntimeConfiguration.ALLOW_UNLISTED_LANGUAGES]`` is ``False`` and ``language`` is not supported by the TTS engine :raises: OSError: if output file cannot be written to ``output_file_path`` :raises: RuntimeError: if both the C extension and the pure Python code did not succeed. """ # check that text_file is not None if text is None: self.log_exc(u"text is None", None, True, TypeError) # check that text has unicode type if not gf.is_unicode(text): self.log_exc(u"text is not a Unicode string", None, True, TypeError) # check that output_file_path can be written if not gf.file_can_be_written(output_file_path): self.log_exc(u"Cannot write to output file '%s'" % (output_file_path), None, True, OSError) # check that the requested language is listed in language.py if (language not in self.LANGUAGE_TO_VOICE_CODE) and (not self.rconf[RuntimeConfiguration.ALLOW_UNLISTED_LANGUAGES]): self.log_exc(u"Language '%s' is not supported by the selected TTS engine" % (language), None, True, ValueError) self.log([u"Synthesizing text: '%s'", text]) self.log([u"Synthesizing language: '%s'", language]) self.log([u"Synthesizing to file: '%s'", output_file_path]) # return zero if text is the empty string if len(text) == 0: self.log(u"len(text) is zero: returning 0.000") return TimeValue("0.000") # language to voice code voice_code = self._language_to_voice_code(language) self.log([u"Using voice code: '%s'", voice_code]) # first, call Python function _synthesize_single_python() if available if self.has_python_call: self.log(u"Calling TTS engine via Python") try: result = self._synthesize_single_python(text, voice_code, output_file_path) return result[0] except Exception as exc: self.log_exc(u"An unexpected error occurred while calling _synthesize_single_python", exc, False, None) # call _synthesize_single_c_extension() or _synthesize_single_subprocess() self.log(u"Calling TTS engine via C extension or subprocess") c_extension_function = self._synthesize_single_c_extension if self.has_c_extension_call else None subprocess_function = self._synthesize_single_subprocess if self.has_subprocess_call else None result = gf.run_c_extension_with_fallback( self.log, "cew", c_extension_function, subprocess_function, (text, voice_code, output_file_path), rconf=self.rconf ) return result[0]
def synthesize_multiple(self, text_file, output_file_path, quit_after=None, backwards=False): """ Synthesize the text contained in the given fragment list into a WAVE file. Return a tuple (anchors, total_time, num_chars). Concrete subclasses must implement at least one of the following private functions: 1. ``_synthesize_multiple_python()`` 2. ``_synthesize_multiple_c_extension()`` 3. ``_synthesize_multiple_subprocess()`` :param text_file: the text file to be synthesized :type text_file: :class:`~aeneas.textfile.TextFile` :param string output_file_path: the path to the output audio file :param quit_after: stop synthesizing as soon as reaching this many seconds :type quit_after: :class:`~aeneas.timevalue.TimeValue` :param bool backwards: if > 0, synthese from the end of the text file :rtype: tuple (anchors, total_time, num_chars) :raises: TypeError: if ``text_file`` is ``None`` or one of the text fragments is not a Unicode string :raises: ValueError: if ``self.rconf[RuntimeConfiguration.ALLOW_UNLISTED_LANGUAGES]`` is ``False`` and a fragment has a language code not supported by the TTS engine, or if ``text_file`` has no fragments :raises: OSError: if output file cannot be written to ``output_file_path`` :raises: RuntimeError: if both the C extension and the pure Python code did not succeed. """ if text_file is None: self.log_exc(u"text_file is None", None, True, TypeError) if len(text_file) < 1: self.log_exc(u"The text file has no fragments", None, True, ValueError) if not self.rconf[RuntimeConfiguration.ALLOW_UNLISTED_LANGUAGES]: for fragment in text_file.fragments: if fragment.language not in self.LANGUAGE_TO_VOICE_CODE: self.log_exc(u"Language '%s' is not supported by the selected TTS engine" % (fragment.language), None, True, ValueError) for fragment in text_file.fragments: for line in fragment.lines: if not gf.is_unicode(line): self.log_exc(u"The text file contain a line which is not a Unicode string", None, True, TypeError) # log parameters if quit_after is not None: self.log([u"Quit after reaching %.3f", quit_after]) if backwards: self.log(u"Synthesizing backwards") # check that output_file_path can be written if not gf.file_can_be_written(output_file_path): self.log_exc(u"Cannot write to output file '%s'" % (output_file_path), None, True, OSError) # first, call Python function _synthesize_multiple_python() if available if self.has_python_call: self.log(u"Calling TTS engine via Python") try: computed, result = self._synthesize_multiple_python(text_file, output_file_path, quit_after, backwards) if computed: self.log(u"The _synthesize_multiple_python call was successful, returning anchors") return result else: self.log(u"The _synthesize_multiple_python call failed") except Exception as exc: self.log_exc(u"An unexpected error occurred while calling _synthesize_multiple_python", exc, False, None) # call _synthesize_multiple_c_extension() or _synthesize_multiple_subprocess() self.log(u"Calling TTS engine via C extension or subprocess") c_extension_function = self._synthesize_multiple_c_extension if self.has_c_extension_call else None subprocess_function = self._synthesize_multiple_subprocess if self.has_subprocess_call else None return gf.run_c_extension_with_fallback( self.log, "cew", c_extension_function, subprocess_function, (text_file, output_file_path, quit_after, backwards), rconf=self.rconf )