Example #1
0
 def __init__(
         self,
         file_path=None,
         file_path_is_mono_wave=False,
         mfcc_matrix=None,
         audio_file=None,
         rconf=None,
         logger=None
 ):
     if (file_path is None) and (audio_file is None) and (mfcc_matrix is None):
         raise ValueError(u"You must initialize with at least one of: file_path, audio_file, or mfcc_matrix")
     super(AudioFileMFCC, self).__init__(rconf=rconf, logger=logger)
     self.file_path = file_path
     self.audio_file = audio_file
     self.is_reversed = False
     self.__mfcc = None
     self.__mfcc_mask = None
     self.__mfcc_mask_map = None
     self.__speech_intervals = None
     self.__nonspeech_intervals = None
     self.log(u"Initializing MFCCs...")
     if mfcc_matrix is not None:
         self.__mfcc = mfcc_matrix
         self.audio_length = self.all_length * self.rconf.mws
     elif (self.file_path is not None) or (self.audio_file is not None):
         audio_file_was_none = False
         if self.audio_file is None:
             audio_file_was_none = True
             self.audio_file = AudioFile(
                 self.file_path,
                 is_mono_wave=file_path_is_mono_wave,
                 rconf=self.rconf,
                 logger=self.logger
             )
             # NOTE load audio samples into memory, if not present already
             self.audio_file.audio_samples
         gf.run_c_extension_with_fallback(
             self.log,
             "cmfcc",
             self._compute_mfcc_c_extension,
             self._compute_mfcc_pure_python,
             (),
             rconf=self.rconf
         )
         self.audio_length = self.audio_file.audio_length
         if audio_file_was_none:
             self.log(u"Clearing the audio data...")
             self.audio_file.clear_data()
             self.audio_file = None
             self.log(u"Clearing the audio data... done")
     self.__middle_begin = 0
     self.__middle_end = self.__mfcc.shape[1]
     self.log(u"Initializing MFCCs... done")
Example #2
0
 def __init__(
         self,
         file_path=None,
         file_path_is_mono_wave=False,
         mfcc_matrix=None,
         audio_file=None,
         rconf=None,
         logger=None
 ):
     if (file_path is None) and (audio_file is None) and (mfcc_matrix is None):
         raise ValueError(u"You must initialize with at least one of: file_path, audio_file, or mfcc_matrix")
     super(AudioFileMFCC, self).__init__(rconf=rconf, logger=logger)
     self.file_path = file_path
     self.audio_file = audio_file
     self.is_reversed = False
     self.__mfcc = None
     self.__mfcc_mask = None
     self.__mfcc_mask_map = None
     self.__speech_intervals = None
     self.__nonspeech_intervals = None
     self.log(u"Initializing MFCCs...")
     if mfcc_matrix is not None:
         self.__mfcc = mfcc_matrix
         self.audio_length = self.all_length * self.rconf.mws
     elif (self.file_path is not None) or (self.audio_file is not None):
         audio_file_was_none = False
         if self.audio_file is None:
             audio_file_was_none = True
             self.audio_file = AudioFile(
                 self.file_path,
                 is_mono_wave=file_path_is_mono_wave,
                 rconf=self.rconf,
                 logger=self.logger
             )
             # NOTE load audio samples into memory, if not present already
             self.audio_file.audio_samples
         gf.run_c_extension_with_fallback(
             self.log,
             "cmfcc",
             self._compute_mfcc_c_extension,
             self._compute_mfcc_pure_python,
             (),
             c_extension=self.rconf[RuntimeConfiguration.C_EXTENSIONS]
         )
         self.audio_length = self.audio_file.audio_length
         if audio_file_was_none:
             self.log(u"Clearing the audio data...")
             self.audio_file.clear_data()
             self.audio_file = None
             self.log(u"Clearing the audio data... done")
     self.__middle_begin = 0
     self.__middle_end = self.__mfcc.shape[1]
     self.log(u"Initializing MFCCs... done")
Example #3
0
 def compute_path(self):
     return gf.run_c_extension_with_fallback(self.log,
                                             "cdtw",
                                             self._compute_path_c_extension,
                                             self._compute_path_pure_python,
                                             (),
                                             rconf=self.rconf)
Example #4
0
 def compute_accumulated_cost_matrix(self):
     return gf.run_c_extension_with_fallback(self.log,
                                             "cdtw",
                                             self._compute_acm_c_extension,
                                             self._compute_acm_pure_python,
                                             (),
                                             rconf=self.rconf)
Example #5
0
 def compute_path(self):
     return gf.run_c_extension_with_fallback(
         self.log,
         "cdtw",
         self._compute_path_c_extension,
         self._compute_path_pure_python,
         (),
         rconf=self.rconf
     )
Example #6
0
 def compute_accumulated_cost_matrix(self):
     return gf.run_c_extension_with_fallback(
         self.log,
         "cdtw",
         self._compute_acm_c_extension,
         self._compute_acm_pure_python,
         (),
         rconf=self.rconf
     )
Example #7
0
    def synthesize_multiple(self,
                            text_file,
                            output_file_path,
                            quit_after=None,
                            backwards=False):
        """
        Synthesize the text contained in the given fragment list
        into a WAVE file.

        Return a tuple (anchors, total_time, num_chars).

        Concrete subclasses must implement at least one
        of the following private functions:

            1. ``_synthesize_multiple_python()``
            2. ``_synthesize_multiple_c_extension()``
            3. ``_synthesize_multiple_subprocess()``

        :param text_file: the text file to be synthesized
        :type  text_file: :class:`~aeneas.textfile.TextFile`
        :param string output_file_path: the path to the output audio file
        :param quit_after: stop synthesizing as soon as
                                 reaching this many seconds
        :type quit_after: :class:`~aeneas.exacttiming.TimeValue`
        :param bool backwards: if > 0, synthesize from the end of the text file
        :rtype: tuple (anchors, total_time, num_chars)
        :raises: TypeError: if ``text_file`` is ``None`` or
                            one of the text fragments is not a Unicode string
        :raises: ValueError: if ``self.rconf[RuntimeConfiguration.ALLOW_UNLISTED_LANGUAGES]`` is ``False``
                             and a fragment has a language code not supported by the TTS engine, or
                             if ``text_file`` has no fragments or all its fragments are empty
        :raises: OSError: if output file cannot be written to ``output_file_path``
        :raises: RuntimeError: if both the C extension and
                               the pure Python code did not succeed.
        """
        if text_file is None:
            self.log_exc(u"text_file is None", None, True, TypeError)
        if len(text_file) < 1:
            self.log_exc(u"The text file has no fragments", None, True,
                         ValueError)
        if text_file.chars == 0:
            self.log_exc(u"All fragments in the text file are empty", None,
                         True, ValueError)
        if not self.rconf[RuntimeConfiguration.ALLOW_UNLISTED_LANGUAGES]:
            for fragment in text_file.fragments:
                if fragment.language not in self.LANGUAGE_TO_VOICE_CODE:
                    self.log_exc(
                        u"Language '%s' is not supported by the selected TTS engine"
                        % (fragment.language), None, True, ValueError)
        for fragment in text_file.fragments:
            for line in fragment.lines:
                if not gf.is_unicode(line):
                    self.log_exc(
                        u"The text file contain a line which is not a Unicode string",
                        None, True, TypeError)

        # log parameters
        if quit_after is not None:
            self.log([u"Quit after reaching %.3f", quit_after])
        if backwards:
            self.log(u"Synthesizing backwards")

        # check that output_file_path can be written
        if not gf.file_can_be_written(output_file_path):
            self.log_exc(
                u"Cannot write to output file '%s'" % (output_file_path), None,
                True, OSError)

        # first, call Python function _synthesize_multiple_python() if available
        if self.HAS_PYTHON_CALL:
            self.log(u"Calling TTS engine via Python")
            try:
                computed, result = self._synthesize_multiple_python(
                    text_file, output_file_path, quit_after, backwards)
                if computed:
                    self.log(
                        u"The _synthesize_multiple_python call was successful, returning anchors"
                    )
                    return result
                else:
                    self.log(u"The _synthesize_multiple_python call failed")
            except Exception as exc:
                self.log_exc(
                    u"An unexpected error occurred while calling _synthesize_multiple_python",
                    exc, False, None)

        # call _synthesize_multiple_c_extension() or _synthesize_multiple_subprocess()
        self.log(u"Calling TTS engine via C extension or subprocess")
        c_extension_function = self._synthesize_multiple_c_extension if self.HAS_C_EXTENSION_CALL else None
        subprocess_function = self._synthesize_multiple_subprocess if self.HAS_SUBPROCESS_CALL else None
        return gf.run_c_extension_with_fallback(
            self.log,
            self.C_EXTENSION_NAME,
            c_extension_function,
            subprocess_function,
            (text_file, output_file_path, quit_after, backwards),
            rconf=self.rconf)
Example #8
0
    def synthesize_single(self, text, language, output_file_path):
        """
        Create a mono WAVE audio file containing the synthesized text.

        The ``text`` must be a Unicode string encodable with UTF-8.

        Return the duration of the synthesized audio file, in seconds.

        Concrete subclasses can (but they are not required to) implement one
        of the following private functions:

            1. ``_synthesize_single_python()``
            2. ``_synthesize_single_c_extension()``
            3. ``_synthesize_single_subprocess()``

        :param string text: the text to synthesize
        :param language: the language to use
        :type  language: :class:`~aeneas.language.Language`
        :param string output_file_path: the path of the output audio file
        :rtype: :class:`~aeneas.timevalue.TimeValue`
        :raises: TypeError: if ``text`` is ``None`` or it is not a Unicode string
        :raises: ValueError: if ``self.rconf[RuntimeConfiguration.ALLOW_UNLISTED_LANGUAGES]`` is ``False``
                             and ``language`` is not supported by the TTS engine
        :raises: OSError: if output file cannot be written to ``output_file_path``
        :raises: RuntimeError: if both the C extension and
                               the pure Python code did not succeed.
        """
        # check that text_file is not None
        if text is None:
            self.log_exc(u"text is None", None, True, TypeError)

        # check that text has unicode type
        if not gf.is_unicode(text):
            self.log_exc(u"text is not a Unicode string", None, True, TypeError)

        # check that output_file_path can be written
        if not gf.file_can_be_written(output_file_path):
            self.log_exc(u"Cannot write to output file '%s'" % (output_file_path), None, True, OSError)

        # check that the requested language is listed in language.py
        if (language not in self.LANGUAGE_TO_VOICE_CODE) and (not self.rconf[RuntimeConfiguration.ALLOW_UNLISTED_LANGUAGES]):
            self.log_exc(u"Language '%s' is not supported by the selected TTS engine" % (language), None, True, ValueError)

        self.log([u"Synthesizing text: '%s'", text])
        self.log([u"Synthesizing language: '%s'", language])
        self.log([u"Synthesizing to file: '%s'", output_file_path])

        # return zero if text is the empty string
        if len(text) == 0:
            self.log(u"len(text) is zero: returning 0.000")
            return TimeValue("0.000")

        # language to voice code
        voice_code = self._language_to_voice_code(language)
        self.log([u"Using voice code: '%s'", voice_code])

        # first, call Python function _synthesize_single_python() if available
        if self.has_python_call:
            self.log(u"Calling TTS engine via Python")
            try:
                result = self._synthesize_single_python(text, voice_code, output_file_path)
                return result[0]
            except Exception as exc:
                self.log_exc(u"An unexpected error occurred while calling _synthesize_single_python", exc, False, None)

        # call _synthesize_single_c_extension() or _synthesize_single_subprocess()
        self.log(u"Calling TTS engine via C extension or subprocess")
        c_extension_function = self._synthesize_single_c_extension if self.has_c_extension_call else None
        subprocess_function = self._synthesize_single_subprocess if self.has_subprocess_call else None
        result = gf.run_c_extension_with_fallback(
            self.log,
            "cew",
            c_extension_function,
            subprocess_function,
            (text, voice_code, output_file_path),
            rconf=self.rconf
        )
        return result[0]
Example #9
0
    def synthesize_multiple(self, text_file, output_file_path, quit_after=None, backwards=False):
        """
        Synthesize the text contained in the given fragment list
        into a WAVE file.

        Return a tuple (anchors, total_time, num_chars).

        Concrete subclasses must implement at least one
        of the following private functions:

            1. ``_synthesize_multiple_python()``
            2. ``_synthesize_multiple_c_extension()``
            3. ``_synthesize_multiple_subprocess()``

        :param text_file: the text file to be synthesized
        :type  text_file: :class:`~aeneas.textfile.TextFile`
        :param string output_file_path: the path to the output audio file
        :param quit_after: stop synthesizing as soon as
                                 reaching this many seconds
        :type quit_after: :class:`~aeneas.timevalue.TimeValue`
        :param bool backwards: if > 0, synthese from the end of the text file
        :rtype: tuple (anchors, total_time, num_chars)
        :raises: TypeError: if ``text_file`` is ``None`` or
                            one of the text fragments is not a Unicode string
        :raises: ValueError: if ``self.rconf[RuntimeConfiguration.ALLOW_UNLISTED_LANGUAGES]`` is ``False``
                             and a fragment has a language code not supported by the TTS engine, or
                             if ``text_file`` has no fragments
        :raises: OSError: if output file cannot be written to ``output_file_path``
        :raises: RuntimeError: if both the C extension and
                               the pure Python code did not succeed.
        """
        if text_file is None:
            self.log_exc(u"text_file is None", None, True, TypeError)
        if len(text_file) < 1:
            self.log_exc(u"The text file has no fragments", None, True, ValueError)
        if not self.rconf[RuntimeConfiguration.ALLOW_UNLISTED_LANGUAGES]:
            for fragment in text_file.fragments:
                if fragment.language not in self.LANGUAGE_TO_VOICE_CODE:
                    self.log_exc(u"Language '%s' is not supported by the selected TTS engine" % (fragment.language), None, True, ValueError)
        for fragment in text_file.fragments:
            for line in fragment.lines:
                if not gf.is_unicode(line):
                    self.log_exc(u"The text file contain a line which is not a Unicode string", None, True, TypeError)

        # log parameters
        if quit_after is not None:
            self.log([u"Quit after reaching %.3f", quit_after])
        if backwards:
            self.log(u"Synthesizing backwards")

        # check that output_file_path can be written
        if not gf.file_can_be_written(output_file_path):
            self.log_exc(u"Cannot write to output file '%s'" % (output_file_path), None, True, OSError)

        # first, call Python function _synthesize_multiple_python() if available
        if self.has_python_call:
            self.log(u"Calling TTS engine via Python")
            try:
                computed, result = self._synthesize_multiple_python(text_file, output_file_path, quit_after, backwards)
                if computed:
                    self.log(u"The _synthesize_multiple_python call was successful, returning anchors")
                    return result
                else:
                    self.log(u"The _synthesize_multiple_python call failed")
            except Exception as exc:
                self.log_exc(u"An unexpected error occurred while calling _synthesize_multiple_python", exc, False, None)

        # call _synthesize_multiple_c_extension() or _synthesize_multiple_subprocess()
        self.log(u"Calling TTS engine via C extension or subprocess")
        c_extension_function = self._synthesize_multiple_c_extension if self.has_c_extension_call else None
        subprocess_function = self._synthesize_multiple_subprocess if self.has_subprocess_call else None
        return gf.run_c_extension_with_fallback(
            self.log,
            "cew",
            c_extension_function,
            subprocess_function,
            (text_file, output_file_path, quit_after, backwards),
            rconf=self.rconf
        )