예제 #1
0
    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 4:
            return self.print_help()
        text_format = gf.safe_unicode(self.actual_arguments[0])
        if text_format == u"list":
            text = gf.safe_unicode(self.actual_arguments[1])
        elif text_format in TextFileFormat.ALLOWED_VALUES:
            text = self.actual_arguments[1]
            if not self.check_input_file(text):
                return self.ERROR_EXIT_CODE
        else:
            return self.print_help()

        l1_id_regex = self.has_option_with_value(u"--l1-id-regex")
        l2_id_regex = self.has_option_with_value(u"--l2-id-regex")
        l3_id_regex = self.has_option_with_value(u"--l3-id-regex")
        id_regex = self.has_option_with_value(u"--id-regex")
        class_regex = self.has_option_with_value(u"--class-regex")
        sort = self.has_option_with_value(u"--sort")
        backwards = self.has_option([u"-b", u"--backwards"])
        quit_after = gf.safe_float(self.has_option_with_value(u"--quit-after"),
                                   None)
        start_fragment = gf.safe_int(self.has_option_with_value(u"--start"),
                                     None)
        end_fragment = gf.safe_int(self.has_option_with_value(u"--end"), None)
        parameters = {
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L1_ID_REGEX: l1_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L2_ID_REGEX: l2_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L3_ID_REGEX: l3_id_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_CLASS_REGEX: class_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX: id_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_SORT: sort,
        }
        if (text_format == TextFileFormat.MUNPARSED) and (
            (l1_id_regex is None) or (l2_id_regex is None) or
            (l3_id_regex is None)):
            self.print_error(
                u"You must specify --l1-id-regex and --l2-id-regex and --l3-id-regex for munparsed format"
            )
            return self.ERROR_EXIT_CODE
        if (text_format == TextFileFormat.UNPARSED) and (
                id_regex is None) and (class_regex is None):
            self.print_error(
                u"You must specify --id-regex and/or --class-regex for unparsed format"
            )
            return self.ERROR_EXIT_CODE

        language = gf.safe_unicode(self.actual_arguments[2])

        output_file_path = self.actual_arguments[3]
        if not self.check_output_file(output_file_path):
            return self.ERROR_EXIT_CODE

        text_file = self.get_text_file(text_format, text, parameters)
        if text_file is None:
            self.print_error(
                u"Unable to build a TextFile from the given parameters")
            return self.ERROR_EXIT_CODE
        elif len(text_file) == 0:
            self.print_error(u"No text fragments found")
            return self.ERROR_EXIT_CODE
        text_file.set_language(language)
        self.print_info(u"Read input text with %d fragments" %
                        (len(text_file)))
        if start_fragment is not None:
            self.print_info(u"Slicing from index %d" % (start_fragment))
        if end_fragment is not None:
            self.print_info(u"Slicing to index %d" % (end_fragment))
        text_slice = text_file.get_slice(start_fragment, end_fragment)
        self.print_info(u"Synthesizing %d fragments" % (len(text_slice)))

        if quit_after is not None:
            self.print_info(u"Stop synthesizing upon reaching %.3f seconds" %
                            (quit_after))

        try:
            synt = Synthesizer(rconf=self.rconf, logger=self.logger)
            synt.synthesize(text_slice,
                            output_file_path,
                            quit_after=quit_after,
                            backwards=backwards)
            self.print_success(u"Created file '%s'" % output_file_path)
            synt.clear_cache()
            return self.NO_ERROR_EXIT_CODE
        except ImportError as exc:
            tts = self.rconf[RuntimeConfiguration.TTS]
            if tts == Synthesizer.AWS:
                self.print_error(
                    u"You need to install Python module boto3 to use the AWS Polly TTS API wrapper. Run:"
                )
                self.print_error(u"$ pip install boto3")
                self.print_error(u"or, to install for all users:")
                self.print_error(u"$ sudo pip install boto3")
            elif tts == Synthesizer.NUANCE:
                self.print_error(
                    u"You need to install Python module requests to use the Nuance TTS API wrapper. Run:"
                )
                self.print_error(u"$ pip install requests")
                self.print_error(u"or, to install for all users:")
                self.print_error(u"$ sudo pip install requests")
            else:
                self.print_error(
                    u"An unexpected error occurred while synthesizing text:")
                self.print_error(u"%s" % exc)
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while synthesizing text:")
            self.print_error(u"%s" % exc)

        return self.ERROR_EXIT_CODE
예제 #2
0
    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 4:
            return self.print_help()
        text_format = gf.safe_unicode(self.actual_arguments[0])
        if text_format == u"list":
            text = gf.safe_unicode(self.actual_arguments[1])
        elif text_format in TextFileFormat.ALLOWED_VALUES:
            text = self.actual_arguments[1]
            if not self.check_input_file(text):
                return self.ERROR_EXIT_CODE
        else:
            return self.print_help()

        l1_id_regex = self.has_option_with_value(u"--l1-id-regex")
        l2_id_regex = self.has_option_with_value(u"--l2-id-regex")
        l3_id_regex = self.has_option_with_value(u"--l3-id-regex")
        id_regex = self.has_option_with_value(u"--id-regex")
        class_regex = self.has_option_with_value(u"--class-regex")
        sort = self.has_option_with_value(u"--sort")
        backwards = self.has_option([u"-b", u"--backwards"])
        quit_after = gf.safe_float(self.has_option_with_value(u"--quit-after"), None)
        start_fragment = gf.safe_int(self.has_option_with_value(u"--start"), None)
        end_fragment = gf.safe_int(self.has_option_with_value(u"--end"), None)
        parameters = {
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L1_ID_REGEX: l1_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L2_ID_REGEX: l2_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L3_ID_REGEX: l3_id_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_CLASS_REGEX: class_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX: id_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_SORT: sort,
        }
        if (text_format == TextFileFormat.MUNPARSED) and ((l1_id_regex is None) or (l2_id_regex is None) or (l3_id_regex is None)):
            self.print_error(u"You must specify --l1-id-regex and --l2-id-regex and --l3-id-regex for munparsed format")
            return self.ERROR_EXIT_CODE
        if (text_format == TextFileFormat.UNPARSED) and (id_regex is None) and (class_regex is None):
            self.print_error(u"You must specify --id-regex and/or --class-regex for unparsed format")
            return self.ERROR_EXIT_CODE

        language = gf.safe_unicode(self.actual_arguments[2])

        output_file_path = self.actual_arguments[3]
        if not self.check_output_file(output_file_path):
            return self.ERROR_EXIT_CODE

        text_file = self.get_text_file(text_format, text, parameters)
        if text_file is None:
            self.print_error(u"Unable to build a TextFile from the given parameters")
            return self.ERROR_EXIT_CODE
        elif len(text_file) == 0:
            self.print_error(u"No text fragments found")
            return self.ERROR_EXIT_CODE
        text_file.set_language(language)
        self.print_info(u"Read input text with %d fragments" % (len(text_file)))
        if start_fragment is not None:
            self.print_info(u"Slicing from index %d" % (start_fragment))
        if end_fragment is not None:
            self.print_info(u"Slicing to index %d" % (end_fragment))
        text_slice = text_file.get_slice(start_fragment, end_fragment)
        self.print_info(u"Synthesizing %d fragments" % (len(text_slice)))

        if quit_after is not None:
            self.print_info(u"Stop synthesizing upon reaching %.3f seconds" % (quit_after))

        try:
            synt = Synthesizer(rconf=self.rconf, logger=self.logger)
            synt.synthesize(
                text_slice,
                output_file_path,
                quit_after=quit_after,
                backwards=backwards
            )
            self.print_success(u"Created file '%s'" % output_file_path)
            synt.clear_cache()
            return self.NO_ERROR_EXIT_CODE
        except ImportError as exc:
            tts = self.rconf[RuntimeConfiguration.TTS]
            if tts == Synthesizer.AWS:
                self.print_error(u"You need to install Python module boto3 to use the AWS Polly TTS API wrapper. Run:")
                self.print_error(u"$ pip install boto3")
                self.print_error(u"or, to install for all users:")
                self.print_error(u"$ sudo pip install boto3")
            elif tts == Synthesizer.NUANCE:
                self.print_error(u"You need to install Python module requests to use the Nuance TTS API wrapper. Run:")
                self.print_error(u"$ pip install requests")
                self.print_error(u"or, to install for all users:")
                self.print_error(u"$ sudo pip install requests")
            else:
                self.print_error(u"An unexpected error occurred while synthesizing text:")
                self.print_error(u"%s" % exc)
        except Exception as exc:
            self.print_error(u"An unexpected error occurred while synthesizing text:")
            self.print_error(u"%s" % exc)

        return self.ERROR_EXIT_CODE
예제 #3
0
 def test_clear_cache(self):
     synth = Synthesizer()
     synth.clear_cache()
예제 #4
0
class ExecuteTask(Loggable):
    """
    Execute a task, that is, compute the sync map for it.

    :param task: the task to be executed
    :type  task: :class:`~aeneas.task.Task`
    :param rconf: a runtime configuration
    :type  rconf: :class:`~aeneas.runtimeconfiguration.RuntimeConfiguration`
    :param logger: the logger object
    :type  logger: :class:`~aeneas.logger.Logger`
    """

    TAG = u"ExecuteTask"

    def __init__(self, task=None, rconf=None, logger=None):
        super(ExecuteTask, self).__init__(rconf=rconf, logger=logger)
        self.task = task
        self.step_index = 1
        self.step_label = u""
        self.step_begin_time = None
        self.step_total = 0.000
        self.synthesizer = None
        if task is not None:
            self.load_task(self.task)

    def load_task(self, task):
        """
        Load the task from the given ``Task`` object.

        :param task: the task to load
        :type  task: :class:`~aeneas.task.Task`
        :raises: :class:`~aeneas.executetask.ExecuteTaskInputError`: if ``task`` is not an instance of :class:`~aeneas.task.Task`
        """
        if not isinstance(task, Task):
            self.log_exc(u"task is not an instance of Task", None, True,
                         ExecuteTaskInputError)
        self.task = task

    def _step_begin(self, label, log=True):
        """ Log begin of a step """
        if log:
            self.step_label = label
            self.step_begin_time = self.log(u"STEP %d BEGIN (%s)" %
                                            (self.step_index, label))

    def _step_end(self, log=True):
        """ Log end of a step """
        if log:
            step_end_time = self.log(u"STEP %d END (%s)" %
                                     (self.step_index, self.step_label))
            diff = (step_end_time - self.step_begin_time)
            diff = float(diff.seconds + diff.microseconds / 1000000.0)
            self.step_total += diff
            self.log(u"STEP %d DURATION %.3f (%s)" %
                     (self.step_index, diff, self.step_label))
            self.step_index += 1

    def _step_failure(self, exc):
        """ Log failure of a step """
        self.log_crit(u"STEP %d (%s) FAILURE" %
                      (self.step_index, self.step_label))
        self.step_index += 1
        self.log_exc(u"Unexpected error while executing task", exc, True,
                     ExecuteTaskExecutionError)

    def _step_total(self):
        """ Log total """
        self.log(u"STEP T DURATION %.3f" % (self.step_total))

    def execute(self):
        """
        Execute the task.
        The sync map produced will be stored inside the task object.

        :raises: :class:`~aeneas.executetask.ExecuteTaskInputError`: if there is a problem with the input parameters
        :raises: :class:`~aeneas.executetask.ExecuteTaskExecutionError`: if there is a problem during the task execution
        """
        self.log(u"Executing task...")

        # check that we have the AudioFile object
        if self.task.audio_file is None:
            self.log_exc(u"The task does not seem to have its audio file set",
                         None, True, ExecuteTaskInputError)
        if ((self.task.audio_file.audio_length is None)
                or (self.task.audio_file.audio_length <= 0)):
            self.log_exc(u"The task seems to have an invalid audio file", None,
                         True, ExecuteTaskInputError)
        task_max_audio_length = self.rconf[
            RuntimeConfiguration.TASK_MAX_AUDIO_LENGTH]
        if ((task_max_audio_length > 0) and
            (self.task.audio_file.audio_length > task_max_audio_length)):
            self.log_exc(
                u"The audio file of the task has length %.3f, more than the maximum allowed (%.3f)."
                % (self.task.audio_file.audio_length, task_max_audio_length),
                None, True, ExecuteTaskInputError)

        # check that we have the TextFile object
        if self.task.text_file is None:
            self.log_exc(u"The task does not seem to have its text file set",
                         None, True, ExecuteTaskInputError)
        if len(self.task.text_file) == 0:
            self.log_exc(u"The task text file seems to have no text fragments",
                         None, True, ExecuteTaskInputError)
        task_max_text_length = self.rconf[
            RuntimeConfiguration.TASK_MAX_TEXT_LENGTH]
        if ((task_max_text_length > 0)
                and (len(self.task.text_file) > task_max_text_length)):
            self.log_exc(
                u"The text file of the task has %d fragments, more than the maximum allowed (%d)."
                % (len(self.task.text_file), task_max_text_length), None, True,
                ExecuteTaskInputError)
        if self.task.text_file.chars == 0:
            self.log_exc(u"The task text file seems to have empty text", None,
                         True, ExecuteTaskInputError)

        self.log(u"Both audio and text input file are present")

        # execute
        self.step_index = 1
        self.step_total = 0.000
        if self.task.text_file.file_format in TextFileFormat.MULTILEVEL_VALUES:
            self._execute_multi_level_task()
        else:
            self._execute_single_level_task()
        self.log(u"Executing task... done")

    def _execute_single_level_task(self):
        """ Execute a single-level task """
        self.log(u"Executing single level task...")
        try:
            # load audio file, extract MFCCs from real wave, clear audio file
            self._step_begin(u"extract MFCC real wave")
            real_wave_mfcc = self._extract_mfcc(
                file_path=self.task.audio_file_path_absolute,
                file_format=None,
            )
            self._step_end()

            # compute head and/or tail and set it
            self._step_begin(u"compute head tail")
            (head_length, process_length,
             tail_length) = self._compute_head_process_tail(real_wave_mfcc)
            real_wave_mfcc.set_head_middle_tail(head_length, process_length,
                                                tail_length)
            self._step_end()

            # compute alignment, outputting a tree of time intervals
            self._set_synthesizer()
            sync_root = Tree()
            self._execute_inner(real_wave_mfcc,
                                self.task.text_file,
                                sync_root=sync_root,
                                force_aba_auto=False,
                                log=True,
                                leaf_level=True)
            self._clear_cache_synthesizer()

            # create syncmap and add it to task
            self._step_begin(u"create sync map")
            self._create_sync_map(sync_root=sync_root)
            self._step_end()

            # log total
            self._step_total()
            self.log(u"Executing single level task... done")
        except Exception as exc:
            self._step_failure(exc)

    def _execute_multi_level_task(self):
        """ Execute a multi-level task """
        self.log(u"Executing multi level task...")

        self.log(u"Saving rconf...")
        # save original rconf
        orig_rconf = self.rconf.clone()
        # clone rconfs and set granularity
        # TODO the following code assumes 3 levels: generalize this
        level_rconfs = [
            None,
            self.rconf.clone(),
            self.rconf.clone(),
            self.rconf.clone()
        ]
        level_mfccs = [None, None, None, None]
        force_aba_autos = [None, False, False, True]
        for i in range(1, len(level_rconfs)):
            level_rconfs[i].set_granularity(i)
            self.log([u"Level %d mmn: %s", i, level_rconfs[i].mmn])
            self.log([u"Level %d mwl: %.3f", i, level_rconfs[i].mwl])
            self.log([u"Level %d mws: %.3f", i, level_rconfs[i].mws])
            level_rconfs[i].set_tts(i)
            self.log([u"Level %d tts: %s", i, level_rconfs[i].tts])
            self.log([u"Level %d tts_path: %s", i, level_rconfs[i].tts_path])
        self.log(u"Saving rconf... done")
        try:
            self.log(u"Creating AudioFile object...")
            audio_file = self._load_audio_file()
            self.log(u"Creating AudioFile object... done")

            # extract MFCC for each level
            for i in range(1, len(level_rconfs)):
                self._step_begin(u"extract MFCC real wave level %d" % i)
                if (i == 1) or (
                        level_rconfs[i].mws != level_rconfs[i - 1].mws) or (
                            level_rconfs[i].mwl != level_rconfs[i - 1].mwl):
                    self.rconf = level_rconfs[i]
                    level_mfccs[i] = self._extract_mfcc(audio_file=audio_file)
                else:
                    self.log(u"Keeping MFCC real wave from previous level")
                    level_mfccs[i] = level_mfccs[i - 1]
                self._step_end()

            self.log(u"Clearing AudioFile object...")
            self.rconf = level_rconfs[1]
            self._clear_audio_file(audio_file)
            self.log(u"Clearing AudioFile object... done")

            # compute head tail for the entire real wave (level 1)
            self._step_begin(u"compute head tail")
            (head_length, process_length,
             tail_length) = self._compute_head_process_tail(level_mfccs[1])
            level_mfccs[1].set_head_middle_tail(head_length, process_length,
                                                tail_length)
            self._step_end()

            # compute alignment at each level
            sync_root = Tree()
            sync_roots = [sync_root]
            text_files = [self.task.text_file]
            number_levels = len(level_rconfs)
            for i in range(1, number_levels):
                self._step_begin(u"compute alignment level %d" % i)
                self.rconf = level_rconfs[i]
                text_files, sync_roots = self._execute_level(
                    level=i,
                    audio_file_mfcc=level_mfccs[i],
                    text_files=text_files,
                    sync_roots=sync_roots,
                    force_aba_auto=force_aba_autos[i],
                )
                self._step_end()

            # restore original rconf, and create syncmap and add it to task
            self._step_begin(u"create sync map")
            self.rconf = orig_rconf
            self._create_sync_map(sync_root=sync_root)
            self._step_end()

            self._step_total()
            self.log(u"Executing multi level task... done")
        except Exception as exc:
            self._step_failure(exc)

    def _execute_level(self,
                       level,
                       audio_file_mfcc,
                       text_files,
                       sync_roots,
                       force_aba_auto=False):
        """
        Compute the alignment for all the nodes in the given level.

        Return a pair (next_level_text_files, next_level_sync_roots),
        containing two lists of text file subtrees and sync map subtrees
        on the next level.

        :param int level: the level
        :param audio_file_mfcc: the audio MFCC representation for this level
        :type  audio_file_mfcc: :class:`~aeneas.audiofilemfcc.AudioFileMFCC`
        :param list text_files: a list of :class:`~aeneas.textfile.TextFile` objects,
                                each representing a (sub)tree of the Task text file
        :param list sync_roots: a list of :class:`~aeneas.tree.Tree` objects,
                                each representing a SyncMapFragment tree,
                                one for each element in ``text_files``
        :param bool force_aba_auto: if ``True``, force using the AUTO ABA algorithm
        :rtype: (list, list)
        """
        self._set_synthesizer()
        next_level_text_files = []
        next_level_sync_roots = []
        for text_file_index, text_file in enumerate(text_files):
            self.log([u"Text level %d, fragment %d", level, text_file_index])
            self.log([u"  Len:   %d", len(text_file)])
            sync_root = sync_roots[text_file_index]
            if (level > 1) and (len(text_file) == 1):
                self.log(
                    u"Level > 1 and only one text fragment => return trivial tree"
                )
                self._append_trivial_tree(text_file, sync_root)
            elif (level > 1) and (sync_root.value.begin
                                  == sync_root.value.end):
                self.log(
                    u"Level > 1 and parent has begin == end => return trivial tree"
                )
                self._append_trivial_tree(text_file, sync_root)
            else:
                self.log(
                    u"Level == 1 or more than one text fragment with non-zero parent => compute tree"
                )
                if not sync_root.is_empty:
                    begin = sync_root.value.begin
                    end = sync_root.value.end
                    self.log([u"  Setting begin: %.3f", begin])
                    self.log([u"  Setting end:   %.3f", end])
                    audio_file_mfcc.set_head_middle_tail(head_length=begin,
                                                         middle_length=(end -
                                                                        begin))
                else:
                    self.log(u"  No begin or end to set")
                self._execute_inner(audio_file_mfcc,
                                    text_file,
                                    sync_root=sync_root,
                                    force_aba_auto=force_aba_auto,
                                    log=False,
                                    leaf_level=(level == 3))
            # store next level roots
            next_level_text_files.extend(text_file.children_not_empty)
            # we added head and tail, we must not pass them to the next level
            next_level_sync_roots.extend(sync_root.children[1:-1])
        self._clear_cache_synthesizer()
        return (next_level_text_files, next_level_sync_roots)

    def _execute_inner(self,
                       audio_file_mfcc,
                       text_file,
                       sync_root=None,
                       force_aba_auto=False,
                       log=True,
                       leaf_level=False):
        """
        Align a subinterval of the given AudioFileMFCC
        with the given TextFile.

        Return the computed tree of time intervals,
        rooted at ``sync_root`` if the latter is not ``None``,
        or as a new ``Tree`` otherwise.

        The begin and end positions inside the AudioFileMFCC
        must have been set ahead by the caller.

        The text fragments being aligned are the vchildren of ``text_file``.

        :param audio_file_mfcc: the audio file MFCC representation
        :type  audio_file_mfcc: :class:`~aeneas.audiofilemfcc.AudioFileMFCC`
        :param text_file: the text file subtree to align
        :type  text_file: :class:`~aeneas.textfile.TextFile`
        :param sync_root: the tree node to which fragments should be appended
        :type  sync_root: :class:`~aeneas.tree.Tree`
        :param bool force_aba_auto: if ``True``, do not run aba algorithm
        :param bool log: if ``True``, log steps
        :param bool leaf_level: alert aba if the computation is at a leaf level
        :rtype: :class:`~aeneas.tree.Tree`
        """
        if 'timings' in text_file.file_path:
            self._step_begin(u"extract timings", log=log)
            synt_path, synt_anchors, synt_format = self._provide_times(
                text_file)
            self._step_end(log=log)

            self._step_begin(u"extract MFCC synt wave", log=log)
            synt_wave_mfcc = self._extract_mfcc(
                file_path=synt_path,
                file_format=synt_format,
            )
            # gf.delete_file(synt_handler, synt_path)
            self._step_end(log=log)

        else:
            self._step_begin(u"synthesize text", log=log)
            func = '_time_and_combine' if 'clips' in text_file.file_path else '_synthesize'
            synt_handler, synt_path, synt_anchors, synt_format = getattr(
                self, func)(text_file)
            self._step_end(log=log)

            self._step_begin(u"extract MFCC synt wave", log=log)
            synt_wave_mfcc = self._extract_mfcc(
                file_path=synt_path,
                file_format=synt_format,
            )
            gf.delete_file(synt_handler, synt_path)
            self._step_end(log=log)

        self._step_begin(u"align waves", log=log)
        indices = self._align_waves(audio_file_mfcc, synt_wave_mfcc,
                                    synt_anchors)
        self._step_end(log=log)

        self._step_begin(u"adjust boundaries", log=log)
        self._adjust_boundaries(indices, text_file, audio_file_mfcc, sync_root,
                                force_aba_auto, leaf_level)
        self._step_end(log=log)

    def _provide_times(self, text_file):
        with open(text_file.file_path) as file:
            timings = [row.strip().split(',') for row in file.readlines()]
            synt_anchors = [[TimeValue(start), verse, file]
                            for verse, start, file in timings]

        synt_wav = timings[0][-1]
        synt_format = ('pcm_s161e', 1, 2)
        return synt_wav, synt_anchors, synt_format

    def _time_and_combine(self, text_file):
        """
        Combine original audio clips into a single WAV file.

        Return a tuple consisting of:

        1. the handler of the generated audio file
        2. the path of the generated audio file
        3. the list of anchors, that is, a list of floats
           each representing the start time of the corresponding
           text fragment in the generated wave file
           ``[start_1, start_2, ..., start_n]``
        4. a tuple describing the format of the audio file

        :param text_file: the text with audio clips to be timed/combined
        :type  text_file: :class:`~aeneas.textfile.TextFile`
        :rtype: tuple (handler, string, list)
        """
        import subprocess

        # Concatenate all clips into a single, temporary file
        handler, path = gf.tmp_file(
            suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
        cmd = "ffmpeg -y -f concat -i {} -c copy {}".format(
            text_file.file_path, path)
        subprocess.call(cmd, shell=True)

        audio_format = ('pcm_s161e', 1, 2)

        # Build "synt" anchor times
        anchor_time, anchors = TimeValue('0.0'), []
        for fragment in text_file.fragments:
            audio_path = 'output/sample/{}'.format(fragment.text.split("'")[1])
            audio_file = AudioFileMFCC(file_path=audio_path,
                                       file_format=audio_format)
            # TODO: Investigate faster ways to get the audio_length
            # cmd = 'ffprobe -i {} -show_entries format=duration -v quiet -of csv="p=0"'.format(audio_path)
            # subprocess.call(cmd, shell=True)
            # # should become... (to get response)
            # cmds = ['ffprobe', '-i', audio_path, '-show_entries', 'format=duration',
            #         '-v', 'quiet', '-of', 'csv="p=0"']
            # p = subprocess.Popen(cmds, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            # output, err = p.communicate()
            # audio_length = TimeValue(output)
            anchors.append([anchor_time, fragment.identifier, audio_path])
            anchor_time += audio_file.audio_length

        # [
        #     [TimeValue('0.0'), u'f000001', 'output/sample/audio01.wav'],
        #     [TimeValue('0.339625'), u'f000002', 'output/sample/audio02.wav'],
        #     [TimeValue('3.5526875'), u'f000003', 'output/sample/audio03.wav'],
        #     [TimeValue('6.6874375'), u'f000004', 'output/sample/audio04.wav'],
        #     [TimeValue('9.5609375'), u'f000005', 'output/sample/audio05.wav'],
        #     [TimeValue('12.4344375'), u'f000006', 'output/sample/audio06.wav'],
        #     [TimeValue('16.1961250'), u'f000007', 'output/sample/audio07.wav'],
        #     [TimeValue('19.9578125'), u'f000008', 'output/sample/audio08.wav'],
        #     [TimeValue('23.0925625'), u'f000009', 'output/sample/audio09.wav'],
        #     [TimeValue('28.0297500'), u'f000010', 'output/sample/audio10.wav'],
        #     [TimeValue('31.1645000'), u'f000011', 'output/sample/audio11.wav'],
        #     [TimeValue('33.5678125'), u'f000012', 'output/sample/audio12.wav'],
        #     [TimeValue('37.0943750'), u'f000013', 'output/sample/audio13.wav'],
        #     [TimeValue('40.2030000'), u'f000014', 'output/sample/audio14.wav'],
        #     [TimeValue('43.8601875'), u'f000015', 'output/sample/audio15.wav']
        # ]

        # import pdb; pdb.set_trace()

        return (handler, path, anchors, audio_format)

    def _load_audio_file(self):
        """
        Load audio in memory.

        :rtype: :class:`~aeneas.audiofile.AudioFile`
        """
        self._step_begin(u"load audio file")
        # NOTE file_format=None forces conversion to
        #      PCM16 mono WAVE with default sample rate
        audio_file = AudioFile(file_path=self.task.audio_file_path_absolute,
                               file_format=None,
                               rconf=self.rconf,
                               logger=self.logger)
        audio_file.read_samples_from_file()
        self._step_end()
        return audio_file

    def _clear_audio_file(self, audio_file):
        """
        Clear audio from memory.

        :param audio_file: the object to clear
        :type  audio_file: :class:`~aeneas.audiofile.AudioFile`
        """
        self._step_begin(u"clear audio file")
        audio_file.clear_data()
        audio_file = None
        self._step_end()

    def _extract_mfcc(self, file_path=None, file_format=None, audio_file=None):
        """
        Extract the MFCCs from the given audio file.

        :rtype: :class:`~aeneas.audiofilemfcc.AudioFileMFCC`
        """
        audio_file_mfcc = AudioFileMFCC(file_path=file_path,
                                        file_format=file_format,
                                        audio_file=audio_file,
                                        rconf=self.rconf,
                                        logger=self.logger)
        if self.rconf.mmn:
            self.log(u"Running VAD inside _extract_mfcc...")
            audio_file_mfcc.run_vad(
                log_energy_threshold=self.rconf[
                    RuntimeConfiguration.MFCC_MASK_LOG_ENERGY_THRESHOLD],
                min_nonspeech_length=self.rconf[
                    RuntimeConfiguration.MFCC_MASK_MIN_NONSPEECH_LENGTH],
                extend_before=self.rconf[
                    RuntimeConfiguration.
                    MFCC_MASK_EXTEND_SPEECH_INTERVAL_BEFORE],
                extend_after=self.rconf[
                    RuntimeConfiguration.
                    MFCC_MASK_EXTEND_SPEECH_INTERVAL_AFTER])
            self.log(u"Running VAD inside _extract_mfcc... done")
        return audio_file_mfcc

    def _compute_head_process_tail(self, audio_file_mfcc):
        """
        Set the audio file head or tail,
        by either reading the explicit values
        from the Task configuration,
        or using SD to determine them.

        This function returns the lengths, in seconds,
        of the (head, process, tail).

        :rtype: tuple (float, float, float)
        """
        head_length = self.task.configuration["i_a_head"]
        process_length = self.task.configuration["i_a_process"]
        tail_length = self.task.configuration["i_a_tail"]
        head_max = self.task.configuration["i_a_head_max"]
        head_min = self.task.configuration["i_a_head_min"]
        tail_max = self.task.configuration["i_a_tail_max"]
        tail_min = self.task.configuration["i_a_tail_min"]
        if ((head_length is not None) or (process_length is not None)
                or (tail_length is not None)):
            self.log(u"Setting explicit head process tail")
        else:
            self.log(u"Detecting head tail...")
            sd = SD(audio_file_mfcc,
                    self.task.text_file,
                    rconf=self.rconf,
                    logger=self.logger)
            head_length = TimeValue("0.000")
            process_length = None
            tail_length = TimeValue("0.000")
            if (head_min is not None) or (head_max is not None):
                self.log(u"Detecting HEAD...")
                head_length = sd.detect_head(head_min, head_max)
                self.log([u"Detected HEAD: %.3f", head_length])
                self.log(u"Detecting HEAD... done")
            if (tail_min is not None) or (tail_max is not None):
                self.log(u"Detecting TAIL...")
                tail_length = sd.detect_tail(tail_min, tail_max)
                self.log([u"Detected TAIL: %.3f", tail_length])
                self.log(u"Detecting TAIL... done")
            self.log(u"Detecting head tail... done")
        self.log([u"Head:    %s", gf.safe_float(head_length, None)])
        self.log([u"Process: %s", gf.safe_float(process_length, None)])
        self.log([u"Tail:    %s", gf.safe_float(tail_length, None)])
        return (head_length, process_length, tail_length)

    def _set_synthesizer(self):
        """ Create synthesizer """
        self.log(u"Setting synthesizer...")
        self.synthesizer = Synthesizer(rconf=self.rconf, logger=self.logger)
        self.log(u"Setting synthesizer... done")

    def _clear_cache_synthesizer(self):
        """ Clear the cache of the synthesizer """
        self.log(u"Clearing synthesizer...")
        self.synthesizer.clear_cache()
        self.log(u"Clearing synthesizer... done")

    def _synthesize(self, text_file):
        """
        Synthesize text into a WAVE file.

        Return a tuple consisting of:

        1. the handler of the generated audio file
        2. the path of the generated audio file
        3. the list of anchors, that is, a list of floats
           each representing the start time of the corresponding
           text fragment in the generated wave file
           ``[start_1, start_2, ..., start_n]``
        4. a tuple describing the format of the audio file

        :param text_file: the text to be synthesized
        :type  text_file: :class:`~aeneas.textfile.TextFile`
        :rtype: tuple (handler, string, list)
        """
        handler, path = gf.tmp_file(
            suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
        result = self.synthesizer.synthesize(text_file, path)
        return (handler, path, result[0], self.synthesizer.output_audio_format)

    def _align_waves(self, real_wave_mfcc, synt_wave_mfcc, synt_anchors):
        """
        Align two AudioFileMFCC objects,
        representing WAVE files.

        Return a list of boundary indices.
        """
        self.log(u"Creating DTWAligner...")
        aligner = DTWAligner(real_wave_mfcc,
                             synt_wave_mfcc,
                             rconf=self.rconf,
                             logger=self.logger)
        self.log(u"Creating DTWAligner... done")
        self.log(u"Computing boundary indices...")
        boundary_indices = aligner.compute_boundaries(synt_anchors)
        self.log(u"Computing boundary indices... done")
        return boundary_indices

    def _adjust_boundaries(self,
                           boundary_indices,
                           text_file,
                           real_wave_mfcc,
                           sync_root,
                           force_aba_auto=False,
                           leaf_level=False):
        """
        Adjust boundaries as requested by the user.

        Return the computed time map, that is,
        a list of pairs ``[start_time, end_time]``,
        of length equal to number of fragments + 2,
        where the two extra elements are for
        the HEAD (first) and TAIL (last).
        """
        # boundary_indices contains the boundary indices in the all_mfcc of real_wave_mfcc
        # starting with the (head-1st fragment) and ending with (-1th fragment-tail)
        aba_parameters = self.task.configuration.aba_parameters()
        if force_aba_auto:
            self.log(u"Forced running algorithm: 'auto'")
            aba_parameters["algorithm"] = (AdjustBoundaryAlgorithm.AUTO, [])
            # note that the other aba settings (nonspeech and nozero)
            # remain as specified by the user
        self.log([u"ABA parameters: %s", aba_parameters])
        aba = AdjustBoundaryAlgorithm(rconf=self.rconf, logger=self.logger)
        aba.adjust(aba_parameters=aba_parameters,
                   real_wave_mfcc=real_wave_mfcc,
                   boundary_indices=boundary_indices,
                   text_file=text_file,
                   allow_arbitrary_shift=leaf_level)
        aba.append_fragment_list_to_sync_root(sync_root=sync_root)

    def _append_trivial_tree(self, text_file, sync_root):
        """
        Append trivial tree, made by one HEAD,
        one sync map fragment for each element of ``text_file``,
        and one TAIL.

        This function is called if either ``text_file`` has only one element,
        or if ``sync_root.value`` is an interval with zero length
        (i.e., ``sync_root.value.begin == sync_root.value.end``).
        """
        interval = sync_root.value
        #
        # NOTE the following is correct, but it is a bit obscure
        # time_values = [interval.begin] * (1 + len(text_file)) + [interval.end] * 2
        #
        if len(text_file) == 1:
            time_values = [
                interval.begin, interval.begin, interval.end, interval.end
            ]
        else:
            # interval.begin == interval.end
            time_values = [interval.begin] * (3 + len(text_file))
        aba = AdjustBoundaryAlgorithm(rconf=self.rconf, logger=self.logger)
        aba.intervals_to_fragment_list(text_file=text_file,
                                       time_values=time_values)
        aba.append_fragment_list_to_sync_root(sync_root=sync_root)

    def _create_sync_map(self, sync_root):
        """
        If requested, check that the computed sync map is consistent.
        Then, add it to the Task.
        """
        sync_map = SyncMap(tree=sync_root,
                           rconf=self.rconf,
                           logger=self.logger)
        if self.rconf.safety_checks:
            self.log(u"Running sanity check on computed sync map...")
            if not sync_map.leaves_are_consistent:
                self._step_failure(
                    ValueError(
                        u"The computed sync map contains inconsistent fragments"
                    ))
            self.log(u"Running sanity check on computed sync map... passed")
        else:
            self.log(u"Not running sanity check on computed sync map")
        self.task.sync_map = sync_map
예제 #5
0
 def test_clear_cache(self):
     synth = Synthesizer()
     synth.clear_cache()
예제 #6
0
class ExecuteTask(Loggable):
    """
    Execute a task, that is, compute the sync map for it.

    :param task: the task to be executed
    :type  task: :class:`~aeneas.task.Task`
    :param rconf: a runtime configuration
    :type  rconf: :class:`~aeneas.runtimeconfiguration.RuntimeConfiguration`
    :param logger: the logger object
    :type  logger: :class:`~aeneas.logger.Logger`
    """

    TAG = u"ExecuteTask"

    def __init__(self, task=None, rconf=None, logger=None):
        super(ExecuteTask, self).__init__(rconf=rconf, logger=logger)
        self.task = task
        self.step_index = 1
        self.step_label = u""
        self.step_begin_time = None
        self.step_total = 0.000
        self.synthesizer = None
        if task is not None:
            self.load_task(self.task)

    def load_task(self, task):
        """
        Load the task from the given ``Task`` object.

        :param task: the task to load
        :type  task: :class:`~aeneas.task.Task`
        :raises: :class:`~aeneas.executetask.ExecuteTaskInputError`: if ``task`` is not an instance of :class:`~aeneas.task.Task`
        """
        if not isinstance(task, Task):
            self.log_exc(u"task is not an instance of Task", None, True, ExecuteTaskInputError)
        self.task = task

    def _step_begin(self, label, log=True):
        """ Log begin of a step """
        if log:
            self.step_label = label
            self.step_begin_time = self.log(u"STEP %d BEGIN (%s)" % (self.step_index, label))

    def _step_end(self, log=True):
        """ Log end of a step """
        if log:
            step_end_time = self.log(u"STEP %d END (%s)" % (self.step_index, self.step_label))
            diff = (step_end_time - self.step_begin_time)
            diff = float(diff.seconds + diff.microseconds / 1000000.0)
            self.step_total += diff
            self.log(u"STEP %d DURATION %.3f (%s)" % (self.step_index, diff, self.step_label))
            self.step_index += 1

    def _step_failure(self, exc):
        """ Log failure of a step """
        self.log_crit(u"STEP %d (%s) FAILURE" % (self.step_index, self.step_label))
        self.step_index += 1
        self.log_exc(u"Unexpected error while executing task", exc, True, ExecuteTaskExecutionError)

    def _step_total(self):
        """ Log total """
        self.log(u"STEP T DURATION %.3f" % (self.step_total))

    def execute(self):
        """
        Execute the task.
        The sync map produced will be stored inside the task object.

        :raises: :class:`~aeneas.executetask.ExecuteTaskInputError`: if there is a problem with the input parameters
        :raises: :class:`~aeneas.executetask.ExecuteTaskExecutionError`: if there is a problem during the task execution
        """
        self.log(u"Executing task...")

        # check that we have the AudioFile object
        if self.task.audio_file is None:
            self.log_exc(u"The task does not seem to have its audio file set", None, True, ExecuteTaskInputError)
        if (
                (self.task.audio_file.audio_length is None) or
                (self.task.audio_file.audio_length <= 0)
        ):
            self.log_exc(u"The task seems to have an invalid audio file", None, True, ExecuteTaskInputError)
        task_max_audio_length = self.rconf[RuntimeConfiguration.TASK_MAX_AUDIO_LENGTH]
        if (
                (task_max_audio_length > 0) and
                (self.task.audio_file.audio_length > task_max_audio_length)
        ):
            self.log_exc(u"The audio file of the task has length %.3f, more than the maximum allowed (%.3f)." % (self.task.audio_file.audio_length, task_max_audio_length), None, True, ExecuteTaskInputError)

        # check that we have the TextFile object
        if self.task.text_file is None:
            self.log_exc(u"The task does not seem to have its text file set", None, True, ExecuteTaskInputError)
        if len(self.task.text_file) == 0:
            self.log_exc(u"The task text file seems to have no text fragments", None, True, ExecuteTaskInputError)
        task_max_text_length = self.rconf[RuntimeConfiguration.TASK_MAX_TEXT_LENGTH]
        if (
                (task_max_text_length > 0) and
                (len(self.task.text_file) > task_max_text_length)
        ):
            self.log_exc(u"The text file of the task has %d fragments, more than the maximum allowed (%d)." % (len(self.task.text_file), task_max_text_length), None, True, ExecuteTaskInputError)
        if self.task.text_file.chars == 0:
            self.log_exc(u"The task text file seems to have empty text", None, True, ExecuteTaskInputError)

        self.log(u"Both audio and text input file are present")

        # execute
        self.step_index = 1
        self.step_total = 0.000
        if self.task.text_file.file_format in TextFileFormat.MULTILEVEL_VALUES:
            self._execute_multi_level_task()
        else:
            self._execute_single_level_task()
        self.log(u"Executing task... done")

    def _execute_single_level_task(self):
        """ Execute a single-level task """
        self.log(u"Executing single level task...")
        try:
            # load audio file, extract MFCCs from real wave, clear audio file
            self._step_begin(u"extract MFCC real wave")
            real_wave_mfcc = self._extract_mfcc(
                file_path=self.task.audio_file_path_absolute,
                file_format=None,
            )
            self._step_end()

            # compute head and/or tail and set it
            self._step_begin(u"compute head tail")
            (head_length, process_length, tail_length) = self._compute_head_process_tail(real_wave_mfcc)
            real_wave_mfcc.set_head_middle_tail(head_length, process_length, tail_length)
            self._step_end()

            # compute alignment, outputting a tree of time intervals
            self._set_synthesizer()
            sync_root = Tree()
            self._execute_inner(
                real_wave_mfcc,
                self.task.text_file,
                sync_root=sync_root,
                force_aba_auto=False,
                log=True,
                leaf_level=True
            )
            self._clear_cache_synthesizer()

            # create syncmap and add it to task
            self._step_begin(u"create sync map")
            self._create_sync_map(sync_root=sync_root)
            self._step_end()

            # log total
            self._step_total()
            self.log(u"Executing single level task... done")
        except Exception as exc:
            self._step_failure(exc)

    def _execute_multi_level_task(self):
        """ Execute a multi-level task """
        self.log(u"Executing multi level task...")

        self.log(u"Saving rconf...")
        # save original rconf
        orig_rconf = self.rconf.clone()
        # clone rconfs and set granularity
        # TODO the following code assumes 3 levels: generalize this
        level_rconfs = [None, self.rconf.clone(), self.rconf.clone(), self.rconf.clone()]
        level_mfccs = [None, None, None, None]
        force_aba_autos = [None, False, False, True]
        for i in range(1, len(level_rconfs)):
            level_rconfs[i].set_granularity(i)
            self.log([u"Level %d mmn: %s", i, level_rconfs[i].mmn])
            self.log([u"Level %d mwl: %.3f", i, level_rconfs[i].mwl])
            self.log([u"Level %d mws: %.3f", i, level_rconfs[i].mws])
            level_rconfs[i].set_tts(i)
            self.log([u"Level %d tts: %s", i, level_rconfs[i].tts])
            self.log([u"Level %d tts_path: %s", i, level_rconfs[i].tts_path])
        self.log(u"Saving rconf... done")
        try:
            self.log(u"Creating AudioFile object...")
            audio_file = self._load_audio_file()
            self.log(u"Creating AudioFile object... done")

            # extract MFCC for each level
            for i in range(1, len(level_rconfs)):
                self._step_begin(u"extract MFCC real wave level %d" % i)
                if (i == 1) or (level_rconfs[i].mws != level_rconfs[i - 1].mws) or (level_rconfs[i].mwl != level_rconfs[i - 1].mwl):
                    self.rconf = level_rconfs[i]
                    level_mfccs[i] = self._extract_mfcc(audio_file=audio_file)
                else:
                    self.log(u"Keeping MFCC real wave from previous level")
                    level_mfccs[i] = level_mfccs[i - 1]
                self._step_end()

            self.log(u"Clearing AudioFile object...")
            self.rconf = level_rconfs[1]
            self._clear_audio_file(audio_file)
            self.log(u"Clearing AudioFile object... done")

            # compute head tail for the entire real wave (level 1)
            self._step_begin(u"compute head tail")
            (head_length, process_length, tail_length) = self._compute_head_process_tail(level_mfccs[1])
            level_mfccs[1].set_head_middle_tail(head_length, process_length, tail_length)
            self._step_end()

            # compute alignment at each level
            sync_root = Tree()
            sync_roots = [sync_root]
            text_files = [self.task.text_file]
            number_levels = len(level_rconfs)
            for i in range(1, number_levels):
                self._step_begin(u"compute alignment level %d" % i)
                self.rconf = level_rconfs[i]
                text_files, sync_roots = self._execute_level(
                    level=i,
                    audio_file_mfcc=level_mfccs[i],
                    text_files=text_files,
                    sync_roots=sync_roots,
                    force_aba_auto=force_aba_autos[i],
                )
                self._step_end()

            # restore original rconf, and create syncmap and add it to task
            self._step_begin(u"create sync map")
            self.rconf = orig_rconf
            self._create_sync_map(sync_root=sync_root)
            self._step_end()

            self._step_total()
            self.log(u"Executing multi level task... done")
        except Exception as exc:
            self._step_failure(exc)

    def _execute_level(self, level, audio_file_mfcc, text_files, sync_roots, force_aba_auto=False):
        """
        Compute the alignment for all the nodes in the given level.

        Return a pair (next_level_text_files, next_level_sync_roots),
        containing two lists of text file subtrees and sync map subtrees
        on the next level.

        :param int level: the level
        :param audio_file_mfcc: the audio MFCC representation for this level
        :type  audio_file_mfcc: :class:`~aeneas.audiofilemfcc.AudioFileMFCC`
        :param list text_files: a list of :class:`~aeneas.textfile.TextFile` objects,
                                each representing a (sub)tree of the Task text file
        :param list sync_roots: a list of :class:`~aeneas.tree.Tree` objects,
                                each representing a SyncMapFragment tree,
                                one for each element in ``text_files``
        :param bool force_aba_auto: if ``True``, force using the AUTO ABA algorithm
        :rtype: (list, list)
        """
        self._set_synthesizer()
        next_level_text_files = []
        next_level_sync_roots = []
        for text_file_index, text_file in enumerate(text_files):
            self.log([u"Text level %d, fragment %d", level, text_file_index])
            self.log([u"  Len:   %d", len(text_file)])
            sync_root = sync_roots[text_file_index]
            if (level > 1) and (len(text_file) == 1):
                self.log(u"Level > 1 and only one text fragment => return trivial tree")
                self._append_trivial_tree(text_file, sync_root)
            elif (level > 1) and (sync_root.value.begin == sync_root.value.end):
                self.log(u"Level > 1 and parent has begin == end => return trivial tree")
                self._append_trivial_tree(text_file, sync_root)
            else:
                self.log(u"Level == 1 or more than one text fragment with non-zero parent => compute tree")
                if not sync_root.is_empty:
                    begin = sync_root.value.begin
                    end = sync_root.value.end
                    self.log([u"  Setting begin: %.3f", begin])
                    self.log([u"  Setting end:   %.3f", end])
                    audio_file_mfcc.set_head_middle_tail(head_length=begin, middle_length=(end - begin))
                else:
                    self.log(u"  No begin or end to set")
                self._execute_inner(
                    audio_file_mfcc,
                    text_file,
                    sync_root=sync_root,
                    force_aba_auto=force_aba_auto,
                    log=False,
                    leaf_level=(level == 3)
                )
            # store next level roots
            next_level_text_files.extend(text_file.children_not_empty)
            # we added head and tail, we must not pass them to the next level
            next_level_sync_roots.extend(sync_root.children[1:-1])
        self._clear_cache_synthesizer()
        return (next_level_text_files, next_level_sync_roots)

    def _execute_inner(self, audio_file_mfcc, text_file, sync_root=None, force_aba_auto=False, log=True, leaf_level=False):
        """
        Align a subinterval of the given AudioFileMFCC
        with the given TextFile.

        Return the computed tree of time intervals,
        rooted at ``sync_root`` if the latter is not ``None``,
        or as a new ``Tree`` otherwise.

        The begin and end positions inside the AudioFileMFCC
        must have been set ahead by the caller.

        The text fragments being aligned are the vchildren of ``text_file``.

        :param audio_file_mfcc: the audio file MFCC representation
        :type  audio_file_mfcc: :class:`~aeneas.audiofilemfcc.AudioFileMFCC`
        :param text_file: the text file subtree to align
        :type  text_file: :class:`~aeneas.textfile.TextFile`
        :param sync_root: the tree node to which fragments should be appended
        :type  sync_root: :class:`~aeneas.tree.Tree`
        :param bool force_aba_auto: if ``True``, do not run aba algorithm
        :param bool log: if ``True``, log steps
        :param bool leaf_level: alert aba if the computation is at a leaf level
        :rtype: :class:`~aeneas.tree.Tree`
        """
        self._step_begin(u"synthesize text", log=log)
        synt_handler, synt_path, synt_anchors, synt_format = self._synthesize(text_file)
        self._step_end(log=log)

        self._step_begin(u"extract MFCC synt wave", log=log)
        synt_wave_mfcc = self._extract_mfcc(
            file_path=synt_path,
            file_format=synt_format,
        )
        gf.delete_file(synt_handler, synt_path)
        self._step_end(log=log)

        self._step_begin(u"align waves", log=log)
        indices = self._align_waves(audio_file_mfcc, synt_wave_mfcc, synt_anchors)
        self._step_end(log=log)

        self._step_begin(u"adjust boundaries", log=log)
        self._adjust_boundaries(indices, text_file, audio_file_mfcc, sync_root, force_aba_auto, leaf_level)
        self._step_end(log=log)

    def _load_audio_file(self):
        """
        Load audio in memory.

        :rtype: :class:`~aeneas.audiofile.AudioFile`
        """
        self._step_begin(u"load audio file")
        # NOTE file_format=None forces conversion to
        #      PCM16 mono WAVE with default sample rate
        audio_file = AudioFile(
            file_path=self.task.audio_file_path_absolute,
            file_format=None,
            rconf=self.rconf,
            logger=self.logger
        )
        audio_file.read_samples_from_file()
        self._step_end()
        return audio_file

    def _clear_audio_file(self, audio_file):
        """
        Clear audio from memory.

        :param audio_file: the object to clear
        :type  audio_file: :class:`~aeneas.audiofile.AudioFile`
        """
        self._step_begin(u"clear audio file")
        audio_file.clear_data()
        audio_file = None
        self._step_end()

    def _extract_mfcc(self, file_path=None, file_format=None, audio_file=None):
        """
        Extract the MFCCs from the given audio file.

        :rtype: :class:`~aeneas.audiofilemfcc.AudioFileMFCC`
        """
        audio_file_mfcc = AudioFileMFCC(
            file_path=file_path,
            file_format=file_format,
            audio_file=audio_file,
            rconf=self.rconf,
            logger=self.logger
        )
        if self.rconf.mmn:
            self.log(u"Running VAD inside _extract_mfcc...")
            audio_file_mfcc.run_vad(
                log_energy_threshold=self.rconf[RuntimeConfiguration.MFCC_MASK_LOG_ENERGY_THRESHOLD],
                min_nonspeech_length=self.rconf[RuntimeConfiguration.MFCC_MASK_MIN_NONSPEECH_LENGTH],
                extend_before=self.rconf[RuntimeConfiguration.MFCC_MASK_EXTEND_SPEECH_INTERVAL_BEFORE],
                extend_after=self.rconf[RuntimeConfiguration.MFCC_MASK_EXTEND_SPEECH_INTERVAL_AFTER]
            )
            self.log(u"Running VAD inside _extract_mfcc... done")
        return audio_file_mfcc

    def _compute_head_process_tail(self, audio_file_mfcc):
        """
        Set the audio file head or tail,
        by either reading the explicit values
        from the Task configuration,
        or using SD to determine them.

        This function returns the lengths, in seconds,
        of the (head, process, tail).

        :rtype: tuple (float, float, float)
        """
        head_length = self.task.configuration["i_a_head"]
        process_length = self.task.configuration["i_a_process"]
        tail_length = self.task.configuration["i_a_tail"]
        head_max = self.task.configuration["i_a_head_max"]
        head_min = self.task.configuration["i_a_head_min"]
        tail_max = self.task.configuration["i_a_tail_max"]
        tail_min = self.task.configuration["i_a_tail_min"]
        if (
            (head_length is not None) or
            (process_length is not None) or
            (tail_length is not None)
        ):
            self.log(u"Setting explicit head process tail")
        else:
            self.log(u"Detecting head tail...")
            sd = SD(audio_file_mfcc, self.task.text_file, rconf=self.rconf, logger=self.logger)
            head_length = TimeValue("0.000")
            process_length = None
            tail_length = TimeValue("0.000")
            if (head_min is not None) or (head_max is not None):
                self.log(u"Detecting HEAD...")
                head_length = sd.detect_head(head_min, head_max)
                self.log([u"Detected HEAD: %.3f", head_length])
                self.log(u"Detecting HEAD... done")
            if (tail_min is not None) or (tail_max is not None):
                self.log(u"Detecting TAIL...")
                tail_length = sd.detect_tail(tail_min, tail_max)
                self.log([u"Detected TAIL: %.3f", tail_length])
                self.log(u"Detecting TAIL... done")
            self.log(u"Detecting head tail... done")
        self.log([u"Head:    %s", gf.safe_float(head_length, None)])
        self.log([u"Process: %s", gf.safe_float(process_length, None)])
        self.log([u"Tail:    %s", gf.safe_float(tail_length, None)])
        return (head_length, process_length, tail_length)

    def _set_synthesizer(self):
        """ Create synthesizer """
        self.log(u"Setting synthesizer...")
        self.synthesizer = Synthesizer(rconf=self.rconf, logger=self.logger)
        self.log(u"Setting synthesizer... done")

    def _clear_cache_synthesizer(self):
        """ Clear the cache of the synthesizer """
        self.log(u"Clearing synthesizer...")
        self.synthesizer.clear_cache()
        self.log(u"Clearing synthesizer... done")

    def _synthesize(self, text_file):
        """
        Synthesize text into a WAVE file.

        Return a tuple consisting of:

        1. the handler of the generated audio file
        2. the path of the generated audio file
        3. the list of anchors, that is, a list of floats
           each representing the start time of the corresponding
           text fragment in the generated wave file
           ``[start_1, start_2, ..., start_n]``
        4. a tuple describing the format of the audio file

        :param text_file: the text to be synthesized
        :type  text_file: :class:`~aeneas.textfile.TextFile`
        :rtype: tuple (handler, string, list)
        """
        handler, path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
        result = self.synthesizer.synthesize(text_file, path)
        return (handler, path, result[0], self.synthesizer.output_audio_format)

    def _align_waves(self, real_wave_mfcc, synt_wave_mfcc, synt_anchors):
        """
        Align two AudioFileMFCC objects,
        representing WAVE files.

        Return a list of boundary indices.
        """
        self.log(u"Creating DTWAligner...")
        aligner = DTWAligner(
            real_wave_mfcc,
            synt_wave_mfcc,
            rconf=self.rconf,
            logger=self.logger
        )
        self.log(u"Creating DTWAligner... done")
        self.log(u"Computing boundary indices...")
        boundary_indices = aligner.compute_boundaries(synt_anchors)
        self.log(u"Computing boundary indices... done")
        return boundary_indices

    def _adjust_boundaries(self, boundary_indices, text_file, real_wave_mfcc, sync_root, force_aba_auto=False, leaf_level=False):
        """
        Adjust boundaries as requested by the user.

        Return the computed time map, that is,
        a list of pairs ``[start_time, end_time]``,
        of length equal to number of fragments + 2,
        where the two extra elements are for
        the HEAD (first) and TAIL (last).
        """
        # boundary_indices contains the boundary indices in the all_mfcc of real_wave_mfcc
        # starting with the (head-1st fragment) and ending with (-1th fragment-tail)
        aba_parameters = self.task.configuration.aba_parameters()
        if force_aba_auto:
            self.log(u"Forced running algorithm: 'auto'")
            aba_parameters["algorithm"] = (AdjustBoundaryAlgorithm.AUTO, [])
            # note that the other aba settings (nonspeech and nozero)
            # remain as specified by the user
        self.log([u"ABA parameters: %s", aba_parameters])
        aba = AdjustBoundaryAlgorithm(rconf=self.rconf, logger=self.logger)
        aba.adjust(
            aba_parameters=aba_parameters,
            real_wave_mfcc=real_wave_mfcc,
            boundary_indices=boundary_indices,
            text_file=text_file,
            allow_arbitrary_shift=leaf_level
        )
        aba.append_fragment_list_to_sync_root(sync_root=sync_root)

    def _append_trivial_tree(self, text_file, sync_root):
        """
        Append trivial tree, made by one HEAD,
        one sync map fragment for each element of ``text_file``,
        and one TAIL.

        This function is called if either ``text_file`` has only one element,
        or if ``sync_root.value`` is an interval with zero length
        (i.e., ``sync_root.value.begin == sync_root.value.end``).
        """
        interval = sync_root.value
        #
        # NOTE the following is correct, but it is a bit obscure
        # time_values = [interval.begin] * (1 + len(text_file)) + [interval.end] * 2
        #
        if len(text_file) == 1:
            time_values = [interval.begin, interval.begin, interval.end, interval.end]
        else:
            # interval.begin == interval.end
            time_values = [interval.begin] * (3 + len(text_file))
        aba = AdjustBoundaryAlgorithm(rconf=self.rconf, logger=self.logger)
        aba.intervals_to_fragment_list(
            text_file=text_file,
            time_values=time_values
        )
        aba.append_fragment_list_to_sync_root(sync_root=sync_root)

    def _create_sync_map(self, sync_root):
        """
        If requested, check that the computed sync map is consistent.
        Then, add it to the Task.
        """
        sync_map = SyncMap(tree=sync_root, rconf=self.rconf, logger=self.logger)
        if self.rconf.safety_checks:
            self.log(u"Running sanity check on computed sync map...")
            if not sync_map.leaves_are_consistent:
                self._step_failure(ValueError(u"The computed sync map contains inconsistent fragments"))
            self.log(u"Running sanity check on computed sync map... passed")
        else:
            self.log(u"Not running sanity check on computed sync map")
        self.task.sync_map = sync_map
예제 #7
0
class ExecuteTask(Loggable):
    """
    Execute a task, that is, compute the sync map for it.

    :param task: the task to be executed
    :type  task: :class:`~aeneas.task.Task`
    :param rconf: a runtime configuration
    :type  rconf: :class:`~aeneas.runtimeconfiguration.RuntimeConfiguration`
    :param logger: the logger object
    :type  logger: :class:`~aeneas.logger.Logger`
    """

    TAG = u"ExecuteTask"

    def __init__(self, task=None, rconf=None, logger=None):
        super(ExecuteTask, self).__init__(rconf=rconf, logger=logger)
        self.task = task
        self.step_index = 1
        self.step_label = u""
        self.step_begin_time = None
        self.step_total = 0.000
        self.synthesizer = None
        if task is not None:
            self.load_task(self.task)

    def load_task(self, task):
        """
        Load the task from the given ``Task`` object.

        :param task: the task to load
        :type  task: :class:`~aeneas.task.Task`
        :raises: :class:`~aeneas.executetask.ExecuteTaskInputError`: if ``task`` is not an instance of :class:`~aeneas.task.Task`
        """
        if not isinstance(task, Task):
            self.log_exc(u"task is not an instance of Task", None, True,
                         ExecuteTaskInputError)
        self.task = task

    def _step_begin(self, label, log=True):
        """ Log begin of a step """
        if log:
            self.step_label = label
            self.step_begin_time = self.log(u"STEP %d BEGIN (%s)" %
                                            (self.step_index, label))

    def _step_end(self, log=True):
        """ Log end of a step """
        if log:
            step_end_time = self.log(u"STEP %d END (%s)" %
                                     (self.step_index, self.step_label))
            diff = (step_end_time - self.step_begin_time)
            diff = float(diff.seconds + diff.microseconds / 1000000.0)
            self.step_total += diff
            self.log(u"STEP %d DURATION %.3f (%s)" %
                     (self.step_index, diff, self.step_label))
            self.step_index += 1

    def _step_failure(self, exc):
        """ Log failure of a step """
        self.log_crit(u"STEP %d (%s) FAILURE" %
                      (self.step_index, self.step_label))
        self.step_index += 1
        self.log_exc(u"Unexpected error while executing task", exc, True,
                     ExecuteTaskExecutionError)

    def _step_total(self):
        """ Log total """
        self.log(u"STEP T DURATION %.3f" % (self.step_total))

    def execute(self):
        """
        Execute the task.
        The sync map produced will be stored inside the task object.

        :raises: :class:`~aeneas.executetask.ExecuteTaskInputError`: if there is a problem with the input parameters
        :raises: :class:`~aeneas.executetask.ExecuteTaskExecutionError`: if there is a problem during the task execution
        """
        self.log(u"Executing task...")

        # check that we have the AudioFile object
        if self.task.audio_file is None:
            self.log_exc(u"The task does not seem to have its audio file set",
                         None, True, ExecuteTaskInputError)
        if ((self.task.audio_file.audio_length is None)
                or (self.task.audio_file.audio_length <= 0)):
            self.log_exc(u"The task seems to have an invalid audio file", None,
                         True, ExecuteTaskInputError)
        task_max_audio_length = self.rconf[
            RuntimeConfiguration.TASK_MAX_AUDIO_LENGTH]
        if ((task_max_audio_length > 0) and
            (self.task.audio_file.audio_length > task_max_audio_length)):
            self.log_exc(
                u"The audio file of the task has length %.3f, more than the maximum allowed (%.3f)."
                % (self.task.audio_file.audio_length, task_max_audio_length),
                None, True, ExecuteTaskInputError)

        # check that we have the TextFile object
        if self.task.text_file is None:
            self.log_exc(u"The task does not seem to have its text file set",
                         None, True, ExecuteTaskInputError)
        if len(self.task.text_file) == 0:
            self.log_exc(u"The task text file seems to have no text fragments",
                         None, True, ExecuteTaskInputError)
        task_max_text_length = self.rconf[
            RuntimeConfiguration.TASK_MAX_TEXT_LENGTH]
        if ((task_max_text_length > 0)
                and (len(self.task.text_file) > task_max_text_length)):
            self.log_exc(
                u"The text file of the task has %d fragments, more than the maximum allowed (%d)."
                % (len(self.task.text_file), task_max_text_length), None, True,
                ExecuteTaskInputError)
        if self.task.text_file.chars == 0:
            self.log_exc(u"The task text file seems to have empty text", None,
                         True, ExecuteTaskInputError)

        self.log(u"Both audio and text input file are present")

        # execute
        self.step_index = 1
        self.step_total = 0.000
        if self.task.text_file.file_format in TextFileFormat.MULTILEVEL_VALUES:
            self._execute_multi_level_task()
        else:
            self._execute_single_level_task()
        self.log(u"Executing task... done")

    def _execute_single_level_task(self):
        """ Execute a single-level task """
        self.log(u"Executing single level task...")
        try:
            # load audio file, extract MFCCs from real wave, clear audio file
            self._step_begin(u"extract MFCC real wave")
            real_wave_mfcc = self._extract_mfcc(
                file_path=self.task.audio_file_path_absolute, file_format=None)
            self._step_end()

            # compute head and/or tail and set it
            self._step_begin(u"compute head tail")
            (head_length, process_length,
             tail_length) = self._compute_head_process_tail(real_wave_mfcc)
            real_wave_mfcc.set_head_middle_tail(head_length, process_length,
                                                tail_length)
            self._step_end()

            # compute a time map alignment
            self._set_synthesizer()
            time_map = self._execute_inner(real_wave_mfcc,
                                           self.task.text_file,
                                           adjust_boundaries=True,
                                           log=True)
            self._clear_cache_synthesizer()

            # convert time_map to tree and create syncmap and add it to task
            self._step_begin(u"create sync map")
            tree = self._level_time_map_to_tree(self.task.text_file, time_map)
            self.task.sync_map = self._create_syncmap(tree)
            self._step_end()

            # check for fragments with zero duration
            self._step_begin(u"check zero duration")
            self._check_no_zero(self.rconf.mws)
            self._step_end()

            # log total
            self._step_total()
            self.log(u"Executing single level task... done")
        except Exception as exc:
            self._step_failure(exc)

    def _execute_multi_level_task(self):
        """ Execute a multi-level task """
        self.log(u"Executing multi level task...")

        self.log(u"Saving rconf...")
        # save original rconf
        orig_rconf = self.rconf.clone()
        # clone rconfs and set granularity
        level_rconfs = [
            None,
            self.rconf.clone(),
            self.rconf.clone(),
            self.rconf.clone()
        ]
        level_mfccs = [None, None, None, None]
        for i in range(1, len(level_rconfs)):
            level_rconfs[i].set_granularity(i)
            self.log([u"Level %d mws: %.3f", i, level_rconfs[i].mws])
            self.log([u"Level %d mwl: %.3f", i, level_rconfs[i].mwl])
            level_rconfs[i].set_tts(i)
            self.log([u"Level %d tts: %s", i, level_rconfs[i].tts])
            self.log([u"Level %d tts_path: %s", i, level_rconfs[i].tts_path])
        self.log(u"Saving rconf... done")

        try:
            self.log(u"Creating AudioFile object...")
            audio_file = self._load_audio_file()
            self.log(u"Creating AudioFile object... done")

            # extract MFCC for each level
            for i in range(1, len(level_rconfs)):
                self._step_begin(u"extract MFCC real wave level %d" % i)
                if (i == 1) or (
                        level_rconfs[i].mws != level_rconfs[i - 1].mws) or (
                            level_rconfs[i].mwl != level_rconfs[i - 1].mwl):
                    self.rconf = level_rconfs[i]
                    level_mfccs[i] = self._extract_mfcc(audio_file=audio_file)
                else:
                    self.log(u"Keeping MFCC real wave from previous level")
                    level_mfccs[i] = level_mfccs[i - 1]
                self._step_end()

            self.log(u"Clearing AudioFile object...")
            self.rconf = level_rconfs[1]
            self._clear_audio_file(audio_file)
            self.log(u"Clearing AudioFile object... done")

            # compute head tail for the entire real wave (level 1)
            self._step_begin(u"compute head tail")
            (head_length, process_length,
             tail_length) = self._compute_head_process_tail(level_mfccs[1])
            level_mfccs[1].set_head_middle_tail(head_length, process_length,
                                                tail_length)
            self._step_end()

            # compute alignment at each level
            tree = Tree()
            sync_roots = [tree]
            text_files = [self.task.text_file]
            aht = [None, True, False, False]
            aba = [None, True, True, False]
            for i in range(1, len(level_rconfs)):
                self._step_begin(u"compute alignment level %d" % i)
                self.rconf = level_rconfs[i]
                text_files, sync_roots = self._execute_level(
                    i, level_mfccs[i], text_files, sync_roots, aht[i], aba[i])
                self._step_end()

            self._step_begin(u"select levels")
            tree = self._select_levels(tree)
            self._step_end()

            self._step_begin(u"create sync map")
            self.rconf = orig_rconf
            self.task.sync_map = self._create_syncmap(tree)
            self._step_end()

            self._step_begin(u"check zero duration")
            self._check_no_zero(level_rconfs[-1].mws)
            self._step_end()

            self._step_total()
            self.log(u"Executing multi level task... done")
        except Exception as exc:
            self._step_failure(exc)

    def _execute_level(self, level, audio_file_mfcc, text_files, sync_roots,
                       add_head_tail, adjust_boundaries):
        """
        Compute the alignment for all the nodes in the given level.

        Return a pair (next_level_text_files, next_level_sync_roots),
        containing two lists of text file subtrees and sync map subtrees
        on the next level.

        :param int level: the level
        :param audio_file_mfcc: the audio MFCC representation for this level
        :type  audio_file_mfcc: :class:`~aeneas.audiofilemfcc.AudioFileMFCC`
        :param list text_files: a list of :class:`~aeneas.textfile.TextFile` objects,
                                each representing a (sub)tree of the Task text file
        :param list sync_roots: a list of :class:`~aeneas.tree.Tree` objects,
                                each representing a SyncMapFragment tree,
                                one for each element in ``text_files``
        :param bool add_head_tail: if ``True``, add head and tail nodes to the sync map tree
        :param bool adjust_boundaries: if ``True``, execute the adjust boundary algorithm
        :rtype: (list, list)
        """
        self._set_synthesizer()
        next_level_text_files = []
        next_level_sync_roots = []
        for text_file_index, text_file in enumerate(text_files):
            self.log([u"Text level %d, fragment %d", level, text_file_index])
            self.log([u"  Len:   %d", len(text_file)])
            sync_root = sync_roots[text_file_index]
            if (level > 1) and (len(text_file) == 1):
                self.log(
                    u"  Level > 1 and only one child => returning trivial timemap"
                )
                time_map = [(TimeValue("0.000"), sync_root.value.begin),
                            (sync_root.value.begin, sync_root.value.end),
                            (sync_root.value.end, audio_file_mfcc.audio_length)
                            ]
            else:
                self.log(
                    u"  Level 1 or more than one child => computing timemap")
                if not sync_root.is_empty:
                    begin = sync_root.value.begin
                    end = sync_root.value.end
                    self.log([u"  Begin: %.3f", begin])
                    self.log([u"  End:   %.3f", end])
                    audio_file_mfcc.set_head_middle_tail(head_length=begin,
                                                         middle_length=(end -
                                                                        begin))
                else:
                    self.log(u"  No begin or end to set")
                time_map = self._execute_inner(
                    audio_file_mfcc,
                    text_file,
                    adjust_boundaries=adjust_boundaries,
                    log=False)
            self.log([u"  Map:   %s", str(time_map)])
            self._level_time_map_to_tree(text_file,
                                         time_map,
                                         sync_root,
                                         add_head_tail=add_head_tail)
            # store next level roots
            next_level_text_files.extend(text_file.children_not_empty)
            src = sync_root.children
            if add_head_tail:
                # if we added head and tail,
                # we must not pass them to the next level
                src = src[1:-1]
            next_level_sync_roots.extend(src)
        self._clear_cache_synthesizer()
        return (next_level_text_files, next_level_sync_roots)

    def _execute_inner(self,
                       audio_file_mfcc,
                       text_file,
                       adjust_boundaries=True,
                       log=True):
        """
        Align a subinterval of the given AudioFileMFCC
        with the given TextFile.

        Return the computed time map, as a list of intervals.

        The begin and end positions inside the AudioFileMFCC
        must have been set ahead by the caller.

        The text fragments being aligned are the vchildren of ``text_file``.

        :param audio_file_mfcc: the audio file MFCC representation
        :type  audio_file_mfcc: :class:`~aeneas.audiofilemfcc.AudioFileMFCC`
        :param text_file: the text file subtree to align
        :type  text_file: :class:`~aeneas.textfile.TextFile`
        :param bool adjust_boundaries: if ``True``, execute the adjust boundary algorithm
        :param bool log: if ``True``, log steps
        :rtype: list
        """
        self._step_begin(u"synthesize text", log=log)
        synt_handler, synt_path, synt_anchors, synt_format = self._synthesize(
            text_file)
        self._step_end(log=log)

        self._step_begin(u"extract MFCC synt wave", log=log)
        synt_wave_mfcc = self._extract_mfcc(file_path=synt_path,
                                            file_format=synt_format)
        gf.delete_file(synt_handler, synt_path)
        self._step_end(log=log)

        self._step_begin(u"align waves", log=log)
        indices = self._align_waves(audio_file_mfcc, synt_wave_mfcc,
                                    synt_anchors)
        self._step_end(log=log)

        self._step_begin(u"adjust boundaries", log=log)
        time_map = self._adjust_boundaries(audio_file_mfcc, text_file, indices,
                                           adjust_boundaries)
        self._step_end(log=log)

        return time_map

    def _load_audio_file(self):
        """
        Load audio in memory.

        :rtype: :class:`~aeneas.audiofile.AudioFile`
        """
        self._step_begin(u"load audio file")
        # NOTE file_format=None forces conversion to
        #      PCM16 mono WAVE with proper sample rate
        audio_file = AudioFile(file_path=self.task.audio_file_path_absolute,
                               file_format=None,
                               rconf=self.rconf,
                               logger=self.logger)
        audio_file.read_samples_from_file()
        self._step_end()
        return audio_file

    def _clear_audio_file(self, audio_file):
        """
        Clear audio from memory.

        :param audio_file: the object to clear
        :type  audio_file: :class:`~aeneas.audiofile.AudioFile`
        """
        self._step_begin(u"clear audio file")
        audio_file.clear_data()
        audio_file = None
        self._step_end()

    def _extract_mfcc(self, file_path=None, file_format=None, audio_file=None):
        """
        Extract the MFCCs from the given audio file.

        :rtype: :class:`~aeneas.audiofilemfcc.AudioFileMFCC`
        """
        return AudioFileMFCC(file_path=file_path,
                             file_format=file_format,
                             audio_file=audio_file,
                             rconf=self.rconf,
                             logger=self.logger)

    def _compute_head_process_tail(self, audio_file_mfcc):
        """
        Set the audio file head or tail,
        by either reading the explicit values
        from the Task configuration,
        or using SD to determine them.

        This function returns the lengths, in seconds,
        of the (head, process, tail).

        :rtype: tuple (float, float, float)
        """
        head_length = self.task.configuration["i_a_head"]
        process_length = self.task.configuration["i_a_process"]
        tail_length = self.task.configuration["i_a_tail"]
        head_max = self.task.configuration["i_a_head_max"]
        head_min = self.task.configuration["i_a_head_min"]
        tail_max = self.task.configuration["i_a_tail_max"]
        tail_min = self.task.configuration["i_a_tail_min"]
        if ((head_length is not None) or (process_length is not None)
                or (tail_length is not None)):
            self.log(u"Setting explicit head process tail")
        else:
            self.log(u"Detecting head tail...")
            sd = SD(audio_file_mfcc,
                    self.task.text_file,
                    rconf=self.rconf,
                    logger=self.logger)
            head_length = TimeValue("0.000")
            process_length = None
            tail_length = TimeValue("0.000")
            if (head_min is not None) or (head_max is not None):
                self.log(u"Detecting HEAD...")
                head_length = sd.detect_head(head_min, head_max)
                self.log([u"Detected HEAD: %.3f", head_length])
                self.log(u"Detecting HEAD... done")
            if (tail_min is not None) or (tail_max is not None):
                self.log(u"Detecting TAIL...")
                tail_length = sd.detect_tail(tail_min, tail_max)
                self.log([u"Detected TAIL: %.3f", tail_length])
                self.log(u"Detecting TAIL... done")
            self.log(u"Detecting head tail... done")
        self.log([u"Head:    %s", gf.safe_float(head_length, None)])
        self.log([u"Process: %s", gf.safe_float(process_length, None)])
        self.log([u"Tail:    %s", gf.safe_float(tail_length, None)])
        return (head_length, process_length, tail_length)

    def _set_synthesizer(self):
        """ Create synthesizer """
        self.log(u"Setting synthesizer...")
        self.synthesizer = Synthesizer(rconf=self.rconf, logger=self.logger)
        self.log(u"Setting synthesizer... done")

    def _clear_cache_synthesizer(self):
        """ Clear the cache of the synthesizer """
        self.log(u"Clearing synthesizer...")
        self.synthesizer.clear_cache()
        self.log(u"Clearing synthesizer... done")

    def _synthesize(self, text_file):
        """
        Synthesize text into a WAVE file.

        Return a tuple consisting of:

        1. the handler of the generated audio file
        2. the path of the generated audio file
        3. the list of anchors, that is, a list of floats
           each representing the start time of the corresponding
           text fragment in the generated wave file
           ``[start_1, start_2, ..., start_n]``
        4. a tuple describing the format of the audio file

        :param text_file: the text to be synthesized
        :type  text_file: :class:`~aeneas.textfile.TextFile`
        :rtype: tuple (handler, string, list)
        """
        handler, path = gf.tmp_file(
            suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
        result = self.synthesizer.synthesize(text_file, path)
        return (handler, path, result[0], self.synthesizer.output_audio_format)

    def _align_waves(self, real_wave_mfcc, synt_wave_mfcc, synt_anchors):
        """
        Align two AudioFileMFCC objects,
        representing WAVE files.

        Return a list of boundary indices.
        """
        self.log(u"Creating DTWAligner...")
        aligner = DTWAligner(real_wave_mfcc,
                             synt_wave_mfcc,
                             rconf=self.rconf,
                             logger=self.logger)
        self.log(u"Creating DTWAligner... done")
        self.log(u"Computing boundary indices...")
        boundary_indices = aligner.compute_boundaries(synt_anchors)
        self.log(u"Computing boundary indices... done")
        return boundary_indices

    def _adjust_boundaries(self,
                           real_wave_mfcc,
                           text_file,
                           boundary_indices,
                           adjust_boundaries=True):
        """
        Adjust boundaries as requested by the user.

        Return the computed time map, that is,
        a list of pairs ``[start_time, end_time]``,
        of length equal to number of fragments + 2,
        where the two extra elements are for
        the HEAD (first) and TAIL (last).
        """
        # boundary_indices contains the boundary indices in the all_mfcc of real_wave_mfcc
        # starting with the (head-1st fragment) and ending with (-1th fragment-tail)
        if adjust_boundaries:
            aba_algorithm, aba_parameters = self.task.configuration.aba_parameters(
            )
            self.log([u"Running algorithm: '%s'", aba_algorithm])
        else:
            self.log(u"Forced running algorithm: 'auto'")
            aba_algorithm = AdjustBoundaryAlgorithm.AUTO
            aba_parameters = None
        return AdjustBoundaryAlgorithm(algorithm=aba_algorithm,
                                       parameters=aba_parameters,
                                       real_wave_mfcc=real_wave_mfcc,
                                       boundary_indices=boundary_indices,
                                       text_file=text_file,
                                       rconf=self.rconf,
                                       logger=self.logger).to_time_map()

    def _level_time_map_to_tree(self,
                                text_file,
                                time_map,
                                tree=None,
                                add_head_tail=True):
        """
        Convert a level time map into a Tree of SyncMapFragments.

        The time map is
        a list of pairs ``[start_time, end_time]``,
        of length equal to number of fragments + 2,
        where the two extra elements are for
        the HEAD (first) and TAIL (last).

        :param text_file: the text file object
        :type  text_file: :class:`~aeneas.textfile.TextFile`
        :param list time_map: the time map
        :param tree: the tree; if ``None``, a new Tree will be built
        :type  tree: :class:`~aeneas.tree.Tree`
        :rtype: :class:`~aeneas.tree.Tree`
        """
        if tree is None:
            tree = Tree()
        if add_head_tail:
            fragments = ([
                TextFragment(u"HEAD", self.task.configuration["language"],
                             [u""])
            ] + text_file.fragments + [
                TextFragment(u"TAIL", self.task.configuration["language"],
                             [u""])
            ])
            i = 0
        else:
            fragments = text_file.fragments
            i = 1
        for fragment in fragments:
            interval = time_map[i]
            sm_frag = SyncMapFragment(fragment, interval[0], interval[1])
            tree.add_child(Tree(value=sm_frag))
            i += 1
        return tree

    def _select_levels(self, tree):
        """
        Select the correct levels in the tree,
        reading the ``os_task_file_levels``
        parameter in the Task configuration.

        If ``None`` or invalid, return the current sync map tree
        unchanged.
        Otherwise, return only the levels appearing in it.

        :param tree: a Tree of SyncMapFragments
        :type  tree: :class:`~aeneas.tree.Tree`
        :rtype: :class:`~aeneas.tree.Tree`
        """
        levels = self.task.configuration["o_levels"]
        self.log([u"Levels: '%s'", levels])
        if (levels is None) or (len(levels) < 1):
            return tree
        try:
            levels = [int(l) for l in levels if int(l) > 0]
            self.log([u"Converted levels: %s", levels])
        except ValueError:
            self.log_warn(
                u"Cannot convert levels to list of int, returning unchanged")
            return tree
        # remove head and tail nodes
        head = tree.vchildren[0]
        tail = tree.vchildren[-1]
        tree.remove_child(0)
        tree.remove_child(-1)
        # keep only the selected levels
        tree.keep_levels(levels)
        # add head and tail back
        tree.add_child(Tree(value=head), as_last=False)
        tree.add_child(Tree(value=tail), as_last=True)
        # return the new tree
        return tree

    def _create_syncmap(self, tree):
        """
        Return a sync map corresponding to the provided text file and time map.

        :param tree: a Tree of SyncMapFragments
        :type  tree: :class:`~aeneas.tree.Tree`
        :rtype: :class:`~aeneas.syncmap.SyncMap`
        """
        self.log(
            [u"Fragments in time map (including HEAD/TAIL): %d",
             len(tree)])
        head_tail_format = self.task.configuration["o_h_t_format"]
        self.log([u"Head/tail format: %s", str(head_tail_format)])

        children = tree.vchildren
        head = children[0]
        first = children[1]
        last = children[-2]
        tail = children[-1]

        # remove HEAD fragment if needed
        if head_tail_format != SyncMapHeadTailFormat.ADD:
            tree.remove_child(0)
            self.log(u"Removed HEAD")

        # stretch first and last fragment timings if needed
        if head_tail_format == SyncMapHeadTailFormat.STRETCH:
            self.log([
                u"Stretched first.begin: %.3f => %.3f (head)", first.begin,
                head.begin
            ])
            self.log([
                u"Stretched last.end:    %.3f => %.3f (tail)", last.end,
                tail.end
            ])
            first.begin = head.begin
            last.end = tail.end

        # remove TAIL fragment if needed
        if head_tail_format != SyncMapHeadTailFormat.ADD:
            tree.remove_child(-1)
            self.log(u"Removed TAIL")

        # return sync map
        sync_map = SyncMap()
        sync_map.fragments_tree = tree
        return sync_map

    # TODO can this be done during the alignment?
    def _check_no_zero(self, min_mws):
        """ Check for fragments with zero duration """
        if self.task.configuration["o_no_zero"]:
            self.log(u"Checking for fragments with zero duration...")
            delta = TimeValue("0.001")
            leaves = self.task.sync_map.fragments_tree.vleaves_not_empty
            # first and last leaves are HEAD and TAIL, skipping them
            max_index = len(leaves) - 1
            self.log([u"Fragment min index: %d", 1])
            self.log([u"Fragment max index: %d", max_index - 1])
            for i in range(1, max_index):
                self.log([u"Checking index:     %d", i])
                j = i
                while (j < max_index) and (leaves[j].end == leaves[i].begin):
                    j += 1
                if j != i:
                    self.log(u"Fragment(s) with zero duration:")
                    for k in range(i, j):
                        self.log([u"  %d : %s", k, leaves[k]])

                    if leaves[j].end - leaves[j].begin > (j - i) * delta:
                        # there is room after
                        # to move each zero fragment forward by 0.001
                        for k in range(j - i):
                            shift = (k + 1) * delta
                            leaves[i + k].end += shift
                            leaves[i + k + 1].begin += shift
                            self.log([
                                u"  Moved fragment %d forward by %.3f", i + k,
                                shift
                            ])
                    else:
                        self.log_warn(u"  Unable to fix")
                    i = j - 1
            self.log(u"Checking for fragments with zero duration... done")
        else:
            self.log(u"Not checking for fragments with zero duration")