def test_safe_bytes(self): tests = [ ("", b""), ("foo", b"foo"), (b"", b""), (b"foo", b"foo"), (b"fo\x99", b"fo\x99"), ] self.assertIsNone(gf.safe_bytes(None)) for test in tests: self.assertEqual(gf.safe_bytes(test[0]), test[1])
def main(): """ Run ``aeneas.cew``, reading input text from file and writing audio and interval data to file. """ # make sure we have enough parameters if len(sys.argv) < 6: print("You must pass five arguments: QUIT_AFTER BACKWARDS TEXT_FILE_PATH AUDIO_FILE_PATH DATA_FILE_PATH") return 1 # read parameters c_quit_after = float(sys.argv[1]) # NOTE: cew needs float, not TimeValue c_backwards = int(sys.argv[2]) text_file_path = sys.argv[3] audio_file_path = sys.argv[4] data_file_path = sys.argv[5] # read (voice_code, text) from file s_text = [] with io.open(text_file_path, "r", encoding="utf-8") as text: for line in text.readlines(): # NOTE: not using strip() to avoid removing trailing blank characters line = line.replace(u"\n", u"").replace(u"\r", u"") idx = line.find(" ") if idx > 0: f_voice_code = line[:idx] f_text = line[idx+1:] #print("%s => '%s' and '%s'" % (line, f_voice_code, f_text)) s_text.append((f_voice_code, f_text)) # convert to bytes/unicode as required by subprocess c_text = [] if gf.PY2: for f_voice_code, f_text in s_text: c_text.append((gf.safe_bytes(f_voice_code), gf.safe_bytes(f_text))) else: for f_voice_code, f_text in s_text: c_text.append((gf.safe_unicode(f_voice_code), gf.safe_unicode(f_text))) try: import aeneas.cew.cew sr, sf, intervals = aeneas.cew.cew.synthesize_multiple( audio_file_path, c_quit_after, c_backwards, c_text ) with io.open(data_file_path, "w", encoding="utf-8") as data: data.write(u"%d\n" % (sr)) data.write(u"%d\n" % (sf)) data.write(u"\n".join([u"%.3f %.3f" % (i[0], i[1]) for i in intervals])) except Exception as exc: print(u"Unexpected error: %s" % str(exc))
def main(): """ Run ``aeneas.cew``, reading input text from file and writing audio and interval data to file. """ # make sure we have enough parameters if len(sys.argv) < 6: print( "You must pass five arguments: QUIT_AFTER BACKWARDS TEXT_FILE_PATH AUDIO_FILE_PATH DATA_FILE_PATH" ) return 1 # read parameters c_quit_after = float(sys.argv[1]) # NOTE: cew needs float, not TimeValue c_backwards = int(sys.argv[2]) text_file_path = sys.argv[3] audio_file_path = sys.argv[4] data_file_path = sys.argv[5] # read (voice_code, text) from file s_text = [] with io.open(text_file_path, "r", encoding="utf-8") as text: for line in text.readlines(): # NOTE: not using strip() to avoid removing trailing blank characters line = line.replace(u"\n", u"").replace(u"\r", u"") idx = line.find(" ") if idx > 0: f_voice_code = line[:idx] f_text = line[idx + 1:] #print("%s => '%s' and '%s'" % (line, f_voice_code, f_text)) s_text.append((f_voice_code, f_text)) # convert to bytes/unicode as required by subprocess c_text = [] if gf.PY2: for f_voice_code, f_text in s_text: c_text.append((gf.safe_bytes(f_voice_code), gf.safe_bytes(f_text))) else: for f_voice_code, f_text in s_text: c_text.append( (gf.safe_unicode(f_voice_code), gf.safe_unicode(f_text))) try: import aeneas.cew.cew sr, sf, intervals = aeneas.cew.cew.synthesize_multiple( audio_file_path, c_quit_after, c_backwards, c_text) with io.open(data_file_path, "w", encoding="utf-8") as data: data.write(u"%d\n" % (sr)) data.write(u"%d\n" % (sf)) data.write(u"\n".join( [u"%.3f %.3f" % (i[0], i[1]) for i in intervals])) except Exception as exc: print(u"Unexpected error: %s" % str(exc))
def parse(self, input_text, syncmap): from lxml import etree # namespaces xsi = "http://www.w3.org/2001/XMLSchema-instance" ns_map = {"xsi": xsi} # get root root = etree.fromstring(gf.safe_bytes(input_text)) # get time slots time_slots = dict() for ts in root.iter("TIME_SLOT"): time_slots[ts.get("TIME_SLOT_ID")] = gf.time_from_ssmmm(ts.get("TIME_VALUE")) / 1000 # parse annotations for alignable in root.iter("ALIGNABLE_ANNOTATION"): identifier = gf.safe_unicode(alignable.get("ANNOTATION_ID")) begin = time_slots[alignable.get("TIME_SLOT_REF1")] end = time_slots[alignable.get("TIME_SLOT_REF2")] lines = [] for value in alignable.iter("ANNOTATION_VALUE"): lines.append(gf.safe_unicode(value.text)) self._add_fragment( syncmap=syncmap, identifier=identifier, lines=lines, begin=begin, end=end )
def check_config_xml(self, contents): """ Check whether the given XML config file contents is well-formed and it has all the required parameters. :param string contents: the XML config file contents or XML config string :param bool is_config_string: if ``True``, contents is a config string :rtype: :class:`~aeneas.validator.ValidatorResult` """ self.log(u"Checking contents XML config file") self.result = ValidatorResult() if self._are_safety_checks_disabled(u"check_config_xml"): return self.result contents = gf.safe_bytes(contents) self.log(u"Checking that contents is well formed") self.check_raw_string(contents, is_bstring=True) if not self.result.passed: return self.result self.log(u"Checking required parameters for job") job_parameters = gf.config_xml_to_dict(contents, self.result, parse_job=True) self._check_required_parameters(self.XML_JOB_REQUIRED_PARAMETERS, job_parameters) if not self.result.passed: return self.result self.log(u"Checking required parameters for task") tasks_parameters = gf.config_xml_to_dict(contents, self.result, parse_job=False) for parameters in tasks_parameters: self.log([u"Checking required parameters for task: '%s'", parameters]) self._check_required_parameters(self.XML_TASK_REQUIRED_PARAMETERS, parameters) if not self.result.passed: return self.result return self.result
def parse(self, input_text, syncmap): from lxml import etree # namespaces xsi = "http://www.w3.org/2001/XMLSchema-instance" ns_map = {"xsi": xsi} # get root root = etree.fromstring(gf.safe_bytes(input_text)) # get time slots time_slots = dict() for ts in root.iter("TIME_SLOT"): time_slots[ts.get("TIME_SLOT_ID")] = gf.time_from_ssmmm( ts.get("TIME_VALUE")) / 1000 # parse annotations for alignable in root.iter("ALIGNABLE_ANNOTATION"): identifier = gf.safe_unicode(alignable.get("ANNOTATION_ID")) begin = time_slots[alignable.get("TIME_SLOT_REF1")] end = time_slots[alignable.get("TIME_SLOT_REF2")] lines = [] for value in alignable.iter("ANNOTATION_VALUE"): lines.append(gf.safe_unicode(value.text)) self._add_fragment(syncmap=syncmap, identifier=identifier, lines=lines, begin=begin, end=end)
def parse(self, input_text, syncmap): """ Read from SMIL file. Limitations: 1. parses only ``<par>`` elements, in order 2. timings must have ``hh:mm:ss.mmm`` or ``ss.mmm`` format (autodetected) 3. both ``clipBegin`` and ``clipEnd`` attributes of ``<audio>`` must be populated """ from lxml import etree smil_ns = "{http://www.w3.org/ns/SMIL}" root = etree.fromstring(gf.safe_bytes(input_text)) for par in root.iter(smil_ns + "par"): for child in par: if child.tag == (smil_ns + "text"): identifier = gf.safe_unicode(gf.split_url(child.get("src"))[1]) elif child.tag == (smil_ns + "audio"): begin_text = child.get("clipBegin") if ":" in begin_text: begin = gf.time_from_hhmmssmmm(begin_text) else: begin = gf.time_from_ssmmm(begin_text) end_text = child.get("clipEnd") if ":" in end_text: end = gf.time_from_hhmmssmmm(end_text) else: end = gf.time_from_ssmmm(end_text) # TODO read text from additional text_file? self._add_fragment( syncmap=syncmap, identifier=identifier, lines=[u""], begin=begin, end=end )
def _synthesize_single_c_extension(self, text, voice_code, output_file_path): """ Synthesize a single text fragment, using the cew extension. Return the duration of the synthesized text, in seconds. :rtype: (bool, (:class:`~aeneas.timevalue.TimeValue`, )) """ self.log(u"Synthesizing using C extension...") end = None if self.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED]: self.log(u"Using cewsubprocess to call aeneas.cew") try: self.log(u"Importing aeneas.cewsubprocess...") from aeneas.cewsubprocess import CEWSubprocess self.log(u"Importing aeneas.cewsubprocess... done") self.log(u"Calling aeneas.cewsubprocess...") cewsub = CEWSubprocess(rconf=self.rconf, logger=self.logger) end = cewsub.synthesize_single(output_file_path, voice_code, text) self.log(u"Calling aeneas.cewsubprocess... done") except Exception as exc: self.log_exc(u"An unexpected error occurred while running cewsubprocess", exc, False, None) # NOTE not critical, try calling aeneas.cew directly #return (False, None) if end is None: self.log(u"Preparing c_text...") if gf.PY2: # Python 2 => pass byte strings c_text = gf.safe_bytes(text) else: # Python 3 => pass Unicode strings c_text = gf.safe_unicode(text) self.log(u"Preparing c_text... done") self.log(u"Calling aeneas.cew directly") try: self.log(u"Importing aeneas.cew...") import aeneas.cew.cew self.log(u"Importing aeneas.cew... done") self.log(u"Calling aeneas.cew...") sr, begin, end = aeneas.cew.cew.synthesize_single( output_file_path, voice_code, c_text ) end = TimeValue(end) self.log(u"Calling aeneas.cew... done") except Exception as exc: self.log_exc(u"An unexpected error occurred while running cew", exc, False, None) return (False, None) self.log(u"Synthesizing using C extension... done") return (True, (end, ))
def parse(self, input_text, syncmap): from lxml import etree root = etree.fromstring(gf.safe_bytes(input_text)) for frag in root: for child in frag: if child.tag == "identifier": identifier = gf.safe_unicode(child.text) elif child.tag == "start": begin = gf.time_from_ssmmm(child.text) elif child.tag == "end": end = gf.time_from_ssmmm(child.text) # TODO read text from additional text_file? self._add_fragment(syncmap=syncmap, identifier=identifier, lines=[u""], begin=begin, end=end)
def parse(self, input_text, syncmap): from lxml import etree ttml_ns = "{http://www.w3.org/ns/ttml}" xml_ns = "{http://www.w3.org/XML/1998/namespace}" root = etree.fromstring(gf.safe_bytes(input_text)) language = root.get(xml_ns + "lang") for elem in root.iter(ttml_ns + "p"): identifier = gf.safe_unicode(elem.get(xml_ns + "id")) begin = gf.time_from_ttml(elem.get("begin")) end = gf.time_from_ttml(elem.get("end")) fragment_lines = self._get_lines_from_node_text(elem) self._add_fragment(syncmap=syncmap, identifier=identifier, language=language, lines=fragment_lines, begin=begin, end=end)
def parse(self, input_text, syncmap): from lxml import etree root = etree.fromstring(gf.safe_bytes(input_text)) for frag in root: identifier = gf.safe_unicode(frag.get("id")) begin = gf.time_from_ssmmm(frag.get("begin")) end = gf.time_from_ssmmm(frag.get("end")) lines = [] for child in frag: if child.tag == "line": lines.append(gf.safe_unicode(child.text)) self._add_fragment( syncmap=syncmap, identifier=identifier, lines=lines, begin=begin, end=end )
def parse(self, input_text, syncmap): from lxml import etree ttml_ns = "{http://www.w3.org/ns/ttml}" xml_ns = "{http://www.w3.org/XML/1998/namespace}" root = etree.fromstring(gf.safe_bytes(input_text)) language = root.get(xml_ns + "lang") for elem in root.iter(ttml_ns + "p"): identifier = gf.safe_unicode(elem.get(xml_ns + "id")) begin = gf.time_from_ttml(elem.get("begin")) end = gf.time_from_ttml(elem.get("end")) fragment_lines = self._get_lines_from_node_text(elem) self._add_fragment( syncmap=syncmap, identifier=identifier, language=language, lines=fragment_lines, begin=begin, end=end )
def parse(self, input_text, syncmap): from lxml import etree root = etree.fromstring(gf.safe_bytes(input_text)) for frag in root: for child in frag: if child.tag == "identifier": identifier = gf.safe_unicode(child.text) elif child.tag == "start": begin = gf.time_from_ssmmm(child.text) elif child.tag == "end": end = gf.time_from_ssmmm(child.text) # TODO read text from additional text_file? self._add_fragment( syncmap=syncmap, identifier=identifier, lines=[u""], begin=begin, end=end )
def _synthesize_multiple_c_extension(self, text_file, output_file_path, quit_after=None, backwards=False): """ Synthesize multiple text fragments, using the cfw extension. Return a tuple (anchors, total_time, num_chars). :rtype: (bool, (list, :class:`~aeneas.exacttiming.TimeValue`, int)) """ self.log(u"Synthesizing using C extension...") # convert parameters from Python values to C values try: c_quit_after = float(quit_after) except TypeError: c_quit_after = 0.0 c_backwards = 0 if backwards: c_backwards = 1 self.log([u"output_file_path: %s", output_file_path]) self.log([u"c_quit_after: %.3f", c_quit_after]) self.log([u"c_backwards: %d", c_backwards]) self.log(u"Preparing u_text...") u_text = [] fragments = text_file.fragments for fragment in fragments: f_lang = fragment.language f_text = fragment.filtered_text if f_lang is None: f_lang = self.DEFAULT_LANGUAGE f_voice_code = self.VOICE_CODE_TO_SUBPROCESS[self._language_to_voice_code(f_lang)] if f_text is None: f_text = u"" u_text.append((f_voice_code, f_text)) self.log(u"Preparing u_text... done") # call C extension sr = None sf = None intervals = None self.log(u"Preparing c_text...") if gf.PY2: # Python 2 => pass byte strings c_text = [(gf.safe_bytes(t[0]), gf.safe_bytes(t[1])) for t in u_text] else: # Python 3 => pass Unicode strings c_text = [(gf.safe_unicode(t[0]), gf.safe_unicode(t[1])) for t in u_text] self.log(u"Preparing c_text... done") self.log(u"Calling aeneas.cfw directly") try: self.log(u"Importing aeneas.cfw...") import aeneas.cfw.cfw self.log(u"Importing aeneas.cfw... done") self.log(u"Calling aeneas.cfw...") sr, sf, intervals = aeneas.cfw.cfw.synthesize_multiple( output_file_path, c_quit_after, c_backwards, c_text ) self.log(u"Calling aeneas.cfw... done") except Exception as exc: self.log_exc(u"An unexpected error occurred while running cfw", exc, False, None) return (False, None) self.log([u"sr: %d", sr]) self.log([u"sf: %d", sf]) # create output anchors = [] current_time = TimeValue("0.000") num_chars = 0 if backwards: fragments = fragments[::-1] for i in range(sf): # get the correct fragment fragment = fragments[i] # store for later output anchors.append([ TimeValue(intervals[i][0]), fragment.identifier, fragment.filtered_text ]) # increase the character counter num_chars += fragment.characters # update current_time current_time = TimeValue(intervals[i][1]) # return output # NOTE anchors do not make sense if backwards == True self.log([u"Returning %d time anchors", len(anchors)]) self.log([u"Current time %.3f", current_time]) self.log([u"Synthesized %d characters", num_chars]) self.log(u"Synthesizing using C extension... done") return (True, (anchors, current_time, num_chars))
def perform_command(self): """ Perform command and return the appropriate exit code. :rtype: int """ if len(self.actual_arguments) < 2: return self.print_help() input_file_path = self.actual_arguments[0] output_file_path = self.actual_arguments[1] output_text_format = self.has_option_with_value(u"--format") if output_text_format is None: output_text_format = u"%.18e" output_binary = self.has_option([u"-b", u"--binary"]) output_npz = self.has_option([u"-z", u"--npz"]) output_npy = self.has_option([u"-n", u"--npy"]) delete_first = self.has_option([u"-d", u"--delete-first"]) transpose = self.has_option([u"-t", u"--transpose"]) self.check_c_extensions("cmfcc") if not self.check_input_file(input_file_path): return self.ERROR_EXIT_CODE if not self.check_output_file(output_file_path): return self.ERROR_EXIT_CODE try: mfccs = AudioFileMFCC(input_file_path, rconf=self.rconf, logger=self.logger).all_mfcc if delete_first: mfccs = mfccs[1:, :] if transpose: mfccs = mfccs.transpose() if output_binary: # save as a raw C float64 binary file mapped = numpy.memmap(output_file_path, dtype="float64", mode="w+", shape=mfccs.shape) mapped[:] = mfccs[:] mapped.flush() del mapped elif output_npz: # save as a .npz compressed binary file with io.open(output_file_path, "wb") as output_file: numpy.savez(output_file, mfccs) elif output_npy: # save as a .npy binary file with io.open(output_file_path, "wb") as output_file: numpy.save(output_file, mfccs) else: # save as a text file # NOTE: in Python 2, passing the fmt value a Unicode string crashes NumPy # hence, converting back to bytes, which works in Python 3 too numpy.savetxt(output_file_path, mfccs, fmt=gf.safe_bytes(output_text_format)) self.print_info(u"MFCCs shape: %d %d" % (mfccs.shape)) self.print_success(u"MFCCs saved to '%s'" % (output_file_path)) return self.NO_ERROR_EXIT_CODE except AudioFileConverterError: self.print_error(u"Unable to call the ffmpeg executable '%s'" % (self.rconf[RuntimeConfiguration.FFMPEG_PATH])) self.print_error(u"Make sure the path to ffmpeg is correct") except (AudioFileUnsupportedFormatError, AudioFileNotInitializedError): self.print_error(u"Cannot read file '%s'" % (input_file_path)) self.print_error(u"Check that its format is supported by ffmpeg") except OSError: self.print_error(u"Cannot write file '%s'" % (output_file_path)) return self.ERROR_EXIT_CODE
def _synthesize_single_subprocess_helper(self, text, voice_code, output_file_path=None, return_audio_data=True): """ This is an helper function to synthesize a single text fragment via ``subprocess``. If ``output_file_path`` is ``None``, the audio data will not persist to file at the end of the method. If ``return_audio_data`` is ``True``, return the audio data at the end of the function call; if ``False``, just return ``(True, None)`` in case of success. :rtype: tuple (result, (duration, sample_rate, codec, data)) or (result, None) """ # return zero if text is the empty string if len(text) == 0: # # NOTE sample_rate, codec, data do not matter # if the duration is 0.000 => set them to None # self.log(u"len(text) is zero: returning 0.000") return (True, (TimeValue("0.000"), None, None, None)) # create a temporary output file if needed synt_tmp_file = (output_file_path is None) if synt_tmp_file: self.log( u"Synthesizer helper called with output_file_path=None => creating temporary output file" ) output_file_handler, output_file_path = gf.tmp_file( suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH]) self.log([u"Temporary output file path is '%s'", output_file_path]) try: # if the TTS engine reads text from file, # write the text into a temporary file if self.CLI_PARAMETER_TEXT_PATH in self.subprocess_arguments: self.log(u"TTS engine reads text from file") tmp_text_file_handler, tmp_text_file_path = gf.tmp_file( suffix=u".txt", root=self.rconf[RuntimeConfiguration.TMP_PATH]) self.log([ u"Creating temporary text file '%s'...", tmp_text_file_path ]) with io.open(tmp_text_file_path, "w", encoding="utf-8") as tmp_text_file: tmp_text_file.write(text) self.log([ u"Creating temporary text file '%s'... done", tmp_text_file_path ]) else: self.log(u"TTS engine reads text from stdin") tmp_text_file_handler = None tmp_text_file_path = None # copy all relevant arguments self.log(u"Creating arguments list...") arguments = [] for arg in self.subprocess_arguments: if arg == self.CLI_PARAMETER_VOICE_CODE_FUNCTION: arguments.extend( self._voice_code_to_subprocess(voice_code)) elif arg == self.CLI_PARAMETER_VOICE_CODE_STRING: arguments.append(voice_code) elif arg == self.CLI_PARAMETER_TEXT_PATH: arguments.append(tmp_text_file_path) elif arg == self.CLI_PARAMETER_WAVE_PATH: arguments.append(output_file_path) elif arg == self.CLI_PARAMETER_TEXT_STDIN: # placeholder, do not append pass elif arg == self.CLI_PARAMETER_WAVE_STDOUT: # placeholder, do not append pass else: arguments.append(arg) self.log(u"Creating arguments list... done") # actual call via subprocess self.log(u"Calling TTS engine...") self.log([u"Calling with arguments '%s'", arguments]) self.log([u"Calling with text '%s'", text]) proc = subprocess.Popen(arguments, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) if self.CLI_PARAMETER_TEXT_STDIN in self.subprocess_arguments: self.log(u"Passing text via stdin...") if gf.PY2: (stdoutdata, stderrdata) = proc.communicate(input=gf.safe_bytes(text)) else: (stdoutdata, stderrdata) = proc.communicate(input=text) self.log(u"Passing text via stdin... done") else: self.log(u"Passing text via file...") (stdoutdata, stderrdata) = proc.communicate() self.log(u"Passing text via file... done") proc.stdout.close() proc.stdin.close() proc.stderr.close() if self.CLI_PARAMETER_WAVE_STDOUT in self.subprocess_arguments: self.log(u"TTS engine wrote audio data to stdout") self.log( [u"Writing audio data to file '%s'...", output_file_path]) with io.open(output_file_path, "wb") as output_file: output_file.write(stdoutdata) self.log([ u"Writing audio data to file '%s'... done", output_file_path ]) else: self.log(u"TTS engine wrote audio data to file") if tmp_text_file_path is not None: self.log( [u"Delete temporary text file '%s'", tmp_text_file_path]) gf.delete_file(tmp_text_file_handler, tmp_text_file_path) self.log(u"Calling TTS ... done") except Exception as exc: self.log_exc( u"An unexpected error occurred while calling TTS engine via subprocess", exc, False, None) return (False, None) # check the file can be read if not gf.file_can_be_read(output_file_path): self.log_exc( u"Output file '%s' cannot be read" % (output_file_path), None, True, None) return (False, None) # read audio data ret = self._read_audio_data( output_file_path) if return_audio_data else (True, None) # if the output file was temporary, remove it if synt_tmp_file: self.log([ u"Removing temporary output file path '%s'", output_file_path ]) gf.delete_file(output_file_handler, output_file_path) # return audio data or (True, None) return ret
def _synthesize_single_subprocess(self, text, voice_code, output_file_path): """ Synthesize a single text fragment via ``subprocess``. :rtype: tuple (result, (duration, sample_rate, encoding, samples)) """ self.log(u"Synthesizing using pure Python...") try: # if the TTS engine reads text from file, # write the text into a temporary file if self.CLI_PARAMETER_TEXT_PATH in self.subprocess_arguments: self.log(u"TTS engine reads text from file") tmp_text_file_handler, tmp_text_file_path = gf.tmp_file(suffix=u".txt", root=self.rconf[RuntimeConfiguration.TMP_PATH]) self.log([u"Creating temporary text file '%s'...", tmp_text_file_path]) with io.open(tmp_text_file_path, "w", encoding="utf-8") as tmp_text_file: tmp_text_file.write(text) self.log([u"Creating temporary text file '%s'... done", tmp_text_file_path]) else: self.log(u"TTS engine reads text from stdin") tmp_text_file_handler = None tmp_text_file_path = None # copy all relevant arguments self.log(u"Creating arguments list...") arguments = [] for arg in self.subprocess_arguments: if arg == self.CLI_PARAMETER_VOICE_CODE_FUNCTION: arguments.extend(self._voice_code_to_subprocess(voice_code)) elif arg == self.CLI_PARAMETER_VOICE_CODE_STRING: arguments.append(voice_code) elif arg == self.CLI_PARAMETER_TEXT_PATH: arguments.append(tmp_text_file_path) elif arg == self.CLI_PARAMETER_WAVE_PATH: arguments.append(output_file_path) elif arg == self.CLI_PARAMETER_TEXT_STDIN: # placeholder, do not append pass elif arg == self.CLI_PARAMETER_WAVE_STDOUT: # placeholder, do not append pass else: arguments.append(arg) self.log(u"Creating arguments list... done") # actual call via subprocess self.log(u"Calling TTS engine...") self.log([u"Calling with arguments '%s'", arguments]) self.log([u"Calling with text '%s'", text]) proc = subprocess.Popen( arguments, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True ) if self.CLI_PARAMETER_TEXT_STDIN in self.subprocess_arguments: self.log(u"Passing text via stdin...") if gf.PY2: (stdoutdata, stderrdata) = proc.communicate(input=gf.safe_bytes(text)) else: (stdoutdata, stderrdata) = proc.communicate(input=text) self.log(u"Passing text via stdin... done") else: self.log(u"Passing text via file...") (stdoutdata, stderrdata) = proc.communicate() self.log(u"Passing text via file... done") proc.stdout.close() proc.stdin.close() proc.stderr.close() if self.CLI_PARAMETER_WAVE_STDOUT in self.subprocess_arguments: self.log(u"TTS engine wrote audio data to stdout") self.log([u"Writing audio data to file '%s'...", output_file_path]) with io.open(output_file_path, "wb") as output_file: output_file.write(stdoutdata) self.log([u"Writing audio data to file '%s'... done", output_file_path]) else: self.log(u"TTS engine wrote audio data to file") if tmp_text_file_path is not None: self.log([u"Delete temporary text file '%s'", tmp_text_file_path]) gf.delete_file(tmp_text_file_handler, tmp_text_file_path) self.log(u"Calling TTS ... done") except Exception as exc: self.log_exc(u"An unexpected error occurred while calling TTS engine via subprocess", exc, False, None) return (False, None) # check the file can be read if not gf.file_can_be_read(output_file_path): self.log_exc(u"Output file '%s' cannot be read" % (output_file_path), None, True, None) return (False, None) # return the duration of the output file try: # if we know the TTS outputs to PCM16 mono WAVE, # we can read samples directly from it, # without an intermediate conversion through ffmpeg audio_file = AudioFile( file_path=output_file_path, is_mono_wave=self.OUTPUT_MONO_WAVE, rconf=self.rconf, logger=self.logger ) audio_file.read_samples_from_file() self.log([u"Duration of '%s': %f", output_file_path, audio_file.audio_length]) self.log(u"Synthesizing using pure Python... done") return (True, ( audio_file.audio_length, audio_file.audio_sample_rate, audio_file.audio_format, audio_file.audio_samples )) except (AudioFileUnsupportedFormatError, OSError) as exc: self.log_exc(u"An unexpected error occurred while trying to read the sythesized audio file", exc, True, None) return (False, None)
def _synthesize_multiple_c_extension(self, text_file, output_file_path, quit_after=None, backwards=False): """ Synthesize multiple text fragments, using the cew extension. Return a tuple (anchors, total_time, num_chars). :rtype: (bool, (list, :class:`~aeneas.timevalue.TimeValue`, int)) """ self.log(u"Synthesizing using C extension...") # convert parameters from Python values to C values try: c_quit_after = float(quit_after) except TypeError: c_quit_after = 0.0 c_backwards = 0 if backwards: c_backwards = 1 self.log([u"output_file_path: %s", output_file_path]) self.log([u"c_quit_after: %.3f", c_quit_after]) self.log([u"c_backwards: %d", c_backwards]) self.log(u"Preparing u_text...") u_text = [] fragments = text_file.fragments for fragment in fragments: f_lang = fragment.language f_text = fragment.filtered_text if f_lang is None: f_lang = self.DEFAULT_LANGUAGE f_voice_code = self._language_to_voice_code(f_lang) if f_text is None: f_text = u"" u_text.append((f_voice_code, f_text)) self.log(u"Preparing u_text... done") # call C extension sr = None sf = None intervals = None if self.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED]: self.log(u"Using cewsubprocess to call aeneas.cew") try: self.log(u"Importing aeneas.cewsubprocess...") from aeneas.cewsubprocess import CEWSubprocess self.log(u"Importing aeneas.cewsubprocess... done") self.log(u"Calling aeneas.cewsubprocess...") cewsub = CEWSubprocess(rconf=self.rconf, logger=self.logger) sr, sf, intervals = cewsub.synthesize_multiple(output_file_path, c_quit_after, c_backwards, u_text) self.log(u"Calling aeneas.cewsubprocess... done") except Exception as exc: self.log_exc(u"An unexpected error occurred while running cewsubprocess", exc, False, None) # NOTE not critical, try calling aeneas.cew directly #return (False, None) if sr is None: self.log(u"Preparing c_text...") if gf.PY2: # Python 2 => pass byte strings c_text = [(gf.safe_bytes(t[0]), gf.safe_bytes(t[1])) for t in u_text] else: # Python 3 => pass Unicode strings c_text = [(gf.safe_unicode(t[0]), gf.safe_unicode(t[1])) for t in u_text] self.log(u"Preparing c_text... done") self.log(u"Calling aeneas.cew directly") try: self.log(u"Importing aeneas.cew...") import aeneas.cew.cew self.log(u"Importing aeneas.cew... done") self.log(u"Calling aeneas.cew...") sr, sf, intervals = aeneas.cew.cew.synthesize_multiple( output_file_path, c_quit_after, c_backwards, c_text ) self.log(u"Calling aeneas.cew... done") except Exception as exc: self.log_exc(u"An unexpected error occurred while running cew", exc, False, None) return (False, None) self.log([u"sr: %d", sr]) self.log([u"sf: %d", sf]) # create output anchors = [] current_time = TimeValue("0.000") num_chars = 0 if backwards: fragments = fragments[::-1] for i in range(sf): # get the correct fragment fragment = fragments[i] # store for later output anchors.append([ TimeValue(intervals[i][0]), fragment.identifier, fragment.filtered_text ]) # increase the character counter num_chars += fragment.characters # update current_time current_time = TimeValue(intervals[i][1]) # return output # NOTE anchors do not make sense if backwards == True self.log([u"Returning %d time anchors", len(anchors)]) self.log([u"Current time %.3f", current_time]) self.log([u"Synthesized %d characters", num_chars]) self.log(u"Synthesizing using C extension... done") return (True, (anchors, current_time, num_chars))