def go(input_filename, mapping_dir, output_xml_filename, output_fsg_filename, output_dict_filename, unit, word_unit, out_orth): xml = load_xml(input_filename) xml, fsg, dct = end_to_end(xml, input_filename, unit, word_unit, out_orth, mapping_dir) save_xml(output_xml_filename, xml) save_txt(output_fsg_filename, fsg) save_txt(output_dict_filename, dct)
def main(argv=None): """Hey! This function is named main!""" parser = make_argparse() args = parser.parse_args(argv) if args.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) if args.text_input: tempfile, args.inputfile \ = create_input_xml(args.inputfile, text_language=args.text_language, save_temps=(args.outputfile if args.save_temps else None)) if args.output_xhtml: tokenized_xml_path = '%s.xhtml' % args.outputfile else: _, input_ext = os.path.splitext(args.inputfile) tokenized_xml_path = '%s%s' % (args.outputfile, input_ext) if os.path.exists(tokenized_xml_path) and not args.force_overwrite: parser.error("Output file %s exists already, did you mean to do that?" % tokenized_xml_path) smil_path = args.outputfile + '.smil' if os.path.exists(smil_path) and not args.force_overwrite: parser.error("Output file %s exists already, did you mean to do that?" % smil_path) _, wav_ext = os.path.splitext(args.wavfile) wav_path = args.outputfile + wav_ext if os.path.exists(wav_path) and not args.force_overwrite: parser.error("Output file %s exists already, did you mean to do that?" % wav_path) results = align_audio(args.inputfile, args.wavfile, save_temps=(args.outputfile if args.save_temps else None)) if args.output_xhtml: convert_to_xhtml(results['tokenized']) save_xml(tokenized_xml_path, results['tokenized']) smil = make_smil(os.path.basename(tokenized_xml_path), os.path.basename(wav_path), results) shutil.copy(args.wavfile, wav_path) save_txt(smil_path, smil)
def main(input_xml_path, input_smil_path, output_smil_path): xml = load_xml(input_xml_path) xml_filename = os.path.basename(input_xml_path) smil = load_xml(input_smil_path) ids = defaultdict(list) for par in xpath_default(smil, ".//i:par"): id = "" for text_src in xpath_default(par, ".//i:text/@src"): filename, id = text_src.split("#", 1) filename = os.path.basename(filename) if filename != xml_filename: continue for audio in xpath_default(par, ".//i:audio"): filename = audio.attrib["src"] begin = float(audio.attrib["clipBegin"]) end = float(audio.attrib["clipEnd"]) if not id: continue ids[id].append((filename, begin, end)) results = {"sentences": []} for sentence in xpath_default(xml, ".//i:s"): beginnings = defaultdict(lambda: 100000000000000.0) endings = defaultdict(lambda: -1.0) beginnings, endings = iterate_over_children(sentence, ids, beginnings, endings) for audio_path, beginning in beginnings.items(): results["sentences"].append({ "text_path": xml_filename, "text_id": sentence.attrib["id"], "audio_path": audio_path, "start": beginning, "end": endings[audio_path], }) output_smil_text = pystache.render(SMIL_TEMPLATE, results) save_txt(output_smil_path, output_smil_text)
def go(input_filename, output_filename, unit): xml = load_xml(input_filename) dct = make_dict(xml, input_filename, unit) save_txt(output_filename, dct)
def go(seg_path, text_path, audio_path, output_path): results = make_smil(text_path, audio_path, parse_hypseg(seg_path)) save_txt(output_path, results)
def go(input_filename, output_filename, unit): xml = load_xml(input_filename) fsg = make_fsg(xml, input_filename, unit) save_txt(output_filename, fsg)
def save_txt_to_dir(output_path, dest_path, txt): """Save text to a directory, mimicking the interface of save_txt_zip.""" save_txt(os.path.join(output_path, dest_path), txt)
def go(input_filename, output_filename, unit): xml = load_xml(input_filename) jsgf = make_jsgf(xml, input_filename, unit) save_txt(output_filename, jsgf)