Esempio n. 1
0
def go(input_filename, mapping_dir, output_xml_filename, output_fsg_filename,
       output_dict_filename, unit, word_unit, out_orth):
    xml = load_xml(input_filename)
    xml, fsg, dct = end_to_end(xml, input_filename, unit, word_unit, out_orth,
                               mapping_dir)
    save_xml(output_xml_filename, xml)
    save_txt(output_fsg_filename, fsg)
    save_txt(output_dict_filename, dct)
Esempio n. 2
0
def main(argv=None):
    """Hey! This function is named main!"""
    parser = make_argparse()
    args = parser.parse_args(argv)
    if args.debug:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)
    if args.text_input:
        tempfile, args.inputfile \
            = create_input_xml(args.inputfile,
                               text_language=args.text_language,
                               save_temps=(args.outputfile
                                           if args.save_temps else None))
    if args.output_xhtml:
        tokenized_xml_path = '%s.xhtml' % args.outputfile
    else:
        _, input_ext = os.path.splitext(args.inputfile)
        tokenized_xml_path = '%s%s' % (args.outputfile, input_ext)
    if os.path.exists(tokenized_xml_path) and not args.force_overwrite:
        parser.error("Output file %s exists already, did you mean to do that?"
                     % tokenized_xml_path)
    smil_path = args.outputfile + '.smil'
    if os.path.exists(smil_path) and not args.force_overwrite:
        parser.error("Output file %s exists already, did you mean to do that?"
                     % smil_path)
    _, wav_ext = os.path.splitext(args.wavfile)
    wav_path = args.outputfile + wav_ext
    if os.path.exists(wav_path) and not args.force_overwrite:
        parser.error("Output file %s exists already, did you mean to do that?"
                     % wav_path)

    results = align_audio(args.inputfile, args.wavfile,
                          save_temps=(args.outputfile
                                      if args.save_temps else None))
    if args.output_xhtml:
        convert_to_xhtml(results['tokenized'])
    save_xml(tokenized_xml_path, results['tokenized'])
    smil = make_smil(os.path.basename(tokenized_xml_path),
                     os.path.basename(wav_path), results)
    shutil.copy(args.wavfile, wav_path)
    save_txt(smil_path, smil)
def main(input_xml_path, input_smil_path, output_smil_path):
    xml = load_xml(input_xml_path)
    xml_filename = os.path.basename(input_xml_path)
    smil = load_xml(input_smil_path)

    ids = defaultdict(list)
    for par in xpath_default(smil, ".//i:par"):
        id = ""
        for text_src in xpath_default(par, ".//i:text/@src"):
            filename, id = text_src.split("#", 1)
            filename = os.path.basename(filename)
            if filename != xml_filename:
                continue
            for audio in xpath_default(par, ".//i:audio"):
                filename = audio.attrib["src"]
                begin = float(audio.attrib["clipBegin"])
                end = float(audio.attrib["clipEnd"])
                if not id:
                    continue
                ids[id].append((filename, begin, end))

    results = {"sentences": []}

    for sentence in xpath_default(xml, ".//i:s"):
        beginnings = defaultdict(lambda: 100000000000000.0)
        endings = defaultdict(lambda: -1.0)

        beginnings, endings = iterate_over_children(sentence, ids, beginnings,
                                                    endings)

        for audio_path, beginning in beginnings.items():

            results["sentences"].append({
                "text_path": xml_filename,
                "text_id": sentence.attrib["id"],
                "audio_path": audio_path,
                "start": beginning,
                "end": endings[audio_path],
            })

    output_smil_text = pystache.render(SMIL_TEMPLATE, results)
    save_txt(output_smil_path, output_smil_text)
Esempio n. 4
0
def go(input_filename, output_filename, unit):
    xml = load_xml(input_filename)
    dct = make_dict(xml, input_filename, unit)
    save_txt(output_filename, dct)
Esempio n. 5
0
def go(seg_path, text_path, audio_path, output_path):
    results = make_smil(text_path, audio_path, parse_hypseg(seg_path))
    save_txt(output_path, results)
def go(input_filename, output_filename, unit):
    xml = load_xml(input_filename)
    fsg = make_fsg(xml, input_filename, unit)
    save_txt(output_filename, fsg)
Esempio n. 7
0
def save_txt_to_dir(output_path, dest_path, txt):
    """Save text to a directory, mimicking the interface of
    save_txt_zip."""
    save_txt(os.path.join(output_path, dest_path), txt)
Esempio n. 8
0
def go(input_filename, output_filename, unit):
    xml = load_xml(input_filename)
    jsgf = make_jsgf(xml, input_filename, unit)
    save_txt(output_filename, jsgf)