def go(input_filename, output_filename, word_unit="w", output_orthography="eng-arpabet"): xml = load_xml(input_filename) converted_xml = convert_xml(xml, word_unit, output_orthography) save_xml(output_filename, converted_xml)
def go( input_filename, output_xml_filename, output_fsg_filename, output_dict_filename, unit, word_unit, out_orth, ): xml = load_xml(input_filename) xml, fsg, dct = end_to_end(xml, input_filename, unit, word_unit, out_orth) save_xml(output_xml_filename, xml) save_txt(output_fsg_filename, fsg) save_txt(output_dict_filename, dct)
def main(input_path, output_path, unit="p"): xml = load_xml(input_path) add_lang_ids(xml, unit) save_xml(output_path, xml)
def go(input_filename, output_filename, unit): xml = load_xml(input_filename) fsg = make_fsg(xml.xpath(".//" + unit), input_filename, unit) save_txt(output_filename, fsg)
def go(input_filename, output_filename): xml = load_xml(input_filename) xml = tokenize_xml(xml) save_xml(output_filename, xml)
def go(input_filename, output_filename, unit): xml = load_xml(input_filename) fsg = make_fsg(xml, input_filename, unit) save_txt(output_filename, fsg)
def go(input_filename: str, output_filename: str) -> None: xml = load_xml(input_filename) xml = add_ids(xml) save_xml(output_filename, xml)
def main(input_filename, output_filename, unit): smil = load_xml(input_filename) jsgf = make_jsgf(smil, unit)
def go(input_filename, output_filename, unit): xml = load_xml(input_filename) jsgf = make_jsgf(xml, input_filename, unit) save_txt(output_filename, jsgf)
def extract_files_from_SMIL(input_path): smil = load_xml(input_path) found_files = {} xhtml_ids = [] dirname = os.path.dirname(input_path) # add media referenced in the SMIL file itself queries = [ { "xpath": ".//i:text/@src", "id_prefix": "", "mimetypes": { "xhtml": "application/xhtml+xml" }, }, { "xpath": ".//i:audio/@src", "id_prefix": "audio-", "mimetypes": { "wav": "audio/wav", "mp3": "audio/mpeg" }, }, ] for query in queries: for src_text in xpath_default(smil, query["xpath"]): entry = process_src_attrib(src_text, query["id_prefix"], query["mimetypes"]) if entry is not None and entry["origin_path"] not in found_files: if entry["mimetype"] == "application/xhtml+xml": entry["overlay"] = 'media-overlay="overlay"' xhtml_ids.append({"id": entry["id"]}) found_files[entry["origin_path"]] = entry # add media referenced within the xhtml files (e.g. imgs) within_xhtml_queries = [{ "xpath": ".//i:img/@src", "id_prefix": "img-", "mimetypes": { "png": "image/png", "jpg": "image/jpeg", "jpeg": "image/jpeg", "gif": "image/gif", }, }] SEARCHABLE_EXTENSIONS = ["xhtml"] for entry in found_files.values(): if entry["ext"] not in SEARCHABLE_EXTENSIONS: continue origin_path = os.path.join(dirname, entry["origin_path"]) xhtml = load_xml_with_encoding(origin_path) for query in within_xhtml_queries: for src_text in xpath_default(xhtml, query["xpath"]): entry = process_src_attrib(src_text, query["id_prefix"], query["mimetypes"]) if entry is not None and entry[ "origin_path"] not in found_files: found_files[entry["origin_path"]] = entry # add this file found_files[input_path] = { "origin_path": input_path, "dest_path": os.path.basename(input_path), "id": "overlay", "mimetype": "application/smil+xml", "ext": "smil", } return {"media": found_files.values(), "xhtml": xhtml_ids}
def go(input_filename, output_filename, unit): xml = load_xml(input_filename) dct = make_dict(xml.xpath(".//" + unit), input_filename, unit) save_txt(output_filename, dct)