def go(input_filename, mapping_dir, output_xml_filename, output_fsg_filename, output_dict_filename, unit, word_unit, out_orth): xml = load_xml(input_filename) xml, fsg, dct = end_to_end(xml, input_filename, unit, word_unit, out_orth, mapping_dir) save_xml(output_xml_filename, xml) save_txt(output_fsg_filename, fsg) save_txt(output_dict_filename, dct)
def main(input_xml_path, input_smil_path, output_smil_path): xml = load_xml(input_xml_path) xml_filename = os.path.basename(input_xml_path) smil = load_xml(input_smil_path) ids = defaultdict(list) for par in xpath_default(smil, ".//i:par"): id = "" for text_src in xpath_default(par, ".//i:text/@src"): filename, id = text_src.split("#", 1) filename = os.path.basename(filename) if filename != xml_filename: continue for audio in xpath_default(par, ".//i:audio"): filename = audio.attrib["src"] begin = float(audio.attrib["clipBegin"]) end = float(audio.attrib["clipEnd"]) if not id: continue ids[id].append((filename, begin, end)) results = {"sentences": []} for sentence in xpath_default(xml, ".//i:s"): beginnings = defaultdict(lambda: 100000000000000.0) endings = defaultdict(lambda: -1.0) beginnings, endings = iterate_over_children(sentence, ids, beginnings, endings) for audio_path, beginning in beginnings.items(): results["sentences"].append({ "text_path": xml_filename, "text_id": sentence.attrib["id"], "audio_path": audio_path, "start": beginning, "end": endings[audio_path], }) output_smil_text = pystache.render(SMIL_TEMPLATE, results) save_txt(output_smil_path, output_smil_text)
def go(input_filename, output_filename, word_unit="w", output_orthography="eng-arpabet", mapping_dir=None): xml = load_xml(input_filename) converted_xml = convert_xml(xml, word_unit, output_orthography, mapping_dir=mapping_dir) save_xml(output_filename, converted_xml)
def go(input_filename, output_filename, inventory_dir=None): xml = load_xml(input_filename) xml = tokenize_xml(xml, inventory_dir) save_xml(output_filename, xml)
def go(input_filename, output_filename): xml = load_xml(input_filename) xml = add_ids(xml) save_xml(output_filename, xml)
def main(input_path, output_path, mapping_dir, unit="p"): xml = load_xml(input_path) add_lang_ids(xml, mapping_dir, unit) save_xml(output_path, xml)
def go(input_filename, output_filename, unit): xml = load_xml(input_filename) dct = make_dict(xml, input_filename, unit) save_txt(output_filename, dct)
def go(input_filename, output_filename, unit): xml = load_xml(input_filename) fsg = make_fsg(xml, input_filename, unit) save_txt(output_filename, fsg)
def extract_files_from_SMIL(input_path): smil = load_xml(input_path) found_files = {} xhtml_ids = [] dirname = os.path.dirname(input_path) # add media referenced in the SMIL file itself queries = [{ "xpath": ".//i:text/@src", "id_prefix": "", "mimetypes": { "xhtml": "application/xhtml+xml" } }, { "xpath": ".//i:audio/@src", "id_prefix": "audio-", "mimetypes": { "wav": "audio/wav", "mp3": "audio/mpeg" } }] for query in queries: for src_text in xpath_default(smil, query["xpath"]): entry = process_src_attrib(src_text, query["id_prefix"], query["mimetypes"]) if entry is not None and entry["origin_path"] not in found_files: if entry['mimetype'] == 'application/xhtml+xml': entry['overlay'] = 'media-overlay="overlay"' xhtml_ids.append({"id": entry['id']}) found_files[entry["origin_path"]] = entry # add media referenced within the xhtml files (e.g. imgs) within_xhtml_queries = [{ "xpath": ".//i:img/@src", "id_prefix": "img-", "mimetypes": { "png": "image/png", "jpg": "image/jpeg", "jpeg": "image/jpeg", "gif": "image/gif" } }] SEARCHABLE_EXTENSIONS = ["xhtml"] for entry in found_files.values(): if entry["ext"] not in SEARCHABLE_EXTENSIONS: continue origin_path = os.path.join(dirname, entry["origin_path"]) xhtml = load_xml_with_encoding(origin_path) for query in within_xhtml_queries: for src_text in xpath_default(xhtml, query["xpath"]): entry = process_src_attrib(src_text, query["id_prefix"], query["mimetypes"]) if (entry is not None and entry["origin_path"] not in found_files): found_files[entry["origin_path"]] = entry # add this file found_files[input_path] = { "origin_path": input_path, "dest_path": os.path.basename(input_path), "id": "overlay", "mimetype": "application/smil+xml", "ext": "smil" } return {"media": found_files.values(), "xhtml": xhtml_ids}
def go(input_filename, output_filename, unit): xml = load_xml(input_filename) jsgf = make_jsgf(xml, input_filename, unit) save_txt(output_filename, jsgf)