Ejemplo n.º 1
0
def go(input_filename,
       output_filename,
       word_unit="w",
       output_orthography="eng-arpabet"):
    xml = load_xml(input_filename)
    converted_xml = convert_xml(xml, word_unit, output_orthography)
    save_xml(output_filename, converted_xml)
Ejemplo n.º 2
0
def go(
    input_filename,
    output_xml_filename,
    output_fsg_filename,
    output_dict_filename,
    unit,
    word_unit,
    out_orth,
):
    xml = load_xml(input_filename)
    xml, fsg, dct = end_to_end(xml, input_filename, unit, word_unit, out_orth)
    save_xml(output_xml_filename, xml)
    save_txt(output_fsg_filename, fsg)
    save_txt(output_dict_filename, dct)
Ejemplo n.º 3
0
def main(input_path, output_path, unit="p"):
    xml = load_xml(input_path)
    add_lang_ids(xml, unit)
    save_xml(output_path, xml)
Ejemplo n.º 4
0
def go(input_filename, output_filename, unit):
    xml = load_xml(input_filename)
    fsg = make_fsg(xml.xpath(".//" + unit), input_filename, unit)
    save_txt(output_filename, fsg)
Ejemplo n.º 5
0
def go(input_filename, output_filename):
    xml = load_xml(input_filename)
    xml = tokenize_xml(xml)
    save_xml(output_filename, xml)
Ejemplo n.º 6
0
def go(input_filename, output_filename, unit):
    xml = load_xml(input_filename)
    fsg = make_fsg(xml, input_filename, unit)
    save_txt(output_filename, fsg)
Ejemplo n.º 7
0
def go(input_filename: str, output_filename: str) -> None:
    xml = load_xml(input_filename)
    xml = add_ids(xml)
    save_xml(output_filename, xml)
Ejemplo n.º 8
0
def main(input_filename, output_filename, unit):
    smil = load_xml(input_filename)
    jsgf = make_jsgf(smil, unit)
Ejemplo n.º 9
0
def go(input_filename, output_filename, unit):
    xml = load_xml(input_filename)
    jsgf = make_jsgf(xml, input_filename, unit)
    save_txt(output_filename, jsgf)
Ejemplo n.º 10
0
def extract_files_from_SMIL(input_path):
    smil = load_xml(input_path)
    found_files = {}
    xhtml_ids = []
    dirname = os.path.dirname(input_path)

    # add media referenced in the SMIL file itself
    queries = [
        {
            "xpath": ".//i:text/@src",
            "id_prefix": "",
            "mimetypes": {
                "xhtml": "application/xhtml+xml"
            },
        },
        {
            "xpath": ".//i:audio/@src",
            "id_prefix": "audio-",
            "mimetypes": {
                "wav": "audio/wav",
                "mp3": "audio/mpeg"
            },
        },
    ]

    for query in queries:
        for src_text in xpath_default(smil, query["xpath"]):
            entry = process_src_attrib(src_text, query["id_prefix"],
                                       query["mimetypes"])
            if entry is not None and entry["origin_path"] not in found_files:
                if entry["mimetype"] == "application/xhtml+xml":
                    entry["overlay"] = 'media-overlay="overlay"'
                    xhtml_ids.append({"id": entry["id"]})
                found_files[entry["origin_path"]] = entry

    # add media referenced within the xhtml files (e.g. imgs)
    within_xhtml_queries = [{
        "xpath": ".//i:img/@src",
        "id_prefix": "img-",
        "mimetypes": {
            "png": "image/png",
            "jpg": "image/jpeg",
            "jpeg": "image/jpeg",
            "gif": "image/gif",
        },
    }]

    SEARCHABLE_EXTENSIONS = ["xhtml"]
    for entry in found_files.values():
        if entry["ext"] not in SEARCHABLE_EXTENSIONS:
            continue
        origin_path = os.path.join(dirname, entry["origin_path"])
        xhtml = load_xml_with_encoding(origin_path)
        for query in within_xhtml_queries:
            for src_text in xpath_default(xhtml, query["xpath"]):
                entry = process_src_attrib(src_text, query["id_prefix"],
                                           query["mimetypes"])
                if entry is not None and entry[
                        "origin_path"] not in found_files:
                    found_files[entry["origin_path"]] = entry

    # add this file
    found_files[input_path] = {
        "origin_path": input_path,
        "dest_path": os.path.basename(input_path),
        "id": "overlay",
        "mimetype": "application/smil+xml",
        "ext": "smil",
    }

    return {"media": found_files.values(), "xhtml": xhtml_ids}
Ejemplo n.º 11
0
def go(input_filename, output_filename, unit):
    xml = load_xml(input_filename)
    dct = make_dict(xml.xpath(".//" + unit), input_filename, unit)
    save_txt(output_filename, dct)