Example #1
0
def go(input_filename, mapping_dir, output_xml_filename, output_fsg_filename,
       output_dict_filename, unit, word_unit, out_orth):
    xml = load_xml(input_filename)
    xml, fsg, dct = end_to_end(xml, input_filename, unit, word_unit, out_orth,
                               mapping_dir)
    save_xml(output_xml_filename, xml)
    save_txt(output_fsg_filename, fsg)
    save_txt(output_dict_filename, dct)
def main(input_xml_path, input_smil_path, output_smil_path):
    xml = load_xml(input_xml_path)
    xml_filename = os.path.basename(input_xml_path)
    smil = load_xml(input_smil_path)

    ids = defaultdict(list)
    for par in xpath_default(smil, ".//i:par"):
        id = ""
        for text_src in xpath_default(par, ".//i:text/@src"):
            filename, id = text_src.split("#", 1)
            filename = os.path.basename(filename)
            if filename != xml_filename:
                continue
            for audio in xpath_default(par, ".//i:audio"):
                filename = audio.attrib["src"]
                begin = float(audio.attrib["clipBegin"])
                end = float(audio.attrib["clipEnd"])
                if not id:
                    continue
                ids[id].append((filename, begin, end))

    results = {"sentences": []}

    for sentence in xpath_default(xml, ".//i:s"):
        beginnings = defaultdict(lambda: 100000000000000.0)
        endings = defaultdict(lambda: -1.0)

        beginnings, endings = iterate_over_children(sentence, ids, beginnings,
                                                    endings)

        for audio_path, beginning in beginnings.items():

            results["sentences"].append({
                "text_path": xml_filename,
                "text_id": sentence.attrib["id"],
                "audio_path": audio_path,
                "start": beginning,
                "end": endings[audio_path],
            })

    output_smil_text = pystache.render(SMIL_TEMPLATE, results)
    save_txt(output_smil_path, output_smil_text)
Example #3
0
def go(input_filename,
       output_filename,
       word_unit="w",
       output_orthography="eng-arpabet",
       mapping_dir=None):
    xml = load_xml(input_filename)
    converted_xml = convert_xml(xml,
                                word_unit,
                                output_orthography,
                                mapping_dir=mapping_dir)
    save_xml(output_filename, converted_xml)
Example #4
0
def go(input_filename, output_filename, inventory_dir=None):
    xml = load_xml(input_filename)
    xml = tokenize_xml(xml, inventory_dir)
    save_xml(output_filename, xml)
def go(input_filename, output_filename):
    xml = load_xml(input_filename)
    xml = add_ids(xml)
    save_xml(output_filename, xml)
Example #6
0
def main(input_path, output_path, mapping_dir, unit="p"):
    xml = load_xml(input_path)
    add_lang_ids(xml, mapping_dir, unit)
    save_xml(output_path, xml)
Example #7
0
def go(input_filename, output_filename, unit):
    xml = load_xml(input_filename)
    dct = make_dict(xml, input_filename, unit)
    save_txt(output_filename, dct)
def go(input_filename, output_filename, unit):
    xml = load_xml(input_filename)
    fsg = make_fsg(xml, input_filename, unit)
    save_txt(output_filename, fsg)
Example #9
0
def extract_files_from_SMIL(input_path):
    smil = load_xml(input_path)
    found_files = {}
    xhtml_ids = []
    dirname = os.path.dirname(input_path)

    # add media referenced in the SMIL file itself
    queries = [{
        "xpath": ".//i:text/@src",
        "id_prefix": "",
        "mimetypes": {
            "xhtml": "application/xhtml+xml"
        }
    }, {
        "xpath": ".//i:audio/@src",
        "id_prefix": "audio-",
        "mimetypes": {
            "wav": "audio/wav",
            "mp3": "audio/mpeg"
        }
    }]

    for query in queries:
        for src_text in xpath_default(smil, query["xpath"]):
            entry = process_src_attrib(src_text, query["id_prefix"],
                                       query["mimetypes"])
            if entry is not None and entry["origin_path"] not in found_files:
                if entry['mimetype'] == 'application/xhtml+xml':
                    entry['overlay'] = 'media-overlay="overlay"'
                    xhtml_ids.append({"id": entry['id']})
                found_files[entry["origin_path"]] = entry

    # add media referenced within the xhtml files (e.g. imgs)
    within_xhtml_queries = [{
        "xpath": ".//i:img/@src",
        "id_prefix": "img-",
        "mimetypes": {
            "png": "image/png",
            "jpg": "image/jpeg",
            "jpeg": "image/jpeg",
            "gif": "image/gif"
        }
    }]

    SEARCHABLE_EXTENSIONS = ["xhtml"]
    for entry in found_files.values():
        if entry["ext"] not in SEARCHABLE_EXTENSIONS:
            continue
        origin_path = os.path.join(dirname, entry["origin_path"])
        xhtml = load_xml_with_encoding(origin_path)
        for query in within_xhtml_queries:
            for src_text in xpath_default(xhtml, query["xpath"]):
                entry = process_src_attrib(src_text, query["id_prefix"],
                                           query["mimetypes"])
                if (entry is not None
                        and entry["origin_path"] not in found_files):
                    found_files[entry["origin_path"]] = entry

    # add this file
    found_files[input_path] = {
        "origin_path": input_path,
        "dest_path": os.path.basename(input_path),
        "id": "overlay",
        "mimetype": "application/smil+xml",
        "ext": "smil"
    }

    return {"media": found_files.values(), "xhtml": xhtml_ids}
Example #10
0
def go(input_filename, output_filename, unit):
    xml = load_xml(input_filename)
    jsgf = make_jsgf(xml, input_filename, unit)
    save_txt(output_filename, jsgf)