def xml_names_from_facsimiles(): prefix_length = len(faust.faust_scheme + "://facsimile/") def to_xml_path(facs_uri): stripped = facs_uri[prefix_length:] return os.path.join(faust.xml_dir, "transcript", stripped + ".xml") return map(to_xml_path, faust.facsimiles())
def invalid_facsimile_links(): ''' Print all files with invalid facsimile links''' faust_facsimiles = faust.facsimiles() def facs_invalid(file): xml = lxml.etree.parse(file) urls = faust.xpath("//tei:facsimile/tei:graphic/@url")(xml) for url in urls: if url in faust_facsimiles: return True return False return filter(facs_invalid, faust.transcript_files())
# coding=UTF-8 # # Correct the links to facsimile files # import faust import transform import lxml import os import sys import rev_desc doc_template = lxml.etree.parse( os.path.join(faust.xml_dir, "template", "tei.xml")) graphic_xp = faust.xpath("//tei:facsimile/tei:graphic") header_xp = faust.xpath("/tei:TEI/tei:teiHeader") valid_graphic_uris = faust.facsimiles() def xml_names_from_facsimiles(): prefix_length = len(faust.faust_scheme + "://facsimile/") def to_xml_path(facs_uri): stripped = facs_uri[prefix_length:] return os.path.join(faust.xml_dir, "transcript", stripped + ".xml") return map(to_xml_path, faust.facsimiles()) def facs_uri_from_xml(path): stripped = path[len(faust.xml_dir + "/facsimile/"):-len(".xml")] return faust.faust_scheme + "://facsimile" + stripped
def xml_names_from_facsimiles(): prefix_length = len(faust.faust_scheme + "://facsimile/") def to_xml_path (facs_uri): stripped = facs_uri[prefix_length:] return os.path.join(faust.xml_dir, "transcript" , stripped + ".xml") return map (to_xml_path, faust.facsimiles())
#!/usr/bin/env python # coding=UTF-8 # # Correct the links to facsimile files # import faust import transform import lxml import os import sys import rev_desc doc_template = lxml.etree.parse(os.path.join(faust.xml_dir, "template", "tei.xml")) graphic_xp = faust.xpath ("//tei:facsimile/tei:graphic") header_xp = faust.xpath ("/tei:TEI/tei:teiHeader") valid_graphic_uris = faust.facsimiles() def xml_names_from_facsimiles(): prefix_length = len(faust.faust_scheme + "://facsimile/") def to_xml_path (facs_uri): stripped = facs_uri[prefix_length:] return os.path.join(faust.xml_dir, "transcript" , stripped + ".xml") return map (to_xml_path, faust.facsimiles()) def facs_uri_from_xml(path): stripped = path[len(faust.xml_dir + "/facsimile/") : - len(".xml")] return faust.faust_scheme + "://facsimile" + stripped def make_xml_templates(): xml_templates = xml_names_from_facsimiles() # check if all directories exist