#!/usr/bin/env python import faust import query import lxml.etree import os.path text_xp = faust.xpath("//tei:text") for f in query.matches(query.documentary_by_name(), "//tei:text and not(//ge:document)"): relpath = faust.relative_path(f) xml = lxml.etree.parse(f) text = text_xp(xml)[0] gedocument = lxml.etree.Element(faust.ns("ge") + "document", nsmap=faust.namespaces) surface = lxml.etree.Element(faust.ns("tei") + "surface") gedocument.append(surface) zone = lxml.etree.Element(faust.ns("tei") + "zone") zone.set("type", "main") surface.append(zone) text.addprevious(gedocument) out = os.path.join("/tmp/faust/" + relpath) outdir = os.path.dirname(out) try: os.makedirs(outdir) except:
import query, faust, os.path, sys, os, shutil from bn import relpath def destination(file): rel_f = relpath(file, faust.xml_dir) return os.path.join(faust.xml_dir, 'attic', rel_f) if __name__ == "__main__": deleatur_transcripts = query.matches(faust.transcript_files(), query.deleatur_xp) if '-e' in sys.argv: print "executing" for f in deleatur_transcripts: print ' ' + f print '-->' + destination(f) print '' dest_dir = os.path.dirname(destination(f)) print dest_dir if not os.path.isdir(dest_dir): os.makedirs(dest_dir) shutil.move (f, destination(f)) else: for f in deleatur_transcripts: print ' ' + f print '-->' + destination(f) print '' print "To execute, call with -e option"
#!/usr/bin/env python import faust import query import lxml.etree import os.path text_xp = faust.xpath("//tei:text") for f in query.matches (query.documentary_by_name(), "//tei:text and not(//ge:document)"): relpath = faust.relative_path(f) xml = lxml.etree.parse(f) text = text_xp(xml)[0] gedocument = lxml.etree.Element(faust.ns("ge") + "document", nsmap=faust.namespaces) surface = lxml.etree.Element(faust.ns("tei") + "surface") gedocument.append(surface) zone = lxml.etree.Element(faust.ns("tei") + "zone") zone.set("type", "main") surface.append(zone) text.addprevious(gedocument) out = os.path.join("/tmp/faust/" + relpath) outdir = os.path.dirname(out) try: os.makedirs (outdir) except: pass xml.write(out, encoding="UTF-8")
def delete_empty_text_elements(): files = query.matches(faust.transcript_files(), "//tei:text[not(.//text() or //tei:div[@type='template' or .//comment()])]") xslt_trans = lxml.etree.XSLT(lxml.etree.parse("xsl/delete_empty_text_elements.xsl")) del_txt = lambda t: tei_transform(t, xslt_trans) transform_all(files, del_txt)
def to_convert_unencoded(): files = [faust.absolute_path(rel_file) for rel_file in static_to_convert()] return query.matches (files, "not(" + query.encoded_xp + ")")