error_msg = "[%s:%s] %s" % (line, column, msg) if xml_file in validation_report: validation_report[xml_file].append(error_msg) else: validation_report[xml_file] = [error_msg] validation_queue = [] # Validate all XML documents for xml_file in faust.xml_files(): try: if faust.is_tei_document(xml_file): validation_queue.append(xml_file) validate() except IOError: sys.stderr.write("I/O error while validating " + xml_file + "\n") validate(True) # Generate validation report if len(validation_report) > 0: report = "" xml_url = faust.config.get("xml", "url") xml_files = validation_report.keys() xml_files.sort() for xml_file in xml_files: report += (xml_url + xml_file + "\n\n") report += ("\n".join(validation_report[xml_file]) + "\n\n") report += ("".rjust(78, "=") + "\n\n") faust.send_report("TEI-P5 Validierungsfehler", report)
sys.stderr.write("I/O error while extracting status from " + xml_file + "\n") except lxml.etree.XMLSyntaxError: sys.stderr.write("XML error while extracting status from " + xml_file + "\n") if len(status) == 0: # no status given status_unknown += 1 else: for s in status: # increment relevant status entry status_dict[s] += 1 return status_dict, status_unknown if __name__ == "__main__": status_dict, status_unknown = count() status_keys = status_dict.keys() # generate and send report report = "".rjust(40, "=") + "\n" for status in status_keys: report += status + str(status_dict[status]).rjust(40 - len(status)) + "\n" report += "".rjust(40, "-") + "\n" report += "n/a" + str(status_unknown).rjust(40 - len("n/a")) + "\n" report += "".rjust(40, "=") + "\n" faust.send_report("Stand der Transkription", report)
except lxml.etree.XMLSyntaxError: sys.stderr.write("XML error while extracting status from " + xml_file + "\n") if len(status) == 0: # no status given status_unknown += 1 else: for s in status: # increment relevant status entry status_dict[s] += 1 return status_dict, status_unknown if __name__ == "__main__": status_dict, status_unknown = count() status_keys = status_dict.keys() # generate and send report report = "".rjust(40, "=") + "\n" for status in status_keys: report += status + str( status_dict[status]).rjust(40 - len(status)) + "\n" report += "".rjust(40, "-") + "\n" report += "n/a" + str(status_unknown).rjust(40 - len("n/a")) + "\n" report += "".rjust(40, "=") + "\n" faust.send_report("Stand der Transkription", report)