def get_entities(): entities = {} with open('src/dtd/vokosgn.dtd', 'rb') as f: dtd = etree.DTD(f) for entity in dtd.iterentities(): entities[entity.name] = entity.content with open('src/dtd/vokomll.dtd', 'rb') as f: dtd = etree.DTD(f) for entity in dtd.iterentities(): entities[entity.name] = entity.content # Seed some default entities. entities[u'apos'] = "'" entities[u'quot'] = '"' # Entities sometimes refer to a canonical entity. for key, value in entities.iteritems(): if '&' in value and ';' in value: pre = value[:value.index('&')] sym = value[value.index('&') + 1:value.index(';')] post = value[value.index(';') + 1:] entities[key] = u"%s%s%s" % (pre, entities[sym], post) return entities
def __init__(self, validation_file): if validation_file == ValidationFiles.EAD_3_XSD: with resource_stream(__name__, validation_file) as fh: xmlschema_doc = etree.parse(fh) self._dtd = etree.XMLSchema(xmlschema_doc) elif validation_file == ValidationFiles.EAD_2002_DTD: with resource_stream(__name__, validation_file) as fh: self._dtd = etree.DTD(validation_file) elif validation_file == ValidationFiles.EAD_3_RNG: with resource_stream(__name__, validation_file) as fh: xmlschema_doc = etree.parse(fh) self._dtd = etree.RelaxNG(xmlschema_doc) elif validation_file == ValidationFiles.EAD_2002_XSD: with resource_stream(__name__, validation_file) as fh: xmlschema_doc = etree.parse(fh) self._dtd = etree.XMLSchema(xmlschema_doc) elif validation_file == ValidationFiles.EAD_2002_RNG: with resource_stream(__name__, validation_file) as fh: xmlschema_doc = etree.parse(fh) self._dtd = etree.RelaxNG(xmlschema_doc) elif validation_file == ValidationFiles.EAD_3_DTD: with resource_stream(__name__, validation_file) as fh: self._dtd = etree.DTD(validation_file) else: # Add other formats as examples are found print("EAD format not found")
def load_dtd_as_file_object(dtd_url=None, dtd_path=None, verbose=0): dtd = None if dtd_url is not None: r = requests.get(dtd_url) if r.status_code == 200: dtd_file_object = io.StringIO(r.text) dtd = etree.DTD(dtd_file_object) if verbose >= 1: print() if dtd is None: print(f'failed to load dtd from {dtd_url}') else: print(f'succesfully loaded dtd from {dtd_url}') elif dtd_path is not None: with open(dtd_path) as infile: dtd = etree.DTD(infile) if verbose >= 1: print() if dtd is None: print(f'failed to load dtd from {dtd_path}') else: print(f'succesfully loaded dtd from {dtd_path}') return dtd
def main(dtd_file, dtd_dir, dtd_url, src): """ Validates DASTA ver.3 document against appropriate DTD. """ try: doc = etree.parse(src) except etree.XMLSyntaxError as e: verbose("document cannot be parsed, error detail: '{0}'".format(e)) sys.exit(-1) else: verbose("document parsed") if dtd_file: verbose(f"validation against file {dtd_file.name}") dtd = etree.DTD(dtd_file) else: dtd_name = dtd_file_name(doc) if dtd_name: with dtd_source(dtd_name, dtd_dir, dtd_url) as f: dtd = etree.DTD(f) else: verbose( "cannot find out DTD file name, validations is not possible") sys.exit(-1) try: dtd.assertValid(doc) except etree.DocumentInvalid as e: verbose("document is not valid, error detail: '{0}'".format(e)) sys.exit(-1) else: verbose("document is valid")
def load_data(self, file): # self.save_to_db() self.clean() if not os.path.isfile(file): print("incorrect path") return f = open(file, "r") try: xml_doc = ET.fromstring(f.read()) dtd = ET.DTD(open(self.dtd_path)) #if not dtd.validate(xmlDoc): # print("Dtd validation not passed") # return except: print("Error while parsing. Invalid document") return for xml_railroad in xml_doc.findall("Railroads/Railroad"): road_id = int(xml_railroad.get('id')) road_name = xml_railroad.get('road_name') railroad = Railroad(road_id, road_name) for xml_station in xml_railroad.findall("Stations/Station"): station_id = int(xml_station.get('id')) station_name = xml_station.get('name') station_address = xml_station.get('address') station_tracks = int(xml_station.get('tracks')) station = Station(station_id, station_name, station_address, station_tracks) railroad.stations.add(station) self.stations[station_id] = station self.railroads[road_id] = railroad self.print_data() return 0
def analyze_lxml(self, filename): """ DESCRIPTION: PARAMETERS: RETURN: """ errors = list() try: dtd = etree.DTD(external_id=AnalyzerMarkup.xhtml_validator) # parse and the XHTML parser = html.XHTMLParser() tree = etree.parse(filename, parser) # validate tree dtd.validate(tree) # get a list of error messages errors = dtd.error_log.filter_from_errors() except etree.XMLSyntaxError as err: errors = [ "Failed parsing XHTML, check well-formed tags:" + str(err) ] # return list of errors return errors
def yml_validate(file): from lxml import etree, objectify import os curr_dir = os.path.dirname(__file__) dtd = etree.DTD(open('%s/static/s1/shops.dtd' % curr_dir, 'rb')) tree = objectify.parse(open(file, 'rb')) return dtd.validate(tree)
def test_individual_xml_validates(self): dtd = etree.DTD(StringIO(""" <!ELEMENT responseData (response, totalResults, individuals)> <!ELEMENT response (#PCDATA)> <!ELEMENT totalResults (#PCDATA)> <!ELEMENT individuals (individual+)> <!ELEMENT individual (name, displayName, cnetid, chicagoid, contacts, resources)> <!ELEMENT name (#PCDATA)> <!ELEMENT displayName (#PCDATA)> <!ELEMENT cnetid (#PCDATA)> <!ELEMENT chicagoid (#PCDATA)> <!ELEMENT contacts (contact)> <!ELEMENT contact (title, division, department, subDepartment, email, phone, facultyExchange)> <!ELEMENT title (#PCDATA)> <!ELEMENT division (name, resources)> <!ELEMENT resources (directoryURL, xmlURL)> <!ELEMENT directoryURL (#PCDATA)> <!ELEMENT xmlURL (#PCDATA)> <!ELEMENT department (name, resources)> <!ELEMENT subDepartment (name, resources)> <!ELEMENT email (#PCDATA)> <!ELEMENT phone (#PCDATA)> <!ELEMENT facultyExchange (#PCDATA)> """)) cnetids = get_all_library_cnetids_from_directory() root = etree.XML(get_xml_from_directory_api('https://directory.uchicago.edu/api/v2/individuals/' + cnetids[0] + '.xml')) self.assertEqual(dtd.validate(root), True)
def assert_xml(xml_content, expected_xml=None, dtd_path=None): """Provided xml_content should be a valid XML string and be equal to expected_xml.""" def to_xml(string): """ A shortcut function to load xml. """ return etree.fromstring(string) def normalize_xml(xml_string): """ Helps to normalize different xml to the same format, indentation etc. At the same time, content is validated. """ return etree.tostring(to_xml(xml_content)) validated_xml = normalize_xml(xml_content) if dtd_path: dtd = etree.DTD(dtd_path) if not dtd.validate(to_xml(xml_content)): raise AssertionError(dtd.error_log) if expected_xml is not None: assert_equal(validated_xml, normalize_xml(expected_xml))
def validate_dtd(xml_file, dtd_file=None): """ Validate an xml file against its dtd. Args: xml_file: the xml file dtd_file: the optional dtd file @throws Exception validation fails """ #perform parsing, use dtd validation if dtd file is not specified try: parser = etree.XMLParser(dtd_validation=not dtd_file) xml = etree.parse(xml_file, parser=parser) except etree.LxmlError: pass if parser.error_log: raise XMLSyntaxError(parser.error_log) # perform dtd validation if the dtd file is specified if not dtd_file: return try: dtd = etree.DTD(dtd_file) if not dtd.validate(xml.getroot()): raise XMLSyntaxError(dtd.error_log) except etree.LxmlError: raise XMLSyntaxError(dtd.error_log)
def is_valid(self): """ Validate if the NZB File is okay; this will generate some overhead but at the same time it caches a lot of the results it returns so future calls will be speedy The function returns True if the nzb file is valid, otherwise it returns False """ if self._lazy_is_valid is None: if self.open(): # Open DTD file and create dtd object dtdfd = open(NZB_XML_DTD_FILE) dtd = etree.DTD(dtdfd) # Verify our dtd file against our current stream try: nzb = etree.parse(self.filepath) except XMLSyntaxError as e: if e[0] is not None: # We have corruption logger.error("NZB-File '%s' is corrupt" % self.filepath) logger.debug('NZB-File XMLSyntaxError Exception %s' % str(e)) # Mark situation self._lazy_is_valid = False # We failed return False self._lazy_is_valid = dtd.validate(nzb) return self._lazy_is_valid is True
def build_dtd(dtd_file): """Parse a DTD file into a DTD validator. dtd_file -- DTD file Return DTD validator (lxml.etree.DTD) on success. Return None on error. The lxml.etree.DTD class: https://lxml.de/validation.html#id1 https://lxml.de/api/lxml.etree.DTD-class.html """ try: validator = etree.DTD(file=dtd_file) # Catch DTD parse errors. except etree.DTDParseError as inst: if inst.error_log.last_error.line == 0: # error parsing DTD logger.error(inst) # "failed to load external entity" when file does not exist. logger.error(inst.error_log.last_error.message) return None logger.error("'%s' is not a valid DTD file", dtd_file) # Lines with DTD parse errors are logged as warnings. for e in inst.error_log: # E.g. e.level_name: "FATAL", e.domain_name: "PARSER", # e.type_name: "ERR_EXT_SUBSET_NOT_FINISHED" logger.warning("line %i, column %i: %s", e.line, e.column, e.message) return None else: return validator
def validate(self): """Validate the dtd is valid It raises a ValidationError exception when not valid It also can raise etree.ParseError if the dtd is unparsable """ # Be careful when getting the content we can have a recursive loop # since we validate the dtd when getting it. But we also want to be # able to validate a dtd before we fetch the content. content = self._content if self._content else self.content f, filename = tempfile.mkstemp() # Don't know why but the validation doesn't work using a StringIO so we # write a temporary file try: try: # TODO: Get encoding from the dtd file (xml tag). os.write(f, content.encode('utf-8')) finally: os.close(f) dtd_obj = etree.DTD(filename) finally: os.remove(filename) if dtd_obj.error_log: raise ValidationError(dtd_obj.error_log) # It can raise an exception if something is wrong in the dtd # For example, etree.DTD doesn't raise exception if a sub element is # not defined, self.parse does. self.parse()
def build(self): for xpi in glob.glob('*.xpi'): os.remove(xpi) with zipfile.ZipFile(self.xpi, 'w', zipfile.ZIP_DEFLATED) as xpi: for file in [ 'chrome.manifest', 'bootstrap.js', 'install.rdf' ] + glob.glob('resource/**/*', recursive=True) + glob.glob( 'chrome/**/*', recursive=True): if file == 'install.rdf': rdf = etree.parse('install.rdf') rdf.find( './/em:version', namespaces=self.namespaces(rdf)).text = self.version xpi.writestr(file, etree.tostring(rdf, pretty_print=True)) elif file == 'chrome/locale/en-US/about.dtd': with open('chrome/locale/en-US/about.dtd') as f: dtd = etree.DTD(f) entities = '<!ENTITY odfscan.version "' + self.version + '">\n' for entity in list(dtd.entities()): if entity.name == 'odfscan.version': continue entities += '<!ENTITY ' + entity.name + ' "' + entity.content + '">\n' xpi.writestr(file, entities) else: xpi.write(file)
def test_assertXmlValidDTD_DTD(self): """Asserts assertXmlValidDTD accepts an LXML DTD object.""" test_case = XmlTestCase(methodName='assertXmlValidDTD') dtd = """<!ELEMENT root (child)> <!ELEMENT child EMPTY> <!ATTLIST child id ID #REQUIRED> """ schema = etree.DTD(io.StringIO(dtd)) data = b"""<?xml version="1.0" encoding="utf-8"?> <root> <child id="child1"/> </root> """ root = test_case.assertXmlDocument(data) # Document is valid according to DTD test_case.assertXmlValidDTD(root, schema) data_invalid = b"""<?xml version="1.0" encoding="utf-8"?> <root> <child id="child1"/> <child id="child1"/> </root> """ root = test_case.assertXmlDocument(data_invalid) # Document is invalid according to DTD (multiple child element) with self.assertRaises(test_case.failureException): test_case.assertXmlValidDTD(root, schema)
def xml4dtd(xmlFile, dtdFile): # Load DTD and check it syntax. try: dtd = etree.DTD(dtdFile) except lxml.etree.DTDParseError: print('WARNING: DTD had a bad syntax.') sys.exit() # Load XML and check it syntax try: rawFile = objectify.parse(xmlFile) except lxml.etree.XMLSyntaxError: print('WARNING: The file is empty or had a bad syntax.') sys.exit() # Objetify XML oneLine = etree.tostring(rawFile) rootNode = objectify.fromstring(oneLine) # Validacion del XML cargado. valid = dtd.validate(rootNode) print('XML validation = {}'.format(valid)) # Shows the reasons why the DTD does not validate. if dtd.error_log.filter_from_errors(): print('\nReasons:\n--------') print(dtd.error_log.filter_from_errors()[0]) # Returns the node and her valid state. return rootNode, valid
def importXML(xml, dtd): if os.path.exists(xml): try: dtd = etree.DTD(open(dtd, 'rb')) tree = objectify.parse(open(xml, 'rb')) except Exception as e: showinfo( "Erreur", "Le fichier %s est corrompu. " "Corrigez le fichier ou supprimez le. " "L'erreur est la suivante: %s" % (xml, e)) sys.exit(0) if not dtd.validate(tree): showinfo( "Erreur", "Le fichier %s n'est pas conforme." "Corrigez le fichier ou supprimez le. L'erreur est " "la suivant: %s" % (xml, dtd.error_log.filter_from_errors())) sys.exit(0) parser = etree.XMLParser(remove_blank_text=True) returndata = etree.parse(xml, parser) return returndata.getroot() else: root = etree.Element("Annuaire") etree.SubElement(root, "Fournisseurs") etree.SubElement(root, "Mutuelles") etree.SubElement(root, "Magasins") tree = etree.ElementTree(root) tree.write(xml, pretty_print=True) return tree.getroot()
def check_translations(): print('checking XUL translations') translations = {} with open(os.path.join(root, 'locale', 'en-US', 'zotero-better-bibtex.dtd')) as dtd: entities = list(etree.DTD(dtd).entities()) for entity in entities: translations[entity.name] = [] for xul in glob.glob(os.path.join(root, 'content', '*.xul')): with open(xul, 'r') as xml: xml = xml.read() for entity in entities: _xml = xml.replace(f'&{entity.name};', entity.content) if _xml != xml: translations[entity.name].append( os.path.splitext(os.path.basename(xul))[0]) xml = _xml try: etree.fromstring(xml) except etree.XMLSyntaxError as err: print(os.path.relpath(xul, root), ':', err) sys.exit(1) for string, panes in translations.items(): if len(panes) == 0: print(f' Unused translation string "{string}"') for pane in panes: if not string.startswith(f'better-bibtex.{pane}.'): print(f' {string} used in {pane}')
def validateExternalDTD(self, source): """ Validate the current NAF document against the DTD. :param source: The DTD source :return True or False """ self.dtd = etree.DTD(source) return self.dtd.validate(self.root)
def Body_Former(self, scrypt, config, path, test_file): code = 0 strcount = 0 scenopenflag = False recvbuf = 15000 m2ua_ppid = "0x02000000" # XML parse and validate xmlparser = etree.XMLParser(strip_cdata=False) tree = etree.parse(path, xmlparser) docinfo = tree.docinfo dtdfilename = docinfo.doctype.split('"', 2) dtdfilepath = cwd + "/tests/xml/" + dtdfilename[1] dtd = etree.DTD(dtdfilepath) if dtd.validate(tree): # If valid XML file root = tree.getroot() testname = root.attrib # Scriptname for line in root: if line.tag == "sock_open": proto = line.attrib["proto"] if line.attrib["transport"] == "sctp": test_file.write ("\n#Create SCTP socket\n") test_file.write ("recv_buf = {0}\n".format(recvbuf)) test_file.write ("sk{0} = sctp.sctpsocket_tcp(socket.AF_INET)\n".format(line.attrib["local"])) try: xxx = line.attrib["timeout"] test_file.write ("sk{0}.settimeout({1})\n".format(line.attrib["local"], int(line.attrib["timeout"]))) except KeyError: test_file.write ("sk{0}.settimeout({1})\n".format(line.attrib["local"], 30)) elif line.attrib["transport"] == "tcp": pass else: pass test_file.write ("sk{0}.bind((address{0}, port{0}))\n".format(line.attrib["local"])) test_file.write ("sk{0}.connect((address{1}, port{1}))\n".format(line.attrib["local"], line.attrib["remote"])) if line.tag == "sock_close": test_file.write ("\n#Close SCTP socket\n") test_file.write ("sk{0}.close()\n".format(line.attrib["local"])) if line.tag == "send": sendinfo = executor.Values_Exec(line.text.strip(), 'send') if sendinfo[2]: test_file.write ("\nmessage = builder.Build_{0}_Message({1})\n".format(sendinfo[1], sendinfo[3])) else: test_file.write ("\nmessage = builder.Build_{0}_Message()\n".format(sendinfo[1])) if proto == "m2ua": test_file.write ("sk{0}.sctp_send(message,ppid={1})\n".format(line.attrib["socket"], m2ua_ppid)) if line.tag == "recv": recvinfo = executor.Values_Exec(line.text.strip(), "recv") if recvinfo[1] == "NO MESSAGE": test_file.write ("\ntry:\n") test_file.write (" message = sk{0}.recv({1})\n".format(line.attrib["socket"], recvbuf)) test_file.write (" print ('NOK')\n") test_file.write ("except socket.timeout:\n") test_file.write (" print('No message OK')\n") else: test_file.write ("\nmessage = sk{0}.recv({1})\n".format(line.attrib["socket"], recvbuf)) test_file.write ("obj_message = parser.Parse_Message(message)\n") if recvinfo[2]: test_file.write ("validation_result = validator.Validate_Message_W_Params(obj_message, '{0}', '{1}', {2})\n".format(line.attrib["class"], recvinfo[1], recvinfo[3])) else: test_file.write ("validation_result = validator.Validate_Message(obj_message, '{0}', '{1}')\n".format(line.attrib["class"], recvinfo[1]))
def validate(eadfile, dtd=None, xsd=None): # Info: http://stackoverflow.com/a/6098238/1763984 # realpath() with make your script run, even if you symlink it :) cmd_folder = os.path.realpath( os.path.abspath( os.path.split(inspect.getfile(inspect.currentframe()))[0])) eadfile = etree.parse(eadfile) ead2002ns = eadfile.xpath("//*[namespace-uri()='urn:isbn:1-931666-22-9']") validator = None if not dtd: dtd = "%s/ents/ead.dtd" % cmd_folder if not xsd: xsd = "%s/ents/ead.xsd" % cmd_folder if not ead2002ns: # looks like DTD style validator = etree.DTD(dtd) else: # looks like XSD style validator = etree.XMLSchema(etree.parse(xsd)) message = None error_count = 0 valid = validator.validate(eadfile) if not valid: message = validator.error_log error_count = len(message) return message, valid, error_count
def test_validating_against_dtd(self): xml = ET.XML(export.Export(self._raw_json).pipeline_pubmed()) dtd = ET.DTD(open('tests/dtd/scielo_pubmed/PubMed.dtd', 'r')) self.assertEqual(True, dtd.validate(xml))
def ex(datasource): d = [] parser = etree.XMLParser(recover=True, strip_cdata=True) if datasource == 'dbpedia3.8': tree = etree.parse('tests/test4.xml', parser) elif datasource == 'lift': tree = etree.parse('tests/test1.xml', parser) else: return jsonify(result=d) #--- dtd = etree.DTD('http://documents.ls2n.fr/be4dbp/log.dtd') assert dtd.validate(tree), '%s non valide au chargement : %s' % ( file, dtd.error_log.filter_from_errors()[0]) #--- # print('DTD valide !') nbe = 0 # nombre d'entries traitées for entry in tree.getroot(): if entry.tag == 'entry': nbe += 1 valid = entry.get("valid") if valid is not None: if valid in ['TPF', 'EmptyTPF']: # print('(%d) new entry to add ' %nbe) rep = '' for x in entry: if x.tag == 'bgp': if len(x) > 0: rep += etree.tostring(x).decode('utf-8') # print(rep) d.append((entry.find('request').text, datasource, rep)) # else: print('(%d) entry not loaded : %s' % (n,valid)) # else: print('(%d) entry not loaded (not validated)' % n) return jsonify(result=d)
def genxml(root_menu, configdir): '''Generate the applications.menu XMl file in the user's directory.''' dtd = etree.DTD(os.path.join(BASEDIR, "lib", "menu-1.0.dtd")) if dtd.validate(root_menu) == 0: print dtd.error_log.filter_from_errors() return -1 if options.verbose: #menu = etree.parse(root_menu) print etree.tostring(root_menu, pretty_print=True) if not options.simulate: if not os.path.exists(configdir): os.makedirs(configdir) if options.xfce: mymenu = open(configdir + '/xfce-applications.menu', "w") mymenu.write(etree.tostring(root_menu, pretty_print=True)) if options.kde4: mymenu = open(configdir + '/kde-4-applications.menu', "w") mymenu.write(etree.tostring(root_menu, pretty_print=True)) if options.kde: mymenu = open(configdir + '/kf5-applications.menu', "w") mymenu.write(etree.tostring(root_menu, pretty_print=True)) else: mymenu = open(configdir + '/applications.menu', "w") mymenu.write(etree.tostring(root_menu, pretty_print=True)) mymenu.close()
def parseDBLP(facultydict): # with open('dblp.xml', mode='r') as f: print("alias,name") dtd = ElementTree.DTD(file='dblp.dtd') with open('dblp.xml-full', mode='rb') as f: # with open("dblp.xml", mode="r", encoding="utf-8") as f: oldnode = None for (event, node) in ElementTree.iterparse(f, events=["start", "end"], load_dtd=True): if oldnode is not None: oldnode.clear() oldnode = node authors = 0 authorList = [] # print((node.tag)) if node.tag == "www": for child in node: if child.tag == "author": authorName = child.text if authorName: authorName = authorName.strip() authors += 1 authorList.append(authorName.encode("utf-8")) if authors: pairs = [(authorList[0], item) for item in authorList[1:]] for p in pairs: print(p[1].decode("utf-8") + "," + p[0].decode("utf-8"))
def write(self, language, dtd=None, filename=None): """ Writes the data to a file. @param language: the target language code @type language: str @param dtd: if set, the xml is validated before written, defaults to None, throws Exception if validation failed @type dtd: str @param filename: if given, the path to write to, defaults to None @type filename: str """ if filename: self.filename = filename root = etree.Element("TS", attrib={ "version": "2.1", "language": language }) for ctxname in sorted(self.contexts.keys()): root.append(self.contexts[ctxname].toXml(ctxname)) tree = etree.ElementTree(root) if dtd: val = etree.DTD(StringIO(dtd)) if not val.validate(tree): raise Exception("Error validating: %s" % val.error_log.filter_from_errors()[0]) tree.write(self.filename, encoding='utf-8', xml_declaration=True, pretty_print=True)
def _get_validator(dtd=None): validator = _validator_cache.get(dtd) if not validator: if not path.isabs(dtd): dtd = path.join(path.dirname(__file__), dtd) validator = _validator_cache[dtd] = etree.DTD(dtd).validate return validator
def dtd_1_0(filename): """ Validates ``filename`` against the CellML 1.0 DTD, prints the result and then exits. """ # Create lxml parser parser = etree.XMLParser(no_network=True) # Create xml object xml = etree.parse(filename, parser) # Create DTD object dtd = etree.DTD(check.cellml_1_0('cellml_1_0.dtd')) # Validate if dtd.validate(xml): print( colored('warning', '[pass]') + ' This file validates against the CellML 1.0 DTD.') sys.exit(0) else: for e in dtd.error_log: r = re.compile(re.escape('{' + check.CELLML_1_0_NS + '}')) print(colored('fail', '[fail]') + ' Error on line ' + str(e.line)) print(r.sub('cellml:', e.message)) print() sys.exit(1)
def test_directory_xml_validates(self): dtd = etree.DTD(StringIO(""" <!ELEMENT responseData (response, totalResults, organizations)> <!ELEMENT response (#PCDATA)> <!ELEMENT totalResults (#PCDATA)> <!ELEMENT organizations (organization+)> <!ELEMENT organization (name, type, departments, members, resources)> <!ELEMENT name (#PCDATA)> <!ELEMENT type (#PCDATA)> <!ELEMENT departments (department+)> <!ELEMENT department (name, resources)> <!-- name (see above) --> <!ELEMENT resources (directoryURL, xmlURL)> <!ELEMENT directoryURL (#PCDATA)> <!ELEMENT xmlURL (#PCDATA)> <!ELEMENT members (member+)> <!ELEMENT member (name, displayName, cnetid, chicagoid, title, email, phone, facultyExchange, resources)> <!-- name (see above) --> <!ELEMENT displayName (#PCDATA)> <!ELEMENT cnetid (#PCDATA)> <!ELEMENT chicagoid (#PCDATA)> <!ELEMENT title (#PCDATA)> <!ELEMENT email (#PCDATA)> <!ELEMENT phone (#PCDATA)> <!ELEMENT facultyExchange (#PCDATA)> <!-- resources (see above) --> """)) root = etree.XML(get_xml_from_directory_api('https://directory.uchicago.edu/api/v2/divisions/16.xml')) self.assertEqual(dtd.validate(root), True)
def validate_manifest(tree, dtd_file, logger): ''' Validates the given XML tree against the given DTD. This is a common function used by ManifestParser and ManifestWriter. Parameters: - tree, an etree.ElementTree - dtd_file, the path to a DTD file - logger, where to log errors to Returns: - Nothing On success, this method returns; on error it raises an exception. Raises: - ManifestError is raised if the DTD file cannot be loaded, or if validation fails. ''' try: dtd = etree.DTD(dtd_file) except etree.DTDParseError, error: msg = "Unable to parse DTD file [%s]:" % (dtd_file) logger.exception(msg) logger.exception(str(error)) raise ManifestError(msg, orig_exception=error)