def test_validate_xml_against_xsd(self): """ Validate generated DataCite XML for all public records """ from invenio.websearch_model import Collection from invenio.bibformat import format_record from invenio.bibfield import get_record etree.clear_error_log() for recid in Collection.query.filter_by(name='zenodo').first().reclist: try: xml = None record = get_record(recid) for identifier in record.get('related_identifiers', []): if identifier['scheme'] != identifier['scheme'].lower(): raise Exception( "Record %s has problem with upper-case scheme %s" % (recid, identifier['scheme'])) if record.get('doi', None): xml = StringIO(format_record(recid, 'dcite')) xml_doc = etree.parse(xml) self.schema.assertValid(xml_doc) except Exception, e: print recid if xml: print xml.getvalue() raise e
def loadUtr(modelManager): # Build a dictionary of item types that are constrained by the UTR. modelManager.utrDict = {} # This attribute is unbound on modelManager until this function is called. utrUrl = "http://www.xbrl.org/utr/utr.xml" #utrUrl = os.path.join(modelManager.cntlr.configDir, "utr.xml") modelManager.cntlr.showStatus(_("Loading Unit Type Registry")) try: xmldoc = etree.parse(modelManager.cntlr.webCache.getfilename(utrUrl)) for unitElt in xmldoc.iter(tag="{http://www.xbrl.org/2009/utr}unit"): id = unitElt.get("id") unitId = unitElt.findtext("{http://www.xbrl.org/2009/utr}unitId") nsUnit = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsUnit") itemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}itemType") nsItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsItemType") numeratorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}numeratorItemType") nsNumeratorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsNumeratorItemType") denominatorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}denominatorItemType") nsDenominatorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsDenominatorItemType") # TO DO: This indexing scheme assumes that there are no name clashes in item types of the registry. if modelManager.utrDict.get(itemType) == None: modelManager.utrDict[itemType] = {} # a RegEntry is just an array. (modelManager.utrDict[itemType])[id] = [unitId, nsUnit # 0,1 , nsNumeratorItemType, numeratorItemType # 2,3 , nsDenominatorItemType, denominatorItemType # 4,5 , nsItemType # 6 often None ] except (EnvironmentError, etree.LxmlError) as err: modelManager.cntlr.addToLog("Unit Type Registry Import error: {0}".format(err)) etree.clear_error_log()
def validate(self): """ Validate the provided data file for correctness against the provided schema file. Returns: A flag indicating if the data validates against the schema. """ # clear any previous xml errors clear_error_log() if self.schemafile is not None: try: # Attempt parsing the schema file schdoc = parse(self.schemafile) except XMLSyntaxError, e: # The schema was not parsable XML logging.warning('The schema XML file could not be parsed.') for item in e.error_log: logging.info(item) return False try: theschema = XMLSchema(schdoc) except XMLSchemaParseError, e: # The schema document is XML, but it's not a schema logging.warning('The schema XML file was parsed, but it does not appear to be a valid XML Schema document.') for item in e.error_log: logging.info(item) return False
def test(xhtml_file: Path, dtd: DTD, schematron: Schematron) -> bool: """ Test that an XHTML file matches a DTD and passes Schematron tests. Error messages are printed to stderr if the file doesn't pass. :param xhtml_file: the XHTML file to test :param dtd: the DTD :param schematron: the Schematron :return: True if the file passes """ if settings.verbose: print(xhtml_file) clear_error_log() parser = XHTMLParser(dtd_validation=True, ns_clean=True) try: tree = parse(source=str(xhtml_file), parser=parser) html = tree.getroot() except IOError as e: print(f"{xhtml_file}: {e.strerror}", file=stderr) return False except XMLSyntaxError: print_error_log(parser.error_log) return False if not dtd.validate(html): print_error_log(dtd.error_log) return False if not schematron.validate(html): print_schematron_error_log(html, schematron) return False return test_links(xhtml_file, html) and test_images(xhtml_file, html)
def transformIterable(self, result, encoding): """Apply the transform if required """ result = self.parseTree(result) if result is None: return None DevelopmentMode = Globals.DevelopmentMode runtrace = (DevelopmentMode and self.request.get( 'diazo.debug', '').lower() in ('1', 'y', 'yes', 't', 'true')) try: etree.clear_error_log() settings = self.getSettings() if settings.doctype: result.doctype = settings.doctype if not result.doctype.endswith('\n'): result.doctype += '\n' transform = self.setupTransform(runtrace=runtrace) if transform is None: return None cache = None if not DevelopmentMode: cache = getCache(settings) parameterExpressions = settings.parameterExpressions or {} params = prepareThemeParameters(findContext(self.request), self.request, parameterExpressions, cache) transformed = transform(result.tree, **params) error_log = transform.error_log if transformed is not None: # Transformed worked, swap content with result result.tree = transformed except etree.LxmlError as e: if not (DevelopmentMode): raise error_log = e.error_log runtrace = True if runtrace: from diazo.runtrace import generate_debug_html # Add debug information to end of body body = result.tree.xpath('/html/body')[0] body.insert( -1, generate_debug_html( findContext(self.request).portal_url() + '/++resource++diazo-debug', rules=settings.rules, rules_parser=getParser('rules', settings.readNetwork), error_log=error_log, )) return result
def test_validate_xml_against_xsd(self): """ Validate generated DataCite XML for all public records """ from invenio.websearch_model import Collection from invenio.bibformat import format_record from invenio.bibfield import get_record etree.clear_error_log() for recid in Collection.query.filter_by(name='zenodo').first().reclist: try: xml = None record = get_record(recid) for identifier in record.get('related_identifiers', []): if identifier['scheme'] != identifier['scheme'].lower(): raise Exception("Record %s has problem with upper-case scheme %s" % (recid, identifier['scheme'])) if record.get('doi', None): xml = StringIO(format_record(recid, 'dcite')) xml_doc = etree.parse(xml) self.schema.assertValid(xml_doc) except Exception, e: print recid if xml: print xml.getvalue() raise e
def loadUtr(modelManager): # Build a dictionary of item types that are constrained by the UTR utrItemTypeEntries = defaultdict(dict) # print('UTR LOADED FROM '+utrUrl); modelManager.cntlr.showStatus(_("Loading Unit Type Registry")) file = None try: from arelle.FileSource import openXmlFileStream # normalize any relative paths to config directory file = openXmlFileStream(modelManager.cntlr, modelManager.disclosureSystem.utrUrl, stripDeclaration=True)[0] xmldoc = etree.parse(file) for unitElt in xmldoc.iter(tag="{http://www.xbrl.org/2009/utr}unit"): u = UtrEntry() u.id = unitElt.get("id") u.unitId = unitElt.findtext("{http://www.xbrl.org/2009/utr}unitId") u.nsUnit = (unitElt.findtext("{http://www.xbrl.org/2009/utr}nsUnit") or None) # None if empty entry u.itemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}itemType") u.nsItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsItemType") u.numeratorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}numeratorItemType") u.nsNumeratorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsNumeratorItemType") u.denominatorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}denominatorItemType") u.nsDenominatorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsDenominatorItemType") u.isSimple = u.numeratorItemType is None and u.denominatorItemType is None # TO DO: This indexing scheme assumes that there are no name clashes in item types of the registry. (utrItemTypeEntries[u.itemType])[u.id] = u modelManager.disclosureSystem.utrItemTypeEntries = utrItemTypeEntries except (EnvironmentError, etree.LxmlError) as err: modelManager.cntlr.addToLog("Unit Type Registry Import error: {0}".format(err)) etree.clear_error_log() if file: file.close()
def trim_xml( xml_file_in: str, xml_file_out: str ) -> Tuple[bool, Optional[str], Optional[etree.ElementTree]]: # clear global error log for lxml etree.clear_error_log() parser = etree.XMLParser(remove_blank_text=True) try: doc = etree.parse(xml_file_in, parser) except IOError: return (False, 'IO', None) except etree.XMLSyntaxError: return (False, 'Syntax', None) try: with open(xml_file_out, 'wb') as f: f.write( etree.tostring(doc.getroot(), pretty_print=True, encoding='UTF-8')) return (True, None, doc) except IOError: return (False, 'IO', None)
def validate(self): """ Validate the provided data file for correctness against the provided schema file. @return: A flag indicating if the data validates against the schema. """ # clear any previous xml errors clear_error_log() if self.schemafile is not None: try: # Attempt parsing the schema file schdoc = parse(self.schemafile) except XMLSyntaxError, e: # The schema was not parsable XML logging.warning('The schema XML file could not be parsed.') for item in e.error_log: logging.info(item) return False try: theschema = XMLSchema(schdoc) except XMLSchemaParseError, e: # The schema document is XML, but it's not a schema logging.warning( 'The schema XML file was parsed, but it does not appear to be a valid XML Schema document.' ) for item in e.error_log: logging.info(item) return False
def quickDumpFunctionStereotypeInfo(xmlDocument, xsltDocument,): try: resultingDoc = executeTransform(xmlDocument, xsltDocument) print >> sys.stderr, et.tostring(resultingDoc) if len(xsltDocument.error_log) >0: print >> sys.stderr, xsltDocument.error_log et.clear_error_log() except: print "Failed to execute transformation" raise matches = [] try: matches = resultingDoc.xpath( "//src:function[preceding-sibling::*[1][self::src:comment]]", namespaces=xmlNamespaces ) print >> sys.stderr, "Number of Functions located: {0}".format(len(matches)) except: print >> sys.stderr, "Failed to test stereotype data" # print "transformed document" # print et.tostring(resultingDoc) # print "\n\n\nMatches: " # for m in matches: # print et.tostring(m) raise
def validate(self, filepath, expected=None): logger.debug('Validating syntax of {xml}'.format(xml=filepath)) etree.clear_error_log() started = timezone.now() try: etree.parse(filepath) except etree.XMLSyntaxError as e: msg = 'Syntax validation of {xml} failed'.format(xml=filepath) logger.exception(msg) done = timezone.now() validation_objs = [] for error in e.error_log: message = '{line}: {msg}'.format(line=error.line, msg=error.message) validation_objs.append( Validation( passed=False, validator=self.__class__.__name__, filename=filepath, message=message, time_started=started, time_done=done, information_package_id=self.ip, task=self.task, )) Validation.objects.bulk_create(validation_objs, 100) raise ValidationError(msg, errors=[o.message for o in validation_objs]) except Exception as e: logger.exception( 'Unknown error during syntax validation of {xml}'.format( xml=filepath)) done = timezone.now() Validation.objects.create( passed=False, validator=self.__class__.__name__, filename=filepath, message=str(e), time_started=started, time_done=done, information_package_id=self.ip, task=self.task, ) raise Validation.objects.create( passed=True, validator=self.__class__.__name__, filename=filepath, time_started=started, time_done=timezone.now(), information_package_id=self.ip, task=self.task, ) logger.info( "Successful syntax validation of {xml}".format(xml=filepath))
def loadStandardTaxonomiesDict(self): if self.selection: self.standardTaxonomiesDict = {} self.standardLocalHrefs = set() self.standardAuthorities = set() if not self.standardTaxonomiesUrl: return basename = os.path.basename(self.standardTaxonomiesUrl) self.modelManager.cntlr.showStatus( _("parsing {0}").format(basename)) try: for file in (self.modelManager.cntlr.webCache.getfilename( self.standardTaxonomiesUrl), os.path.join(self.modelManager.cntlr.configDir, "xbrlschemafiles.xml")): xmldoc = etree.parse(file) for locElt in xmldoc.iter(tag="Loc"): href = None localHref = None namespaceUri = None attType = None family = None for childElt in locElt.iterchildren(): ln = childElt.tag value = childElt.text.strip() if ln == "Href": href = value elif ln == "LocalHref": localHref = value elif ln == "Namespace": namespaceUri = value elif ln == "AttType": attType = value elif ln == "Family": family = value if href: if namespaceUri and (attType == "SCH" or attType == "ENT"): if namespaceUri not in self.standardTaxonomiesDict: self.standardTaxonomiesDict[ namespaceUri] = (href, localHref) authority = UrlUtil.authority(namespaceUri) self.standardAuthorities.add(authority) if family == "BASE": self.baseTaxonomyNamespaces.add( namespaceUri) if href not in self.standardTaxonomiesDict: self.standardTaxonomiesDict[ href] = "Allowed" + attType if localHref: self.standardLocalHrefs.add(localHref) elif attType == "SCH" and family == "BASE": self.baseTaxonomyNamespaces.add(namespaceUri) except (EnvironmentError, etree.LxmlError) as err: self.modelManager.cntlr.addToLog( "{0}: import error: {1}".format(basename, err)) etree.clear_error_log()
def _validate_xhtml(func_name, *args, **kwargs): page = b.get_html() if "xhtml1-strict.dtd" not in page: return etree.clear_error_log() try: doc = etree.parse(StringIO(page), base_url=b.get_url()) except etree.XMLSyntaxError, e: raise twill.errors.TwillAssertionError( _format_error_log(page, e.error_log))
def validate(self, xml_data): self.errors = [] if self.additional_root_element: xml_data = u"<%(are)s>\n%(data)s\n</%(are)s>" % {"are": self.additional_root_element, "data": xml_data} etree.clear_error_log() try: doc = etree.parse(StringIO(xml_data)) except etree.XMLSyntaxError, e: self.raiseValidationError(xml_data, e.error_log)
def transformIterable(self, result, encoding): """Apply the transform if required """ result = self.parseTree(result) if result is None: return None DevelopmentMode = Globals.DevelopmentMode runtrace = (DevelopmentMode and self.request.get('diazo.debug', '').lower() in ('1', 'y', 'yes', 't', 'true')) try: etree.clear_error_log() settings = self.getSettings() if settings.doctype: result.doctype = settings.doctype if not result.doctype.endswith('\n'): result.doctype += '\n' transform = self.setupTransform(runtrace=runtrace) if transform is None: return None cache = None if not DevelopmentMode: cache = getCache(settings) parameterExpressions = settings.parameterExpressions or {} params = prepareThemeParameters(findContext(self.request), self.request, parameterExpressions, cache) transformed = transform(result.tree, **params) error_log = transform.error_log if transformed is not None: # Transformed worked, swap content with result result.tree = transformed except etree.LxmlError as e: if not(DevelopmentMode): raise error_log = e.error_log runtrace = True if runtrace: from diazo.runtrace import generate_debug_html # Add debug information to end of body body = result.tree.xpath('/html/body')[0] body.insert(-1, generate_debug_html( findContext(self.request).portal_url() + '/++resource++diazo-debug', rules=settings.rules, rules_parser=getParser('rules', settings.readNetwork), error_log = error_log, )) return result
def _validate_xml(self): inner_xml = self.require_unique_param('inner_xml') result = Record() etree.clear_error_log() try: result.inner_xml = inner_xml.strip() except etree.XMLSyntaxError, e: entry = e.error_log.last_error raise ValidationError("Invalid XML supplied: %s, " "at line %d, character %d" % (entry.message, entry.line - 1, entry.column))
def loadMappings(self): basename = os.path.basename(self.mappingsUrl) self.modelManager.cntlr.showStatus(_("parsing {0}").format(basename)) try: xmldoc = etree.parse(self.mappingsUrl) for elt in xmldoc.iter(tag="mapFile"): self.mappedFiles[elt.get("from")] = elt.get("to") for elt in xmldoc.iter(tag="mapPath"): self.mappedPaths.append((elt.get("from"), elt.get("to"))) except (EnvironmentError, etree.LxmlError) as err: self.modelManager.cntlr.addToLog("{0}: import error: {1}".format(basename, err)) etree.clear_error_log()
def loadMappings(self): basename = os.path.basename(self.mappingsUrl) self.modelManager.cntlr.showStatus(_("parsing {0}").format(basename)) try: xmldoc = etree.parse(self.mappingsUrl) for elt in xmldoc.iter(tag="mapFile"): self.mappedFiles[elt.get("from")] = elt.get("to") for elt in xmldoc.iter(tag="mapPath"): self.mappedPaths.append((elt.get("from"), elt.get("to"))) except (EnvironmentError, etree.LxmlError) as err: self.modelManager.cntlr.addToLog("{0}: import error: {1}".format( basename, err)) etree.clear_error_log()
def loadStandardTaxonomiesDict(self): if self.selection: self.standardTaxonomiesDict = {} self.standardLocalHrefs = set() self.standardAuthorities = set() if not self.standardTaxonomiesUrl: return basename = os.path.basename(self.standardTaxonomiesUrl) self.modelManager.cntlr.showStatus(_("parsing {0}").format(basename)) try: for file in (self.modelManager.cntlr.webCache.getfilename(self.standardTaxonomiesUrl), os.path.join(self.modelManager.cntlr.configDir,"xbrlschemafiles.xml")): xmldoc = etree.parse(file) for locElt in xmldoc.iter(tag="Loc"): href = None localHref = None namespaceUri = None attType = None family = None for childElt in locElt.iterchildren(): ln = childElt.tag value = childElt.text.strip() if ln == "Href": href = value elif ln == "LocalHref": localHref = value elif ln == "Namespace": namespaceUri = value elif ln == "AttType": attType = value elif ln == "Family": family = value if href: if namespaceUri and (attType == "SCH" or attType == "ENT"): if namespaceUri not in self.standardTaxonomiesDict: self.standardTaxonomiesDict[namespaceUri] = (href, localHref) authority = UrlUtil.authority(namespaceUri) self.standardAuthorities.add(authority) if family == "BASE": self.baseTaxonomyNamespaces.add(namespaceUri) if href not in self.standardTaxonomiesDict: self.standardTaxonomiesDict[href] = "Allowed" + attType if localHref: self.standardLocalHrefs.add(localHref) elif attType == "SCH" and family == "BASE": self.baseTaxonomyNamespaces.add(namespaceUri) except (EnvironmentError, etree.LxmlError) as err: self.modelManager.cntlr.addToLog("{0}: import error: {1}".format(basename,err)) etree.clear_error_log()
def html_to_article(content, language): content = content.strip() if not len(content): return '' config = NewspaperConfig() config.language = language doc = config.get_parser().fromstring(content.strip()) if doc is None: return '' # Split block-level elements with newlines for tag in _BLOCKLEVEL_TAGS: if tag in _MEANINGLESS_TAGS: continue for node in doc.xpath('//{}'.format(tag)): node.append(etree.Element('br')) node.append(etree.Element('br')) # Initial cleanup cleaner = _NewspaperCleaner(config) doc = cleaner.clean(doc) # Best node estimation extractor = NewspaperExtractor(config) top = extractor.calculate_best_node(doc) if top is None: del doc, cleaner, extractor etree.clear_error_log() return '' top = extractor.post_cleanup(top) # Cleanup dummy nodes used for estimation for dummy in top.xpath("//p[@newspaper='dummy']"): dummy.getparent().remove(dummy) # Custom formatting to avoid unnecessary computations formatter = NewspaperFormatter(config) formatter.top_node = top formatter.remove_negativescores_nodes() content = formatter.convert_to_html() content = str(content).strip() content = unescape(content) del doc, top, cleaner, extractor, formatter etree.clear_error_log() return content
def _validate_xhtml(func_name, *args, **kwargs): page = b.get_html() if "xhtml1-strict.dtd" not in page: return etree.clear_error_log() try: # lxml will try to convert the URL to unicode by itself, # this won't work for non-ascii URLs, so help him url = b.get_url() if isinstance(url, str): url = unicode(url, "latin1") etree.parse(StringIO(page), base_url=url) except etree.XMLSyntaxError, e: raise twill.errors.TwillAssertionError(_format_error_log(page, e.error_log))
def _validate_xhtml(func_name, *args, **kwargs): page = b.get_html() if "xhtml1-strict.dtd" not in page: return etree.clear_error_log() try: # lxml will try to convert the URL to unicode by itself, # this won't work for non-ascii URLs, so help him url = b.get_url() if isinstance(url, str): url = unicode(url, 'latin1') etree.parse(StringIO(page), base_url=url) except etree.XMLSyntaxError as e: raise twill.errors.TwillAssertionError( _format_error_log(page, e.error_log))
def validate(file_path): """Validate a given file_path using the Validator from the Daisy Pipeline. Return an empty string if the validation was successful. Return a list of error messages as delivered by the Daisy Pipeline otherwise.""" xmlschema_doc = etree.parse( join(settings.PROJECT_DIR, 'documents', 'schema', 'minimalSchema.xsd')) xmlschema = etree.XMLSchema(xmlschema_doc) etree.clear_error_log() try: doc = etree.parse(file_path) except etree.XMLSyntaxError, e: entries = e.error_log.filter_from_level(etree.ErrorLevels.FATAL) return [("%s on line %s" % (entry.message, entry.line)) for entry in entries]
def validateXML(xml_string): try: etree.clear_error_log() parser = etree.XMLParser() xml = etree.fromstring(xml_string, parser) return xml except etree.XMLSyntaxError as e: error_list = [] for error in e.error_log: error_list.append((error.line, error.message)) return error_list except Exception as e: error_list = [] error_list.append((0, "unknown error " + str(e))) return error_list
def update_pmode(obj, mode="add"): etree.clear_error_log() # parse the pmode xml template try: pmode = etree.parse(PMODE_FILE) parties = pmode.find("/businessProcesses/parties") init_parties = pmode.find("//initiatorParties") party_exists = pmode.xpath("//party[@name='{}']".format( obj.gateway_party_name)) if not party_exists and mode == 'add': new_party = etree.SubElement( parties, 'party', { 'name': obj.gateway_party_name, 'endpoint': obj.endpoint, 'allowChunking': 'false' }) etree.SubElement(new_party, 'identifier', { 'partyId': obj.gateway_party_id, 'partyIdType': 'partyTypeUrn' }) # add to initiator parties etree.SubElement(init_parties, 'initiatorParty', {'name': obj.gateway_party_name}) else: # get party to remove for party in parties.xpath(".//party[@name='{}']".format( obj.gateway_party_name)): party.getparent().remove(party) # also remove from initiatorParties for init_party in init_parties.xpath( ".//initiatorParty[@name='{}']".format( obj.gateway_party_name)): init_party.getparent().remove(init_party) # write the reult tree back to xml f = open(PMODE_FILE, 'w') f.write( etree.tostring(pmode, pretty_print=True, xml_declaration=True, encoding='UTF-8')) f.close() except Exception, e: return { 'success': False, 'msg': "Could not parse pmode template: {}".format(e.message) }
def validateXML(xml_doc, schema, Resultfile2): etree.clear_error_log() root_new = xml_doc.getroot() print "Parsing Done" if (schema.validate(xml_doc)): print "File is valid against the given Schema: " else: log = schema.error_log print file1 + " is not valid against Schema" #logpath = filepath.split(".")[0]+".log" logpath = Resultfile2 + "XSDVALIDATION.log" fs = open(logpath, "a+") for error in iter(log): fs.write(error.message) fs.write("\n") fs.close() root_new.clear()
def loadUtr( modelManager ): # Build a dictionary of item types that are constrained by the UTR. modelManager.utrDict = { } # This attribute is unbound on modelManager until this function is called. utrUrl = "http://www.xbrl.org/utr/utr.xml" #utrUrl = os.path.join(modelManager.cntlr.configDir, "utr.xml") modelManager.cntlr.showStatus(_("Loading Unit Type Registry")) try: xmldoc = etree.parse(modelManager.cntlr.webCache.getfilename(utrUrl)) for unitElt in xmldoc.iter(tag="{http://www.xbrl.org/2009/utr}unit"): id = unitElt.get("id") unitId = unitElt.findtext("{http://www.xbrl.org/2009/utr}unitId") nsUnit = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsUnit") itemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}itemType") nsItemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}nsItemType") numeratorItemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}numeratorItemType") nsNumeratorItemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}nsNumeratorItemType") denominatorItemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}denominatorItemType") nsDenominatorItemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}nsDenominatorItemType") # TO DO: This indexing scheme assumes that there are no name clashes in item types of the registry. if modelManager.utrDict.get(itemType) == None: modelManager.utrDict[itemType] = {} # a RegEntry is just an array. (modelManager.utrDict[itemType])[id] = [ unitId, nsUnit # 0,1 , nsNumeratorItemType, numeratorItemType # 2,3 , nsDenominatorItemType, denominatorItemType # 4,5 , nsItemType # 6 often None ] except (EnvironmentError, etree.LxmlError) as err: modelManager.cntlr.addToLog( "Unit Type Registry Import error: {0}".format(err)) etree.clear_error_log()
def loadMappings(self): basename = os.path.basename(self.mappingsUrl) self.modelManager.cntlr.showStatus(_("parsing {0}").format(basename)) try: xmldoc = etree.parse(self.mappingsUrl) xmldoc.xinclude() for elt in xmldoc.iter(tag="mapFile"): self.mappedFiles[elt.get("from")] = elt.get("to") for elt in xmldoc.iter(tag="mapPath"): self.mappedPaths.append((elt.get("from"), elt.get("to"))) except (EnvironmentError, etree.LxmlError) as err: self.modelManager.cntlr.addToLog(_("Disclosure System \"%(name)s\" import %(importFile)s, error: %(error)s"), messageCode="arelle:disclosureSystemImportError", messageArgs={"error": str(err), "name": self.name, "importFile": basename}, level=logging.ERROR) etree.clear_error_log()
def check_syntax( xml_file: str ) -> Tuple[bool, Optional[str], Optional[str], Optional[etree.ElementTree]]: # clear global error log for lxml etree.clear_error_log() # parse xml try: doc = etree.parse(xml_file) return (True, None, None, doc) except IOError: return (False, 'IO', 'Invalid File', None) except etree.XMLSyntaxError as err: return (False, 'Syntax', str(err.error_log), None) #pylint: disable=no-member
def loadUtr( modelManager ): # Build a dictionary of item types that are constrained by the UTR modelManager.disclosureSystem.utrItemTypeEntries = utrItemTypeEntries = defaultdict( dict) # print('UTR LOADED FROM '+utrUrl); # skip status message as it hides prior activity during which this might have just obtained symbols # modelManager.cntlr.showStatus(_("Loading Unit Type Registry")) file = None try: from arelle.FileSource import openXmlFileStream # normalize any relative paths to config directory file = openXmlFileStream(modelManager.cntlr, modelManager.disclosureSystem.utrUrl, stripDeclaration=True)[0] xmldoc = etree.parse(file) for unitElt in xmldoc.iter(tag="{http://www.xbrl.org/2009/utr}unit"): u = UtrEntry() u.id = unitElt.get("id") u.unitId = unitElt.findtext("{http://www.xbrl.org/2009/utr}unitId") u.nsUnit = ( unitElt.findtext("{http://www.xbrl.org/2009/utr}nsUnit") or None) # None if empty entry u.itemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}itemType") u.nsItemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}nsItemType") u.numeratorItemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}numeratorItemType") u.nsNumeratorItemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}nsNumeratorItemType") u.denominatorItemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}denominatorItemType") u.nsDenominatorItemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}nsDenominatorItemType") u.isSimple = u.numeratorItemType is None and u.denominatorItemType is None u.symbol = unitElt.findtext("{http://www.xbrl.org/2009/utr}symbol") # TO DO: This indexing scheme assumes that there are no name clashes in item types of the registry. (utrItemTypeEntries[u.itemType])[u.id] = u except (EnvironmentError, etree.LxmlError) as err: modelManager.cntlr.addToLog( "Unit Type Registry Import error: {0}".format(err)) etree.clear_error_log() if file: file.close()
def fragment_to_text(html): if not len(html.strip()): return '' soup = BeautifulSoup(html, 'lxml') if soup is None: raise ValueError('Can\'t build DOM tree with LXML') # Drop comments for comment in soup(text=lambda txt: isinstance(txt, Comment)): comment.extract() # Drop non-meaning tags for node in soup(_MEANINGLESS_TAGS): node.replace_with(soup.new_tag('br')) # Insert linebreaks around block-level tags for node in soup(_BLOCKLEVEL_TAGS): node.insert_before(soup.new_tag('br')) node.insert_before(soup.new_tag('br')) node.insert_after(soup.new_tag('br')) node.insert_after(soup.new_tag('br')) # Remove linebreaks inside text nodes (as browser does) for node in soup(string=lambda string: '\n' in string): node.string.replace_with(node.string.replace('\n', ' ')) # Swap html linebreaks to normal ones for node in soup('br'): node.replace_with('\n') # Cleanup final text text = soup.getText() text = str(text).strip() text = re.sub(' {2,}', ' ', text) text = text.replace(' \n', '\n').replace('\n ', '\n') text = re.sub('\n{3,}', '\n\n', text) soup.decompose() etree.clear_error_log() del soup return text
def insert_entries(self, entries_xml, taxids): """insert UniProt entries from XML""" # to avoid memory leak reload of etree is necessary if 'etree' in sys.modules: importlib.reload(etree) parser = etree.XMLParser(collect_ids=False) entries = etree.fromstringlist(entries_xml, parser) for entry in entries: self.insert_entry(entry, taxids) entry.clear() del entry etree.clear_error_log() del entries self.session.commit()
def run(xhtml_files: List[Path], dtd_file: Path, images: bool, links: bool) -> bool: try: dtd = DTD(str(dtd_file)) except DTDParseError as e: print(e.error_log, file=stderr) clear_error_log() return False else: success = True for file in xhtml_files: # if you reuse the parser on too many documents it gets confused parser = XHTMLParser(dtd_validation=True, ns_clean=True) dtd = DTD(str(dtd_file)) if settings.verbose: print(xhtml_file) if not test(file, parser, dtd, images, links): success = False return success
def validate_cib(self, new_cib_elem): detail_msg = "" if self.is_local: schema_f = os.path.join(self.local_dir, self.schema_filename) else: try: tmp_f = self.tmp_schema_f() except EnvironmentError as msg: raise PacemakerError("Cannot expand the Relax-NG schema: " + str(msg)) if tmp_f is None: raise PacemakerError("Cannot expand the Relax-NG schema") else: schema_f = tmp_f try: cib_elem = etree.fromstring(etree.tostring(new_cib_elem)) except etree.Error as msg: raise PacemakerError("Failed to parse the CIB XML: " + str(msg)) try: schema = etree.RelaxNG(file=schema_f) except etree.Error as msg: raise PacemakerError("Failed to parse the Relax-NG schema: " + str(msg)) try: etree.clear_error_log() except: pass is_valid = schema.validate(cib_elem) if not is_valid: for error_entry in schema.error_log: detail_msg += error_entry.level_name + ": " + error_entry.message + "\n" if not self.is_local: try: delete_dir(os.path.dirname(tmp_f)) except: pass return (is_valid, detail_msg)
def test(xhtml_file: Path, parser: XHTMLParser, dtd: DTD, images: bool, links: bool) -> bool: success = False try: try: document = parse(source=str(xhtml_file), parser=parser).getroot() dtd.assertValid(document) except IOError as e: print(f"{xhtml_file}: {e.strerror}", file=stderr) except XMLSyntaxError as e: print(str(e.error_log), file=stderr) except DocumentInvalid as e: print(str(e.error_log), file=stderr) else: success = True if images: success = success and test_images(xhtml_file, document) if links: success = success and test_links(xhtml_file, document) finally: clear_error_log() return success
def validate_xml(xml_doc: etree.ElementTree, xsd_file: str) -> Tuple[bool, Optional[str], Optional[str]]: # clear global error log for lxml etree.clear_error_log() try: xmlschema_doc = etree.parse(xsd_file) xmlschema = etree.XMLSchema(xmlschema_doc) except IOError: return (False, 'IO', 'XSD file I/O error') except etree.XMLSyntaxError as err: return (False, 'Syntax', str(err.error_log)) #pylint: disable=no-member try: xmlschema.assertValid(xml_doc) return (True, None, None) except etree.DocumentInvalid as err: return (False, 'Schema', str(err.error_log)) #pylint: disable=no-member
def parse_schedule(xml, filename): """ Parses a schedule definition in XML. :param str xml: The XML with a schedule definition :param str filename: :rtype: enarksh.xml_reader.node.ScheduleNode """ with open(os.path.join(C.HOME, 'etc/enarksh.xsd'), 'rb') as f: xsd = f.read() etree.clear_error_log() schema_root = etree.XML(xsd) schema = etree.XMLSchema(schema_root) parser = etree.XMLParser(schema=schema, encoding='utf8') try: root = etree.fromstring(bytes(xml, 'utf8'), parser) # Root element must be a schedule. if root.tag != 'Schedule': raise Exception("Root element must be 'Schedule' but '{0!s}' was found.".format(root.tag)) schedule = create_node('Schedule') schedule.read_xml(root) error = schedule.validate() if error: raise Exception( "File '{0!s}' is not a valid schedule configuration file.\n{1!s}".format(filename, error)) # Set recursion and dependency levels. schedule.set_levels() except etree.XMLSyntaxError as exception: log = logging.getLogger('enarksh') log.error(exception.error_log.filter_from_level(etree.ErrorLevels.WARNING)) raise exception return schedule
def check_html(filename, html_lines, html_hints, quiet): """Validates the given HTML (as XHTML actually) """ global etree print("\n# -- HTML check for '%s'" % filename) # re-build the page content, replacing the DTD with the XHTML DTD, # or adding it if missing. Jinja2 expressions are removed. opened_braces = 0 normalized_lines = [] has_html_elt = has_head_elt = has_body_elt = False for linenum, line in html_lines: has_html_elt = has_html_elt or '<html>' in line has_head_elt = has_head_elt or '<head>' in line has_body_elt = has_body_elt or '<body>' in line if line.strip() != '<!DOCTYPE html>': normalized, opened_braces = remove_jinja_exprs( linenum, line, opened_braces) normalized_lines.append(normalized) is_xml = html_lines[0][1].startswith('<?xml ') if not is_xml: if not has_body_elt: normalized_lines[0] = '<body>' + normalized_lines[0] normalized_lines[-1] = normalized_lines[-1] + '</body>' if not has_head_elt: normalized_lines[0] = '<head><title/></head>' + normalized_lines[0] if not has_html_elt: normalized_lines[0] = '<html>' + normalized_lines[0] normalized_lines[-1] = normalized_lines[-1] + '</html>' normalized_lines[0] = XHTML_DOCTYPE + normalized_lines[0] page = '\n'.join(normalized_lines) ## print('LINES %s' % ''.join("%5d: %s" % l for l in html_lines)) # DEBUG ## print('PAGE %s' % ## '\n'.join("%5d: %s" % l for l in enumerate(normalized_lines))) ## print('HINTS', repr(html_hints)) # DEBUG etree.clear_error_log() try: # lxml will try to convert the URL to unicode by itself, # this won't work for non-ascii URLs, so help him etree.parse(StringIO(page), base_url='.') # base_url ?? if not quiet: for lineinfo in html_lines: print('%5d %s' % lineinfo), return 0 except etree.XMLSyntaxError as e: errors = [] for entry in e.error_log: errors.append((entry.line, entry.column, entry.message)) real_errors = [] def process_error(linenum, col, msg): hint_linenum = hint = None while html_hints: hint_linenum, hint = html_hints[0] if hint_linenum >= linenum or len(html_hints) == 1: break del html_hints[0] if hint and hint in msg: del html_hints[0] ignored = ' (IGNORED "%s")' % hint else: real_errors.append(linenum) ignored = '' print('%s:%s:%s: %s%s' % (filename, linenum, col, msg, ignored)) for linenum, line in html_lines: if not quiet: print('%5d %s' % (linenum, line)), while errors and errors[0][0] == linenum: err = errors[0] del errors[0] process_error(*err) # in case some errors haven't been flushed at this point... for err in errors: process_error(*err) return len(real_errors)
def loadUtr(modelXbrl): # Build a dictionary of item types that are constrained by the UTR modelManager = modelXbrl.modelManager modelManager.disclosureSystem.utrItemTypeEntries = utrItemTypeEntries = defaultdict(dict) # print('UTR LOADED FROM '+utrUrl); # skip status message as it hides prior activity during which this might have just obtained symbols # modelManager.cntlr.showStatus(_("Loading Unit Type Registry")) file = None try: from arelle.FileSource import openXmlFileStream # normalize any relative paths to config directory unitDupCheck = set() file = openXmlFileStream(modelManager.cntlr, modelManager.disclosureSystem.utrUrl, stripDeclaration=True)[0] xmldoc = etree.parse(file) for unitElt in xmldoc.iter(tag="{http://www.xbrl.org/2009/utr}unit"): u = UtrEntry() u.id = unitElt.get("id") u.unitId = unitElt.findtext("{http://www.xbrl.org/2009/utr}unitId") u.nsUnit = (unitElt.findtext("{http://www.xbrl.org/2009/utr}nsUnit") or None) # None if empty entry u.itemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}itemType") u.nsItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsItemType") u.numeratorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}numeratorItemType") u.nsNumeratorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsNumeratorItemType") u.denominatorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}denominatorItemType") u.nsDenominatorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsDenominatorItemType") u.isSimple = all(e is None for e in (u.numeratorItemType, u.nsNumeratorItemType, u.denominatorItemType, u.nsDenominatorItemType)) u.symbol = unitElt.findtext("{http://www.xbrl.org/2009/utr}symbol") u.status = unitElt.findtext("{http://www.xbrl.org/2009/utr}status") if u.status == "REC": # TO DO: This indexing scheme assumes that there are no name clashes in item types of the registry. (utrItemTypeEntries[u.itemType])[u.id] = u unitDupKey = (u.unitId, u.nsUnit, u.status) if unitDupKey in unitDupCheck: modelXbrl.error("arelleUtrLoader:entryDuplication", "Unit Type Registry entry duplication: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s", modelObject=modelXbrl, id=u.id, unitId=u.unitId, nsUnit=u.nsUnit, status=u.status) unitDupCheck.add(unitDupKey) if u.isSimple: if not u.itemType: modelXbrl.error("arelleUtrLoader:simpleDefMissingField", "Unit Type Registry simple unit definition missing item type: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s", modelObject=modelXbrl, id=u.id, unitId=u.unitId, nsUnit=u.nsUnit, status=u.status) if u.numeratorItemType or u.denominatorItemType or u.nsNumeratorItemType or u.nsDenominatorItemType: modelXbrl.error("arelleUtrLoader", "Unit Type Registry simple unit definition may not have complex fields: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s", modelObject=modelXbrl, id=u.id, unitId=u.unitId, nsUnit=u.nsUnit, status=u.status) else: if u.symbol: modelXbrl.error("arelleUtrLoader:complexDefSymbol", "Unit Type Registry complex unit definition may not have symbol: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s", modelObject=modelXbrl, id=u.id, unitId=u.unitId, nsUnit=u.nsUnit, status=u.status) if not u.numeratorItemType or not u.denominatorItemType: modelXbrl.error("arelleUtrLoader:complexDefMissingField", "Unit Type Registry complex unit definition must have numerator and denominator fields: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s", modelObject=modelXbrl, id=u.id, unitId=u.unitId, nsUnit=u.nsUnit, status=u.status) except (EnvironmentError, etree.LxmlError) as err: modelManager.modelXbrl.error("arelleUtrLoader:error", "Unit Type Registry Import error: %(error)s", modelObject=modelXbrl, error=err) etree.clear_error_log() if file: file.close()
def loadStandardTaxonomiesDict(self): if self.selection: self.standardTaxonomiesDict = defaultdict(set) self.familyHrefs = defaultdict(set) self.standardLocalHrefs = defaultdict(set) self.standardAuthorities = set() self.standardPrefixes = {} if not self.standardTaxonomiesUrl: return basename = os.path.basename(self.standardTaxonomiesUrl) self.modelManager.cntlr.showStatus(_("parsing {0}").format(basename)) try: from arelle.FileSource import openXmlFileStream for filepath in (self.standardTaxonomiesUrl, os.path.join(self.modelManager.cntlr.configDir,"xbrlschemafiles.xml")): xmldoc = etree.parse(filepath) # must open with file path for xinclude to know base of file xmldoc.xinclude() # to include elements below root use xpointer(/*/*) for erxlElt in xmldoc.iter(tag="Erxl"): v = erxlElt.get("version") if v and re.match(r"[0-9]+([.][0-9]+)*$", v): vSplit = v.split('.') # at least 3 digits always! self.version = tuple(int(n) for n in vSplit) + tuple(0 for n in range(3 - len(vSplit))) break for locElt in xmldoc.iter(tag="Loc"): href = None localHref = None namespaceUri = None prefix = None attType = None family = None elements = None version = None for childElt in locElt.iterchildren(): ln = childElt.tag value = childElt.text.strip() if ln == "Href": href = value elif ln == "LocalHref": localHref = value elif ln == "Namespace": namespaceUri = value elif ln == "Prefix": prefix = value elif ln == "AttType": attType = value elif ln == "Family": family = value elif ln == "Elements": elements = value elif ln == "Version": version = value if href: if namespaceUri and (attType == "SCH" or attType == "ENT"): self.standardTaxonomiesDict[namespaceUri].add(href) if localHref: self.standardLocalHrefs[namespaceUri].add(localHref) authority = UrlUtil.authority(namespaceUri) self.standardAuthorities.add(authority) if family == "BASE": self.baseTaxonomyNamespaces.add(namespaceUri) if prefix: self.standardPrefixes[namespaceUri] = prefix if href not in self.standardTaxonomiesDict: self.standardTaxonomiesDict[href] = "Allowed" + attType if family: self.familyHrefs[family].add(ErxlLoc(family, version, href, attType, elements, namespaceUri)) elif attType == "SCH" and family == "BASE": self.baseTaxonomyNamespaces.add(namespaceUri) except (EnvironmentError, etree.LxmlError) as err: self.modelManager.cntlr.addToLog(_("Disclosure System \"%(name)s\" import %(importFile)s, error: %(error)s"), messageCode="arelle:disclosureSystemImportError", messageArgs={"error": str(err), "name": self.name, "importFile": basename}, level=logging.ERROR) etree.clear_error_log()
def process_response(self, request, response): settings = getSettings() if settings is None or not isThemeEnabled(request, response, settings): return response result = parseTree(response) if result is None: return response runtrace = (DevelopmentMode and request.GET.get(u'diazo.debug', u'').lower() in TRUE) try: etree.clear_error_log() if settings.get('doctype'): result.doctype = settings.get('doctype') if not result.doctype.endswith('\n'): result.doctype += '\n' transform = setupTransform(request, response, runtrace) if transform is None: return response parameterExpressions = settings.get('parameter_expressions') or {} params = prepareThemeParameters(request, parameterExpressions) transformed = transform(result.tree, **params) error_log = transform.error_log if transformed is not None: # Transformed worked, swap content with result result.tree = transformed except etree.LxmlError as e: if not(DevelopmentMode): raise error_log = e.error_log runtrace = True if runtrace: from diazo.runtrace import generate_debug_html # Add debug information to end of body base_url = request.build_absolute_uri()[:-len(request.path)] body = result.tree.xpath('/html/body') if body: body = body[0] else: html = result.tree.xpath('/html')[0] body = etree.Element('body') html.append(body) body.insert(-1, generate_debug_html( base_url + '/diazo-debug', rules=settings.get('rules'), rules_parser=getParser('rules', settings.get('read_network')), error_log=error_log, )) response.content = str(result) if settings.get('update_content_length'): response['Content-Length'] = str(len(response.content)) return response
def parse(self, doc, options): doc.diagnostics = [] doc_type = 'XML' etree.clear_error_log() with open(doc.data_path) as f: source = f.read() try: # parse the XML for errors if os.path.isabs(doc.path): doc_schema = self.get_schema(doc.path, doc.path, source) xml = doc_schema['xml'] if doc_schema['type'] != None: doc_type = doc_schema['type'] else: xml = etree.fromstring(source) # if the doc is a schema itself, parse it for schema errors try: if doc_type == "XSD": etree.XMLSchema(xml) elif doc_type == "RelaxNG": etree.RelaxNG(xml) except (etree.RelaxNGError, etree.XMLSchemaParseError) as e: for error in e.error_log: doc.diagnostics.append( self.format_error(doc_type + " parsing error", error)) except Exception as e: doc.diagnostics.append( self.format_error(doc_type + " parsing error", e)) # parse XML comments in document for a reference to a schema try: (schema_ref, schema_location, comment_line) = self.look_for_schema(doc.path, xml) if schema_ref != None: try: if schema_ref['type'] == "XSD": schema = etree.XMLSchema(schema_ref['xml']) elif schema_ref['type'] == "RelaxNG": schema = etree.RelaxNG(schema_ref['xml']) schema.assertValid(xml) except (etree.DocumentInvalid, etree.RelaxNGValidateError, etree.XMLSchemaValidateError): for error in schema.error_log: doc.diagnostics.append( self.format_error( schema_ref['type'] + " validation error", error)) except (etree.RelaxNGError, etree.XMLSchemaParseError): doc.diagnostics.append( self.format_error( schema_ref['type'] + " error", "Schema is invalid " + schema_location, comment_line)) except Exception as e: doc.diagnostics.append( self.format_error(schema_ref['type'] + " error", e)) except etree.XMLSyntaxError as e: doc.diagnostics.append( self.format_error( "Schema error", "Unable to parse schema XML " + schema_location, comment_line)) except Exception as e: doc.diagnostics.append( self.format_error("Schema error", e, comment_line)) # handle XML parse errors except etree.XMLSyntaxError as e: for error in e.error_log: doc.diagnostics.append( self.format_error("XML parsing error", error)) # ignore other exceptions except: pass
def _test(self, sample): """ Actual Test Enrty """ etree.clear_error_log() html = sample['content'] elapsed = self.timeit(lambda: lxml.html.fromstring(html)) return {'time': elapsed}
def transformIterable(self, result, encoding): """Apply the transform if required """ # Obtain settings. Do nothing if not found policy = theming_policy(self.request) settings = policy.getSettings() if settings is None: return None if not policy.isThemeEnabled(): return None result = self.parseTree(result) if result is None: return None debug_mode = getConfiguration().debug_mode runtrace = self.debug_theme() try: etree.clear_error_log() if settings.doctype: result.doctype = settings.doctype if not result.doctype.endswith('\n'): result.doctype += '\n' transform = self.setupTransform(runtrace=runtrace) if transform is None: return None cache = None if not debug_mode: cache = policy.getCache() parameterExpressions = settings.parameterExpressions or {} params = prepareThemeParameters(findContext(self.request), self.request, parameterExpressions, cache) transformed = transform(result.tree, **params) error_log = transform.error_log if transformed is not None: # Transformed worked, swap content with result result.tree = transformed except etree.LxmlError as e: if not (debug_mode): raise error_log = e.error_log runtrace = True if runtrace: from diazo.runtrace import generate_debug_html # Add debug information to end of body body = result.tree.xpath('/html/body')[0] debug_url = findContext( self.request).portal_url() + '/++resource++diazo-debug' body.insert( -1, generate_debug_html( debug_url, rules=settings.rules, rules_parser=getParser('rules', settings.readNetwork), error_log=error_log, )) return result
else: schema_f = tmp_f try: cib_elem = etree.fromstring(etree.tostring(new_cib_elem)) except etree.Error, msg: raise PacemakerError("Failed to parse the CIB XML: " + str(msg)) try: schema = etree.RelaxNG(file=schema_f) except etree.Error, msg: raise PacemakerError("Failed to parse the Relax-NG schema: " + str(msg)) try: etree.clear_error_log() except: pass is_valid = schema.validate(cib_elem) if not is_valid: for error_entry in schema.error_log: detail_msg += error_entry.level_name + ": " + error_entry.message + "\n" if not self.is_local: try: delete_dir(os.path.dirname(tmp_f)) except: pass return (is_valid, detail_msg)
def transformIterable(self, result, encoding): """Apply the transform if required """ # Obtain settings. Do nothing if not found policy = theming_policy(self.request) settings = policy.getSettings() if settings is None: return None if not policy.isThemeEnabled(): return None result = self.parseTree(result) if result is None: return None debug_mode = getConfiguration().debug_mode runtrace = self.debug_theme() try: etree.clear_error_log() if settings.doctype: result.doctype = settings.doctype if not result.doctype.endswith('\n'): result.doctype += '\n' transform = self.setupTransform(runtrace=runtrace) if transform is None: return None cache = None if not debug_mode: cache = policy.getCache() parameterExpressions = settings.parameterExpressions or {} params = prepareThemeParameters( findContext(self.request), self.request, parameterExpressions, cache ) transformed = transform(result.tree, **params) error_log = transform.error_log if transformed is not None: # Transformed worked, swap content with result result.tree = transformed except etree.LxmlError as e: if not(debug_mode): raise error_log = e.error_log runtrace = True if runtrace: from diazo.runtrace import generate_debug_html # Add debug information to end of body body = result.tree.xpath('/html/body')[0] debug_url = findContext( self.request ).portal_url() + '/++resource++diazo-debug' body.insert( -1, generate_debug_html( debug_url, rules=settings.rules, rules_parser=getParser('rules', settings.readNetwork), error_log=error_log, ) ) return result
def select(self, name): self.clear() if not name: return True # nothing to load result = False status = _("loading disclosure system and mappings") try: if name: isSelected = False for url in self.urls: # urls in revese order, last plugin first xmldoc = etree.parse(url) for dsElt in xmldoc.iter(tag="DisclosureSystem"): namesStr = dsElt.get("names") if namesStr: names = namesStr.split("|") if name in names: self.names = names self.name = self.names[0] self.validationType = dsElt.get("validationType") self.exclusiveTypesPattern = compileAttrPattern(dsElt,"exclusiveTypesPattern", patternIfNoAttr=None) if self.validationType not in self.pluginTypes: self.EFM = self.validationType == "EFM" self.GFM = self.validationType == "GFM" self.EFMorGFM = self.EFM or self.GFM self.HMRC = self.validationType == "HMRC" self.SBRNL = self.validationType == "SBR.NL" for pluginXbrlMethod in pluginClassMethods("DisclosureSystem.Types"): for typeName, typeTestVariable in pluginXbrlMethod(self): setattr(self, typeTestVariable, self.validationType == typeName) self.validateFileText = dsElt.get("validateFileText") == "true" if dsElt.get("allowedExternalHrefPattern"): self.allowedExternalHrefPattern = re.compile(dsElt.get("allowedExternalHrefPattern")) self.blockDisallowedReferences = dsElt.get("blockDisallowedReferences") == "true" try: self.maxSubmissionSubdirectoryEntryNesting = int(dsElt.get("maxSubmissionSubdirectoryEntryNesting")) except (ValueError, TypeError): self.maxSubmissionSubdirectoryEntryNesting = 0 self.defaultXmlLang = dsElt.get("defaultXmlLang") if dsElt.get("defaultXmlEncoding", default=None) is not None: # don't reset from utf-8 unless supplied with a value self.defaultXmlEncoding = dsElt.get("defaultXmlEncoding") # may be an empty string self.xmlLangPattern = compileAttrPattern(dsElt,"xmlLangPattern") self.defaultLanguage = dsElt.get("defaultLanguage") self.standardTaxonomiesUrl = self.modelManager.cntlr.webCache.normalizeUrl( dsElt.get("standardTaxonomiesUrl"), url) if dsElt.get("mappingsUrl"): self.mappingsUrl = self.modelManager.cntlr.webCache.normalizeUrl( dsElt.get("mappingsUrl"), url) if dsElt.get("utrUrl"): # may be mapped by mappingsUrl entries, see below self.utrUrl = self.modelManager.cntlr.webCache.normalizeUrl( dsElt.get("utrUrl"), url) self.identifierSchemePattern = compileAttrPattern(dsElt,"identifierSchemePattern") self.identifierValuePattern = compileAttrPattern(dsElt,"identifierValuePattern") self.identifierValueName = dsElt.get("identifierValueName") self.contextElement = dsElt.get("contextElement") self.roleDefinitionPattern = compileAttrPattern(dsElt,"roleDefinitionPattern") self.labelCheckPattern = compileAttrPattern(dsElt,"labelCheckPattern", re.DOTALL) self.labelTrimPattern = compileAttrPattern(dsElt,"labelTrimPattern", re.DOTALL) self.deiNamespacePattern = compileAttrPattern(dsElt,"deiNamespacePattern") self.deiAmendmentFlagElement = dsElt.get("deiAmendmentFlagElement") self.deiCurrentFiscalYearEndDateElement = dsElt.get("deiCurrentFiscalYearEndDateElement") self.deiDocumentFiscalYearFocusElement = dsElt.get("deiDocumentFiscalYearFocusElement") self.deiDocumentPeriodEndDateElement = dsElt.get("deiDocumentPeriodEndDateElement") self.deiFilerIdentifierElement = dsElt.get("deiFilerIdentifierElement") self.deiFilerNameElement = dsElt.get("deiFilerNameElement") self.logLevelFilter = dsElt.get("logLevelFilter") self.logCodeFilter = dsElt.get("logCodeFilter") self.standardTaxonomyDatabase = dsElt.get("standardTaxonomyDatabase") self.standardTaxonomyUrlPattern = compileAttrPattern(dsElt, "standardTaxonomyUrlPattern") self.selection = self.name isSelected = True result = True break if isSelected: break self.loadMappings() self.utrUrl = self.mappedUrl(self.utrUrl) # utr may be mapped, change to its mapped entry self.loadStandardTaxonomiesDict() self.utrTypeEntries = None # clear any prior loaded entries # set log level filters (including resetting prior disclosure systems values if no such filter) self.modelManager.cntlr.setLogLevelFilter(self.logLevelFilter) # None or "" clears out prior filter if any self.modelManager.cntlr.setLogCodeFilter(self.logCodeFilter) if result: status = _("loaded") else: status = _("unable to load disclosure system {}").format(name) self.modelManager.cntlr.addToLog(_("Disclosure System \"%(name)s\" not recognized (a plug-in may be needed)."), messageCode="arelle:disclosureSystemName", messageArgs={"name": name}, level=logging.ERROR) except (EnvironmentError, etree.LxmlError) as err: status = _("exception during loading") result = False self.modelManager.cntlr.addToLog(_("Disclosure System \"%(name)s\" loading error: %(error)s"), messageCode="arelle:disclosureSystemLoadingError", messageArgs={"error": str(err), "name": name}, level=logging.ERROR) etree.clear_error_log() self.modelManager.cntlr.showStatus(_("Disclosure system and mappings {0}: {1}").format(status,name), 3500) return result
def select(self, name): self.clear() status = _("loading disclosure system and mappings") try: if name: xmldoc = etree.parse(self.url) for dsElt in xmldoc.iter(tag="DisclosureSystem"): namesStr = dsElt.get("names") if namesStr: names = namesStr.split("|") if name in names: self.names = names self.name = self.names[0] self.validationType = dsElt.get("validationType") self.EFM = self.validationType == "EFM" self.GFM = self.validationType == "GFM" self.EFMorGFM = self.EFM or self.GFM self.HMRC = self.validationType == "HMRC" self.SBRNL = self.validationType == "SBR-NL" self.validateFileText = dsElt.get("validateFileText") == "true" self.blockDisallowedReferences = dsElt.get("blockDisallowedReferences") == "true" try: self.maxSubmissionSubdirectoryEntryNesting = int(dsElt.get("maxSubmissionSubdirectoryEntryNesting")) except (ValueError, TypeError): self.maxSubmissionSubdirectoryEntryNesting = 0 self.defaultXmlLang = dsElt.get("defaultXmlLang") self.xmlLangPattern = compileAttrPattern(dsElt,"xmlLangPattern") self.defaultLanguage = dsElt.get("defaultLanguage") self.standardTaxonomiesUrl = self.modelManager.cntlr.webCache.normalizeUrl( dsElt.get("standardTaxonomiesUrl"), self.url) if dsElt.get("mappingsUrl"): self.mappingsUrl = self.modelManager.cntlr.webCache.normalizeUrl( dsElt.get("mappingsUrl"), self.url) if dsElt.get("utrUrl"): # may be mapped by mappingsUrl entries, see below self.utrUrl = self.modelManager.cntlr.webCache.normalizeUrl( dsElt.get("utrUrl"), self.url) self.identifierSchemePattern = compileAttrPattern(dsElt,"identifierSchemePattern") self.identifierValuePattern = compileAttrPattern(dsElt,"identifierValuePattern") self.identifierValueName = dsElt.get("identifierValueName") self.contextElement = dsElt.get("contextElement") self.roleDefinitionPattern = compileAttrPattern(dsElt,"roleDefinitionPattern") self.labelCheckPattern = compileAttrPattern(dsElt,"labelCheckPattern", re.DOTALL) self.labelTrimPattern = compileAttrPattern(dsElt,"labelTrimPattern", re.DOTALL) self.deiNamespacePattern = compileAttrPattern(dsElt,"deiNamespacePattern") self.deiAmendmentFlagElement = dsElt.get("deiAmendmentFlagElement") self.deiCurrentFiscalYearEndDateElement = dsElt.get("deiCurrentFiscalYearEndDateElement") self.deiDocumentFiscalYearFocusElement = dsElt.get("deiDocumentFiscalYearFocusElement") self.deiDocumentPeriodEndDateElement = dsElt.get("deiDocumentPeriodEndDateElement") self.deiFilerIdentifierElement = dsElt.get("deiFilerIdentifierElement") self.deiFilerNameElement = dsElt.get("deiFilerNameElement") self.logLevelFilter = dsElt.get("logLevelFilter") self.logCodeFilter = dsElt.get("logCodeFilter") self.selection = self.name break self.loadMappings() self.utrUrl = self.mappedUrl(self.utrUrl) # utr may be mapped, change to its mapped entry self.loadStandardTaxonomiesDict() self.utrTypeEntries = None # clear any prior loaded entries # set log level filters (including resetting prior disclosure systems values if no such filter) self.modelManager.cntlr.setLogLevelFilter(self.logLevelFilter) # None or "" clears out prior filter if any self.modelManager.cntlr.setLogCodeFilter(self.logCodeFilter) status = _("loaded") result = True except (EnvironmentError, etree.LxmlError) as err: status = _("exception during loading") result = False self.modelManager.cntlr.addToLog("disclosuresystems.xml: import error: {0}".format(err)) etree.clear_error_log() self.modelManager.cntlr.showStatus(_("Disclosure system and mappings {0}: {1}").format(status,name), 3500) return result
def loadStandardTaxonomiesDict(self): if self.selection: self.standardTaxonomiesDict = defaultdict(set) self.familyHrefs = defaultdict(set) self.standardLocalHrefs = defaultdict(set) self.standardAuthorities = set() self.standardPrefixes = {} if not self.standardTaxonomiesUrl: return basename = os.path.basename(self.standardTaxonomiesUrl) self.modelManager.cntlr.showStatus(_("parsing {0}").format(basename)) file = None try: from arelle.FileSource import openXmlFileStream for filepath in (self.standardTaxonomiesUrl, os.path.join(self.modelManager.cntlr.configDir,"xbrlschemafiles.xml")): file = openXmlFileStream(self.modelManager.cntlr, filepath, stripDeclaration=True)[0] xmldoc = etree.parse(file) file.close() for locElt in xmldoc.iter(tag="Loc"): href = None localHref = None namespaceUri = None prefix = None attType = None family = None elements = None version = None for childElt in locElt.iterchildren(): ln = childElt.tag value = childElt.text.strip() if ln == "Href": href = value elif ln == "LocalHref": localHref = value elif ln == "Namespace": namespaceUri = value elif ln == "Prefix": prefix = value elif ln == "AttType": attType = value elif ln == "Family": family = value elif ln == "Elements": elements = value elif ln == "Version": version = value if href: if namespaceUri and (attType == "SCH" or attType == "ENT"): self.standardTaxonomiesDict[namespaceUri].add(href) if localHref: self.standardLocalHrefs[namespaceUri].add(localHref) authority = UrlUtil.authority(namespaceUri) self.standardAuthorities.add(authority) if family == "BASE": self.baseTaxonomyNamespaces.add(namespaceUri) if prefix: self.standardPrefixes[namespaceUri] = prefix if href not in self.standardTaxonomiesDict: self.standardTaxonomiesDict[href] = "Allowed" + attType if family: self.familyHrefs[family].add(ErxlLoc(family, version, href, attType, elements, namespaceUri)) elif attType == "SCH" and family == "BASE": self.baseTaxonomyNamespaces.add(namespaceUri) except (EnvironmentError, etree.LxmlError) as err: self.modelManager.cntlr.addToLog("{0}: import error: {1}".format(basename,err)) etree.clear_error_log() if file: file.close()
def parse(self): """ Traverses through the XML document and parses the data, applying it to the model specified in the :py:class:`~feedmapper.models.Mapping`. """ self.mapping.parse_attempted = now() try: tree = etree.parse(self.data_source) root = tree.getroot() model_mappings = self.mapping.data_map['models'] purge_filter = self.mapping.data_map.get('purge_filter') for model_string, configuration in model_mappings.items(): baseurl = configuration.get('baseurl',None) if not self.validate_model_format(model_string): raise ValueError("Invalid model format in JSON mapping: %s" % model_string) identifier = configuration.get('identifier') identifier_fieldname = configuration.get('identifier_fieldname') if not identifier and not self.mapping.purge: raise UserWarning("Purging is off and the JSON mapping doesn't supply an identifier.") model = get_model(*model_string.split('.')) node_path = configuration['nodePath'].replace('.', '/') fields = configuration['fields'] nodes = root.xpath(node_path, namespaces=self.nsmap) if self.mapping.purge: # remove existing items existing_items = model.objects.all() if purge_filter: filter_kwargs = self.generate_filter_kwargs(purge_filter) if filter_kwargs: existing_items = existing_items.filter(**filter_kwargs) existing_items.delete() for node in nodes: if self.mapping.purge: instance = model() else: # purge is turned off, retrieve an existing instance identifier_value = node.find(identifier, namespaces=self.nsmap).text.strip() try: kw = {identifier_fieldname:identifier_value} instance = model.objects.get(**kw) except model.DoesNotExist: instance = model() for field, target in fields.items(): extra_args = {} if "extra" in target: extra_args.update(**target['extra']) if baseurl: extra_args['baseurl'] = baseurl if isinstance(target, basestring): if target == "feed_label": # provides the feed label value = self.mapping.label else: # maps one model field to one feed node value = self.get_value(node, target) elif isinstance(target, list): # maps one model field to multiple feed nodes value = self.join_fields(node, target) elif isinstance(target, dict): value = None if 'transformer' in target: # maps one model field to a transformer method transformer = getattr(instance, target['transformer']) text_list = [self.get_value(node, target_field) for target_field in target['fields']] value = transformer(*text_list,**extra_args) if 'default' in target and not value: # maps one model field to a default value value = target['default'] if isinstance(value, basestring): value = value.strip() setattr(instance, field, value) instance.save() self.mapping.parse_succeeded = True self.mapping.parse_log = "" except etree.Error as e: self.mapping.parse_succeeded = False self.mapping.parse_log = str(e.error_log) except IOError as e: self.mapping.parse_succeeded = False self.mapping.parse_log = e.args[0] # clear the lxml error log so errors don't compound etree.clear_error_log() self.mapping.save() # notify the authorities if a failure occured if not self.mapping.parse_succeeded and self.mapping.notification_recipients: self.notify_failure()
def parse(self): """ Traverses through the XML document and parses the data, applying it to the model specified in the :py:class:`~feedmapper.models.Mapping`. """ self.mapping.parse_attempted = datetime.now() try: tree = etree.parse(self.data_source) root = tree.getroot() model_mappings = self.mapping.data_map['models'] purge_filter = self.mapping.data_map.get('purge_filter') for model_string, configuration in model_mappings.items(): if not self.validate_model_format(model_string): raise ValueError("Invalid model format in JSON mapping: %s" % model_string) identifier = configuration.get('identifier') # allow transformation of identifiers identifier_transformer = None if isinstance(identifier, dict): identifier_transformer = identifier["transformer"] identifier = identifier["field"] if not identifier and not self.mapping.purge: raise UserWarning("Purging is off and the JSON mapping doesn't supply an identifier.") model = get_model(*model_string.split('.')) node_path = configuration['nodePath'].replace('.', '/') fields = configuration['fields'] nodes = root.xpath(node_path, namespaces=self.nsmap) if self.mapping.purge: # remove existing items existing_items = model.objects.all() if purge_filter: filter_kwargs = self.generate_filter_kwargs(purge_filter) if filter_kwargs: existing_items = existing_items.filter(**filter_kwargs) existing_items.delete() for node in nodes: if self.mapping.purge: instance = model() else: # purge is turned off, retrieve an existing instance identifier_value = node.find(identifier, namespaces=self.nsmap).text if identifier_transformer: identifier_value = getattr(model, identifier_transformer)(identifier_value, parser=self) kwargs = {identifier: identifier_value} try: instance = model.objects.get(**kwargs) except model.DoesNotExist: instance = model(**kwargs) many_to_many = {} for field, target in fields.items(): transformer = getattr(instance, "parse_%s" % field, None) if not transformer: if isinstance(target, basestring): # maps one model field to one feed node value = self.get_value(node, target) elif isinstance(target, list): # maps one model field to multiple feed nodes value = self.join_fields(node, target) elif transformer or isinstance(target, dict): # we may have a transformer (parse_fieldname method) or an extended definition value = None if 'transformer' in target: # maps one model field to a transformer method transformer = getattr(instance, target['transformer']) else: # we've got a single field definition with an implicit transformer target = {"fields": [target]} if transformer: transformer_args = [] field_is_m2m = False if len(target["fields"]) == 1 and target["fields"][0].endswith("*"): # we've hit a many2many relation transformer_args = self.get_value(node, target["fields"][0][:-1], as_text=False) field_is_m2m = True else: for target_field in target["fields"]: if target_field.endswith("*"): raise ValueError(u"M2m fields can only contain one target field") else: transformer_args.append(self.get_value(node, target_field)) if field_is_m2m: many_to_many[field] = ( transformer, transformer_args, {"parser": self} ) continue else: value = transformer(*transformer_args, parser=self) if 'default' in target and not value: # maps one model field to a default value value = target['default'] setattr(instance, field, value) instance.save() # handle m2m for transformer, args, kwargs in many_to_many.values(): transformer(*args, **kwargs) self.mapping.parse_succeeded = True self.mapping.parse_log = "" except etree.Error as e: self.mapping.parse_succeeded = False self.mapping.parse_log = str(e.error_log) except IOError as e: self.mapping.parse_succeeded = False self.mapping.parse_log = e.args[0] # clear the lxml error log so errors don't compound etree.clear_error_log() self.mapping.save() # notify the authorities if a failure occured if not self.mapping.parse_succeeded and self.mapping.notification_recipients: self.notify_failure()
def select(self, name): self.clear() if not name: return True # nothing to load result = False status = _("loading disclosure system and mappings") try: if name: isSelected = False for url in self.urls: # urls in revese order, last plugin first xmldoc = etree.parse(url) for dsElt in xmldoc.iter(tag="DisclosureSystem"): namesStr = dsElt.get("names") if namesStr: names = namesStr.split("|") if name in names: self.names = names self.name = self.names[0] self.validationType = dsElt.get( "validationType") self.exclusiveTypesPattern = compileAttrPattern( dsElt, "exclusiveTypesPattern", patternIfNoAttr=None) if self.validationType not in self.pluginTypes: self.EFM = self.validationType == "EFM" self.GFM = self.validationType == "GFM" self.EFMorGFM = self.EFM or self.GFM self.HMRC = self.validationType == "HMRC" self.SBRNL = self.validationType == "SBR.NL" for pluginXbrlMethod in pluginClassMethods( "DisclosureSystem.Types"): for typeName, typeTestVariable in pluginXbrlMethod( self): setattr( self, typeTestVariable, self.validationType == typeName) self.validateFileText = dsElt.get( "validateFileText") == "true" if dsElt.get("allowedExternalHrefPattern"): self.allowedExternalHrefPattern = re.compile( dsElt.get( "allowedExternalHrefPattern")) self.blockDisallowedReferences = dsElt.get( "blockDisallowedReferences") == "true" try: self.maxSubmissionSubdirectoryEntryNesting = int( dsElt.get( "maxSubmissionSubdirectoryEntryNesting" )) except (ValueError, TypeError): self.maxSubmissionSubdirectoryEntryNesting = 0 self.defaultXmlLang = dsElt.get( "defaultXmlLang") if dsElt.get( "defaultXmlEncoding", default=None ) is not None: # don't reset from utf-8 unless supplied with a value self.defaultXmlEncoding = dsElt.get( "defaultXmlEncoding" ) # may be an empty string self.xmlLangPattern = compileAttrPattern( dsElt, "xmlLangPattern") self.xmlLangIsInheritable = dsElt.get( "xmlLangIsInheritable", "true") == "true" self.defaultLanguage = dsElt.get( "defaultLanguage") if dsElt.get("standardTaxonomiesUrl"): self.standardTaxonomiesUrl = self.modelManager.cntlr.webCache.normalizeUrl( dsElt.get("standardTaxonomiesUrl"), url) if dsElt.get("mappingsUrl"): self.mappingsUrl = self.modelManager.cntlr.webCache.normalizeUrl( dsElt.get("mappingsUrl"), url) if dsElt.get( "utrUrl" ): # may be mapped by mappingsUrl entries, see below self.utrUrl = self.modelManager.cntlr.webCache.normalizeUrl( dsElt.get("utrUrl"), url) self.identifierSchemePattern = compileAttrPattern( dsElt, "identifierSchemePattern") self.identifierValuePattern = compileAttrPattern( dsElt, "identifierValuePattern") self.identifierValueName = dsElt.get( "identifierValueName") self.contextElement = dsElt.get( "contextElement") self.roleDefinitionPattern = compileAttrPattern( dsElt, "roleDefinitionPattern") self.labelCheckPattern = compileAttrPattern( dsElt, "labelCheckPattern", re.DOTALL) self.labelTrimPattern = compileAttrPattern( dsElt, "labelTrimPattern", re.DOTALL) self.deiNamespacePattern = compileAttrPattern( dsElt, "deiNamespacePattern") self.deiAmendmentFlagElement = dsElt.get( "deiAmendmentFlagElement") self.deiCurrentFiscalYearEndDateElement = dsElt.get( "deiCurrentFiscalYearEndDateElement") self.deiDocumentFiscalYearFocusElement = dsElt.get( "deiDocumentFiscalYearFocusElement") self.deiDocumentPeriodEndDateElement = dsElt.get( "deiDocumentPeriodEndDateElement") self.deiFilerIdentifierElement = dsElt.get( "deiFilerIdentifierElement") self.deiFilerNameElement = dsElt.get( "deiFilerNameElement") self.logLevelFilter = dsElt.get( "logLevelFilter") self.logCodeFilter = dsElt.get("logCodeFilter") self.standardTaxonomyDatabase = dsElt.get( "standardTaxonomyDatabase") self.standardTaxonomyUrlPattern = compileAttrPattern( dsElt, "standardTaxonomyUrlPattern") self.selection = self.name isSelected = True result = True break if isSelected: break self.loadMappings() self.utrUrl = self.mappedUrl( self.utrUrl) # utr may be mapped, change to its mapped entry self.loadStandardTaxonomiesDict() self.utrTypeEntries = None # clear any prior loaded entries # set log level filters (including resetting prior disclosure systems values if no such filter) self.modelManager.cntlr.setLogLevelFilter( self.logLevelFilter ) # None or "" clears out prior filter if any self.modelManager.cntlr.setLogCodeFilter(self.logCodeFilter) if result: status = _("loaded") else: status = _("unable to load disclosure system {}").format(name) self.modelManager.cntlr.addToLog( _("Disclosure System \"%(name)s\" not recognized (a plug-in may be needed)." ), messageCode="arelle:disclosureSystemName", messageArgs={"name": name}, level=logging.ERROR) except (EnvironmentError, etree.LxmlError) as err: status = _("exception during loading") result = False self.modelManager.cntlr.addToLog( _("Disclosure System \"%(name)s\" loading error: %(error)s"), messageCode="arelle:disclosureSystemLoadingError", messageArgs={ "error": str(err), "name": name }, level=logging.ERROR) etree.clear_error_log() self.modelManager.cntlr.showStatus( _("Disclosure system and mappings {0}: {1}").format(status, name), 3500) return result
def loadStandardTaxonomiesDict(self): if self.selection: self.standardTaxonomiesDict = defaultdict(set) self.familyHrefs = defaultdict(set) self.standardLocalHrefs = defaultdict(set) self.standardAuthorities = set() self.standardPrefixes = {} if not self.standardTaxonomiesUrl: return basename = os.path.basename(self.standardTaxonomiesUrl) self.modelManager.cntlr.showStatus( _("parsing {0}").format(basename)) try: from arelle.FileSource import openXmlFileStream for filepath in (self.standardTaxonomiesUrl, os.path.join( self.modelManager.cntlr.configDir, "xbrlschemafiles.xml")): xmldoc = etree.parse( filepath ) # must open with file path for xinclude to know base of file xmldoc.xinclude( ) # to include elements below root use xpointer(/*/*) for erxlElt in xmldoc.iter(tag="Erxl"): v = erxlElt.get("version") if v and re.match(r"[0-9]+([.][0-9]+)*$", v): vSplit = v.split('.') # at least 3 digits always! self.version = tuple( int(n) for n in vSplit) + tuple( 0 for n in range(3 - len(vSplit))) break for locElt in xmldoc.iter(tag="Loc"): href = None localHref = None namespaceUri = None prefix = None attType = None family = None elements = None version = None for childElt in locElt.iterchildren(): ln = childElt.tag value = childElt.text.strip() if ln == "Href": href = value elif ln == "LocalHref": localHref = value elif ln == "Namespace": namespaceUri = value elif ln == "Prefix": prefix = value elif ln == "AttType": attType = value elif ln == "Family": family = value elif ln == "Elements": elements = value elif ln == "Version": version = value if href: if namespaceUri and (attType == "SCH" or attType == "ENT"): self.standardTaxonomiesDict[namespaceUri].add( href) if localHref: self.standardLocalHrefs[namespaceUri].add( localHref) authority = UrlUtil.authority(namespaceUri) self.standardAuthorities.add(authority) if family == "BASE": self.baseTaxonomyNamespaces.add( namespaceUri) if prefix: self.standardPrefixes[ namespaceUri] = prefix if href not in self.standardTaxonomiesDict: self.standardTaxonomiesDict[ href] = "Allowed" + attType if family: self.familyHrefs[family].add( ErxlLoc(family, version, href, attType, elements, namespaceUri)) elif attType == "SCH" and family == "BASE": self.baseTaxonomyNamespaces.add(namespaceUri) except (EnvironmentError, etree.LxmlError) as err: self.modelManager.cntlr.addToLog( _("Disclosure System \"%(name)s\" import %(importFile)s, error: %(error)s" ), messageCode="arelle:disclosureSystemImportError", messageArgs={ "error": str(err), "name": self.name, "importFile": basename }, level=logging.ERROR) etree.clear_error_log()
def loadUtr( modelXbrl ): # Build a dictionary of item types that are constrained by the UTR modelManager = modelXbrl.modelManager modelManager.disclosureSystem.utrItemTypeEntries = utrItemTypeEntries = defaultdict( dict) # print('UTR LOADED FROM '+utrUrl); # skip status message as it hides prior activity during which this might have just obtained symbols # modelManager.cntlr.showStatus(_("Loading Unit Type Registry")) file = None try: from arelle.FileSource import openXmlFileStream # normalize any relative paths to config directory unitDupCheck = set() file = openXmlFileStream(modelManager.cntlr, modelManager.disclosureSystem.utrUrl, stripDeclaration=True)[0] xmldoc = etree.parse(file) for unitElt in xmldoc.iter(tag="{http://www.xbrl.org/2009/utr}unit"): u = UtrEntry() u.id = unitElt.get("id") u.unitId = unitElt.findtext("{http://www.xbrl.org/2009/utr}unitId") u.nsUnit = ( unitElt.findtext("{http://www.xbrl.org/2009/utr}nsUnit") or None) # None if empty entry u.itemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}itemType") u.nsItemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}nsItemType") u.numeratorItemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}numeratorItemType") u.nsNumeratorItemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}nsNumeratorItemType") u.denominatorItemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}denominatorItemType") u.nsDenominatorItemType = unitElt.findtext( "{http://www.xbrl.org/2009/utr}nsDenominatorItemType") u.isSimple = all( e is None for e in (u.numeratorItemType, u.nsNumeratorItemType, u.denominatorItemType, u.nsDenominatorItemType)) u.symbol = unitElt.findtext("{http://www.xbrl.org/2009/utr}symbol") u.status = unitElt.findtext("{http://www.xbrl.org/2009/utr}status") if u.status == "REC": # TO DO: This indexing scheme assumes that there are no name clashes in item types of the registry. (utrItemTypeEntries[u.itemType])[u.id] = u unitDupKey = (u.unitId, u.nsUnit, u.status) if unitDupKey in unitDupCheck: modelXbrl.error( "arelleUtrLoader:entryDuplication", "Unit Type Registry entry duplication: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s", modelObject=modelXbrl, id=u.id, unitId=u.unitId, nsUnit=u.nsUnit, status=u.status) unitDupCheck.add(unitDupKey) if u.isSimple: if not u.itemType: modelXbrl.error( "arelleUtrLoader:simpleDefMissingField", "Unit Type Registry simple unit definition missing item type: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s", modelObject=modelXbrl, id=u.id, unitId=u.unitId, nsUnit=u.nsUnit, status=u.status) if u.numeratorItemType or u.denominatorItemType or u.nsNumeratorItemType or u.nsDenominatorItemType: modelXbrl.error( "arelleUtrLoader", "Unit Type Registry simple unit definition may not have complex fields: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s", modelObject=modelXbrl, id=u.id, unitId=u.unitId, nsUnit=u.nsUnit, status=u.status) else: if u.symbol: modelXbrl.error( "arelleUtrLoader:complexDefSymbol", "Unit Type Registry complex unit definition may not have symbol: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s", modelObject=modelXbrl, id=u.id, unitId=u.unitId, nsUnit=u.nsUnit, status=u.status) if not u.numeratorItemType or not u.denominatorItemType: modelXbrl.error( "arelleUtrLoader:complexDefMissingField", "Unit Type Registry complex unit definition must have numerator and denominator fields: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s", modelObject=modelXbrl, id=u.id, unitId=u.unitId, nsUnit=u.nsUnit, status=u.status) except (EnvironmentError, etree.LxmlError) as err: modelManager.modelXbrl.error( "arelleUtrLoader:error", "Unit Type Registry Import error: %(error)s", modelObject=modelXbrl, error=err) etree.clear_error_log() if file: file.close()
def run(self): docType = 'XML' etree.clear_error_log() try: # parse the XML for errors if self.source_file != '<unknown>': docSchema = self.getSchema(self.source_file, self.source_contents) xml = docSchema['xml'] if docSchema['type'] != None: docType = docSchema['type'] else: xml = etree.fromstring(self.source_contents) # if the doc is a schema itself, parse it for schema errors try: if docType == "XSD": etree.XMLSchema(xml) elif docType == "RelaxNG": etree.RelaxNG(xml) except (etree.RelaxNGError, etree.XMLSchemaParseError) as e: for error in e.error_log: self.addError(docType + " parsing error", error) except Exception as e: self.addError(docType + " parsing error", e) # parse XML comments in document for a reference to a schema try: (schemaRef, schemaLocation, commentLine) = self.lookForSchema(xml) if schemaRef != None: try: if schemaRef['type'] == "XSD": schema = etree.XMLSchema(schemaRef['xml']) elif schemaRef['type'] == "RelaxNG": schema = etree.RelaxNG(schemaRef['xml']) schema.assertValid(xml) except (etree.DocumentInvalid, etree.RelaxNGValidateError, etree.XMLSchemaValidateError): for error in schema.error_log: self.addError(schemaRef['type'] + " validation error", error) except (etree.RelaxNGError, etree.XMLSchemaParseError): self.addError(schemaRef['type'] + " error", "Schema is invalid " + schemaLocation, commentLine) except Exception as e: self.addError(schemaRef['type'] + " error", e) except etree.XMLSyntaxError as e: self.addError("Schema error", "Unable to parse schema XML " + schemaLocation, commentLine) except Exception as e: self.addError("Schema error", e, commentLine) # handle XML parse errors except etree.XMLSyntaxError as e: for error in e.error_log: self.addError("XML parsing error", error) # ignore other exceptions except: pass self.clock.acquire() if not self.cancelled: self.idle_finish = GLib.idle_add(self.finish_in_idle) self.clock.release()
def parse(self, doc, options): doc.diagnostics = [] doc_type = 'XML' etree.clear_error_log() with open(doc.data_path) as f: source = f.read() try: # parse the XML for errors if os.path.isabs(doc.path): doc_schema = self.get_schema(doc.path, doc.path, source) xml = doc_schema['xml'] if doc_schema['type'] != None: doc_type = doc_schema['type'] else: xml = etree.fromstring(source) # if the doc is a schema itself, parse it for schema errors try: if doc_type == "XSD": etree.XMLSchema(xml) elif doc_type == "RelaxNG": etree.RelaxNG(xml) except (etree.RelaxNGError, etree.XMLSchemaParseError) as e: for error in e.error_log: doc.diagnostics.append(self.format_error(doc_type + " parsing error", error)) except Exception as e: doc.diagnostics.append(self.format_error(doc_type + " parsing error", e)) # parse XML comments in document for a reference to a schema try: (schema_ref, schema_location, comment_line) = self.look_for_schema(doc.path, xml) if schema_ref != None: try: if schema_ref['type'] == "XSD": schema = etree.XMLSchema(schema_ref['xml']) elif schema_ref['type'] == "RelaxNG": schema = etree.RelaxNG(schema_ref['xml']) schema.assertValid(xml) except (etree.DocumentInvalid, etree.RelaxNGValidateError, etree.XMLSchemaValidateError): for error in schema.error_log: doc.diagnostics.append(self.format_error(schema_ref['type'] + " validation error", error)) except (etree.RelaxNGError, etree.XMLSchemaParseError): doc.diagnostics.append(self.format_error(schema_ref['type'] + " error", "Schema is invalid " + schema_location, comment_line)) except Exception as e: doc.diagnostics.append(self.format_error(schema_ref['type'] + " error", e)) except etree.XMLSyntaxError as e: doc.diagnostics.append(self.format_error("Schema error", "Unable to parse schema XML " + schema_location, comment_line)) except Exception as e: doc.diagnostics.append(self.format_error("Schema error", e, comment_line)) # handle XML parse errors except etree.XMLSyntaxError as e: for error in e.error_log: doc.diagnostics.append(self.format_error("XML parsing error", error)) # ignore other exceptions except: pass
raise PacemakerError("Cannot expand the Relax-NG schema") else: schema_f = tmp_f try: cib_elem = etree.fromstring(etree.tostring(new_cib_elem)) except etree.Error, msg: raise PacemakerError("Failed to parse the CIB XML: " + str(msg)) try: schema = etree.RelaxNG(file=schema_f) except etree.Error, msg: raise PacemakerError("Failed to parse the Relax-NG schema: " + str(msg)) try: etree.clear_error_log() except: pass is_valid = schema.validate(cib_elem) if not is_valid: for error_entry in schema.error_log: detail_msg += error_entry.level_name + ": " + error_entry.message + "\n" if not self.is_local: try: delete_dir(os.path.dirname(tmp_f)) except: pass return (is_valid, detail_msg)
def do_ConfirmReservation(self, elem, *args, **kw): log.debug("=============== XenBEEClient2BrokerProtocol::do_ConfirmReservation") try: confirm = message.MessageBuilder.from_xml(elem.getroottree()) except Exception, e: return message.BrokerError(confirm.uuid(), errcode.ILLEGAL_REQUEST, str(e)) ticket = TicketStore.getInstance().lookup(confirm.ticket()) if ticket is None: return message.BrokerError(confirm.uuid(), errcode.TICKET_INVALID, confirm.ticket()) log.debug("got confirmation with ticket %s" % confirm.ticket()) xbed = XBEDaemon.getInstance() jsdl_doc = jsdl.JsdlDocument(schema_map=xbed.schema_map) try: if hasattr(etree, 'clearErrorLog'): etree.clearErrorLog() if hasattr(etree, 'clear_error_log'): etree.clear_error_log() parsed_jsdl = jsdl_doc.parse(confirm.jsdl()) except etree.DocumentInvalid, e: log.info("got invalid document: %s" % str(e.error_log)) # TaskManager.getInstance().removeTask(ticket.task) # del ticket.task # TicketStore.getInstance().release(ticket) return message.BrokerError(confirm.uuid(), errcode.ILLEGAL_REQUEST, "JSDL document is invalid: %s" % (e.error_log,)) try: # does the job have our InstanceDescription element? # otherwise drop the job jsdl_doc.lookup_path( "JobDefinition/JobDescription/Resources/"+ "InstanceDefinition/InstanceDescription") except Exception, e: