def to_xml(text): try: if PY2: # On python2, fromstring expects an encoded string return fromstring((text[BOM_LEN:] if text.startswith(BOM) else text).encode('utf-8')) return fromstring(text[BOM_LEN:] if text.startswith(BOM) else text) except ParseError: from lxml.etree import XMLParser, parse, tostring # Exchange servers may spit out the weirdest XML. lxml is pretty good at recovering from errors log.warning('Fallback to lxml processing of faulty XML') magical_parser = XMLParser(recover=True) no_bom_text = text[BOM_LEN:] if text.startswith(BOM) else text root = parse(io.BytesIO(no_bom_text.encode('utf-8')), magical_parser) try: return fromstring(tostring(root)) except ParseError as e: if hasattr(e, 'position'): e.lineno, e.offset = e.position if not e.lineno: raise ParseError('%s' % text_type(e)) try: offending_line = no_bom_text.splitlines()[e.lineno - 1] except IndexError: raise ParseError('%s' % text_type(e)) else: offending_excerpt = offending_line[max(0, e.offset - 20):e.offset + 20] raise ParseError('%s\nOffending text: [...]%s[...]' % (text_type(e), offending_excerpt)) except TypeError: raise ParseError('This is not XML: %s' % text)
def to_xml(text, encoding): from xml.etree.ElementTree import fromstring, ParseError processed = text.lstrip(BOM).encode(encoding or 'utf-8') try: return fromstring(processed) except ParseError: from io import BytesIO from lxml.etree import XMLParser, parse, tostring # Exchange servers may spit out the weirdest XML. lxml is pretty good at recovering from errors log.warning('Fallback to lxml processing of faulty XML') magical_parser = XMLParser(encoding=encoding or 'utf-8', recover=True) root = parse(BytesIO(processed), magical_parser) try: return fromstring(tostring(root)) except ParseError as e: line_no, col_no = e.lineno, e.offset try: offending_line = processed.splitlines()[line_no - 1] except IndexError: offending_line = '' offending_excerpt = offending_line[max(0, col_no - 20):col_no + 20].decode('ascii', 'ignore') raise ParseError('%s\nOffending text: [...]%s[...]' % (text_type(e), offending_excerpt)) except TypeError: raise ParseError('This is not XML: %s' % text)
def _parse_oidc_backends_config(self, config_file): self.oidc_backends_config = {} try: tree = ET.parse(config_file) root = tree.getroot() if root.tag != 'OIDC': raise ParseError( "The root element in OIDC config xml file is expected to be `OIDC`, " "found `{}` instead -- unable to continue.".format( root.tag)) for child in root: if child.tag != 'provider': log.error( "Expect a node with `provider` tag, found a node with `{}` tag instead; " "skipping the node.".format(child.tag)) continue if 'name' not in child.attrib: log.error( "Could not find a node attribute 'name'; skipping the node '{}'." .format(child.tag)) continue idp = child.get('name').lower() if idp == 'google': self.oidc_backends_config[idp] = self._parse_google_config( child) if len(self.oidc_backends_config) == 0: raise ParseError("No valid provider configuration parsed.") except ImportError: raise except ParseError as e: raise ParseError( "Invalid configuration at `{}`: {} -- unable to continue.". format(config_file, e))
def __init__(self, cap_file): """!Initialize xml.etree.ElementTree""" is_file = False try: xml = pathlib.Path(cap_file) if xml.exists(): is_file = True except OSError as exc: if exc.errno == 36: # file name too long pass else: raise if is_file: try: etree.ElementTree.__init__(self, file=cap_file) except ParseError: raise ParseError(_("Unable to parse XML file")) except IOError as error: raise ParseError( _("Unable to open XML file '%s'.\n%s\n" % (cap_file, error)) ) else: try: etree.ElementTree.__init__(self, element=etree.fromstring(cap_file)) except ParseError: raise ParseError(_("Unable to parse XML file")) if self.getroot() is None: raise ParseError(_("Root node was not found."))
def _parse_oidc_config(self, config_file): self.oidc_config = {} try: tree = ET.parse(config_file) root = tree.getroot() if root.tag != 'OIDC': raise ParseError("The root element in OIDC_Config xml file is expected to be `OIDC`, " "found `{}` instead -- unable to continue.".format(root.tag)) for child in root: if child.tag != 'Setter': log.error("Expect a node with `Setter` tag, found a node with `{}` tag instead; " "skipping this node.".format(child.tag)) continue if 'Property' not in child.attrib or 'Value' not in child.attrib or 'Type' not in child.attrib: log.error("Could not find the node attributes `Property` and/or `Value` and/or `Type`;" " found these attributes: `{}`; skipping this node.".format(child.attrib)) continue try: func = getattr(importlib.import_module('__builtin__'), child.get('Type')) except AttributeError: log.error("The value of attribute `Type`, `{}`, is not a valid built-in type;" " skipping this node").format(child.get('Type')) continue self.oidc_config[child.get('Property')] = func(child.get('Value')) except ImportError: raise except ParseError as e: raise ParseError("Invalid configuration at `{}`: {} -- unable to continue.".format(config_file, e))
def test_parse_error_from_xml_parse_error(self): error = XmlParseError('xml parse error') error.code = 123 error.position = (1, 2) actual = ParseError.from_exception('file', error) expected = ParseError('file', 'xml parse error', 1, 2) self.assertEqual(expected, actual)
def __init__(self, cap_file): """!Initialize xml.etree.ElementTree """ try: etree.ElementTree.__init__(self, file = cap_file) except ParseError: raise ParseError(_("Unable to parse XML file")) except IOError as error: raise ParseError(_("Unable to open XML file '%s'.\n%s\n" % (cap_file, error))) if self.getroot() is None: raise ParseError(_("Root node was not found."))
def test_elementtree_parse_file(self, mock_parse): from xml.etree.ElementTree import ParseError from cumulusci.tasks.metadata.package import elementtree_parse_file err = ParseError() err.msg = 'it broke' err.lineno = 1 mock_parse.side_effect = err try: elementtree_parse_file('test_file') except ParseError as err: self.assertEqual(str(err), 'it broke (test_file, line 1)') else: self.fail('Expected ParseError')
def from_response(cls, requested_api_version, response): try: header = to_xml(response.text, encoding=response.encoding).find('{%s}Header' % SOAPNS) if not header: raise ParseError() except ParseError as e: raise_from(EWSWarning('Unknown XML response from %s (response: %s)' % (response, response.text)), e) info = header.find('{%s}ServerVersionInfo' % TNS) if info is None: raise TransportError('No ServerVersionInfo in response: %s' % response.text) try: build = Build.from_xml(info) except ValueError: raise TransportError('Bad ServerVersionInfo in response: %s' % response.text) # Not all Exchange servers send the Version element api_version_from_server = info.get('Version') or build.api_version() if api_version_from_server != requested_api_version: if api_version_from_server.startswith('V2_') \ or api_version_from_server.startswith('V2015_') \ or api_version_from_server.startswith('V2016_'): # Office 365 is an expert in sending invalid API version strings... log.info('API version "%s" worked but server reports version "%s". Using "%s"', requested_api_version, api_version_from_server, requested_api_version) api_version_from_server = requested_api_version else: # Work around a bug in Exchange that reports a bogus API version in the XML response. Trust server # response except 'V2_nn' or 'V201[5,6]_nn_mm' which is bogus log.info('API version "%s" worked but server reports version "%s". Using "%s"', requested_api_version, api_version_from_server, api_version_from_server) return cls(build, api_version_from_server)
def _project_version() -> str: root_pom = ElementTree.parse(_project_dir / 'pom.xml').getroot() version_tag = root_pom.find('{http://maven.apache.org/POM/4.0.0}version') if version_tag is None: raise ParseError('not a pom.xml file (<version> tag not found)') return version_tag.text
def from_response(cls, requested_api_version, response): try: header = to_xml(response).find('{%s}Header' % SOAPNS) if header is None: raise ParseError() except ParseError: raise TransportError('Unknown XML response (%s)' % response) info = header.find('{%s}ServerVersionInfo' % TNS) if info is None: raise TransportError('No ServerVersionInfo in response: %s' % response) try: build = Build.from_xml(elem=info) except ValueError: raise TransportError('Bad ServerVersionInfo in response: %s' % response) # Not all Exchange servers send the Version element api_version_from_server = info.get('Version') or build.api_version() if api_version_from_server != requested_api_version: if cls._is_invalid_version_string(api_version_from_server): # For unknown reasons, Office 365 may respond with an API version strings that is invalid in a request. # Detect these so we can fallback to a valid version string. log.info('API version "%s" worked but server reports version "%s". Using "%s"', requested_api_version, api_version_from_server, requested_api_version) api_version_from_server = requested_api_version else: # Work around a bug in Exchange that reports a bogus API version in the XML response. Trust server # response except 'V2_nn' or 'V201[5,6]_nn_mm' which is bogus log.info('API version "%s" worked but server reports version "%s". Using "%s"', requested_api_version, api_version_from_server, api_version_from_server) return cls(build, api_version_from_server)
def Parse(self, data): if len(data) < sizeof(CryXMLBHeader): raise ValueError( "File is not a binary XML file (file size is too small).") self._data = data self._header = CryXMLBHeader.from_buffer(data, 0) # TODO: actually do header validation - see references if self._header.signature != b"CryXmlB": if self._header.signature.startswith(b"<"): # try parsing as a normal xml file parser = XMLParser(target=self.target) parser.feed(self._data) raise _StandardXmlFile() raise ParseError("Invalid CryXmlB Signature") self._attributes = [ self._read_attribute(i) for i in range(self._header.attributes_count) ] self._child_indices = [ self._read_child_index(i) for i in range(self._header.child_table_count) ] self._nodes = [ self._read_node(i) for i in range(self._header.node_count) ] root_node = self._read_node(0) assert root_node.parent_index == CRYXML_NO_PARENT self._iter_parse_nodes(root_node)
def from_file(cls, f): try: pointer = EventPointer.from_file(f) event, element = next(pointer) return cls(element, pointer) except ParseError as e: raise ParseError("{0}: {1}...".format(str(e), f.read(500)))
def __init__(self, pom_path, xml=XmlUtils()): path_abspath = os.path.abspath(pom_path) if not os.path.lexists(pom_path): raise IOError('Pom file not found %s' % path_abspath) try: self.pom_xml = xml.parse(path_abspath) except ParseError as e: raise ParseError('Error parsing %s: %s' % (path_abspath, e.msg)) self.root_element = self.pom_xml.getroot()
def from_file(cls, f): try: pointer = EventPointer.from_file(f) event, element = pointer.next() return cls(element, pointer) except ParseError, e: raise ParseError(u"{0}: {1}..." .format(unicode(e), f.read(500)))
def __init__(self, cap_file, force_version=None): """!Parses WMS capabilities file. If the capabilities file cannot be parsed if it raises xml.etree.ElementTree.ParseError. The class manges inheritance in 'Layer' elements. Inherited elements are added to 'Layer' element. The class also removes elements which are in invalid form and are needed by wxGUI capabilities dialog. @param cap_file - capabilities file @param force_version - force capabilities file version (1.1.1, 1.3.0) """ BaseCapabilitiesTree.__init__(self, cap_file) self.xml_ns = WMSXMLNsHandler(self) grass.debug('Checking WMS capabilities tree.', 4) if "version" not in self.getroot().attrib: raise ParseError( _("Missing version attribute root node " "in Capabilities XML file")) else: wms_version = self.getroot().attrib["version"] if wms_version == "1.3.0": self.proj_tag = "CRS" else: self.proj_tag = "SRS" if force_version is not None: if wms_version != force_version: raise ParseError( _("WMS server does not support '%s' version.") % wms_version) capability = self._find(self.getroot(), "Capability") root_layer = self._find(capability, "Layer") self._checkFormats(capability) self._checkLayerTree(root_layer) grass.debug('Check of WMS capabilities tree was finished.', 4)
def collect_group_xmls(self): """The functions is used for collecting all INI files into the one.""" # load content without decoding to unicode - ElementTree requests this xml_path = os.path.join(self.dirname, "group.xml") try: self.ret[self.dirname] = ElementTree.parse(xml_path).getroot() except ParseError as par_err: raise ParseError( "Encountered a parse error in file %s.\nDetails: %s" % (os.path.join(self.dirname, "group.xml"), par_err)) return self.ret
def _find(self, etreeElement, tag): """!Find child element. If the element is not found it raises xml.etree.ElementTree.ParseError. """ res = etreeElement.find(tag) if res is None: raise ParseError(_("Unable to parse tile service file. \n\ Tag <%s> was not found.") % tag) return res
def iter_elements(self, filename: str) -> Generator[Element, None, None]: try: events = cElementTree.iterparse(filename, events=("start", "end")) _, root = next(events) for event, elem in events: if event == "end" and elem.tag == self.search_key: yield elem root.clear() except ParseError as e: raise (ParseError("unable to parse file [{}]. Error: {}".format( filename, e)))
def _findall(self, etreeElement, tag): """!Find all children element. If no element is found it raises xml.etree.ElementTree.ParseError. """ res = etreeElement.findall(self.xml_ns.Ns(tag)) if not res: raise ParseError(_("Unable to parse capabilities file. \n\ Tag <%s> was not found.") % tag) return res
def __init__(self, caps): """!Handle XML namespaces according to WMS version of capabilities. """ self.namespace = "{http://www.opengis.net/wms}" if caps.getroot().find("Service") is not None: self.use_ns = False elif caps.getroot().find(self.namespace + "Service") is not None: self.use_ns = True else: raise ParseError(_("Unable to parse capabilities file.\n\ Tag <%s> was not found.") % "Service")
def _findall(self, etreeElement, tag, ns = None): """!Find all children element. If no element is found it raises xml.etree.ElementTree.ParseError. """ if not ns: res = etreeElement.findall(tag) else: res = etreeElement.findall(ns(tag)) if not res: raise ParseError(_("Unable to parse capabilities file. \n\ Tag '%s' was not found.") % tag) return res
def define_xml_node_value(xml_file_name, node): """ Parsing XML file for passed node name :param xml_file_name: :param node: :return: """ xml_dom = minidom.parse(xml_file_name) try: xml_node = xml_dom.getElementsByTagName(node) xml_node_value = xml_node[0].firstChild.data return xml_node_value except FileNotFoundError(f'No such file: {xml_file_name}'): logger.error('Error\n\n', exc_info=True) except ParseError(f'no such node ({node}) in the {xml_file_name}'): logger.error('Error\n\n', exc_info=True) return None
def _get_version_from_service(cls, protocol, api_version): assert api_version xml = dummy_xml(version=api_version, name=protocol.credentials.username) # Create a minimal, valid EWS request to force Exchange into accepting the request and returning EWS xml # containing server version info. Some servers will only reply with their version if a valid POST is sent. session = protocol.get_session() log.debug('Test if service API version is %s using auth %s', api_version, session.auth.__class__.__name__) r, session = post_ratelimited(protocol=protocol, session=session, url=protocol.service_endpoint, headers=None, data=xml, timeout=protocol.TIMEOUT, verify=protocol.verify_ssl, allow_redirects=False) protocol.release_session(session) if r.status_code == 401: raise UnauthorizedError('Wrong username or password for %s' % protocol.service_endpoint) elif r.status_code == 302: log.debug('We were redirected. Unable to get version info from service') return None elif r.status_code == 503: log.debug('Service is unavailable. Unable to get version info from service') return None if r.status_code == 400: raise EWSWarning('Bad request') if r.status_code == 500 and ('The specified server version is invalid' in r.text or 'ErrorInvalidSchemaVersionForMailboxVersion' in r.text): raise EWSWarning('Invalid server version') if r.status_code != 200: if 'The referenced account is currently locked out' in r.text: raise TransportError('The service account is currently locked out') raise TransportError('Unexpected HTTP status %s when getting %s (%s)' % ( r.status_code, protocol.service_endpoint, r.text)) log.debug('Response data: %s', r.text) try: header = to_xml(r.text, encoding=r.encoding).find('{%s}Header' % SOAPNS) if header is None: raise ParseError() except ParseError as e: raise_from(EWSWarning('Unknown XML response from %s (response: %s)' % (protocol.service_endpoint, r.text)), e) info = header.find('{%s}ServerVersionInfo' % TNS) if info is None: raise TransportError('No ServerVersionInfo in response: %s' % r.text) version = cls.from_response(requested_api_version=api_version, response=r) log.debug('Service version is: %s', version) return version
def error_with_file(error: ParseError, file: str) -> ParseError: """Add filename to an XML parse error. :param error: Original XML parse error. :param file: Filename to add. :return: A new parse error (of the same type as `error`) with the `filename` added. """ error.filename = file new_error = type(error)( error.msg, (file, error.position[0], error.position[1], error.text)) new_error.code = error.code new_error.position = error.position return new_error
def __init__(self, model_type, model_name, set_type): self.model_type = model_type self.model_name = model_name self.set_type = set_type self.catalog = TDSCatalog(self.catalog_url) self.fm_models = TDSCatalog(self.catalog.catalog_refs[model_type].href) self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys())) try: model_url = self.fm_models.catalog_refs[model_name].href except ParseError: raise ParseError(self.model_name + ' model may be unavailable.') try: self.model = TDSCatalog(model_url) except HTTPError: raise HTTPError(self.model_name + ' model may be unavailable.') self.datasets_list = list(self.model.datasets.keys()) self.set_dataset()
def connect_to_catalog(self): self.catalog = TDSCatalog(self.catalog_url) self.fm_models = TDSCatalog( self.catalog.catalog_refs[self.model_type].href) self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys())) try: model_url = self.fm_models.catalog_refs[self.model_name].href except ParseError: raise ParseError(self.model_name + ' model may be unavailable.') try: self.model = TDSCatalog(model_url) except HTTPError: try: self.model = TDSCatalog(model_url) except HTTPError: raise HTTPError(self.model_name + ' model may be unavailable.') self.datasets_list = list(self.model.datasets.keys()) self.set_dataset() self.connected = True
def __enter__(self): try: self.tree = defused_etree.parse(self.path) except IOError: # Document is blank or missing if self.force_create is False: LOG.debug('%s does not seem to exist; not creating', self.path) # This will abort __enter__ self.__exit__(IOError('File not found'), None, None) # Create topmost xml entry self.tree = etree.ElementTree(etree.Element(self.top_element)) self.write_xml = True except ParseError: LOG.error('Error parsing %s', self.path) # "Kodi cannot parse {0}. PKC will not function correctly. Please # visit {1} and correct your file!" messageDialog(lang(29999), lang(39716).format( self.filename, 'http://kodi.wiki')) self.__exit__(ParseError('Error parsing XML'), None, None) self.root = self.tree.getroot() return self
def _raiseerror(self, value): err = ParseError(value) raise err
def exception_side_effect(x): raise ParseError()
def _raiseerror(self, value): err = ParseError(value) err.code = value.code err.position = value.lineno, value.offset raise err