def test_xmlsyntaxerror_in_read(fc_class_mock): """ Simulate an XMLSyntaxError exception when reading data. """ fc_mock = FritzConnectionMock() fc_class_mock.return_value = fc_mock fc_mock.call_action.side_effect = [{0}, XMLSyntaxError(0, 0, 0, 0), {0}, XMLSyntaxError(0, 0, 0, 0), {0}] MOCK.process()
def test_incorrect_password(fc_class_mock): """ Simulate an incorrect password on router. """ fc_mock = FritzConnectionMock() fc_class_mock.return_value = fc_mock fc_mock.call_action.side_effect = [{0}, XMLSyntaxError(0, 0, 0, 0)] with pytest.raises(IOError): MOCK.process(CollectdConfig({'Password': '******'}))
def _build_doc(self): """ Raises ------ ValueError * If a URL that lxml cannot parse is passed. Exception * Any other ``Exception`` thrown. For example, trying to parse a URL that is syntactically correct on a machine with no internet connection will fail. See Also -------- pandas.io.html._HtmlFrameParser._build_doc """ from lxml.etree import XMLSyntaxError from lxml.html import ( HTMLParser, fromstring, parse, ) parser = HTMLParser(recover=True, encoding=self.encoding) try: if is_url(self.io): with urlopen(self.io) as f: r = parse(f, parser=parser) else: # try to parse the input in the simplest way r = parse(self.io, parser=parser) try: r = r.getroot() except AttributeError: pass except (UnicodeDecodeError, OSError) as e: # if the input is a blob of html goop if not is_url(self.io): r = fromstring(self.io, parser=parser) try: r = r.getroot() except AttributeError: pass else: raise e else: if not hasattr(r, "text_content"): raise XMLSyntaxError("no text parsed from document", 0, 0, 0) for br in r.xpath("*//br"): br.tail = "\n" + (br.tail or "") return r
def _build_doc(self): """ Raises ------ ValueError * If a URL that lxml cannot parse is passed. Exception * Any other ``Exception`` thrown. For example, trying to parse a URL that is syntactically correct on a machine with no internet connection will fail. See Also -------- pandas.io.html._HtmlFrameParser._build_doc """ from lxml.html import parse, fromstring, HTMLParser from lxml.etree import XMLSyntaxError parser = HTMLParser(recover=False, encoding=self.encoding) try: # try to parse the input in the simplest way r = parse(self.io, parser=parser) try: r = r.getroot() except AttributeError: pass except (UnicodeDecodeError, IOError): # if the input is a blob of html goop if not _is_url(self.io): r = fromstring(self.io, parser=parser) try: r = r.getroot() except AttributeError: pass else: # not a url scheme = parse_url(self.io).scheme if scheme not in _valid_schemes: # lxml can't parse it msg = (('{invalid!r} is not a valid url scheme, valid ' 'schemes are {valid}') .format(invalid=scheme, valid=_valid_schemes)) raise ValueError(msg) else: # something else happened: maybe a faulty connection raise else: if not hasattr(r, 'text_content'): raise XMLSyntaxError("no text parsed from document", 0, 0, 0) return r
def parse_xml(content: str, transport, base_url=None, settings=None): """Parse an XML string and return the root Element. :param content: The XML string :type content: str :param transport: The transport instance to load imported documents :type transport: zeep.transports.Transport :param base_url: The base url of the document, used to make relative lookups absolute. :type base_url: str :param settings: A zeep.settings.Settings object containing parse settings. :type settings: zeep.settings.Settings :returns: The document root :rtype: lxml.etree._Element """ settings = settings or Settings() recover = not settings.strict parser = XMLParser( remove_comments=True, resolve_entities=False, recover=recover, huge_tree=settings.xml_huge_tree, ) parser.resolvers.add(ImportResolver(transport)) try: elementtree = fromstring(content, parser=parser, base_url=base_url) docinfo = elementtree.getroottree().docinfo if docinfo.doctype: if settings.forbid_dtd: raise DTDForbidden(docinfo.doctype, docinfo.system_url, docinfo.public_id) if settings.forbid_entities: for dtd in docinfo.internalDTD, docinfo.externalDTD: if dtd is None: continue for entity in dtd.iterentities(): raise EntitiesForbidden(entity.name, entity.content) return elementtree except etree.XMLSyntaxError as exc: raise XMLSyntaxError("Invalid XML content received (%s)" % exc.msg, content=content)
def get_facility(self, url, username=u'', password=u''): if u'://' not in url: # Assume URL is relative url = urljoin(self.sitelist_url, url) res = self.request(url, username=username, password=password) # If there's a problem with obtaining an FLM if res.status_code != 200: # This reraises a HTTPError stored by the requests API _logger.warning('Could not access ' + url + ': HTTP Response code ' + res.status_code) res.raise_for_status() try: _logger.info('Parsing FLM at ' + url) return FacilityParser(res.raw) except FlmxParseError as e: raise FlmxParseError(u"Problem parsing FLM at " + url + u". Error message: " + e.msg) except XMLSyntaxError as e: msg = u"FLM at " + url + u" failed validation. Error message: " + e.msg _logger.warning(msg) raise XMLSyntaxError(msg)
def test_get_xml_field_ExpatError_returns_empty_dict(): with patch.object(lxml_to_dict, "parse") as parse: # Inject dummy values into XMLSyntaxError constructor parse.side_effect = XMLSyntaxError(*list(range(5))) actual = api_xml._get_xml_field('any_xml', 'myfield') assert_equal(actual, {})
config_mock.config["environment"]["rabbit"]["exchange_nok"]) assert rabbit_mock().publish_message.call_args[0][2] == ( "NOK.test_org.FLOW.ARCHIVED".lower()) # Should still return "202" assert result.status_code == 202 assert result.json() == { "message": "Processing 1 event(s) in the background." } # Check that it didn't delete the S3 object assert s3_client().delete_object.call_count == 0 @patch.object(PremisEvents, '__init__', side_effect=XMLSyntaxError("Document is empty, line 1, column 1", 1, 1, 1, "<string>")) def test_handle_event_xml_error(premis_events_mock): result = client.post("/event", data='') assert result.status_code == 400 assert result.json() == { "detail": "NOK: Document is empty, line 1, column 1 (<string>, line 1)" } @patch.object(PremisEvents, '__init__', side_effect=InvalidPremisEventException("Invalid event")) def test_handle_event_invalid_premis_event(premis_events_mock): result = client.post("/event", data='') assert result.status_code == 400 assert result.json() == {"detail": "NOK: Invalid event"}