def test_type_error(self) -> None: value = 1 # type: ignore with self.assertRaises(TypeError) as cm: parse_untrusted_xml(value) self.assertSequenceEqual( cm.exception.args, ("Value to be parsed as XML must be bytes.", ))
def test_attack_quadratic_blowup(self) -> None: value = read_test_file_bytes( 'test_data/xml/attacks/quadratic-blowup-entity-expansion.xml') with self.assertRaises(XmlFeatureForbidden) as cm: parse_untrusted_xml(value) self.assertSequenceEqual( cm.exception.args, ("XML uses or contains a forbidden feature.", ))
def test_attack_external_entity_expansion_remote(self) -> None: value = read_test_file_bytes( 'test_data/xml/attacks/external-entity-expansion-remote.xml') with self.assertRaises(XmlFeatureForbidden) as cm: parse_untrusted_xml(value) self.assertSequenceEqual( cm.exception.args, ("XML uses or contains a forbidden feature.", ))
def test_bytes_text(self) -> None: value = b'not xml' # type: ignore with self.assertRaises(XmlSyntaxError) as cm: parse_untrusted_xml(value) self.assertSequenceEqual(cm.exception.args, ( "XML syntax error. Start tag expected, '<' not found, line 1, column 1.", ))
def test_attack_billion_laughs_2(self) -> None: value = read_test_file_bytes( 'test_data/xml/attacks/billion-laughs-2.xml') with self.assertRaises(XmlSyntaxError) as cm: parse_untrusted_xml(value) self.assertSequenceEqual(cm.exception.args, ( "XML syntax error. Detected an entity reference loop, line 1, column 4.", ))
def test_clean_dte_xml_ok_3(self) -> None: file_bytes = self.dte_bad_xml_3_xml_bytes xml_doc = xml_utils.parse_untrusted_xml(file_bytes) self.assertEqual(xml_doc.getroottree().getroot().tag, 'DTE') with self.assertRaises(xml_utils.XmlSchemaDocValidationError) as cm: validate_dte_xml(xml_doc) self.assertSequenceEqual(cm.exception.args, ( "Element 'DTE': No matching global declaration available for the validation root., " "line 2", )) xml_doc_cleaned, modified = clean_dte_xml( xml_doc, set_missing_xmlns=True, remove_doc_personalizado=True, ) self.assertTrue(modified) # This will not raise. validate_dte_xml(xml_doc_cleaned) f = io.BytesIO() xml_utils.write_xml_doc(xml_doc_cleaned, f) file_bytes_rewritten = f.getvalue() del f xml_doc_rewritten = xml_utils.parse_untrusted_xml(file_bytes_rewritten) validate_dte_xml(xml_doc_rewritten) expected_file_bytes_diff = ( b'--- \n', b'+++ \n', b'@@ -1,5 +1,5 @@\n', b'-<?xml version="1.0" encoding="windows-1252"?>', b'-<DTE version="1.0">', b"+<?xml version='1.0' encoding='WINDOWS-1252'?>", b'+<DTE xmlns="http://www.sii.cl/SiiDte" version="1.0">', b' <Documento ID="DTE-33-2336600">', b' <Encabezado>', b' <IdDoc>', ) file_bytes_diff_gen = difflib.diff_bytes( dfunc=difflib.unified_diff, a=file_bytes.splitlines(), b=file_bytes_rewritten.splitlines()) self.assertSequenceEqual( [diff_line for diff_line in file_bytes_diff_gen], expected_file_bytes_diff)
def test_validate_dte_xml_ok_dte_2(self) -> None: xml_doc = xml_utils.parse_untrusted_xml(self.dte_clean_xml_2_xml_bytes) validate_dte_xml(xml_doc) self.assertEqual( xml_doc.getroottree().getroot().tag, '{%s}DTE' % DTE_XMLNS)
def test_parse_dte_xml_ok_2(self) -> None: xml_doc = xml_utils.parse_untrusted_xml(self.dte_clean_xml_2_xml_bytes) parsed_dte = parse_dte_xml(xml_doc) self.assertDictEqual( dict(parsed_dte.as_dict()), dict( emisor_rut=Rut('76399752-9'), tipo_dte=cl_sii.dte.constants.TipoDteEnum.FACTURA_ELECTRONICA, folio=25568, fecha_emision_date=date(2019, 3, 29), receptor_rut=Rut('96874030-K'), monto_total=230992, emisor_razon_social='COMERCIALIZADORA INNOVA MOBEL SPA', receptor_razon_social='EMPRESAS LA POLAR S.A.', fecha_vencimiento_date=None, firma_documento_dt=tz_utils.convert_naive_dt_to_tz_aware( dt=datetime(2019, 3, 28, 13, 59, 52), tz=DteDataL2.DATETIME_FIELDS_TZ), signature_value=self._TEST_DTE_2_SIGNATURE_VALUE, signature_x509_cert_der=self.dte_clean_xml_2_cert_der, emisor_giro='COMERCIALIZACION DE PRODUCTOS PARA EL HOGAR', emisor_email='*****@*****.**', receptor_email=None, ))
def test_parse_dte_xml_ok_1b(self) -> None: xml_doc = xml_utils.parse_untrusted_xml(self.dte_clean_xml_1b_xml_bytes) parsed_dte = parse_dte_xml(xml_doc) self.assertDictEqual( dict(parsed_dte.as_dict()), dict( emisor_rut=Rut('76354771-K'), tipo_dte=cl_sii.dte.constants.TipoDteEnum.FACTURA_ELECTRONICA, folio=170, fecha_emision_date=date(2019, 4, 1), receptor_rut=Rut('96790240-3'), monto_total=2996301, emisor_razon_social='INGENIERIA ENACON SPA', receptor_razon_social='MINERA LOS PELAMBRES', fecha_vencimiento_date=None, firma_documento_dt=tz_utils.convert_naive_dt_to_tz_aware( dt=datetime(2019, 4, 1, 1, 36, 40), tz=DteDataL2.DATETIME_FIELDS_TZ), signature_value=self._TEST_DTE_1_SIGNATURE_VALUE, signature_x509_cert_der=self.dte_clean_xml_1_cert_der, emisor_giro='Ingenieria y Construccion', emisor_email=None, receptor_email=None, ))
def _set_dte_xml_missing_xmlns(xml_doc: XmlElement) -> Tuple[XmlElement, bool]: # source: name of the XML element without namespace. # cl_sii/data/ref/factura_electronica/schemas-xml/DTE_v10.xsd#L22 (f57a326) # cl_sii/data/ref/factura_electronica/schemas-xml/EnvioDTE_v10.xsd#L92 (f57a326) em_tag_simple = 'DTE' em_namespace = DTE_XMLNS em_tag_namespaced = '{%s}%s' % (em_namespace, em_tag_simple) # Tag of 'DTE' should be ... assert em_tag_namespaced == '{http://www.sii.cl/SiiDte}DTE' modified = False root_em = xml_doc.getroottree().getroot() root_em_tag = root_em.tag if root_em_tag == em_tag_namespaced: pass elif root_em_tag == em_tag_simple: modified = True root_em.set('xmlns', em_namespace) f = io.BytesIO() xml_utils.write_xml_doc(xml_doc, f) new_xml_doc_bytes = f.getvalue() xml_doc = xml_utils.parse_untrusted_xml(new_xml_doc_bytes) else: exc_msg = "XML root element tag does not match the expected simple or namespaced name." raise Exception(exc_msg, em_tag_simple, em_tag_namespaced, root_em_tag) return xml_doc, modified
def clean_dte_xml_file(input_file_path: str, output_file_path: str) -> Iterable[bytes]: with open(input_file_path, mode='rb') as f: file_bytes = f.read() xml_doc = xml_utils.parse_untrusted_xml(file_bytes) xml_doc_cleaned, modified = cl_sii.dte.parse.clean_dte_xml( xml_doc, set_missing_xmlns=True, remove_doc_personalizado=True, ) # TODO: add exception with a nice message for the caller. cl_sii.dte.parse.validate_dte_xml(xml_doc_cleaned) with open(output_file_path, 'w+b') as f: xml_utils.write_xml_doc(xml_doc_cleaned, f) with open(output_file_path, mode='rb') as f: file_bytes_rewritten = f.read() # note: another way to compute the difference in a similar format is # `diff -Naur $input_file_path $output_file_path` file_bytes_diff_gen = difflib.diff_bytes( dfunc=difflib.unified_diff, a=file_bytes.splitlines(), b=file_bytes_rewritten.splitlines()) return file_bytes_diff_gen
def test_parse_dte_xml_ok_3(self) -> None: xml_doc = xml_utils.parse_untrusted_xml(self.dte_clean_xml_3_xml_bytes) parsed_dte = parse_dte_xml(xml_doc) self.assertDictEqual( dict(parsed_dte.as_dict()), dict( emisor_rut=Rut('60910000-1'), tipo_dte=cl_sii.dte.constants.TipoDteEnum.FACTURA_ELECTRONICA, folio=2336600, fecha_emision_date=date(2019, 8, 8), receptor_rut=Rut('76555835-2'), monto_total=10642, emisor_razon_social='Universidad de Chile', receptor_razon_social='FYNPAL SPA', fecha_vencimiento_date=date(2019, 8, 8), firma_documento_dt=tz_utils.convert_naive_dt_to_tz_aware( dt=datetime(2019, 8, 9, 9, 41, 9), tz=DteDataL2.DATETIME_FIELDS_TZ), signature_value=self._TEST_DTE_3_SIGNATURE_VALUE, signature_x509_cert_der=self.dte_clean_xml_3_cert_der, emisor_giro= 'Corporación Educacional y Servicios Profesionales', emisor_email=None, receptor_email=None, ))
def test_parse_dte_xml_fail_3(self) -> None: xml_doc = xml_utils.parse_untrusted_xml(self.dte_bad_xml_3_xml_bytes) with self.assertRaises(ValueError) as cm: parse_dte_xml(xml_doc) self.assertSequenceEqual( cm.exception.args, ("Top level XML element 'Document' is required.", ))
def test_parse_untrusted_xml_valid(self) -> None: value = (b'<root>\n' b' <element key="value">text</element>\n' b' <element>text</element>tail\n' b' <empty-element/>\n' b'</root>') xml = parse_untrusted_xml(value) self.assertIsInstance(xml, XmlElement) # print(xml) self.assertEqual(lxml.etree.tostring(xml, pretty_print=False), value)
def test_validate_dte_xml_fail_dte_3(self) -> None: file_bytes = self.dte_bad_xml_3_xml_bytes xml_doc = xml_utils.parse_untrusted_xml(file_bytes) self.assertEqual(xml_doc.getroottree().getroot().tag, 'DTE') with self.assertRaises(xml_utils.XmlSchemaDocValidationError) as cm: validate_dte_xml(xml_doc) self.assertSequenceEqual(cm.exception.args, ( "Element 'DTE': No matching global declaration available for the validation root., " "line 2", ))
def test_clean_dte_xml_ok_2(self) -> None: file_bytes = self.dte_bad_xml_2_xml_bytes xml_doc = xml_utils.parse_untrusted_xml(file_bytes) self.assertEqual( xml_doc.getroottree().getroot().tag, 'DTE') with self.assertRaises(xml_utils.XmlSchemaDocValidationError) as cm: validate_dte_xml(xml_doc) self.assertSequenceEqual( cm.exception.args, ("Element 'DTE': No matching global declaration available for the validation root., " "line 2", ) ) xml_doc_cleaned, modified = clean_dte_xml( xml_doc, set_missing_xmlns=True, remove_doc_personalizado=True, ) self.assertTrue(modified) # This will not raise. validate_dte_xml(xml_doc_cleaned) f = io.BytesIO() xml_utils.write_xml_doc(xml_doc_cleaned, f) file_bytes_rewritten = f.getvalue() del f xml_doc_rewritten = xml_utils.parse_untrusted_xml(file_bytes_rewritten) validate_dte_xml(xml_doc_rewritten) expected_file_bytes_diff = ( b'--- \n', b'+++ \n', b'@@ -1,5 +1,5 @@\n', b'-<?xml version="1.0" encoding="ISO-8859-1"?>', b'-<DTE version="1.0">', b"+<?xml version='1.0' encoding='ISO-8859-1'?>", b'+<DTE xmlns="http://www.sii.cl/SiiDte" version="1.0">', b' <!-- O Win32 Chrome 73 central VERSION: v20190227 -->', b' <Documento ID="MiPE76399752-6048">', b' <Encabezado>', b'@@ -64,13 +64,13 @@\n', b' </Documento>', b' <Signature xmlns="http://www.w3.org/2000/09/xmldsig#">', b' <SignedInfo>', b'-<CanonicalizationMethod Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315" />', # noqa: E501 b'-<SignatureMethod Algorithm="http://www.w3.org/2000/09/xmldsig#rsa-sha1" />', b'+<CanonicalizationMethod Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/>', # noqa: E501 b'+<SignatureMethod Algorithm="http://www.w3.org/2000/09/xmldsig#rsa-sha1"/>', b' <Reference URI="#MiPE76399752-6048">', b' <Transforms>', b'-<Transform Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315" />', b'+<Transform Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/>', b' </Transforms>', b'-<DigestMethod Algorithm="http://www.w3.org/2000/09/xmldsig#sha1" />', b'+<DigestMethod Algorithm="http://www.w3.org/2000/09/xmldsig#sha1"/>', b' <DigestValue>tk/D3mfO/KtdWyFXYZHe7dtYijg=</DigestValue>', b' </Reference>', b' </SignedInfo>', ) file_bytes_diff_gen = difflib.diff_bytes( dfunc=difflib.unified_diff, a=file_bytes.splitlines(), b=file_bytes_rewritten.splitlines()) self.assertSequenceEqual( [diff_line for diff_line in file_bytes_diff_gen], expected_file_bytes_diff )