def test_process_changes_added_end(self): """ If we can't guess a valid sibling, ensure the added element is at the end of the parent. """ notice_xml = etree.fromstring(""" <notice xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys></fdsys><preamble></preamble> <changeset> <change operation="added" label="1234-5"> <paragraph label="1234-5">An added paragraph</paragraph> </change> </changeset> </notice>""") original_xml = etree.fromstring(""" <regulation xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys></fdsys> <preamble></preamble> <part label="1234"> <content> <paragraph label="1234-1">An existing paragraph</paragraph> <paragraph label="1234-3">Another existing paragraph</paragraph> </content> </part> </regulation>""") new_xml = process_changes(original_xml, notice_xml) new_para = new_xml.find('.//{eregs}paragraph[@label="1234-5"]') self.assertEqual(new_para.getparent().index(new_para), 2)
def test_is_pointer_tag(self): """ Check that is_pointer_tag works properly. """ yes = ["""<html url_name="blah"/>""", """<html url_name="blah"></html>""", """<html url_name="blah"> </html>""", """<problem url_name="blah"/>""", """<course org="HogwartsX" course="Mathemagics" url_name="3.14159"/>"""] no = ["""<html url_name="blah" also="this"/>""", """<html url_name="blah">some text</html>""", """<problem url_name="blah"><sub>tree</sub></problem>""", """<course org="HogwartsX" course="Mathemagics" url_name="3.14159"> <chapter>3</chapter> </course> """] for xml_str in yes: print("should be True for {0}".format(xml_str)) self.assertTrue(is_pointer_tag(etree.fromstring(xml_str))) for xml_str in no: print("should be False for {0}".format(xml_str)) self.assertFalse(is_pointer_tag(etree.fromstring(xml_str)))
def test_define_and_retrieve(self): conn = self.get_openAuth_curry_func()("qemu:///system") self.assertEqual(conn.listDomainsID(), []) conn.defineXML(get_vm_xml()) dom = conn.lookupByName("testname") xml = dom.XMLDesc(0) etree.fromstring(xml)
def test_process_changes_moved_after(self): notice_xml = etree.fromstring(""" <notice xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys></fdsys><preamble></preamble> <changeset> <change operation="moved" label="1234-1" parent="1234-Subpart-B" after="1234-2"></change> </changeset> </notice>""") original_xml = etree.fromstring(""" <regulation xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys></fdsys> <preamble></preamble> <part label="1234"> <content> <subpart label="1234-Subpart-A"> <content> <paragraph label="1234-1">An existing paragraph</paragraph> </content> </subpart> <subpart label="1234-Subpart-B"> <content> <paragraph label="1234-2">Another existing paragraph</paragraph> <paragraph label="1234-3">One more existing paragraph</paragraph> </content> </subpart> </content> </part> </regulation>""") new_xml = process_changes(original_xml, notice_xml) moved_para = new_xml.find('.//{eregs}paragraph[@label="1234-1"]') self.assertEqual(moved_para.getparent().getparent().get('label'), '1234-Subpart-B') self.assertEqual(moved_para.getparent().index(moved_para), 1)
def test_metadata_import_export(self): """Two checks: - unknown metadata is preserved across import-export - inherited metadata doesn't leak to children. """ system = self.get_system() v = 'March 20 17:00' url_name = 'test1' start_xml = ''' <course org="{org}" course="{course}" due="{due}" url_name="{url_name}" unicorn="purple"> <chapter url="hi" url_name="ch" display_name="CH"> <html url_name="h" display_name="H">Two houses, ...</html> </chapter> </course>'''.format(due=v, org=ORG, course=COURSE, url_name=url_name) descriptor = system.process_xml(start_xml) compute_inherited_metadata(descriptor) print(descriptor, descriptor._model_data) self.assertEqual(descriptor.lms.due, Date().from_json(v)) # Check that the child inherits due correctly child = descriptor.get_children()[0] self.assertEqual(child.lms.due, Date().from_json(v)) self.assertEqual(child._inheritable_metadata, child._inherited_metadata) self.assertEqual(2, len(child._inherited_metadata)) self.assertEqual('1970-01-01T00:00:00Z', child._inherited_metadata['start']) self.assertEqual(v, child._inherited_metadata['due']) # Now export and check things resource_fs = MemoryFS() exported_xml = descriptor.export_to_xml(resource_fs) # Check that the exported xml is just a pointer print("Exported xml:", exported_xml) pointer = etree.fromstring(exported_xml) self.assertTrue(is_pointer_tag(pointer)) # but it's a special case course pointer self.assertEqual(pointer.attrib['course'], COURSE) self.assertEqual(pointer.attrib['org'], ORG) # Does the course still have unicorns? with resource_fs.open('course/{url_name}.xml'.format(url_name=url_name)) as f: course_xml = etree.fromstring(f.read()) self.assertEqual(course_xml.attrib['unicorn'], 'purple') # the course and org tags should be _only_ in the pointer self.assertTrue('course' not in course_xml.attrib) self.assertTrue('org' not in course_xml.attrib) # did we successfully strip the url_name from the definition contents? self.assertTrue('url_name' not in course_xml.attrib) # Does the chapter tag now have a due attribute? # hardcoded path to child with resource_fs.open('chapter/ch.xml') as f: chapter_xml = etree.fromstring(f.read()) self.assertEqual(chapter_xml.tag, 'chapter') self.assertFalse('due' in chapter_xml.attrib)
def get_dom(self): """ :return: The DOM instance """ if self._dom is None: # Start parsing, using a parser without target so we get the DOM # instance as result of our call to fromstring parser = etree.HTMLParser(recover=True) http_resp = self.get_http_response() resp_body = http_resp.get_body() try: self._dom = etree.fromstring(resp_body, parser) except ValueError: # Sometimes we get XMLs in the response. lxml fails to parse # them when an encoding header is specified and the text is # unicode. So we better make an exception and convert it to # string. Note that yet the parsed elems will be unicode. resp_body = resp_body.encode(http_resp.charset, 'xmlcharrefreplace') parser = etree.HTMLParser(recover=True, encoding=http_resp.charset) self._dom = etree.fromstring(resp_body, parser) except etree.XMLSyntaxError: msg = 'An error occurred while parsing "%s",'\ ' original exception: "%s"' om.out.debug(msg % (http_resp.get_url(), etree.XMLSyntaxError)) return self._dom
def _read_opf(self): data = self.oeb.container.read(None) data = self.oeb.decode(data) data = XMLDECL_RE.sub('', data) data = re.sub(r'http://openebook.org/namespaces/oeb-package/1.0(/*)', OPF1_NS, data) try: opf = etree.fromstring(data) except etree.XMLSyntaxError: data = xml_replace_entities(clean_xml_chars(data), encoding=None) try: opf = etree.fromstring(data) self.logger.warn('OPF contains invalid HTML named entities') except etree.XMLSyntaxError: data = re.sub(r'(?is)<tours>.+</tours>', '', data) data = data.replace('<dc-metadata>', '<dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core">') try: opf = etree.fromstring(data) self.logger.warn('OPF contains invalid tours section') except etree.XMLSyntaxError: from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER opf = etree.fromstring(data, parser=RECOVER_PARSER) self.logger.warn('OPF contains invalid markup, trying to parse it anyway') ns = namespace(opf.tag) if ns not in ('', OPF1_NS, OPF2_NS): raise OEBError('Invalid namespace %r for OPF document' % ns) opf = self._clean_opf(opf) return opf
def main(): if len(sys.argv) < 4: print "Provide a directory name, which contains the checks." sys.exit(1) # Get header with schema version oval_config = sys.argv[1] + "/" + conf_file product = sys.argv[2] oval_schema_version = None runtime_oval_schema_version = os.getenv('RUNTIME_OVAL_VERSION', None) if os.path.isfile(oval_config): (config_oval_schema_version, multi_platform) = parse_conf_file(oval_config, product) if runtime_oval_schema_version is not None and \ runtime_oval_schema_version != config_oval_schema_version: oval_schema_version = runtime_oval_schema_version else: oval_schema_version = config_oval_schema_version header = _header(oval_schema_version) else: print 'The directory specified does not contain the %s file!' % conf_file sys.exit(1) body = checks(product) # parse new file(string) as an ElementTree, so we can reorder elements # appropriately corrected_tree = ET.fromstring(header + body + footer) tree = add_platforms(corrected_tree, multi_platform) definitions = ET.Element("definitions") tests = ET.Element("tests") objects = ET.Element("objects") states = ET.Element("states") variables = ET.Element("variables") for childnode in tree.findall("./{http://oval.mitre.org/XMLSchema/oval-definitions-5}def-group/*"): if childnode.tag is ET.Comment: continue if childnode.tag.endswith("definition"): append(definitions, childnode) if childnode.tag.endswith("_test"): append(tests, childnode) if childnode.tag.endswith("_object"): append(objects, childnode) if childnode.tag.endswith("_state"): append(states, childnode) if childnode.tag.endswith("_variable"): append(variables, childnode) tree = ET.fromstring(header + footer) tree.append(definitions) tree.append(tests) tree.append(objects) tree.append(states) if list(variables): tree.append(variables) ET.dump(tree) sys.exit(0)
def _parse(self, http_resp): """ Parse the HTTP response body """ # Start parsing! parser = etree.HTMLParser(target=self, recover=True) resp_body = http_resp.body try: # Note: Given that the parser has target != None, this call does not # return a DOM instance! etree.fromstring(resp_body, parser) except ValueError: # Sometimes we get XMLs in the response. lxml fails to parse them # when an encoding header is specified and the text is unicode. So # we better make an exception and convert it to string. Note that # yet the parsed elems will be unicode. resp_body = resp_body.encode(http_resp.charset, 'xmlcharrefreplace') parser = etree.HTMLParser(target=self, recover=True, encoding=http_resp.charset) etree.fromstring(resp_body, parser) except etree.XMLSyntaxError: msg = 'An error occurred while parsing "%s",'\ ' original exception: "%s"' om.out.debug(msg % (http_resp.get_url(), etree.XMLSyntaxError))
def get_collection_items(url, hurry=False): try: xml = get_page(url, hurry=hurry) lang = 1 except ValueError: # If there's a bad URL, skip this link return None # Get the tracklisting for this collection if xml: root = etree.fromstring(xml) items = root.xpath('.//itms:TrackList',namespaces={'itms':'http://www.apple.com/itms/'}) else: try: print("Trying lang=2 instead...") xml = get_page(url,2,hurry=hurry) lang = 2 except ValueError: # If there's a bad URL, skip this link return None if xml: root = etree.fromstring(xml) items = root.xpath('.') else: return None plist = plistlib.readPlistFromString(etree.tostring(items[0])) if lang == 2: #Get rid of all non-podcast-episode plist elements from the list of items to be returned. tobereturned = plist.get('items') tobereturned_norubbish = [] for i in tobereturned: if i['type'] == 'podcast-episode': tobereturned_norubbish.append(i) return tobereturned_norubbish else: return plist.get('items')
def test_loadState(self): ''' simple load from file ''' self.l.loadSettings(xml.fromstring(self.CONSTRAINLIST)) xmls = self.l.loadState(xml.fromstring(self.STATELIST2)) self.assertEqual(self.l.getConstrain(name='weekly').getTimeLeft(),timedelta(hours=1)) self.assertTrue(self.l.getConstrain(name='weekly').isActive()) self.assertTrue(self.l.getConstrain(name='base').isActive())
def html5_pagelist_macro(macro_tag, fullpagesource): """ generate the html output for the pagelist macro""" from lxml import etree from ductus.resource.ductmodels import tag_value_attribute_validator from ductus.index import search_pages tags = macro_tag.get("data-tags", '') try: parsed_tags = tags.split(',') for tag in parsed_tags: tag_value_attribute_validator(tag) except Exception: rv = etree.fromstring('<p>Invalid tag search</p>') try: pages = search_pages(tags=parsed_tags) except Exception: rv = etree.fromstring('<p>Search failed</p>') rv = etree.Element('ul') rv.set("class", "search_results") for page in pages: li = etree.SubElement(rv, 'li') a = etree.SubElement(li, 'a', href=page['path']) a.text = page['absolute_pagename'] return macro_tag.append(rv)
def load_data(self): try: resp = requests.get(DBLP_PERSON_URL.format(urlpt=self.urlpt), timeout=1000) except Exception as e: print "time out!" raise ValueError # TODO error handling xml = resp.content self.xml = xml try: root = etree.fromstring(xml) except etree.XMLSyntaxError as e: time.sleep(2) try: root = etree.fromstring(xml) except etree.XMLSyntaxError as e: print "error again!" raise ValueError data = { 'name':root.attrib['name'], 'publications':[Publication(k) for k in root.xpath('/dblpperson/dblpkey[not(@type)]/text()')], 'homepages':root.xpath( '/dblpperson/dblpkey[@type="person record"]/text()'), 'homonyms':root.xpath('/dblpperson/homonym/text()') } self.data = data
def _get_module(self, request): self.selected = [] # get the values of the enumeration xml_doc_tree_str = request.session['xmlDocTree'] xml_doc_tree = etree.fromstring(xml_doc_tree_str) namespaces = common.get_namespaces(BytesIO(str(xml_doc_tree_str))) # get the element where the module is attached xsd_element = xml_doc_tree.xpath(request.GET['xsd_xpath'], namespaces=namespaces)[0] xsd_element_type = xsd_element.attrib['type'] # remove ns prefix if present if ':' in xsd_element_type: xsd_element_type = xsd_element_type.split(':')[1] xpath_type = "./{0}simpleType[@name='{1}']".format(LXML_SCHEMA_NAMESPACE, xsd_element_type) elementType = xml_doc_tree.find(xpath_type) enumeration_list = elementType.findall('./{0}restriction/{0}enumeration'.format(LXML_SCHEMA_NAMESPACE)) for enumeration in enumeration_list: self.options[enumeration.attrib['value']] = enumeration.attrib['value'] if 'data' in request.GET: data = request.GET['data'] # get XML to reload reload_data = etree.fromstring("<root>" + data + "</root>") for child in reload_data: self.selected.append(child.text.strip()) return CheckboxesModule.get_module(self, request)
def test_overdragenZaak_Du01_geweigerd(self): """ 1. Verzoek overdragenZaak_Di01: STP -> ZS 2. Antwoord Bv03: ZS -> STP 3. Asynchroon antwoord: overdragenZaakDu01: ZS -> STP 4. Antwoord Bv03: STP -> ZS """ # Step 1 & 2 context = { 'referentienummer': self.genereerID(10), 'zaakidentificatie': self.zaak.zaakidentificatie, } response = self._do_request('OntvangAsynchroon', 'overdragenZaak_Di01.xml', context) response_root = etree.fromstring(response.content) response_berichtcode = response_root.xpath('//stuf:stuurgegevens/stuf:berichtcode', namespaces=self.nsmap)[0].text self.assertEqual(response_berichtcode, BerichtcodeChoices.bv03, response.content) # Step 3 & 4 response = self.consumer.overdragenZaak( self.zaak, False, context['referentienummer'], settings.ZAAKMAGAZIJN_SYSTEEM, melding='melding') response_root = etree.fromstring(response.content) response_berichtcode = response_root.xpath('//stuf:stuurgegevens/stuf:berichtcode', namespaces=self.nsmap)[0].text self.assertEqual(response_berichtcode, BerichtcodeChoices.bv03)
def test_api_list_ordering(self): '''API document lists should be ordered by date added''' self._login() name = 'Pride-and-Prejudice_Jane-Austen.epub' self._upload(name) name2 = 'alice-fromAdobe.epub' self._upload(name2) self.client.logout() response = self.client.get('/api/documents/', { 'api_key': self.userpref.get_api_key()}) self._validate_page(response) page = etree.fromstring(response.content) assert 'Pride' in page.xpath('//xhtml:li[1]/xhtml:span[@class="document-title"]/text()', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'})[0] assert 'Alice' in page.xpath('//xhtml:li[2]/xhtml:span[@class="document-title"]/text()', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'})[0] # Delete the books and add in the opposite order [ua.delete() for ua in library_models.UserArchive.objects.all() ] [d.delete() for d in library_models.EpubArchive.objects.all() ] self._login() self._upload(name2) self._upload(name) self.client.logout() response = self.client.get('/api/documents/', { 'api_key': self.userpref.get_api_key()}) self._validate_page(response) page = etree.fromstring(response.content) assert 'Alice' in page.xpath('//xhtml:li[1]/xhtml:span[@class="document-title"]/text()', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'})[0] assert 'Pride' in page.xpath('//xhtml:li[2]/xhtml:span[@class="document-title"]/text()', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'})[0]
def process_include(tree, do_python=False): ''' Include XML or python file. For python files, wrap inside <script><![CDATA[ ... ]]></script> ''' tag = './/edxinclude' cmd = 'edXinclude' if do_python: tag += 'py' cmd += "py" for include in tree.findall(tag): incfn = include.text if incfn is None: print "Error: %s must specify file to include!" % cmd print "See xhtml source line %s" % getattr(include,'sourceline','<unavailable>') raise incfn = incfn.strip() try: incdata = open(incfn).read() except Exception, err: print "Error %s: cannot open include file %s to read" % (err,incfn) print "See xhtml source line %s" % getattr(include,'sourceline','<unavailable>') raise try: if do_python: incxml = etree.fromstring('<script><![CDATA[\n%s\n]]></script>' % incdata) else: incxml = etree.fromstring(incdata) except Exception, err: print "Error %s parsing XML for include file %s" % (err,incfn) print "See xhtml source line %s" % getattr(include,'sourceline','<unavailable>') raise
def compare_xml_strings(doc1, doc2): """ Simple helper function to compare two XML strings. :type doc1: str :type doc2: str """ # Compat py2k and py3k try: doc1 = doc1.encode() doc2 = doc2.encode() except: pass obj1 = etree.fromstring(doc1).getroottree() obj2 = etree.fromstring(doc2).getroottree() buf = io.BytesIO() obj1.write_c14n(buf) buf.seek(0, 0) str1 = buf.read().decode() str1 = [_i.strip() for _i in str1.splitlines()] buf = io.BytesIO() obj2.write_c14n(buf) buf.seek(0, 0) str2 = buf.read().decode() str2 = [_i.strip() for _i in str2.splitlines()] unified_diff = difflib.unified_diff(str1, str2) err_msg = "\n".join(unified_diff) if err_msg: # pragma: no cover raise AssertionError("Strings are not equal.\n" + err_msg)
def get_epub_info(fname): ns = { 'n':'urn:oasis:names:tc:opendocument:xmlns:container', 'pkg':'http://www.idpf.org/2007/opf', 'dc':'http://purl.org/dc/elements/1.1/' } # prepare to read from the .epub file zip = zipfile.ZipFile(fname) # find the contents metafile txt = zip.read('META-INF/container.xml') tree = etree.fromstring(txt) cfname = tree.xpath('n:rootfiles/n:rootfile/@full-path',namespaces=ns)[0] # grab the metadata block from the contents metafile cf = zip.read(cfname) tree = etree.fromstring(cf) p = tree.xpath('/pkg:package/pkg:metadata',namespaces=ns)[0] # repackage the data res = {} for s in ['title','language','creator','date','identifier']: res[s] = p.xpath('dc:%s/text()'%(s),namespaces=ns)[0] return res
def test_complex_type_array_parsexml(): node = etree.fromstring(""" <?xml version="1.0"?> <schema xmlns="http://www.w3.org/2001/XMLSchema" xmlns:tns="http://tests.python-zeep.org/" targetNamespace="http://tests.python-zeep.org/" elementFormDefault="qualified"> <element name="container"> <complexType> <sequence> <element minOccurs="0" maxOccurs="unbounded" name="foo" type="string" /> </sequence> </complexType> </element> </schema> """.strip()) schema = xsd.Schema(node) address_type = schema.get_element('{http://tests.python-zeep.org/}container') input_node = etree.fromstring(""" <Address xmlns="http://tests.python-zeep.org/"> <foo>bar</foo> <foo>zoo</foo> </Address> """) obj = address_type.parse(input_node, None) assert obj.foo == ['bar', 'zoo']
def test_preprocess_xml(self, content): xml = etree.fromstring(""" <CFRGRANULE> <PART> <APPENDIX> <TAG>Other Text</TAG> <GPH DEEP="453" SPAN="2"> <GID>ABCD.0123</GID> </GPH> </APPENDIX> </PART> </CFRGRANULE>""") content.Macros.return_value = [ ("//GID[./text()='ABCD.0123']/..", """ <HD SOURCE="HD1">Some Title</HD> <GPH DEEP="453" SPAN="2"> <GID>EFGH.0123</GID> </GPH>""")] reg_text.preprocess_xml(xml) should_be = etree.fromstring(""" <CFRGRANULE> <PART> <APPENDIX> <TAG>Other Text</TAG> <HD SOURCE="HD1">Some Title</HD> <GPH DEEP="453" SPAN="2"> <GID>EFGH.0123</GID> </GPH></APPENDIX> </PART> </CFRGRANULE>""") self.assertEqual(etree.tostring(xml), etree.tostring(should_be))
def remove_spaces(data): # Spaces need to be consistent with splits and the TOC. # "Foo Bar.htm" and "Foo%20Bar.htm" are seen as different. def fnsan(fn): return fn.replace(" ", "").replace("%20", "") # rename actual files for file_name in data: if file_name.endswith(".htm") and file_name != fnsan(file_name): data[fnsan(file_name)] = data[file_name] del data[file_name] # update TOC file_name = "toc.ncx" root = etree.fromstring(data[file_name]) for element in root.findall('.//*'): if element.get("src"): element.set("src", fnsan(element.get("src"))) data[file_name] = "<?xml version='1.0' encoding='utf-8'?>\n" + etree.tostring(root) # update inventory file_name = "content.opf" root = etree.fromstring(data[file_name]) for element in root.findall('.//*'): if element.get("href"): element.set("href", fnsan(element.get("href"))) data[file_name] = "<?xml version='1.0' encoding='utf-8'?>\n" + etree.tostring(root)
def test_serialization(self): xml = '''<input type="radio" token="username" searchWhenChanged="True"> <label>Select name</label> <default>Nate</default> <choice value="*">Any</choice> <choice value="nagrin">Nate</choice> <choice value="amrit">Amrit</choice> <populatingSearch earliest="-40m" latest="-10m" fieldForValue="foo" fieldForLabel="bar">search foo bar</populatingSearch> </input>''' di1 = RadioInput() di1.fromXml(et.fromstring(xml)) xml2 = et.tostring(di1.toXml()) di2 = RadioInput() di2.fromXml(et.fromstring(xml2)) self.assertEqual(di1.search, di2.search) self.assertEqual(di1.earliest, di2.earliest) self.assertEqual(di1.latest, di2.latest) self.assertEqual(di1.searchWhenChanged, di2.searchWhenChanged) self.assertEqual(di1.staticFields, di2.staticFields) self.assertEqual(di1.searchFields, di2.searchFields) self.assertEqual(di1.checked, di2.checked) self.assertEqual(di1.name, di2.name) self.assertEqual(di1.savedSearch, di2.savedSearch)
def test_process_analysis_no_existing(self): notice_xml = etree.fromstring(""" <notice xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys> <date>2015-11-17</date> </fdsys> <preamble> <documentNumber>2015-12345</documentNumber> </preamble> <changeset></changeset> <analysis label="1234-Analysis"> <analysisSection target="1234-2" notice="2015-12345" date="2015-11-17">An existing analysis</analysisSection> <analysisSection target="1234-3" notice="2015-12345" date="2015-11-17">An unchanging analysis</analysisSection> </analysis> </notice>""") regulation_xml = etree.fromstring(""" <regulation xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <part label="1234"></part> </regulation>""") result = process_analysis(regulation_xml, notice_xml) analysis = result.find('.//{eregs}analysis') self.assertTrue(analysis is not None) sections = analysis.findall('{eregs}analysisSection') self.assertEquals(len(sections), 2)
def test_process_changes_added(self): notice_xml = etree.fromstring(""" <notice xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys></fdsys><preamble></preamble> <changeset> <change operation="added" label="1234-2"> <paragraph label="1234-2">An added paragraph</paragraph> </change> </changeset> </notice>""") original_xml = etree.fromstring(""" <regulation xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys></fdsys> <preamble></preamble> <part label="1234"> <content> <paragraph label="1234-1">An existing paragraph</paragraph> </content> </part> </regulation>""") new_xml = process_changes(original_xml, notice_xml) new_para = new_xml.find('.//{eregs}paragraph[@label="1234-2"]') self.assertNotEqual(new_para, None) self.assertEqual("An added paragraph", new_para.text) self.assertEqual(new_para.getparent().index(new_para), 1)
def test_process_changes_modified_xpath(self): notice_xml = etree.fromstring(""" <notice xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys></fdsys><preamble></preamble> <changeset> <change operation="modified" label="1234" subpath='title'> <title>Modified Title</title> </change> </changeset> </notice>""") original_xml = etree.fromstring(""" <regulation xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys></fdsys> <preamble></preamble> <part label="1234"> <title>Test Title</title> <content> <paragraph label="1234-1">An existing paragraph</paragraph> </content> </part> </regulation>""") new_xml = process_changes(original_xml, notice_xml) mod_title = new_xml.findall('.//{eregs}title') self.assertEqual(len(mod_title), 1) self.assertNotEqual(mod_title[0], None) self.assertEqual("Modified Title", mod_title[0].text)
def test_process_changes_moved_xpath(self): notice_xml = etree.fromstring(""" <notice xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys></fdsys><preamble></preamble> <changeset> <change operation="moved" label="1234-Subpart-A" subpath='title' parent="1234-Subpart-B"></change> </changeset> </notice>""") original_xml = etree.fromstring(""" <regulation xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys></fdsys> <preamble></preamble> <part label="1234"> <content> <subpart label="1234-Subpart-A"> <title>Test Title</title> <content> <paragraph label="1234-1">An existing paragraph</paragraph> </content> </subpart> <subpart label="1234-Subpart-B"> <content> <paragraph label="1234-2">Another existing paragraph</paragraph> </content> </subpart> </content> </part> </regulation>""") new_xml = process_changes(original_xml, notice_xml) moved_title = new_xml.find('.//{eregs}title') self.assertEqual(moved_title.getparent().get('label'), '1234-Subpart-B') old_title = new_xml.find('.//{eregs}subpart[@label="1234-Subpart-A"]/{eregs}title') self.assertEqual(old_title, None)
def apply_notices(cfr_part, version, notices): regulation_file = find_file(os.path.join(cfr_part, version)) with open(regulation_file, 'r') as f: left_reg_xml = f.read() parser = etree.XMLParser(huge_tree=True) left_xml_tree = etree.fromstring(left_reg_xml, parser) prev_notice = version prev_tree = left_xml_tree for notice in notices: print('Applying notice {} to version {}'.format(notice, prev_notice)) notice_file = find_file(os.path.join(cfr_part, notice), is_notice=True) with open(notice_file, 'r') as f: notice_string = f.read() parser = etree.XMLParser(huge_tree=True) notice_xml = etree.fromstring(notice_string, parser) # Process the notice changeset new_xml_tree = process_changes(prev_tree, notice_xml) # Write the new xml tree new_xml_string = etree.tostring(new_xml_tree, pretty_print=True, xml_declaration=True, encoding='UTF-8') new_path = os.path.join( os.path.dirname(regulation_file), os.path.basename(notice_file)) with open(new_path, 'w') as f: print("Writing regulation to {}".format(new_path)) f.write(new_xml_string) prev_notice = notice prev_tree = new_xml_tree
def test_process_changes_change_target_text(self): notice_xml = etree.fromstring(""" <notice xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys></fdsys><preamble></preamble> <changeset> <change operation="changeTarget" oldTarget="1234-1" newTarget="1234-3">reference to 1234-1</change> </changeset> </notice>""") original_xml = etree.fromstring(""" <regulation xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys></fdsys> <preamble></preamble> <part label="1234"> <content> <subpart label="1234-Subpart-A"> <content> <paragraph label="1234-1">An existing paragraph</paragraph> </content> </subpart> <subpart label="1234-Subpart-B"> <content> <paragraph label="1234-2">Another existing paragraph with a <ref target="1234-1" reftype="internal">reference to 1234-1</ref></paragraph> <paragraph label="1234-3">One more existing paragraph with <ref target="1234-1" reftype="internal">another reference to 1234-1</ref></paragraph> </content> </subpart> </content> </part> </regulation>""") new_xml = process_changes(original_xml, notice_xml) old_refs = new_xml.findall('.//{eregs}ref[@target="1234-1"]') new_refs = new_xml.findall('.//{eregs}ref[@target="1234-3"]') self.assertEqual(len(old_refs), 1) self.assertEqual(len(new_refs), 1)
def thread_title(data): # # Thread titles in the TOC are wonky. # They are just eg "Thread 20:" rather than "Thread 20: Midori III", because the # HTML is "<h1>Thread 20:</h1> <p><b>Midori III</b></p>" # titles = {} # figure out the full title for each file name for file_name in data: if file_name.endswith(".htm"): root = etree.fromstring(data[file_name]) h1 = root.find('.//{http://www.w3.org/1999/xhtml}h1') b = root.find('.//{http://www.w3.org/1999/xhtml}b') if h1 is not None and h1.text and b is not None and b.text: titles[file_name] = h1.text + " " + b.text # update toc file_name = "toc.ncx" root = etree.fromstring(data[file_name]) for element in root.findall('.//{http://www.daisy.org/z3986/2005/ncx/}navPoint'): fn = element[1].get("src") if fn in titles: element[0][0].text = titles[fn] data[file_name] = "<?xml version='1.0' encoding='utf-8'?>\n" + etree.tostring(root)
def getScore(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) result = str(html.xpath("//p[@class='d-review__average']/strong/text()")[0]).replace('\\n', '').replace('\n', '').replace( '点', '') return result
if not args.published and not offline: token = acquireToken(curl) time.sleep(3) USERNAME = raw_input("Username: "******"data/schedule_" + conference + ".xml") as f: buf = f.read() schedule = etree.fromstring(buf).getroottree() elif args.published: schedule = etree.fromstring( requests.get( "http://events.ccc.de/congress/2017/Fahrplan/schedule.xml" ).content) else: # download current (probably not published version) from frab buf = download(curl, conference) schedule = etree.parse(buf) count = 0 count_missing = 0 max_time = 0
def parse_schema_file(self, file_name): elt = etree.fromstring(open(file_name, 'rb').read(), parser=PARSER) return self.parse_schema(elt)
def xml_to_xslt(xml, template, context={} ): t = loader.get_template(template) c = Context(context) rendered = t.render(c) xslt = etree.XSLT(etree.fromstring(rendered)) return xslt(xml)
def _update(self, provider, update): try: config = provider['config'] user = config['username'] password = config['password'] id_list = config['idList'] if not user.strip() or not password.strip() or not id_list.strip(): raise KeyError except KeyError: raise SuperdeskIngestError.notConfiguredError( Exception('username, password and idList are needed')) # we remove spaces and empty values from id_list to do a clean list id_list = ','.join( [id_.strip() for id_ in id_list.split(',') if id_.strip()]) params = { 'idList': id_list, 'idListType': 'products', 'format': '5', 'maxItems': '25', 'sortOrder': 'chronological' } try: min_date_time = provider['private']['min_date_time'] sequence_number = provider['private']['sequence_number'] except KeyError: pass else: params['minDateTime'] = min_date_time params['sequenceNumber'] = sequence_number try: r = requests.get(URL, auth=(user, password), params=params) except Exception: raise IngestApiError.apiRequestError( Exception('error while doing the request')) try: root_elt = etree.fromstring(r.content) except Exception: raise IngestApiError.apiRequestError( Exception('error while doing the request')) parser = self.get_feed_parser(provider) items = parser.parse(root_elt, provider) try: min_date_time = root_elt.xpath( '//iptc:timestamp[@role="minDateTime"]/text()', namespaces=NS)[0].strip() sequence_number = root_elt.xpath('//iptc:transmitId/text()', namespaces=NS)[0].strip() except IndexError: raise IngestApiError.apiRequestError( Exception('missing minDateTime or transmitId')) else: update.setdefault('private', {}) update['private']['min_date_time'] = min_date_time update['private']['sequence_number'] = sequence_number return [items]
def getTitle(text): html = etree.fromstring(text, etree.HTMLParser()) result = html.xpath('//*[starts-with(@id, "title")]/text()')[0] return result
def getRuntime(text): html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0] return re.search(r"\d+", str(result)).group()
def verify_bytes(self, document): element = fromstring(document) return self.verify(element)
def getCover(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']") return result
def _get_canonical_signed_info(self, element): path = self.signature_path + "/ds:SignedInfo" signed_info = element.find(path, namespaces=vars(NS)) canonical_signed_info = self.canonicalizer.canonicalize(signed_info) return fromstring(canonical_signed_info)
def getLabel(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']") return result1
def getCover_small(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']") return result
def parse(self): global global_fifo_q #os.chdir(self.input_dicom_file) #while True: # time.sleep(10) #for file in glob.glob("*.dcm"): file = self.input_dicom_file #print(file) if is_dicom(file): print("It is indeed DICOM!") dcm_file = pydicom.dcmread(file) #print(dcm_file) parsed_result = {} testType = dcm_file.StudyDescription studyDate = dcm_file.StudyDate studyTime = dcm_file.StudyTime deviceSerialNumber = dcm_file.DeviceSerialNumber institutionName = dcm_file.InstitutionName manufacturer = dcm_file.Manufacturer manufacturerModelName = dcm_file.ManufacturerModelName entranceDose = dcm_file.EntranceDoseInmGy studyInstanceUID = dcm_file.StudyInstanceUID seriesInstanceUID = dcm_file.SeriesInstanceUID parsed_result["testInfo"] = { "testType": testType, "studyDate": studyDate, "studyTime": studyTime, "deviceSerialNumber": deviceSerialNumber, "institutuionName": institutionName, "manufacturerModelName": manufacturerModelName, "manufacturer": manufacturer, "entranceDoseinmGy": entranceDose, "studyInstanceUID": studyInstanceUID, "seriesInstanceUID": seriesInstanceUID } user_firstName = dcm_file.PatientName.given_name user_lastName = dcm_file.PatientName.family_name ethnic_group = dcm_file.EthnicGroup user_birthdate = dcm_file.PatientBirthDate user_sex = dcm_file.PatientSex user_id = dcm_file.PatientID if user_id is '': user_id = str(user_firstName) + "." + str(user_lastName) + "." + str(user_birthdate) + "@noemail.unk" user_age = dcm_file.PatientAge user_Size = dcm_file.PatientSize user_Weight = dcm_file.PatientWeight parsed_result["userInfo"] = { "firstName": user_firstName, "lastName": user_lastName, "email": user_id, "ethnicGroup": ethnic_group, "birthDate": user_birthdate, "userSex": user_sex, "userAge": user_age, "userSize": user_Size, "userWeight": user_Weight } xml_string = dcm_file.ImageComments xml_root = etree.fromstring(xml_string) #print('XML_root',xml_root.iter('ROI') ) parsed_result["bodyComposition"] = {} for leaf in xml_root.iter('COMP_ROI'): regionName = lowerCamelCase(leaf.attrib['region']) parsed_result["bodyComposition"][regionName] = {} for reading in leaf.iter(): # skip attributes that don't have a value if not 'units' in reading.attrib: continue # Normalize the value (% or lbs) key = lowerCamelCase(reading.tag) units = reading.attrib['units'].strip() value = None if units == '%': value = normalizePercentageValue(float(reading.text)) else: value = normalizeWeightValue(float(reading.text), units) # save the reading parsed_result["bodyComposition"][regionName][key] = value parsed_result["BMD"] = {} for leaf in xml_root.iter('ROI'): regionName = lowerCamelCase(leaf.attrib['region']) #print (regionName) parsed_result["BMD"][regionName] = {} for reading in leaf.iter(): #print (reading.text,'***', reading.tag) if reading.text is None: continue elif reading.text is '-': continue key = lowerCamelCase(reading.tag) #units = reading.attrib['units'].strip() value = float(reading.text) parsed_result["BMD"][regionName][key] = value parsed_result["visceralFat"] = {} for leaf in xml_root.iter('VAT_MASS'): regionName = lowerCamelCase('Estimated Visceral Adipose Tissue') parsed_result["visceralFat"][regionName] = {} for reading in leaf.iter(): # skip attributes that don't have a value if not 'units' in reading.attrib: continue # Normalize the value (% or lbs) key = lowerCamelCase(reading.tag) units = reading.attrib['units'].strip() value = None if units == '%': value = normalizePercentageValue(float(reading.text)) else: value = normalizeWeightValue(float(reading.text), units) # save the reading parsed_result["visceralFat"][regionName][key] = value # convert it all to JSON and Save self.json_result = json.dumps(parsed_result) print(self.json_result) self.parsed_result = parsed_result # Add data to queue #data_to_process = (user_id, self.json_result, studyDate, studyTime, file) ''' data_to_process = (user_id, self.json_result, studyDate, studyTime, file, user_firstName, user_lastName, user_birthdate, studyInstanceUID, (self.parsed_result["testInfo"]["seriesInstanceUID"], self.parsed_result["BMD"]["arms"]["bmd"], self.parsed_result["BMD"]["head"]["bmd"], self.parsed_result["BMD"]["legs"]["bmd"], self.parsed_result["BMD"]["pelvis"]["bmd"], self.parsed_result["BMD"]["ribs"]["bmd"], self.parsed_result["BMD"]["spine"]["bmd"], self.parsed_result["BMD"]["trunk"]["bmd"], self.parsed_result["BMD"]["total"]["bmd"], self.parsed_result["BMD"]["total"]["bmdTscore"], self.parsed_result["BMD"]["total"]["bmdZscore"] ) ) ''' data_to_process = (user_id, self.json_result, studyDate, studyTime, file, user_firstName, user_lastName, user_birthdate, studyInstanceUID ) global_fifo_q.put(data_to_process) else: print("It's probably not DICOM") print("Trying to move NON-DICOM file to:" + self.error_directory) #Handle Error and send notifiction #Move to error directory #shutil.move(file, self.error_directory) try: shutil.move(file, self.error_directory) except shutil.Error as e: print('Error: %s' % e) os.remove(file) pass # eg. source or destination doesn't exist except IOError as e: print('Error: %s' % e.strerror) os.remove(file) pass
def getRelease(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']") return result1
def _introspection_handler(self, data, service, path, methods): """ Parses the result of the dbus method 'Introspect'. It will recursively load information for newly received paths and methods, by calling '_call_introspection'. =============== ================ key description =============== ================ data The result of the dbus method call 'Introspect' service The dbus service that was introspected path The path we introspected methods A dictionary used internally to build up a result. =============== ================ """ # strip xml header if data.startswith("<?xml"): data = data.split("\n", 1)[1] # Transform received XML data into a python object. res = etree.fromstring(data) # Check for a xml-node containing dbus-method information. # # It looks like this: # <node name="/org/gosa/notify"> # <interface name="org.gosa"> # <method name="notify_all"> # <arg direction="in" type="s" name="title" /> # ... # </method> # </interface> # ... # </node> if res.tag == "node" and res.get('name'): # Get the path name this method is registered to (e.g. /org/gosa/notify) path = res.get('name') # add all found methods to the list of known ones for entry in res: if entry.tag == "interface" and entry.get("name") == service: for method in entry.iterchildren(): # Skip method names that start with _ or : (anonymous methods) m_name = method.get('name') if m_name.startswith('_') or m_name.startswith(':'): continue # Mark dbus method with a 'dbus' prefix to be able to distinguish between # client methods and proxied dbus methods m_name = "dbus_" + m_name # Check if this method name is already registered. if m_name in methods: raise DBusProxyException( "Duplicate dbus method found '%s'! See (%s, %s)" % (m_name, path, methods[m_name]['path'])) # Append the new method to the list og known once. methods[m_name] = {} methods[m_name]['path'] = path methods[m_name]['service'] = service methods[m_name]['args'] = () # Extract method parameters for arg in method.iterchildren(): if arg.tag == "arg" and arg.get( "direction") == "in": argument = (arg.get('name'), arg.get('type')) methods[m_name]['args'] += (argument, ) # Check for a xml-node which introduces new paths # # It will look like this: # <node> # <node name="inventory"/> # <node name="notify"/> # <node name="service"/> # <node name="wol"/> # </node> # # Request information about registered services by calling 'Introspect' for each path again else: for entry in res: if entry.tag == "node": sname = entry.get('name') self._call_introspection(service, os.path.join(path, sname), methods) return methods
def getStudio(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ') return result1
def egress(self, envelope, http_headers, operation, binding_options): xml_string = etree.tostring(envelope).decode() xml_string = xml_string.replace('&', '&') new_envelope = etree.fromstring(xml_string) return new_envelope, http_headers
def add_001_field(marcxml_string, recid): marcxml = fromstring(marcxml_string) recid_field = SubElement(marcxml, 'controlfield', {'tag': '001'}) recid_field.text = str(recid) return tostring(marcxml)
def test_getCapabilities(self): conn = self.get_openAuth_curry_func()('qemu:///system') etree.fromstring(conn.getCapabilities())
def _xml_json_serialize(self, xml_string: str) -> OrderedDict: return self.xmljson_serializer.data(etree.fromstring(xml_string))
def parse_page(page_content, dataset_file): ts_min = pd.to_datetime(cfg.get("preprocessing", "timestamp_from"), utc=True) ts_max = pd.to_datetime(cfg.get("preprocessing", "timestamp_to"), utc=True) revisions = [] try: xml_parser = etree.XMLParser(huge_tree=True) xml_content = etree.fromstring(page_content, parser=xml_parser) except Exception as e: print(dataset_file) print(len(page_content)) ts_now_str = str(pd.datetime.now())[:-7].replace(" ", "_") dump_filename = "error_dump-[{file}]{ts_now}.txt".format( file=os.path.basename(dataset_file), ts_now=ts_now_str) dump_filename = os.path.join(cfg.get("directory", "error_dumps"), dump_filename) print("PRE Dump filename: {fn}".format(fn=dump_filename)) with open(dump_filename, "w") as error_dump: error_dump.write(page_content) print("POST Dump filename: {fn}".format(fn=dump_filename)) print(e) raise return None title = xml_content.find("title").text ns = xml_content.find("ns").text page_id = xml_content.find("id").text for revision in xml_content.findall("revision"): try: ts_raw = revision.find("timestamp").text timestamp = pd.to_datetime(ts_raw, utc=True) if not (ts_min <= timestamp <= ts_max): continue user = revision.find("contributor") if "deleted" in user.attrib: continue if user.find("username") is None: user_name = "" user_id = np.NaN user_ip = user.find("ip").text else: user_name = user.find("username").text user_id = user.find("id").text user_ip = "" if revision.find("comment") is None: if revision.find("text") is None: note = "" else: note = revision.find("text").text comment = "" else: note = "" comment = revision.find("comment").text model = revision.find("model").text except Exception as e: print(dataset_file) print(e) print(etree.dump(revision)) raise revision_data = { "page_title": title, "page_ns": ns, "page_id": page_id, "timestamp": timestamp, "user_name": user_name, "user_id": user_id, "user_ip": user_ip, "comment": comment, "model": model, "note": note, "dataset_file": os.path.basename(dataset_file) } revisions.append(revision_data) return revisions
def _xml_strip(self, xml): parser = etree.XMLParser(remove_blank_text=True) return etree.tostring(etree.fromstring(xml, parser=parser))
def pdf_to_bboxes(pdf_index, fn, top_margin=0, bottom_margin=100): # Get the bounding boxes of text runs in the PDF. # Each text run is returned as a dict. box_index = 0 pdfdict = { "index": pdf_index, "file": fn, } xml = subprocess.check_output(["pdftotext", "-bbox", fn, "/dev/stdout"]) # This avoids PCDATA errors codes_to_avoid = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ] cleaned_xml = bytes([x for x in xml if x not in codes_to_avoid]) dom = etree.fromstring(cleaned_xml) for i, page in enumerate( dom.findall(".//{http://www.w3.org/1999/xhtml}page")): pagedict = { "number": i + 1, "width": float(page.get("width")), "height": float(page.get("height")) } for word in page.findall("{http://www.w3.org/1999/xhtml}word"): if float(word.get("yMax")) < (top_margin / 100.0) * float( page.get("height")): continue if float(word.get("yMin")) > (bottom_margin / 100.0) * float( page.get("height")): continue yield { "index": box_index, "pdf": pdfdict, "page": pagedict, "x": float(word.get("xMin")), "y": float(word.get("yMin")), "width": float(word.get("xMax")) - float(word.get("xMin")), "height": float(word.get("yMax")) - float(word.get("yMin")), "text": word.text, } box_index += 1
def get_xml(*args, **kwargs): return etree.fromstring(get(*args, **kwargs))
def test_00_test_mass_mailing_shortener(self): mailing_list_A = self.env['mail.mass_mailing.list'].create({ 'name': 'A', }) self.env['mail.mass_mailing.contact'].create({ 'name': 'User 1', 'email': '*****@*****.**', 'list_ids': [(4, mailing_list_A.id)] }) self.env['mail.mass_mailing.contact'].create({ 'name': 'User 2', 'email': '*****@*****.**', 'list_ids': [(4, mailing_list_A.id)] }) self.env['mail.mass_mailing.contact'].create({ 'name': 'User 3', 'email': '*****@*****.**', 'list_ids': [(4, mailing_list_A.id)] }) mass_mailing = self.env['mail.mass_mailing'].create({ "reply_to_mode": "email", "reply_to": "Administrator <*****@*****.**>", "mailing_model_id": self.env.ref('mass_mailing.model_mail_mass_mailing_list').id, "mailing_domain": "[('list_ids', 'in', [%d])]" % mailing_list_A.id, "contact_list_ids": [[6, False, [mailing_list_A.id]]], "mass_mailing_campaign_id": False, "name": "sdf", "body_html": """ Hi, % set url = "www.odoo.com" % set httpurl = "https://www.odoo.eu" Website0: <a id="url0" href="https://www.odoo.tz/my/${object.name}">https://www.odoo.tz/my/${object.name}</h1> Website1: <a id="url1" href="https://www.odoo.be">https://www.odoo.be</h1> Website2: <a id="url2" href="https://${url}">https://${url}</h1> Website3: <a id="url3" href="${httpurl}">${httpurl}</h1> Email: <a id="url4" href="mailto:[email protected]">[email protected]</h1> """, "schedule_date": False, "state": "draft", "keep_archives": True, }) mass_mailing.put_in_queue() mass_mailing._process_mass_mailing_queue() sent_mails = self.env['mail.mail'].search([('mailing_id', '=', mass_mailing.id)]) sent_messages = sent_mails.mapped('mail_message_id') self.assertEqual(mailing_list_A.contact_nbr, len(sent_messages), 'Some message has not been sent') xbody = etree.fromstring(sent_messages[0].body) after_url0 = self.getHrefFor(xbody, 'url0') after_url1 = self.getHrefFor(xbody, 'url1') after_url2 = self.getHrefFor(xbody, 'url2') after_url3 = self.getHrefFor(xbody, 'url3') after_url4 = self.getHrefFor(xbody, 'url4') self.assertTrue('/r/' in after_url0, 'URL0 should be shortened: %s' % after_url0) self.assertTrue('/r/' in after_url1, 'URL1 should be shortened: %s' % after_url1) self.assertTrue('/r/' in after_url2, 'URL2 should be shortened: %s' % after_url2) self.assertTrue('/r/' in after_url3, 'URL3 should be shortened: %s' % after_url3) self.assertEqual(after_url4, "mailto:[email protected]", 'mailto: has been converted') short0 = self.shorturl_to_link(after_url0) short1 = self.shorturl_to_link(after_url1) short2 = self.shorturl_to_link(after_url2) short3 = self.shorturl_to_link(after_url3) self.assertTrue("https://www.odoo.tz/my/User" in short0.url, 'URL mismatch') self.assertEqual(short1.url, "https://www.odoo.be", 'URL mismatch') self.assertEqual(short2.url, "https://www.odoo.com", 'URL mismatch') self.assertEqual(short3.url, "https://www.odoo.eu", 'URL mismatch') _xbody = etree.fromstring(sent_messages[1].body) _after_url0 = self.getHrefFor(_xbody, 'url0') _after_url1 = self.getHrefFor(_xbody, 'url1') _after_url2 = self.getHrefFor(_xbody, 'url2') _after_url3 = self.getHrefFor(_xbody, 'url3') _after_url4 = self.getHrefFor(_xbody, 'url4') self.assertTrue('/r/' in _after_url0, 'URL0 should be shortened: %s' % _after_url0) self.assertTrue('/r/' in _after_url1, 'URL1 should be shortened: %s' % _after_url1) self.assertTrue('/r/' in _after_url2, 'URL2 should be shortened: %s' % _after_url2) self.assertTrue('/r/' in _after_url3, 'URL3 should be shortened: %s' % _after_url3) self.assertEqual(_after_url4, "mailto:[email protected]", 'mailto: has been converted') _short0 = self.shorturl_to_link(_after_url0) _short1 = self.shorturl_to_link(_after_url1) _short2 = self.shorturl_to_link(_after_url2) _short3 = self.shorturl_to_link(_after_url3) self.assertTrue("https://www.odoo.tz/my/User" in _short0.url, 'URL mismatch') self.assertEqual(_short1.url, "https://www.odoo.be", 'URL mismatch') self.assertEqual(_short2.url, "https://www.odoo.com", 'URL mismatch') self.assertEqual(_short3.url, "https://www.odoo.eu", 'URL mismatch') self.assertNotEqual(short0.url, _short0.url) self.assertEqual(short1.url, _short1.url) self.assertEqual(short2.url, _short2.url) self.assertEqual(short3.url, _short3.url)
def extract(): #output = open("output.xml","w") rq=urllib.request.Request('http://chronos.iut-velizy.uvsq.fr/EDTISTY/g112050.xml') rq.add_header("Authorization", "Basic ZXR1aXN0eTppc3R5") # user:pass base64 encoded site=urllib.request.urlopen(rq) xmlResult=site.read().decode('utf8') #print(xmlResult) tree = etree.fromstring(xmlResult) semaines = {} for semaine in tree.xpath("/timetable/span"): semaines[semaine.findtext('alleventweeks')]={} semaines[semaine.findtext('alleventweeks')]["date"]=semaine.findtext('description')[-10:] semaines[semaine.findtext('alleventweeks')]["date_fin"]=str(int(semaine.findtext('description')[-10:][:2])+7)+semaine.findtext('description')[-10:][2:] semaines[semaine.findtext('alleventweeks')]["num"]=semaine.findtext('title') semaines[semaine.findtext('alleventweeks')]["matieres"]=[] #print(semaines) result="<matieres>\n" #print("<matieres>\n"); for event in tree.xpath("/timetable/event"): #print("\nELEMENT ################") #print(event.findtext('starttime')) #print(event.findtext('endtime')) #print(semaines[event.findtext('rawweeks')]) dateSemaine = datetime.datetime.strptime(semaines[event.findtext('rawweeks')]["date"], "%d/%m/%Y") jour = dateSemaine + datetime.timedelta(days=int(event.findtext('day'))) #print(str(jour)[:10]) resources=event.find("resources"); try: matiere=mat_code(resources.find("module").findtext("item")) except AttributeError: matiere="" try: salle=resources.find("room").findtext("item") except AttributeError: salle="" try: prof=resources.find("staff").findtext("item") except AttributeError: prof="" #print(matiere) #print(salle) #print(prof) mat={} mat["date"]=str(jour)[:10].split("-") mat["date"]=mat["date"][2]+"/"+mat["date"][1]+"/"+mat["date"][0] mat["debut"]=event.findtext('starttime') mat["fin"]=event.findtext('endtime') mat["nom"]=matiere mat["prof"]=prof mat["salle"]=salle semaines[event.findtext('rawweeks')]["matieres"].append(mat) for sem in semaines: to_delete=[] for i in range(1,len(semaines[sem]["matieres"])): if semaines[sem]["matieres"][i-1]["date"] == semaines[sem]["matieres"][i]["date"] and semaines[sem]["matieres"][i-1]["nom"] == semaines[sem]["matieres"][i]["nom"] and semaines[sem]["matieres"][i]["nom"] != "": semaines[sem]["matieres"][i-1]["fin"]=semaines[sem]["matieres"][i]["fin"] to_delete.insert(0,i) for i in to_delete: del(semaines[sem]["matieres"][i]) return semaines
def test_reference_uris_and_custom_key_info(self): with open(os.path.join(os.path.dirname(__file__), "example.pem"), "rb") as fh: crt = fh.read() with open(os.path.join(os.path.dirname(__file__), "example.key"), "rb") as fh: key = fh.read() # Both ID and Id formats. XPath 1 doesn't have case insensitive attribute search for d in [ '''<samlp:Response xmlns:samlp="urn:oasis:names:tc:SAML:2.0:protocol" xmlns:saml="urn:oasis:names:tc:SAML:2.0:assertion" ID="responseId"> <saml:Assertion xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" ID="assertionId"> <ds:Signature xmlns:ds="http://www.w3.org/2000/09/xmldsig#" Id="placeholder" /> </saml:Assertion> </samlp:Response>''', '''<samlp:Response xmlns:samlp="urn:oasis:names:tc:SAML:2.0:protocol" xmlns:saml="urn:oasis:names:tc:SAML:2.0:assertion" Id="responseId"> <saml:Assertion xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" Id="assertionId"> <ds:Signature xmlns:ds="http://www.w3.org/2000/09/xmldsig#" Id="placeholder" /> </saml:Assertion> <saml:Assertion xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" Id="assertion2"> </saml:Assertion> </samlp:Response>''' ]: data = etree.fromstring(d) reference_uri = ["assertionId", "assertion2" ] if "assertion2" in d else "assertionId" signed_root = XMLSigner().sign(data, reference_uri=reference_uri, key=key, cert=crt) signed_data_root = XMLVerifier().verify( etree.tostring(signed_root), x509_cert=crt, expect_references=True)[1] ref = signed_root.xpath( '/samlp:Response/saml:Assertion/ds:Signature/ds:SignedInfo/ds:Reference', namespaces={ "ds": "http://www.w3.org/2000/09/xmldsig#", "saml": "urn:oasis:names:tc:SAML:2.0:assertion", "samlp": "urn:oasis:names:tc:SAML:2.0:protocol" }) self.assertEqual("assertionId", ref[0].attrib['URI'][1:]) self.assertEqual( "{urn:oasis:names:tc:SAML:2.0:assertion}Assertion", signed_data_root.tag) # Also test with detached signing XMLSigner(method=methods.detached).sign( data, reference_uri=reference_uri, key=key, cert=crt) # Test setting custom key info custom_key_info = etree.fromstring(''' <wsse:SecurityTokenReference xmlns:wsse="http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-secext-1.0.xsd"> <wsse:Reference ValueType="http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-x509-token-profile-1.0#X509v3" URI="#uuid-639b8970-7644-4f9e-9bc4-9c2e367808fc-1"/> </wsse:SecurityTokenReference>''') XMLSigner().sign(data, reference_uri=reference_uri, key=key, cert=crt, key_info=custom_key_info)
def decode_caf(self): post = base64.b64decode(self.caf_file).decode('ISO-8859-1') post = etree.fromstring(post) return post
def resolver(uri): if isinstance(d, stdlibElementTree.Element): return etree.fromstring( stdlibElementTree.tostring(d)) else: return d
def main(args): """ main functions """ logger = logging.getLogger(sys._getframe().f_code.co_name) infn = args.input_filename tplfn = args.template_filename metafn = args.metadata_filename outfn = args.output_filename with codecs.open(tplfn, 'r', 'utf-8') as f: tpl = f.read() # set up standard replacements for the template and read any specifics from the specified metadata file replacements = { u'title': u'', u'creator': u'', u'description': u'', u'date': u'', u'datehuman': u'', u'subjects': u'', u'dctype': u'Text', u'language': u'en-US', u'citation': u'', u'url': u'', u'ogtype': u'article', u'ogimage': u'http://pleiades.stoa.org/images/pleiades-social-logo/image', u'depth': u'./' } if metafn is not None: with codecs.open(metafn, 'r', 'utf-8') as f: meta = f.readlines() for i, line in enumerate(meta): if line.strip() == u'': break if i > 0: meta = meta[:i] for line in meta: k, v = line.split(u':') k = k.strip().lower() v = v.strip() if v == u'': if k == u'date': v = unicode( datetime.datetime.now().isoformat().split(u'T')[0]) if v != u'': if k == u'url': if not v.startswith('http'): for i in range(0, len(v.split('/')) - 1): replacements[ 'depth'] = '../' + replacements['depth'] v = u'http://pleiades.stoa.org/{0}'.format(v) replacements[k] = v if replacements['datehuman'] == u'': replacements['datehuman'] = dateparser.parse( replacements['date']).strftime(u'%d %B %Y') if replacements['citation'] == u'': replacements[ 'citation'] = u'{creator}. "{title}." Pleiades, {datehuman}. {url}.'.format( **replacements) # read in the input file if infn is None: sys.stdin = codecs.getreader('utf-8')(sys.stdin) html = sys.stdin.read() else: with codecs.open(infn, 'r', 'utf-8') as f: html = f.read() # do fixups on html content (e.g., style bulkups) if u'sidebar' in html: try: doc = etree.fromstring(html) except etree.XMLSyntaxError: logger.warning('BUSTED HTML:\n\n----------' + html + '\n-----------\n\n') raise # verify we have a sidebar div sidebars = doc.xpath("//div[contains(@class, 'sidebar')]") if len(sidebars) == 0: pass elif len(sidebars) > 1: logger.warning('detected multiple sidebar divs in html; ignoring') else: # wrap the content in another row html = u'<div class="sbholder">\n' + html + u'\n</div> <!-- end sidebar holder -->\n' doc = etree.fromstring(html) holder = doc.xpath("//div[contains(@class, 'sbholder')]")[0] sidebars = doc.xpath("//div[contains(@class, 'sidebar')]") for sidebar in sidebars: parent = sidebar.getparent() sidebar.attrib['class'] += u' {0}'.format(BOOTSTRAP_SIDEBAR) holder.append(sidebar) sib = holder[0] try: sib.attrib['class'] += u' {0}'.format(BOOTSTRAP_SIDEBAR_SIB) except KeyError: sib.attrib['class'] = BOOTSTRAP_SIDEBAR_SIB html = etree.tounicode(doc, method='xml') # fill in the template replacements[u'content'] = html try: html = tpl.format(**replacements) except KeyError: raise # write out the result if outfn is None: sys.stdout = codecs.getwriter('utf-8')(sys.stdout) sys.stdout.writelines(html) else: with codecs.open(outfn, 'w', 'utf-8') as f: f.writelines(html)