def get_article_by_search_wap(keyword, wap_dict): datas = [] for i in wap_dict['items']: item = str_to_bytes(i).replace(b'\xee\x90\x8a' + str_to_bytes(keyword) + b'\xee\x90\x8b', str_to_bytes(keyword)) root = XML(item) display = root.find('.//display') datas.append({ 'gzh': { 'profile_url': display.find('encGzhUrl').text, 'open_id': display.find('openid').text, 'isv': display.find('isV').text, 'wechat_name': display.find('sourcename').text, 'wechat_id': display.find('username').text, 'headimage': display.find('headimage').text, 'qrcode': display.find('encQrcodeUrl').text, }, 'article': { 'title': display.find('title').text, 'url': display.find('url').text, # encArticleUrl 'main_img': display.find('imglink').text, 'abstract': display.find('content168').text, 'time': display.find('lastModified').text, }, }) return datas
def testListMetadataFormatsWithRepositoryIdentifier(self): self.init('example.org') response = self.listMetadataFormats.listMetadataFormats( arguments=dict( verb=['ListMetadataFormats'], identifier=['id0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertTrue( xpath(XML(body.encode()), '/oai:OAI-PMH/oai:error[@code="idDoesNotExist"]'), body) response = self.listMetadataFormats.listMetadataFormats( arguments=dict( verb=['ListMetadataFormats'], identifier=['oai:example.org:id0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( ['oai_dc'], xpath( XML(body.encode()), '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat/oai:metadataPrefix/text()' ))
def parse(self, response): xml = XML(response.body) prds = xml.xpath('//Reference') for prd in prds: cat_no = first(prd.xpath('./Order_Code/text()'), None) d = { "brand": self.brand, "cat_no": cat_no, "cas": first(prd.xpath('./CAS_Registry_Number/text()'), None), "en_name": first(prd.xpath('./Reference_Standard/text()'), None), "info2": first(prd.xpath('./Storage/text()'), None), "info3": first(prd.xpath('./Quantity_per_vial/text()'), None), "info4": first(prd.xpath('./Price/text()'), None), "prd_url": f"https://crs.edqm.eu/db/4DCGI/View={first(prd.xpath('./Order_Code/text()'), '')}", } yield RawData(**d) price = first(prd.xpath('./Price/text()'), None) yield ProductPackage( brand=self.brand, cat_no=cat_no, package=first(prd.xpath('./Quantity_per_vial/text()'), None), price=price and price.replace('€', ''), currency='EUR', )
def iter_transactions(self, coming): url = self.get_part_url() if url is None: # There are no transactions in this kind of account return while True: d = XML(self.browser.readurl(url)) try: el = self.parser.select(d, '//dataBody', 1, 'xpath') except BrokenPageError: # No transactions. return s = StringIO(unicode(el.text).encode('iso-8859-1')) doc = self.browser.get_document(s) for tr in self._iter_transactions(doc, coming): if not tr._coming: coming = False yield tr el = d.xpath('//dataHeader')[0] if int(el.find('suite').text) != 1: return url = urlparse(url) p = parse_qs(url.query) url = self.browser.buildurl(url.path, n10_nrowcolor=0, operationNumberPG=el.find('operationNumber').text, operationTypePG=el.find('operationType').text, pageNumberPG=el.find('pageNumber').text, idecrit=el.find('idecrit').text or '', sign=p['sign'][0], src=p['src'][0])
def test_many_and_one(self): lang = Dict(dict(nb="Norwegian", en="English", dk="Danish")) form = Form("http://example.com/submit", "send") model = dict(a=One(lang)) manip = Manip(model) manip.a = "en" form["x"] = ModelData(manip) xml = form.create() validate_form(XML(xml)) self.assertEquals(xml.count("</one>"), 1) self.assertEquals(xml.count("</item>"), 2) self.assertEquals(xml.count("</sel_item>"), 1) model = dict(a=Many(lang)) manip = Manip(model) manip.a = ["en", "nb"] form["x"] = ModelData(manip) xml = form.create() validate_form(XML(xml)) self.assertEquals(xml.count("</many>"), 1) self.assertEquals(xml.count("</item>"), 1) self.assertEquals(xml.count("</sel_item>"), 2) form["x"].display["a"] = {"hidden": True} xml = form.create() validate_form(XML(xml)) self.assertEquals(xml.count("</hidden>"), 2) form["x"].display["a"] = {"readonly": True} self.assertRaises(ValueError, form.create)
def make_view(self): self.ensure_one() view_obj = self.env['ir.ui.view'] have_header = '<header>' in self.model_view_id.arch arch = have_header and XML(arch_template_header) or XML(arch_template_no_header) #wkf_contain = XML("""<div style="background-color:#7B68EE;border-radius:2px;display: inline-block;padding-right: 4px;"></div>""") wkf_contain = self.make_wkf_contain() arch.insert(0, wkf_contain) view_data = { 'name': '%s.WKF.form.view' % self.model, 'type': 'form', 'model': self.model, 'inherit_id': self.model_view_id.id, 'mode': 'extension', 'arch': tostring(arch), 'priority': 99999, } #update or create view view = self.view_id if not view: view = view_obj.create(view_data) self.write({'view_id': view.id}) else: view.write(view_data) return True
def process_saml_md_about_sps(saml_md: bytes): saml_md_tree = XML(saml_md) localparser = XMLParser( remove_blank_text=True, resolve_entities=False, remove_comments=False) ref = files('SPF_SAML_metadata_processor').joinpath(REMOVE_NAMESPACE_PREFIXES_XSL_FILE_PATH) with ref.open('rb') as xslt_root1_file: xslt_root1 = parse(xslt_root1_file, parser=localparser) transform1 = XSLT(xslt_root1) saml_md_tree_1 = transform1(saml_md_tree) ref = files('SPF_SAML_metadata_processor').joinpath(REMOVE_KEY_WHITESPACE_XSL_FILE_PATH) with ref.open('rb') as xslt_root2_file: xslt_root2 = parse(xslt_root2_file, parser=localparser) transform2 = XSLT(xslt_root2) saml_md_2 = transform2(saml_md_tree_1) canonicalized_saml_md_2 = BytesIO() saml_md_2.write_c14n( canonicalized_saml_md_2, exclusive=True, with_comments=False) saml_md_tree_3 = XML(canonicalized_saml_md_2.getvalue(), localparser).getroottree() return saml_md_tree_3
def get_article_by_search_wap(keyword, wap_dict): datas = [] for i in wap_dict['items']: item = str_to_bytes(i).replace( b'\xee\x90\x8a' + str_to_bytes(keyword) + b'\xee\x90\x8b', str_to_bytes(keyword)) root = XML(item) display = root.find('.//display') datas.append({ 'gzh': { 'profile_url': display.find('encGzhUrl').text, 'open_id': display.find('openid').text, 'isv': display.find('isV').text, 'wechat_name': display.find('sourcename').text, 'wechat_id': display.find('username').text, 'headimage': display.find('headimage').text, 'qrcode': display.find('encQrcodeUrl').text, }, 'article': { 'title': display.find('title').text, 'url': display.find('url').text, # encArticleUrl 'main_img': display.find('imglink').text, 'abstract': display.find('content168').text, 'time': display.find('lastModified').text, }, }) return datas
def validateFootnote(modelXbrl, footnote): #handler = TextBlockHandler(modelXbrl) loadDTD(modelXbrl) checkedGraphicsFiles = set() # only check any graphics file reference once per footnote try: footnoteHtml = XML("<body/>") copyHtml(footnote, footnoteHtml) if not edbodyDTD.validate( footnoteHtml ): modelXbrl.error("EFM.6.05.34.dtdError", _("Footnote %(xlinkLabel)s causes the XML error %(error)s"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), error=', '.join(e.message for e in edbodyDTD.error_log.filter_from_errors())) for elt in footnoteHtml.iter(): eltTag = elt.tag for attrTag, attrValue in elt.items(): if ((attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img")): if "javascript:" in attrValue: modelXbrl.error("EFM.6.05.34.activeContent", _("Footnote %(xlinkLabel)s has javascript in '%(attribute)s' for <%(element)s>"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), attribute=attrTag, element=eltTag) elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a": pass elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue: modelXbrl.error("EFM.6.05.34.externalReference", _("Footnote %(xlinkLabel)s has an invalid external reference in '%(attribute)s' for <%(element)s>: %(value)s"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), attribute=attrTag, element=eltTag, value=attrValue) if attrTag == "src" and attrValue not in checkedGraphicsFiles: if attrValue.lower()[-4:] not in ('.jpg', '.gif'): modelXbrl.error("EFM.6.05.34.graphicFileType", _("Footnote %(xlinkLabel)s references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), attribute=attrValue, element=eltTag) else: # test file contents try: if validateGraphicFile(footnote, attrValue) != attrValue.lower()[-3:]: modelXbrl.error("EFM.6.05.34.graphicFileContent", _("Footnote %(xlinkLabel)s references a graphics file which doesn't have expected content '%(attribute)s' for <%(element)s>"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), attribute=attrValue, element=eltTag) except IOError as err: modelXbrl.error("EFM.6.05.34.graphicFileError", _("Footnote %(xlinkLabel)s references a graphics file which isn't openable '%(attribute)s' for <%(element)s>, error: %(error)s"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), attribute=attrValue, element=eltTag, error=err) checkedGraphicsFiles.add(attrValue) if eltTag == "table" and any(a is not None for a in elt.iterancestors("table")): modelXbrl.error("EFM.6.05.34.nestedTable", _("Footnote %(xlinkLabel)s has nested <table> elements."), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label")) except (XMLSyntaxError, UnicodeDecodeError) as err: #if not err.endswith("undefined entity"): modelXbrl.error("EFM.6.05.34", _("Footnote %(xlinkLabel)s causes the XML error %(error)s"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), error=edbodyDTD.error_log.filter_from_errors())
def do_search(param, sort, page=1, rows=100): (reply, solr_select, q_list) = run_solr_query(param, rows, page, sort) is_bad = False if reply.startswith('<html'): is_bad = True if not is_bad: try: root = XML(reply) except XMLSyntaxError: is_bad = True if is_bad: m = re_pre.search(reply) return web.storage( facet_counts = None, docs = [], is_advanced = bool(param.get('q', 'None')), num_found = None, solr_select = solr_select, q_list = q_list, error = (web.htmlunquote(m.group(1)) if m else reply), ) docs = root.find('result') return web.storage( facet_counts = read_facets(root), docs = docs, is_advanced = bool(param.get('q', 'None')), num_found = (int(docs.attrib['numFound']) if docs is not None else None), solr_select = solr_select, q_list = q_list, error = None, )
def test_read_only_and_hidden_and_xmlns(self): model = dict(a=Int()) manip = Manip(model) form = Form("http://example.com/submit", "send") form["x"] = ModelData(manip) # test read only form["x"].display["a"] = {"readonly": True} xml = form.create() validate_form(XML(xml)) self.assertEquals(xml.count("</readonly>"), 1) self.assertEquals(xml.count("xmlns="), 1) # test hidden form["x"].display["a"] = {"hidden": True} xml = form.create() validate_form(XML(xml)) self.assertEquals(xml.count("</hidden>"), 1) # test disable namespace form.xmlns = None xml = form.create() self.assertEquals(xml.count("xmlns="), 0) form.xmlns = "XXXX" xml = form.create() self.assertEquals(xml.count("xmlns=\"XXXX\""), 1)
def getVersion(self, currentTag): url = self.URL update = urlopen(url).read() root = XML(update) cur_version = root.find(".//" + currentTag) current = cur_version.text return current
def fields_view_get(self, view_id=None, view_type='form', toolbar=False, submenu=False): res = super(Lead, self).fields_view_get(view_id, view_type, toolbar, submenu) if view_type == 'form': doc = XML(res['arch']) FIELDS = ['source_id', 'campaign_id', 'medium_id'] if not self.user_has_groups('sales_team.group_sale_manager'): for field in FIELDS: LABEL_STR = "//field[@name='{0}']".format(field) node = doc.xpath(LABEL_STR) if node: node = node[0] node.set( "options", "{'no_create': True, 'no_create_edit': True}") if field in res['fields']: setup_modifiers(node, res['fields'][field]) res['arch'] = tostring(doc, encoding='unicode') return res
def getMessage(self, messageTag): url = self.URL update = urlopen(url).read() root = XML(update) mess = root.find(".//" + messageTag) message = mess.text return message
class RefSeq(object): def __init__(self,xml): self._root = XML(xml) def cds_start_end(self): n = self._root.xpath('/GBSet/GBSeq/GBSeq_feature-table/GBFeature[GBFeature_key/text()="CDS"]') assert len(n) == 1, "expected exactly one CDS GBFeature_key node" s,e = _feature_se(n[0]) return s,e def cds_start(self): return self.cds_start_end()[0] def chr(self): return self._root.xpath('/GBSet/GBSeq/GBSeq_feature-table/GBFeature[' 'GBFeature_key/text()="source"]/GBFeature_quals' '/GBQualifier[GBQualifier_name/text()=' '"chromosome"]/GBQualifier_value')[0].text def exons(self): exon_nodes = self._root.xpath('/GBSet/GBSeq/GBSeq_feature-table/GBFeature[GBFeature_key="exon"]') return [ _feature_se(n) for n in exon_nodes ] def seq(self): return self._root.xpath('/GBSet/GBSeq/GBSeq_sequence')[0].text
def iter_transactions(self): url = self.get_part_url() if url is None: # There are no transactions in this kind of account return while True: d = XML(self.browser.readurl(url)) try: el = self.parser.select(d, '//dataBody', 1, 'xpath') except BrokenPageError: # No transactions. return s = StringIO(unicode(el.text).encode('iso-8859-1')) doc = self.browser.get_document(s) for tr in self._iter_transactions(doc): yield tr el = d.xpath('//dataHeader')[0] if int(el.find('suite').text) != 1: return url = urlparse(url) p = parse_qs(url.query) url = self.browser.buildurl( url.path, n10_nrowcolor=0, operationNumberPG=el.find('operationNumber').text, operationTypePG=el.find('operationType').text, pageNumberPG=el.find('pageNumber').text, idecrit=el.find('idecrit').text or '', sign=p['sign'][0], src=p['src'][0])
def testListMetadataFormatsWithIdentifierAndSomeDeletes(self): self.init() self.oaijazz.addOaiRecord(identifier="id1", setSpecs=[], metadataPrefixes=['rdf', 'oai_dc']) response = self.listMetadataFormats.listMetadataFormats( arguments=dict( verb=['ListMetadataFormats'], identifier=['id1'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( ['oai_dc', 'rdf'], xpath( XML(body.encode()), '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat/oai:metadataPrefix/text()' )) self.oaijazz.deleteOaiRecordInPrefixes(identifier="id1", metadataPrefixes=['oai_dc']) response = self.listMetadataFormats.listMetadataFormats( arguments=dict( verb=['ListMetadataFormats'], identifier=['id1'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( ['oai_dc', 'rdf'], xpath( XML(body.encode()), '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat/oai:metadataPrefix/text()' ))
def createReturnValue(data, parse): statusAndHeaders, body = _parseData(data) contentType = statusAndHeaders['Headers'].get('Content-Type') if parse: if not contentType is None: if 'html' in contentType: body = HTML(body, HTMLParser(recover=True)) elif 'xml' in contentType: body = XML(body) elif 'json' in contentType: try: body = loads(body.decode()) except JSONDecodeError: body = 'JSONDecodeError in: ' + body.decode() elif body.strip() != b'': try: body = XML(body) except: try: body = HTML(body, HTMLParser(recover=True)) except: print("Exception parsing:") print(body) raise return statusAndHeaders, body
def parse_gir(gir_path): """Extract everything from a gir file""" print("Parsing {}".format(gir_path)) parser = XMLParser(encoding="utf-8", recover=True) content = open(gir_path).read() root = XML(content, parser) namespace = root.findall("{%s}namespace" % XMLNS)[0] namespace_content = extract_namespace(namespace) return namespace_content
def Index(self): '''Build local data structures''' try: xdata = XML(self.data) except XMLSyntaxError: logger.error("Failed to parse %s"%(self.name)) return self.label = xdata.attrib[self.__identifier__] self.entries = xdata.getchildren()
def parse_response(response_string): """Parse the response from an API method.""" doc = XML(response_string) error_code = doc.get("errorCode") if error_code is not None: raise UCSM_XML_API_Error(doc.get("errorDescr"), error_code) return doc
def parse_gir(gir_path): """Extract everything from a gir file""" print("Parsing {}".format(gir_path)) parser = XMLParser(encoding='utf-8', recover=True) content = open(gir_path).read() root = XML(content, parser) namespace = root.findall('{%s}namespace' % XMLNS)[0] namespace_content = extract_namespace(namespace) return namespace_content
def iter_transactions(self): url = self.get_part_url() if url is None: # There are no transactions in this kind of account return is_deferred_card = bool(self.doc.xpath(u'//div[contains(text(), "Différé")]')) has_summary = False if is_deferred_card: coming_debit_date = None # get coming debit date for deferred_card date_string = Regexp(CleanText(u'//option[contains(text(), "détail des factures à débiter le")]'), r'(\d{2}/\d{2}/\d{4})', default=NotAvailable)(self.doc) if date_string: coming_debit_date = parse_d(date_string) while True: d = XML(self.browser.open(url).content) el = d.xpath('//dataBody') if not el: return el = el[0] s = unicode(el.text).encode('iso-8859-1') doc = fromstring(s) for tr in self._iter_transactions(doc): if tr.type == Transaction.TYPE_CARD_SUMMARY: has_summary = True if is_deferred_card and tr.type is Transaction.TYPE_CARD: tr.type = Transaction.TYPE_DEFERRED_CARD if not has_summary: if coming_debit_date: tr.date = coming_debit_date tr._coming = True yield tr el = d.xpath('//dataHeader')[0] if int(el.find('suite').text) != 1: return url = urlparse(url) p = parse_qs(url.query) args = {} args['n10_nrowcolor'] = 0 args['operationNumberPG'] = el.find('operationNumber').text args['operationTypePG'] = el.find('operationType').text args['pageNumberPG'] = el.find('pageNumber').text args['idecrit'] = el.find('idecrit').text or '' args['sign'] = p['sign'][0] args['src'] = p['src'][0] url = '%s?%s' % (url.path, urlencode(args))
def test_merge_siblings(): x = XML('<doc><a>a</a><a>b</a><a>c</a><b>d</b><a>e</a></doc>') assert tostring( merge_siblings(x)) == b'<doc><a>abc</a><b>d</b><a>e</a></doc>' x = XML('<doc><a>a<a>b</a></a><a>c</a><b>d</b><a>e</a></doc>') assert tostring( merge_siblings(x)) == b'<doc><a>a<a>b</a>c</a><b>d</b><a>e</a></doc>' x = XML('<doc><a>a</a><a>b<a>c</a></a><b>d</b><a>e</a></doc>') assert tostring( merge_siblings(x)) == b'<doc><a>ab<a>c</a></a><b>d</b><a>e</a></doc>'
def getMessage(self, messageTag): from urllib.request import urlopen from lxml.etree import XML url = self.URL update = urlopen(url).read() root = XML(update) mess = root.find(".//"+messageTag) message = mess.text return message
def Index(self): """Build local data structures.""" try: self.xdata = XML(self.data) except XMLSyntaxError: logger.error("Failed to parse %s" % (self.name)) return self.entries = self.xdata.getchildren() if self.__identifier__ is not None: self.label = self.xdata.attrib[self.__identifier__]
def getVersion(self, currentTag): from urllib.request import urlopen from lxml.etree import XML url = self.URL update = urlopen(url).read() root = XML(update) cur_version = root.find(".//"+currentTag) current = cur_version.text return current
def validateFootnote(modelXbrl, footnote): #handler = TextBlockHandler(modelXbrl) loadDTD(modelXbrl) try: footnoteHtml = XML("<body/>") copyHtml(footnote, footnoteHtml) if not edbodyDTD.validate(footnoteHtml): modelXbrl.error( "EFM.6.05.34", _("Footnote %(xlinkLabel)s causes the XML error %(error)s"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), error=', '.join( e.message for e in edbodyDTD.error_log.filter_from_errors())) for elt in footnoteHtml.iter(): eltTag = elt.tag for attrTag, attrValue in elt.items(): if ((attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img")): if "javascript:" in attrValue: modelXbrl.error( "EFM.6.05.34", _("Footnote %(xlinkLabel)s has javascript in '%(attribute)s' for <%(element)s>" ), modelObject=footnote, xlinkLabel=footnote.get( "{http://www.w3.org/1999/xlink}label"), attribute=attrTag, element=eltTag) elif attrValue.startswith( "http://www.sec.gov/Archives/edgar/data/" ) and eltTag == "a": pass elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue: modelXbrl.error( "EFM.6.05.34", _("Footnote %(xlinkLabel)s has an invalid external reference in '%(attribute)s' for <%(element)s>: %(value)s" ), modelObject=footnote, xlinkLabel=footnote.get( "{http://www.w3.org/1999/xlink}label"), attribute=attrTag, element=eltTag, value=attrValue) except (XMLSyntaxError, UnicodeDecodeError) as err: #if not err.endswith("undefined entity"): modelXbrl.error( "EFM.6.05.34", _("Footnote %(xlinkLabel)s causes the XML error %(error)s"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), error=edbodyDTD.error_log.filter_from_errors())
def test_xml_list(self): full_list = self._prepare_listing() full_list.create_xml() ns = {"ns2": "http://www.minfin.fgov.be/ClientListingConsignment"} xml = XML(base64.b64decode(full_list.file_save)) xml_vat_amount = xml.xpath( '//ns2:Client[ns2:CompanyVATNumber[text() = "0477472701"]]' "/ns2:VATAmount", namespaces=ns, )[0].text self.assertEqual("94.50", xml_vat_amount)
def parse_result( self, string, bs=False ): root = XML(string) sents = root.xpath(".//sentence") if not bs: return list( itertools.chain.from_iterable( re.findall("\u3000([^(]+)\(\w+\)""",sent.text) for sent in sents ) ) else: return list( ( re.findall("\u3000([^(]+)\(\w+\)""",sent.text) for sent in sents ) )
def test_single_vj_excluded_date(self): monday_of_desired_week = datetime.date(2018, 3, 5) with open(dirname(__file__) + "/single_vj.xml", mode='rb') as f: root = XML(f.read()) [vjelem] = root.xpath("//tx:VehicleJourney", namespaces=NAMESPACES) rtn = parse_single_vj_elem(vjelem, monday_of_desired_week) [ _privatecode, _jpref_id, _vjcode, _other_vjcode, _linecode, days_bitmask, _departuretime ] = rtn self.assertEqual(days_bitmask, 0)
def process_update(self, xml): messages = [] xml = XML(xml) msgs = xml.findall("log/msg") for msg in msgs: author = msg.findtext("author") text = msg.findtext("text") messages.append(ChatMessage(author, text)) self.messages = messages
def load_file(self, filename): try: with open(filename, 'r') as f: self.root = XML(f.read()) #.replace("\n", "")) self.reset_all_fields() for element in self.root.iter(): widget = self.findChild(QtWidgets.QLineEdit, str(element.tag)) if widget is not None and element.text is not None: widget.setText(str(element.text)) else: widget = self.findChild(QtWidgets.QPlainTextEdit, str(element.tag)) if widget is not None and element.text is not None: widget.setPlainText(str(element.text)) else: widget = self.findChild(QtWidgets.QComboBox, str(element.tag)) if widget is not None and element.text is not None: index = widget.findText(element.text, QtCore.Qt.MatchFixedString) if index >= 0: widget.setCurrentIndex(index) elif str(element.text).isdigit(): widget.setCurrentIndex(int(element.text)) else: widget.setCurrentText(str(element.text)) else: widget = self.findChild(QtWidgets.QCheckBox, str(element.tag)) if widget is not None and element.text is not None: widget.setChecked(str(element.text) == 'True') else: widget = self.findChild( SpellTextEdit, str(element.tag)) if widget is not None and element.text is not None: widget.setChecked( str(element.text) == 'True') self.filename = filename self.get_photo(self.PHOTOPATH.text()) self.setWindowTitleSaved() except FileNotFoundError: # Warning: File not found! self.filename = "untitled.xml" self.setWindowTitleUnsaved() self.file_dirty = False except XMLSyntaxError: QtWidgets.QMessageBox.critical( self, "XML Read Failed", "Cannot read xml file %s. \n\nMake sure the xml file is not blank " % filename)
def test_hidden_and_readonly(self): m = data.Manip(dict(x=String())) m.x = "test" form = Form("http://example.com/myform", "Submit") form["x"] = ModelData(m) form["x"].display["x"] = {"hidden": True} s = form.create() RelaxNG(parse(RNG)).assertValid(XML(s)) form["x"].display["x"] = {"readonly": True} s = form.create() RelaxNG(parse(RNG)).assertValid(XML(s))
def parse(self, xmlContent): from lxml.etree import XML out = "" dom = XML(xmlContent) dom_head = dom.xpath("//xmoto")[0] for child in dom_head: groupName = child.tag out += "%s = " % groupName.upper() out += self.getGroupContent(child) return out
def validateFootnote(modelXbrl, footnote): # handler = TextBlockHandler(modelXbrl) loadDTD(modelXbrl) try: footnoteHtml = XML("<body/>") copyHtml(footnote, footnoteHtml) if not edbodyDTD.validate(footnoteHtml): modelXbrl.error( "EFM.6.05.34", _("Footnote %(xlinkLabel)s causes the XML error %(error)s"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), error=", ".join(e.message for e in edbodyDTD.error_log.filter_from_errors()), ) for elt in footnoteHtml.iter(): eltTag = elt.tag for attrTag, attrValue in elt.items(): if (attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img"): if "javascript:" in attrValue: modelXbrl.error( "EFM.6.05.34", _("Footnote %(xlinkLabel)s has javascript in '%(attribute)s' for <%(element)s>"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), attribute=attrTag, element=eltTag, ) elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a": pass elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue: modelXbrl.error( "EFM.6.05.34", _( "Footnote %(xlinkLabel)s has an invalid external reference in '%(attribute)s' for <%(element)s>: %(value)s" ), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), attribute=attrTag, element=eltTag, value=attrValue, ) except (XMLSyntaxError, UnicodeDecodeError) as err: # if not err.endswith("undefined entity"): modelXbrl.error( "EFM.6.05.34", _("Footnote %(xlinkLabel)s causes the XML error %(error)s"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), error=edbodyDTD.error_log.filter_from_errors(), )
def get_federation_clarin_sps(id_fed_name: str,federation_md_file_path: str): logging.info("Selecting SP entity IDs from {}...".format(id_fed_name)) with open(federation_md_file_path, "rb") as clarin_sps_at_federation_file: clarin_sps_at_federation_md = XML(clarin_sps_at_federation_file.read()) entities = set() xpath = "//md:EntityDescriptor[md:Extensions/mdattr:EntityAttributes/saml:Attribute[\ @Name='http://macedir.org/entity-category']/saml:AttributeValue[text() =\ 'http://clarin.eu/category/clarin-member']]" # SURFconext's feed is already only showing CLARIN SPs if id_fed_name == "SURFconext": xpath = "//md:EntityDescriptor" for e in clarin_sps_at_federation_md.xpath(xpath, namespaces=NAMESPACE_PREFIX_MAP): entities.add(e.get('entityID')) return entities
def parser(xml_tree): doc = XML(xml_tree) logger.info('return quantity %s' % len(doc.xpath('/PubmedArticleSet/PubmedArticle'))) parser_data = [] for branch in doc.xpath('/PubmedArticleSet/PubmedArticle'): pmid = branch.xpath('./MedlineCitation/PMID/text()') title = branch.xpath('.//Journal/Title/text()') publication = branch.xpath('.//PublicationTypeList//text()') pmid.extend(title) pmid.append('\n'.join(publication).strip()) pmid.append(get_factor(title[0].lower())) parser_data.append(pmid) return parser_data
def testGetRecordDeletedInRequestedPrefix(self): oaijazz = OaiJazz(self.tempdir + '/jazz') storage = MultiSequentialStorage(self.tempdir + "/seq-store") oairecord = OaiRecord() class MyStorage(object): def getData(self, identifier, name): return 'data' oaigetrecord = be((OaiGetRecord(repository=OaiRepository()), (oaijazz, ), (oairecord, (MyStorage(), )))) oaijazz.addOaiRecord(identifier='id:0', metadataPrefixes=['A', 'B']) oaijazz.deleteOaiRecordInPrefixes(identifier='id:0', metadataPrefixes=['A']) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['A'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( 'deleted', xpathFirst( XML(body.encode()), '/oai:OAI-PMH/oai:GetRecord/oai:record/oai:header/@status'), body) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['B'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( "data", xpathFirst(XML(body.encode()), '//oai:metadata/text()')) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['C'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( 'cannotDisseminateFormat', xpathFirst(XML(body.encode()), '/oai:OAI-PMH/oai:error/@code'))
def parse_error(xml_str): error = {} doc = XML(xml_str) if doc.tag == "error": elem = doc elif doc.tag == "customers": elem = doc.find(".//error") else: raise Exception("Can't find error element in '%s'" % xml_str) client_log.debug(elem) error["id"] = elem.attrib["id"] error["code"] = elem.attrib["code"] error["aux_code"] = elem.attrib["auxCode"] error["message"] = elem.text return error
def handleRequest(self, Body='', **kwargs): yield '\r\n'.join(['HTTP/1.0 200 OK', 'Content-Type: text/xml; charset=utf-8\r\n', '']) try: updateRequest = xpathFirst(XML(Body), '/ucp:updateRequest') recordId = xpathFirst(updateRequest, 'ucp:recordIdentifier/text()') normalizedRecordId = notWordCharRE.sub('_', recordId) self._number +=1 if self._number <= self._maxCountNumber: filename = '%05d_%s.updateRequest' %(self._number, normalizedRecordId) with open(join(self._dumpdir, filename), 'w') as f: print recordId stdout.flush() f.write(tostring(updateRequest)) answer = RESPONSE_XML % { "operationStatus": "success", "diagnostics": ""} else: self._maxCountNumber = self._number + self._maxCount print 'Reached maxCount' answer = RESPONSE_XML % { "operationStatus": "fail", "diagnostics": DIAGNOSTIC_XML % {'uri': '', 'message': '', 'details': escapeXml("Enough is enough")}} except Exception, e: answer = RESPONSE_XML % { "operationStatus": "fail", "diagnostics": DIAGNOSTIC_XML % {'uri': '', 'message': '', 'details': escapeXml(format_exc())}}
class XMLFileBacked(FileBacked): """ This object is a coherent cache for an XML file to be used as a part of DirectoryBacked. """ __identifier__ = 'name' def __init__(self, filename): self.label = "dummy" self.entries = [] FileBacked.__init__(self, filename) def Index(self): """Build local data structures.""" try: self.xdata = XML(self.data) except XMLSyntaxError: logger.error("Failed to parse %s" % (self.name)) return self.entries = self.xdata.getchildren() if self.__identifier__ is not None: self.label = self.xdata.attrib[self.__identifier__] def __iter__(self): return iter(self.entries) def __str__(self): return "%s: %s" % (self.name, lxml.etree.tostring(self.xdata))
def parse_error(xml_str): error = {} doc = XML(xml_str) if doc.tag == 'error': elem = doc elif doc.tag == 'customers': elem = doc.find('.//error') else: raise Exception("Can't find error element in '%s'" % xml_str) client_log.debug(elem) error['id'] = elem.attrib['id'] error['code'] = elem.attrib['code'] error['aux_code'] = elem.attrib['auxCode'] error['message'] = elem.text return error
def _add_children(self, bus, service): '''Add the child nodes found by introspection''' self._children= {}; xml = self._object.Introspect() data = XML(xml) # add all child nodes for child in data.findall('node'): name = child.get('name') if name == "/": continue child_path = self.object.object_path # root is reported as /, don't make it // if child_path != "/": child_path += "/" child_path += name self._children[name] = Dbusitem(bus, service, child_path)
def do_search(param, sort, page=1, rows=100, spellcheck_count=None): (reply, solr_select, q_list) = run_solr_query( param, rows, page, sort, spellcheck_count) is_bad = False if not reply or reply.startswith('<html'): is_bad = True if not is_bad: try: root = XML(reply) except XMLSyntaxError: is_bad = True if is_bad: m = re_pre.search(reply) return web.storage( facet_counts = None, docs = [], is_advanced = bool(param.get('q')), num_found = None, solr_select = solr_select, q_list = q_list, error = (web.htmlunquote(m.group(1)) if m else reply), ) spellcheck = root.find("lst[@name='spellcheck']") spell_map = {} if spellcheck is not None and len(spellcheck): for e in spellcheck.find("lst[@name='suggestions']"): assert e.tag == 'lst' a = e.attrib['name'] if a in spell_map or a in ('sqrt', 'edition_count'): continue spell_map[a] = [i.text for i in e.find("arr[@name='suggestion']")] docs = root.find('result') return web.storage( facet_counts = read_facets(root), docs = docs, is_advanced = bool(param.get('q')), num_found = (int(docs.attrib['numFound']) if docs is not None else None), solr_select = solr_select, q_list = q_list, error = None, spellcheck = spell_map, )
def home(request): data = urlopen('http://www.cs.washington.edu/research/xmldatasets/data/courses/reed.xml') xml = XML(data.read()) data.close() store = Graph() subject = URIRef('http://rdf.ify.dk/#subject') title = URIRef('http://rdf.ify.dk/#title') course = URIRef('http://rdf.ify.dk/#course') for i in xml.iterfind('course'): c = BNode() store.add((c, RDF.type, course)) store.add((c, title, Literal(i.find('title').text))) store.add((c, subject, Literal(i.find('subj').text))) r = HttpResponse(store.serialize(format='pretty-xml'), content_type='application/rdf+xml') r.__setitem__('Content-Disposition', 'attachment; filename="courses.rdf"') return r
def test_implied_slowpath_connectors(self): meta = { 'vnfs' : [{ 'type' : 'ltemmesim', 'fpaths' : [] },{ 'type' : 'ltegwsim', 'fpaths' : [], }], } bldr = vnf.ConnectionBuilder() vnf_xml = XML(VNF.render(meta)) bldr.implied_slowpath_connectors(vnf_xml) #print(tostring(vnf_xml, pretty_print=True)) connectors = vnf_xml.xpath('//connector') self.assertEqual(2, len(connectors)) destinations = vnf_xml.xpath('//connector/destination') self.assertEqual(1, len(destinations))
def _mock_manager_3foobar_warnings(self, *args, **kwargs): cmd = """ <load-configuration action="set" format="text"> <configuration-set> delete interfaces ge-0/0/0 delete protocols ospf delete policy-options prefix-list foo </configuration-set> </load-configuration> """ rsp_string = """ <rpc-reply xmlns="urn:ietf:params:xml:ns:netconf:base:1.0" xmlns:junos="http://xml.juniper.net/junos/16.1R4/junos" xmlns:nc="urn:ietf:params:xml:ns:netconf:base:1.0" message-id="urn:uuid:1f3dfa00-3434-414a-8aa8-0073590c5812"> <load-configuration-results> <rpc-error> <error-severity>warning</error-severity> <error-message> foo boom </error-message> </rpc-error> <rpc-error> <error-severity>warning</error-severity> <error-message> boom bar </error-message> </rpc-error> <rpc-error> <error-severity>warning</error-severity> <error-message> foo bar </error-message> </rpc-error> <ok/> </load-configuration-results> </rpc-reply> """ rsp = XML(rsp_string) errors = [] for err in rsp.findall('.//'+qualify('rpc-error')): errors.append(RPCError(err)) raise RPCError(rsp, errs=errors)
def _normalizeRdfDescription(self, descriptionNode): descriptionNode = XML(lxmltostringUtf8(descriptionNode).strip()) cleanup_namespaces(descriptionNode) if descriptionNode.tag in CANONICAL_DESCRIPTION_TAGS: return descriptionNode def _tag2Type(tag): return tag.replace('{', '').replace('}', '') rdfDescriptionTag = '{%(rdf)s}Description' % namespaces if descriptionNode.tag == rdfDescriptionTag: return descriptionNode descriptionElement = Element(rdfDescriptionTag, attrib=descriptionNode.attrib, nsmap=descriptionNode.nsmap, ) SubElement(descriptionElement, '{%(rdf)s}type' % namespaces, attrib={ '{%(rdf)s}resource' % namespaces: _tag2Type(descriptionNode.tag) } ) for childElement in descriptionNode.getchildren(): descriptionElement.append(deepcopy(childElement)) return descriptionElement
def load(self, xml): self.list.delete(0, END) if len(self.dataElement) > 0: self.deleteAll() self.root = XML(xml) self.group = self.root.getchildren()[0] self.nsField.delete(0, END) self.nsField.insert(END, self.group.get('ns')) self.group.getchildren()[-1].getparent().remove(self.group.getchildren()[-1]) elements = self.group.getchildren() for i in range(1, len(elements)): element = elements[i] arri = dict() if element.tag == 'include': name = element.getchildren()[0].get('name') self.list.insert(END, name) arri['elType'] = 'Openni' arri['value'] = element.getchildren()[0].get('value') else: name = element.get('name') self.list.insert(END, name) if element.get('type') == 'usb_cam_node': children = element.getchildren() arri['elType'] = 'Camera' arri['name'] = element.get('name') arri['output'] = element.get('output') arri['respawn'] = element.get('respawn') arri['videoDevice'] = children[0].get('value') arri['cameraFrameId'] = children[1].get('value') arri['pixelFormat'] = children[2].get('value') arri['imageWidth'] = children[3].get('value') arri['imageHeight'] = children[4].get('value') elif element.get('type') == 'hokuyo_node': children = element.getchildren() arri['elType'] = 'Hokuyo' arri['name'] = element.get('name') arri['output'] = element.get('output') arri['port'] = children[0].get('value') arri['frameId'] = children[1].get('value') self.atriData.append(arri) self.dataElement.append(element) self.names.append(name)
def install(project_path, server_xml_location, server_xml, server_name, lib_path, lib_name, app_name, mount=None): from lxml.etree import fromstring, ElementTree, parse from lxml.etree import XML, Comment changes = 0 with fsopendir(project_path) as project_fs: with project_fs.opendir(server_xml_location) as server_fs: with server_fs.open(server_xml, 'rb') as server_xml_file: root = parse(server_xml_file) import_tag = XML('<import location="{lib_path}"/>'.format(lib_path=lib_path)) import_tag.tail = "\n" install_tag = XML('<install name="{app_name}" lib="{lib_name}" />'.format(app_name=app_name, lib_name=lib_name, mount=mount)) install_tag.tail = "\n" def has_child(node, tag, **attribs): for el in node.findall(tag): if all(el.get(k, None) == v for k, v in attribs.items()): return True return False server_el = "{{http://moyaproject.com}}server[@docname='{}']".format(server_name) for server in root.findall(server_el): def get_comment(): comment = Comment('added by moya-pm') return comment if not has_child(server, "{http://moyaproject.com}import", location=lib_path): server.insert(0, import_tag) server.insert(0, get_comment()) changes += 1 if not has_child(server, "{http://moyaproject.com}install", lib=lib_name): server.append(Comment('added by moya-pm')) server.append(install_tag) changes += 1 if mount is not None and app_name is not None: if not has_child(server, "{http://moyaproject.com}mount", app_name=app_name): mount_tag = XML('<mount app="{app_name}" url="{mount}" />'.format(app_name=app_name, mount=mount)) mount_tag.tail = '\n' server.append(get_comment()) server.append(mount_tag) changes += 1 with open_atomic_write(server_fs, server_xml, 'wb') as server_xml_file: root.write(server_xml_file) return bool(changes)
def __init__(self, root, menu, edit, delete, info, icon): GUIWizard.__init__(self, icon) self.editB = edit self.deleteB = delete self.info = info self.root = Element('launch') self.group = SubElement(self.root, 'group', {'ns': ''}) self.addSerialNode() self.master = Frame(root) self._isLoaded = False self.list = Listbox(self.master, selectmode=SINGLE, width=30) self.list.bind('<Double-Button-1>', self.clickEvent) # test = Button(self.master, text='test', command=self.printTest) title = Label(self.master, text='List of launch elements') title.grid(sticky=W) self.list.grid(sticky=W) # test.grid(row=1, column=1) self.dataElement = [] self.atriData = [] self.names = [] self.menu(menu) self.master.grid(row=2, column=1)
def test_setroot(self): from lxml.etree import XML a = XML('<a />').getroottree() b = XML('<b />').getroottree() a._setroot(b.getroot()) self.assertEqual('b', a.getroot().tag)
def saveTargetDocument(modelXbrl, targetDocumentFilename, targetDocumentSchemaRefs, outputZip=None, filingFiles=None): targetUrl = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(targetDocumentFilename, modelXbrl.modelDocument.filepath) targetUrlParts = targetUrl.rpartition(".") targetUrl = targetUrlParts[0] + "_extracted." + targetUrlParts[2] modelXbrl.modelManager.showStatus(_("Extracting instance ") + os.path.basename(targetUrl)) targetInstance = ModelXbrl.create(modelXbrl.modelManager, newDocumentType=Type.INSTANCE, url=targetUrl, schemaRefs=targetDocumentSchemaRefs, isEntry=True) ValidateXbrlDimensions.loadDimensionDefaults(targetInstance) # need dimension defaults # roleRef and arcroleRef (of each inline document) for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs): for roleRefElt in sourceRefs.values(): addChild(targetInstance.modelDocument.xmlRootElement, roleRefElt.qname, attributes=roleRefElt.items()) # contexts for context in modelXbrl.contexts.values(): newCntx = targetInstance.createContext(context.entityIdentifier[0], context.entityIdentifier[1], 'instant' if context.isInstantPeriod else 'duration' if context.isStartEndPeriod else 'forever', context.startDatetime, context.endDatetime, None, context.qnameDims, [], [], id=context.id) for unit in modelXbrl.units.values(): measures = unit.measures newUnit = targetInstance.createUnit(measures[0], measures[1], id=unit.id) modelXbrl.modelManager.showStatus(_("Creating and validating facts")) newFactForOldObjId = {} def createFacts(facts, parent): for fact in facts: if fact.isItem: attrs = {"contextRef": fact.contextID} if fact.id: attrs["id"] = fact.id if fact.isNumeric: attrs["unitRef"] = fact.unitID if fact.get("decimals"): attrs["decimals"] = fact.get("decimals") if fact.get("precision"): attrs["precision"] = fact.get("precision") if fact.isNil: attrs[XbrlConst.qnXsiNil] = "true" text = None else: text = fact.xValue if fact.xValid else fact.textValue newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent) newFactForOldObjId[fact.objectIndex] = newFact if filingFiles and fact.concept is not None and fact.concept.isTextBlock: # check for img and other filing references for xmltext in [text] + CDATApattern.findall(text): try: for elt in XML("<body>\n{0}\n</body>\n".format(xmltext)): if elt.tag in ("a", "img") and not isHttpUrl(attrValue) and not os.path.isabs(attrvalue): for attrTag, attrValue in elt.items(): if attrTag in ("href", "src"): filingFiles.add(attrValue) except (XMLSyntaxError, UnicodeDecodeError): pass elif fact.isTuple: newTuple = targetInstance.createFact(fact.qname, parent=parent) newFactForOldObjId[fact.objectIndex] = newTuple createFacts(fact.modelTupleFacts, newTuple) createFacts(modelXbrl.facts, None) # footnote links footnoteIdCount = {} modelXbrl.modelManager.showStatus(_("Creating and validating footnotes & relationships")) HREF = "{http://www.w3.org/1999/xlink}href" footnoteLinks = defaultdict(list) for linkKey, linkPrototypes in modelXbrl.baseSets.items(): arcrole, linkrole, linkqname, arcqname = linkKey if (linkrole and linkqname and arcqname and # fully specified roles arcrole != "XBRL-footnotes" and any(lP.modelDocument.type == Type.INLINEXBRL for lP in linkPrototypes)): for linkPrototype in linkPrototypes: if linkPrototype not in footnoteLinks[linkrole]: footnoteLinks[linkrole].append(linkPrototype) for linkrole in sorted(footnoteLinks.keys()): for linkPrototype in footnoteLinks[linkrole]: newLink = addChild(targetInstance.modelDocument.xmlRootElement, linkPrototype.qname, attributes=linkPrototype.attributes) for linkChild in linkPrototype: attributes = linkChild.attributes if isinstance(linkChild, LocPrototype): if HREF not in linkChild.attributes: linkChild.attributes[HREF] = \ "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex]) addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ArcPrototype): addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ModelInlineFootnote): idUseCount = footnoteIdCount.get(linkChild.footnoteID, 0) + 1 if idUseCount > 1: # if footnote with id in other links bump the id number attributes = linkChild.attributes.copy() attributes["id"] = "{}_{}".format(attributes["id"], idUseCount) footnoteIdCount[linkChild.footnoteID] = idUseCount newChild = addChild(newLink, linkChild.qname, attributes=attributes) copyIxFootnoteHtml(linkChild, newChild, withText=True) if filingFiles and linkChild.textValue: footnoteHtml = XML("<body/>") copyIxFootnoteHtml(linkChild, footnoteHtml) for elt in footnoteHtml.iter(): if elt.tag in ("a", "img"): for attrTag, attrValue in elt.items(): if attrTag in ("href", "src") and not isHttpUrl(attrValue) and not os.path.isabs(attrvalue): filingFiles.add(attrValue) targetInstance.saveInstance(overrideFilepath=targetUrl, outputZip=outputZip) modelXbrl.modelManager.showStatus(_("Saved extracted instance"), 5000)
def createTargetInstance(modelXbrl, targetUrl, targetDocumentSchemaRefs, filingFiles, baseXmlLang=None, defaultXmlLang=None): targetInstance = ModelXbrl.create(modelXbrl.modelManager, newDocumentType=Type.INSTANCE, url=targetUrl, schemaRefs=targetDocumentSchemaRefs, isEntry=True, discover=False) # don't attempt to load DTS if baseXmlLang: targetInstance.modelDocument.xmlRootElement.set("{http://www.w3.org/XML/1998/namespace}lang", baseXmlLang) if defaultXmlLang is None: defaultXmlLang = baseXmlLang # allows facts/footnotes to override baseXmlLang ValidateXbrlDimensions.loadDimensionDefaults(targetInstance) # need dimension defaults # roleRef and arcroleRef (of each inline document) for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs): for roleRefElt in sourceRefs.values(): addChild(targetInstance.modelDocument.xmlRootElement, roleRefElt.qname, attributes=roleRefElt.items()) # contexts for context in sorted(modelXbrl.contexts.values(), key=lambda c: c.objectIndex): # contexts may come from multiple IXDS files ignore = targetInstance.createContext(context.entityIdentifier[0], context.entityIdentifier[1], 'instant' if context.isInstantPeriod else 'duration' if context.isStartEndPeriod else 'forever', context.startDatetime, context.endDatetime, None, context.qnameDims, [], [], id=context.id) for unit in sorted(modelXbrl.units.values(), key=lambda u: u.objectIndex): # units may come from multiple IXDS files measures = unit.measures ignore = targetInstance.createUnit(measures[0], measures[1], id=unit.id) modelXbrl.modelManager.showStatus(_("Creating and validating facts")) newFactForOldObjId = {} def createFacts(facts, parent): for fact in facts: if fact.isItem: # HF does not de-duplicate, which is currently-desired behavior attrs = {"contextRef": fact.contextID} if fact.id: attrs["id"] = fact.id if fact.isNumeric: attrs["unitRef"] = fact.unitID if fact.get("decimals"): attrs["decimals"] = fact.get("decimals") if fact.get("precision"): attrs["precision"] = fact.get("precision") if fact.isNil: attrs[XbrlConst.qnXsiNil] = "true" text = None else: text = fact.xValue if fact.xValid else fact.textValue if fact.concept is not None and fact.concept.baseXsdType in ("string", "normalizedString"): # default xmlLang = fact.xmlLang if xmlLang is not None and xmlLang != defaultXmlLang: attrs["{http://www.w3.org/XML/1998/namespace}lang"] = xmlLang newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent) # if fact.isFraction, create numerator and denominator newFactForOldObjId[fact.objectIndex] = newFact if filingFiles is not None and fact.concept is not None and fact.concept.isTextBlock: # check for img and other filing references so that referenced files are included in the zip. for xmltext in [text] + CDATApattern.findall(text): try: for elt in XML("<body>\n{0}\n</body>\n".format(xmltext)).iter(): addLocallyReferencedFile(elt, filingFiles) except (XMLSyntaxError, UnicodeDecodeError): pass # TODO: Why ignore UnicodeDecodeError? elif fact.isTuple: newTuple = targetInstance.createFact(fact.qname, parent=parent) newFactForOldObjId[fact.objectIndex] = newTuple createFacts(fact.modelTupleFacts, newTuple) createFacts(modelXbrl.facts, None) modelXbrl.modelManager.showStatus(_("Creating and validating footnotes and relationships")) HREF = "{http://www.w3.org/1999/xlink}href" footnoteLinks = defaultdict(list) footnoteIdCount = {} for linkKey, linkPrototypes in modelXbrl.baseSets.items(): arcrole, linkrole, linkqname, arcqname = linkKey if (linkrole and linkqname and arcqname and # fully specified roles arcrole != "XBRL-footnotes" and any(lP.modelDocument.type == Type.INLINEXBRL for lP in linkPrototypes)): for linkPrototype in linkPrototypes: if linkPrototype not in footnoteLinks[linkrole]: footnoteLinks[linkrole].append(linkPrototype) for linkrole in sorted(footnoteLinks.keys()): for linkPrototype in footnoteLinks[linkrole]: newLink = addChild(targetInstance.modelDocument.xmlRootElement, linkPrototype.qname, attributes=linkPrototype.attributes) for linkChild in linkPrototype: attributes = linkChild.attributes if isinstance(linkChild, LocPrototype): if HREF not in linkChild.attributes: linkChild.attributes[HREF] = \ "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex]) addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ArcPrototype): addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ModelInlineFootnote): idUseCount = footnoteIdCount.get(linkChild.footnoteID, 0) + 1 if idUseCount > 1: # if footnote with id in other links bump the id number attributes = linkChild.attributes.copy() attributes["id"] = "{}_{}".format(attributes["id"], idUseCount) footnoteIdCount[linkChild.footnoteID] = idUseCount newChild = addChild(newLink, linkChild.qname, attributes=attributes) xmlLang = linkChild.xmlLang if xmlLang is not None and xmlLang != defaultXmlLang: # default newChild.set("{http://www.w3.org/XML/1998/namespace}lang", xmlLang) copyIxFootnoteHtml(linkChild, newChild, targetModelDocument=targetInstance.modelDocument, withText=True) if filingFiles and linkChild.textValue: footnoteHtml = XML("<body/>") copyIxFootnoteHtml(linkChild, footnoteHtml) for elt in footnoteHtml.iter(): addLocallyReferencedFile(elt,filingFiles) return targetInstance
def createRdfNode(aboutUri): root = XML("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"> <rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/"></rdf:Description></rdf:RDF>""" % aboutUri).getroottree() return root, root.getroot().getchildren()[0]
def validateTextBlockFacts(modelXbrl): #handler = TextBlockHandler(modelXbrl) loadDTD(modelXbrl) checkedGraphicsFiles = set() # only check any graphics file reference once per fact for f1 in modelXbrl.facts: # build keys table for 6.5.14 concept = f1.concept if f1.xsiNil != "true" and \ concept is not None and \ concept.isTextBlock and \ XMLpattern.match(f1.value): #handler.fact = f1 # test encoded entity tags for match in namedEntityPattern.finditer(f1.value): entity = match.group() if not entity in xhtmlEntities: modelXbrl.error(("EFM.6.05.16", "GFM.1.2.15"), _("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, entity=entity, error=entity) # test html for xmltext in [f1.value] + CDATApattern.findall(f1.value): ''' try: xml.sax.parseString( "<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format( removeEntities(xmltext)).encode('utf-8'),handler,handler) except (xml.sax.SAXParseException, xml.sax.SAXException, UnicodeDecodeError) as err: # ignore errors which are not errors (e.g., entity codes checked previously if not err.endswith("undefined entity"): handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err) ''' xmlBodyWithoutEntities = "<body>\n{0}\n</body>\n".format(removeEntities(xmltext)) try: textblockXml = XML(xmlBodyWithoutEntities) if not edbodyDTD.validate( textblockXml ): errors = edbodyDTD.error_log.filter_from_errors() htmlError = any(e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE") for e in errors) modelXbrl.error("EFM.6.05.16" if htmlError else ("EFM.6.05.15.dtdError", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=', '.join(e.message for e in errors)) for elt in textblockXml.iter(): eltTag = elt.tag for attrTag, attrValue in elt.items(): if ((attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img")): if "javascript:" in attrValue: modelXbrl.error("EFM.6.05.16.activeContent", _("Fact %(fact)s of context %(contextID)s has javascript in '%(attribute)s' for <%(element)s>"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrTag, element=eltTag) elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a": pass elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue: modelXbrl.error("EFM.6.05.16.externalReference", _("Fact %(fact)s of context %(contextID)s has an invalid external reference in '%(attribute)s' for <%(element)s>"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrTag, element=eltTag) if attrTag == "src" and attrValue not in checkedGraphicsFiles: if attrValue.lower()[-4:] not in ('.jpg', '.gif'): modelXbrl.error("EFM.6.05.16.graphicFileType", _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrValue, element=eltTag) else: # test file contents try: if validateGraphicFile(f1, attrValue) != attrValue.lower()[-3:]: modelXbrl.error("EFM.6.05.16.graphicFileContent", _("Fact %(fact)s of context %(contextID)s references a graphics file which doesn't have expected content '%(attribute)s' for <%(element)s>"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrValue, element=eltTag) except IOError as err: modelXbrl.error("EFM.6.05.16.graphicFileError", _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't openable '%(attribute)s' for <%(element)s>, error: %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrValue, element=eltTag, error=err) checkedGraphicsFiles.add(attrValue) if eltTag == "table" and any(a is not None for a in elt.iterancestors("table")): modelXbrl.error("EFM.6.05.16.nestedTable", _("Fact %(fact)s of context %(contextID)s has nested <table> elements."), modelObject=f1, fact=f1.qname, contextID=f1.contextID) except (XMLSyntaxError, UnicodeDecodeError) as err: #if not err.endswith("undefined entity"): modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err) checkedGraphicsFiles.clear()
def validateTextBlockFacts(modelXbrl): # handler = TextBlockHandler(modelXbrl) loadDTD(modelXbrl) for f1 in modelXbrl.facts: # build keys table for 6.5.14 concept = f1.concept if f1.xsiNil != "true" and concept is not None and concept.isTextBlock and XMLpattern.match(f1.value): # handler.fact = f1 # test encoded entity tags for match in entityPattern.finditer(f1.value): entity = match.group() if not entity in xhtmlEntities: modelXbrl.error( ("EFM.6.05.16", "GFM.1.2.15"), _("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, entity=entity, ) # test html for xmltext in [f1.value] + CDATApattern.findall(f1.value): """ try: xml.sax.parseString( "<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format( removeEntities(xmltext)).encode('utf-8'),handler,handler) except (xml.sax.SAXParseException, xml.sax.SAXException, UnicodeDecodeError) as err: # ignore errors which are not errors (e.g., entity codes checked previously if not err.endswith("undefined entity"): handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err) """ try: textblockXml = XML("<body>\n{0}\n</body>\n".format(removeEntities(xmltext))) if not edbodyDTD.validate(textblockXml): errors = edbodyDTD.error_log.filter_from_errors() htmlError = any(e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE") for e in errors) modelXbrl.error( "EFM.6.05.16" if htmlError else ("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=", ".join(e.message for e in errors), ) for elt in textblockXml.iter(): eltTag = elt.tag for attrTag, attrValue in elt.items(): if (attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img"): if "javascript:" in attrValue: modelXbrl.error( "EFM.6.05.16", _( "Fact %(fact)s of context %(contextID) has javascript in '%(attribute)s' for <%(element)s>" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrTag, element=eltTag, ) elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a": pass elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue: modelXbrl.error( "EFM.6.05.16", _( "Fact %(fact)s of context %(contextID) has an invalid external reference in '%(attribute)s' for <%(element)s>" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrTag, element=eltTag, ) except (XMLSyntaxError, UnicodeDecodeError) as err: # if not err.endswith("undefined entity"): modelXbrl.error( ("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err, )