def storeRecordWithID (record, ID, collectedRecords): global config sys.stdout.write("ID: " + str(ID) + u"… ") collectedRecords[ID] = record """ Write XML file for record. """ if 'xml' in config.format: filePath = pathForID(ID, 'xml') XMLFile = open(filePath, 'w') XMLString = ET.tounicode(record).encode('UTF-8') XMLFile.write(XMLString) XMLFile.close() sys.stdout.write(' ./' + filePath) """ Convert to JSON and write file. """ if 'json' in config.format: JSONInternal = elem_to_internal(record, strip=1) if len(JSONInternal) == 1: JSONInternal = JSONInternal.values()[0] JSONInternal['_id'] = ID filePath = pathForID(ID, 'json') JSONFile = open (filePath, "w") JSONFile.write(simplejson.dumps(JSONInternal)) JSONFile.close() sys.stdout.write(' ./' + filePath) """ If no format is given, print the record. """ if len(config.format) == 0: print ET.tounicode(record) print ""
def testFilteringStyle(self): html = u""" <div style="color: #ffff00;"><p style="float: left;"><img src="http://cdnimg.visualizeus.com/thumbs/3e/37/hairstyles-3e37929b6847d0216b0aabe296ed9a76_h.jpg?ts=93246" alt="" width="248" height="400" style="width: 500px; color: blue;"><a href="http://www.hairstyles123.com/hairstylepics/faces/hairstyles_for_heart_shaped_faces/heart_shaped_faces_hairstyle_4.jpg" class="clb cboxElement"><img src="http://www.hairstyles123.com/hairstylepics/faces/hairstyles_for_heart_shaped_faces/heart_shaped_faces_hairstyle_4.jpg" alt="" width="400" height="544"></a><a href="http://womeninfashion.net/wp-content/uploads/2013/11/heart-face-shape-hairstyles-jennifer-aniston.jpg" class="clb cboxElement"><img src="http://womeninfashion.net/wp-content/uploads/2013/11/heart-face-shape-hairstyles-jennifer-aniston.jpg" alt="" width="420" height="560"></a><a href="http://thisgirlscity.com/wp-content/uploads/2013/07/reese.jpg" class="clb cboxElement"><img src="http://thisgirlscity.com/wp-content/uploads/2013/07/reese.jpg" alt="" width="420" height="560"></a><a href="http://images.beautyriot.com/photos/200/hairstyles_heart_shape_face-200.jpg" class="clb cboxElement"><img src="http://images.beautyriot.com/photos/200/hairstyles_heart_shape_face-200.jpg" alt="" width="200" height="272"></a></p> <p><a href="http://www.youbeauty.com/p/482031/thumbnail/entry_id/0_hmc2pi25/width/0/height/0/quality/90" class="clb cboxElement"><img src="http://www.youbeauty.com/p/482031/thumbnail/entry_id/0_hmc2pi25/width/0/height/0/quality/90" alt="" width="200" height="290"></a><a href="http://www.hairstyles123.com/hairstylepics/faces/hairstyles_for_heart_shaped_faces/heart_shaped_faces_hairstyle_11.jpg" class="clb cboxElement"><img src="http://www.hairstyles123.com/hairstylepics/faces/hairstyles_for_heart_shaped_faces/heart_shaped_faces_hairstyle_11.jpg" alt="" width="520" height="780"></a><a href="http://www.allure.com/images/hair-ideas/2012/05/heart-face-shape-hairstyles-reese-witherspoon.jpg" class="clb cboxElement"><img src="http://www.allure.com/images/hair-ideas/2012/05/heart-face-shape-hairstyles-reese-witherspoon.jpg" alt="" width="420" height="560"></a><a href="http://slodive.com/wp-content/uploads/2012/03/hairstyles-for-heart-shaped-faces/heart-shape-glasses.jpg" class="clb cboxElement"><img src="http://slodive.com/wp-content/uploads/2012/03/hairstyles-for-heart-shaped-faces/heart-shape-glasses.jpg" alt=""></a><a href="http://www.beautifulhairstyle.net/wp-content/uploads/2014/02/Long-Hairstyles-For-Heart-Shaped-Faces.jpg" class="clb cboxElement"><img src="http://www.beautifulhairstyle.net/wp-content/uploads/2014/02/Long-Hairstyles-For-Heart-Shaped-Faces.jpg" alt=""></a><a href="http://beautyhairtotoe.com/wp-content/uploads/2013/08/rby-heart-shaped-reese-marked-mdn.jpg" class="clb cboxElement"><img src="http://beautyhairtotoe.com/wp-content/uploads/2013/08/rby-heart-shaped-reese-marked-mdn.jpg" alt=""></a><a href="http://www.prettydesigns.com/wp-content/uploads/2013/09/Hairstyle-for-Oval-shaped-Women.jpg" class="clb cboxElement"><img src="http://www.prettydesigns.com/wp-content/uploads/2013/09/Hairstyle-for-Oval-shaped-Women.jpg" alt="" width="550" height="775"></a><a href="http://www.hairnext.com/wp-content/uploads/2014/05/Heart-Shaped-Face-Best-Short-Bangs-Hairstyle-For-Fine-Hair.jpg" class="clb cboxElement"><img src="http://www.hairnext.com/wp-content/uploads/2014/05/Heart-Shaped-Face-Best-Short-Bangs-Hairstyle-For-Fine-Hair.jpg" alt="Heart Shaped Face Best Short Bangs Hairstyle For Fine Hair"></a><a href="http://www.hairnext.com/wp-content/uploads/2014/05/Short-Bob-Side-Swept-For-Long-Face-Shape.jpg" class="clb cboxElement"><img src="http://www.hairnext.com/wp-content/uploads/2014/05/Short-Bob-Side-Swept-For-Long-Face-Shape.jpg" alt="Short Bob Side Swept For Long Face Shape"></a></p> <p> <img src="http://www.hairnext.com/wp-content/uploads/2014/05/Short-blonde-Curly-hairstyle.jpg" alt="Short blonde Curly hairstyle:"></p></div> """ print etree.tounicode(filter_style(fromstring(html)))
def htmltopost(self, html, pagetime): self.removecomments(html) c = etree.tounicode(html, method='html', pretty_print=True) postnumber = 0 postnumbernode = html.xpath(r"../../../tr[1]/td[2]/a[last()]") if postnumbernode: postnumber = int(etree.tounicode(postnumbernode[-1], method="text")) postlinknode = postnumbernode[-1].attrib['href'] parsed = urlparse(postlinknode) postid = int(parse_qs(parsed.query)['p'][0]) titlenode = html.xpath(r"../div[@class='smallfont']/strong") title = etree.tounicode(titlenode[-1], method="text").strip() posternode = html.xpath(r"../../td[1]/div/a[starts-with(@class,'bigusername')]") poster = etree.tounicode(posternode[-1], method="text").strip() timenode = html.xpath(r"../../../tr[1]/td[1]") timestring = etree.tounicode(timenode[-1], method="text").strip() ts = misc.parseitemtime(pagetime, timestring) p = post.Post(content=c, postnumber=postnumber, title=title, postername=poster, postid=postid, ts=ts) print(postnumber, postid, poster, title) return p
def main(): extensions = {(URL_MLR_EXT, 'vcard_uuid'): utils.vcard_uuid} converterExtract = XMLTransform(STYLESHEET_EXTRACT, extensions) converterDup = XMLTransform(STYLESHEET_DUP) parser = argparse.ArgumentParser( description='Extend the vcard of a lom into a xcard') parser.add_argument('-f', '--format', default='rawxml', help="output format: one of 'rawxml', 'xml', 'n3'," " 'turtle', 'nt', 'pretty-xml', trix'") parser.add_argument('-o', '--output', help="Output file", type=argparse.FileType('w'), default=sys.stdout) parser.add_argument('infile', help="input file or url", nargs="?") converterExtract.populate_argparser(parser) #converterDup.populate_argparser(parser) args = parser.parse_args() converterExtract.set_options_from_dict(vars(args)) #converterDup.set_options_from_dict(vars(args)) if (urlparse(args.infile).scheme): opener = urlopen else: opener = open with opener(args.infile) as infile: xml = converterExtract.convertfile(infile) if xml: xml = converterDup.convertxml(xml) if xml: if args.format == "rawxml": args.output.write(etree.tounicode(xml, pretty_print=True).encode('utf-8')) else: rdf = Graph().parse(data=etree.tounicode(xml), format="xml") if rdf: args.output.write(rdf.serialize(format=args.format, encoding='utf-8')) args.output.close()
def create_xml(self, p): path = "log.xml" # checks whether XML file exists and loads the root elemenet if not os.path.isfile(path): root = etree.Element("searchedItems") # if there is no XML file, create blank with root element with open(path, mode = "w", encoding="utf-8") as f: f.write(etree.tounicode(root, pretty_print=True)) root = etree.parse(path) # creates child element from root item = etree.SubElement(root.getroot(), "item", attrib = {"id":p["id"]}) while p: print(etree.tounicode(root, pretty_print=True)) key, value = p.popitem() # for BuyNow and Bid prices nested elements are needed! if "buyNow" in key or "bid" in key: if item.find("price") is None: price = etree.Element("price") item.append(price) subitem = etree.Element(key) price.append(subitem) subitem.text = value # otherwise all elements are ascendants of item element else: subitem = etree.Element(key) item.append(subitem) subitem.text = value with open(path, mode="w", encoding="utf-8") as f: f.write(etree.tounicode(root, pretty_print = True))
def serialized(self, stripped=True): """ Serialized version of the definition node. """ if stripped: return etree.tounicode(self.node_stripped()) else: return etree.tounicode(self.node)
def get_single_content(element, data_type): """Return the processed content of given element""" if isinstance(element, basestring) or \ isinstance(element, etree._ElementStringResult) or \ isinstance(element, etree._ElementUnicodeResult): return element if data_type == 'text': # Return element.text or '' return etree.tounicode(element, method='text').strip() elif data_type == 'html': return etree.tounicode(element, pretty_print=True).strip()
def main (): global config loadXSLs() SRUBaseURL = config.url + '?' \ + 'operation=searchRetrieve' \ + '&' + 'version=1.1' \ + '&' + 'recordPacking=xml' \ + '&' + 'recordSchema=' + urllib.quote(config.schema) \ + '&' + 'maximumRecords=' + str(config.chunksize) \ + '&' + 'query=' + urllib.quote(config.query) recordCount = 1 done = False while not done: firstRecord = recordCount SRUURL = SRUBaseURL + '&' + 'startRecord=' + str(recordCount) print SRUURL SRUResponse = urllib.urlopen(SRUURL).read() XML = ET.fromstring(SRUResponse) records = XML.findall('.//{http://www.loc.gov/zing/srw/}recordData/*') numberOfRecords = XML.findall('.//{http://www.loc.gov/zing/srw/}numberOfRecords') resultCount = 0 if len(numberOfRecords) > 0: resultCount = int(numberOfRecords[0].text) print u"Loaded " + str(len(records)) + " records: " + str(recordCount) + "-" + str(min(recordCount + config.chunksize, resultCount)) + " of " + str(resultCount) collectedRecords = {} for record in records: ID = recordID(record, recordCount) """ Transform record. """ for XSL in config.XSLs: record = XSL(record).getroot() if record is None: print u"Record transformation failed for ID »" + ID + u"«" print ET.tounicode(record) else: storeRecordWithID(record, ID, collectedRecords) recordCount += 1 storeBatches(collectedRecords, firstRecord) done = (len(records) == 0 or recordCount > resultCount)
def full_description(self, url): tree = etree.parse(retrieve(url=url), self.parser) desc_list = tree.xpath("//div[@id='item-full']") assert len(desc_list), _(u'No elements found.') for item in desc_list: try: return { 'image': item.xpath(".//div[@class='item-pic']//img/@src")[0], 'desc': tounicode(item.xpath(".//div[@id='item-details']/node()")[0]), 'tech': tounicode(item.xpath(".//div[@id='item-tech']/node()")[0]), } except IndexError: print 'Bad structure in %s !' % url return {}
def comment_stripped_text(self): """ Return a version of the plain text with any square-bracketed comments removed. """ serialized = etree.tounicode(self.node) if '<cm' in serialized: stripped = COMMENT_STRIPPER.edit(serialized) try: new_node = etree.XML(stripped) except etree.XMLSyntaxError: new_node = self.node else: new_node = self.node return etree.tounicode(new_node, method='text') or ''
def success(self, template_name, transaction_number): # First, divert to the 404 page if the # transaction_number is not found. if transaction_number not in g.success_data: return abort( status_code = 404, comment = 'Transaction number expired or invalid.', ) # Retrieve from success cache. Copy it since we will mutate it. values = g.success_data[transaction_number].copy() # Apply the generic commerce notice. sale_template = SaleTemplate(template_name) success_xml = sale_template.success_xml() self._apply_commerce_notice(success_xml) # Grab the mailer, since it's an object and not a string. mailer = values.pop('mailer_instance') # Apply remaining text values to the template. for key, value in values.items(): for e in CSSSelector('#' + key)(success_xml): e.text = value # Allow the mailer to manipulate the page. for e in CSSSelector('#simplsale-email-notice')(success_xml): mailer.apply_notice(e) # Render. return XHTML11_DTD + tounicode(success_xml, method='html')
def tag_words_in(cls, elem, tag='w'): w = Dict(PATTERN=re.compile("([^\s]+)"), REPLACE=r'{%s}\1{/%s}' % (tag, tag), OMIT_ELEMS=[]) def tag_words(e): e.text = re.sub(w.PATTERN, w.REPLACE, e.text or '') for ch in e: if ch.tag not in w.OMIT_ELEMS: tag_words(ch) ch.tail = re.sub(w.PATTERN, w.REPLACE, ch.tail or '') new_elem = XML.fromstring(etree.tounicode(elem)) tag_words(new_elem) s = etree.tounicode(new_elem) s = s.replace('{%s}' % tag, '<%s>' % tag).replace('{/%s}' % tag, '</%s>' % tag) new_elem = XML.fromstring(s) return new_elem
def __init__(self, datapath): """Class for operating on a single WMS source.""" self.datapath = datapath self.handler = webtest.TestApp(NetCDFHandler(datapath)) self.path_info = '/' + datapath + '.wms' # Find this directory dir_path = os.path.dirname(os.path.realpath(__file__)) self.base_env = { 'pydap.config': { 'pydap.responses.wms.fill_method': 'contourf', 'pydap.responses.wms.paletted': True, 'pydap.responses.wms.allow_eval': True, 'pydap.responses.wms.colorfile': dir_path + '/colors.json', 'pydap.responses.wms.styles_file': dir_path + '/styles.json', 'pydap.responses.wms.max_age': 600, 'pydap.responses.wms.s_maxage': 93600, 'pydap.responses.wms.max_image_size': 16777216, 'pydap.responses.wms.localCache': True, 'pydap.responses.wms.redis': False, 'pydap.responses.wms.redis.host': 'localhost', 'pydap.responses.wms.redis.port': 6379, 'pydap.responses.wms.redis.db': 0, 'pydap.responses.wms.redis.redis_expiration_time': 604800, 'pydap.responses.wms.redis.distributed_lock': True } } self.base_query_map = {'SERVICE': 'WMS', 'REQUEST': 'GetMap', 'VERSION': '1.3.0', 'STYLES': '', 'FORMAT': 'image/png', 'TRANSPARENT': 'TRUE', 'HEIGHT': 512, 'WIDTH': 512, 'BBOX': '-180.0,-90.0,180.0,90.0', 'CRS': 'EPSG:4326'} self.base_query_cap = {'SERVICE': 'WMS', 'REQUEST': 'GetCapabilities', 'VERSION': '1.3.0',} #print('Getting Capabilities for %s' % self.path_info) env = self.base_env.copy() env['QUERY_STRING'] = urllib.parse.urlencode(self.base_query_cap) response = self.get(params=self.base_query_cap, extra_environ=env, status=200) self.xml = response.normal_body try: self.wms = WebMapService(self.path_info, xml=self.xml, version='1.3.0') except: print('PATH_INFO', self.path_info) parser = etree.XMLParser(remove_blank_text=True) file_obj = io.BytesIO(self.xml) tree = etree.parse(file_obj, parser) x_str = etree.tounicode(tree, pretty_print=True) print('XML', x_str) raise
def evaluateXPath(path, element): try: import xml.dom from xml.xpath import Evaluate result=Evaluate(path, element) if hasattr(result,'__iter__'): for i in range(len(result)): if isinstance(result[i], xml.dom.Node) and result[i].nodeType == xml.dom.Node.ATTRIBUTE_NODE: result[i]=result[i].value elif type(result)==bool: return result else: result=[result] return result except ImportError: # Implementation for etree from lxml.etree import XPath, fromstring, tounicode # returns a list of _ElementStringResult buf=toPrettyXML(element) elist=XPath(path).evaluate(fromstring(buf)) nodelist=list() # if is iterable if hasattr(elist,'__iter__'): for eelement in elist: # either the returnlist is a stringlist or a element list if isinstance(eelement, basestring): nodelist.append(eelement) else: nodelist.append(parseXMLString(tounicode(eelement)).documentElement) elif type(elist)==bool: return elist else: nodelist.append(elist) return nodelist
def __init__(self, tracks, filename=None): if filename is None: filename = 'playlist.xspf' NSMAP = { None: 'http://xspf.org/ns/0/', } playlist = etree.Element('playlist', nsmap=NSMAP, attrib={ 'version': '1', }) track_list = etree.SubElement(playlist, 'trackList') for track in tracks: elem = etree.SubElement(track_list, 'track') title = etree.SubElement(elem, 'title') title.text = track.get_parent_instance().title duration = etree.SubElement(elem, 'duration') duration.text = unicode(track.length * 1000) location = etree.SubElement(elem, 'location') location.text = track.file.url().replace('https:', 'http:') super(XSPFResponse, self).__init__( etree.tounicode(playlist), content_type='application/xspf+xml', ) self['Content-Disposition'] = 'attachment; filename=%s' % filename
def Element(cls, s, *args): """given a string s and string *args, return an Element.""" sargs = [] for arg in args: if type(arg) == etree._Element: sargs.append(etree.tounicode(arg)) else: sargs.append(arg) if type(s) == etree._Element: t = etree.tounicode(s) else: t = s if len(args) == 0: return XML.fromstring(t) else: return XML.fromstring(t % tuple(sargs))
def serialized(self): """ Return the node serialized in string form. (Wrapper for etree.tounicode()) """ return etree.tounicode(self.node)
def test_all_basic_feed_with_one_item(self): response = self.app.get('/results/all-basic.atom') root = etree.XML(response.content) xml_pretty = etree.tounicode(root, pretty_print=True) result_event = ResultEvent.objects.first() expected = '''<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-gb"> <title>Election results from example.com</title> <link href="http://example.com/" rel="alternate"/> <link href="http://example.com/results/all-basic.atom" rel="self"/> <id>http://example.com/</id> <updated>{updated}</updated> <entry> <title>Tessa Jowell (Labour Party) won in Member of Parliament for Dulwich and West Norwood</title> <link href="http://example.com/#{item_id}" rel="alternate"/> <published>{updated}</published> <updated>{updated}</updated> <author> <name>john</name> </author> <id>http://example.com/#{item_id}</id> <summary type="html">A example.com volunteer recorded at {space_separated} that Tessa Jowell (Labour Party) won the ballot in Member of Parliament for Dulwich and West Norwood, quoting the source 'Seen on the BBC news').</summary> </entry> </feed> '''.format( updated=rfc3339_date(result_event.created), space_separated=result_event.created.strftime("%Y-%m-%d %H:%M:%S"), item_id=result_event.id, ) self.compare_xml(expected, xml_pretty)
def remove_range(cls, elem, end_elem, delete_end=True): """delete everything from elem to end_elem, including elem. if delete_end==True, also including end_elem; otherwise, leave it.""" while elem is not None and elem != end_elem and end_elem not in elem.xpath("descendant::*"): parent = elem.getparent() nxt = elem.getnext() parent.remove(elem) if DEBUG == True: print(etree.tounicode(elem)) elem = nxt if elem == end_elem: if delete_end == True: cls.remove(end_elem, leave_tail=True) elif elem is None: if parent.tail not in [None, '']: parent.tail = '' cls.remove_range(parent.getnext(), end_elem) XML.remove_if_empty(parent) elif end_elem in elem.xpath("descendant::*"): if DEBUG == True: print(elem.text) elem.text = '' cls.remove_range(elem.getchildren()[0], end_elem) XML.remove_if_empty(elem) else: print("LOGIC ERROR", file=sys.stderr)
def trans_entry(data): r = [] meta = {} try: root = et.fromstring(data) head = root.find('Head') title = _get_text_nr(head.find('HWD/BASE')) poslist = head.findall('POS') if poslist: title += ' ({0})'.format( ', '.join(_get_text_nr(pos) for pos in poslist)) except: title = "" try: pron_gb = head.find('Audio[@resource="GB_HWD_PRON"]') if pron_gb is not None: meta['gb_pron'] = pron_gb.get('topic').split('/')[-1] pron_us = head.find('Audio[@resource="US_HWD_PRON"]') if pron_us is not None: meta['us_pron'] = pron_us.get('topic').split('/')[-1] except: pass r.append(_build_header(['entry'], title=title, meta=meta)) r.append(et.tounicode(_trans_assets(root), pretty_print=True, method='html')) r.append(body2html(root)) r.append('</body></html>') return enc_utf8(''.join(r))
def transform_misused_divs_into_paragraphs(doc): """Turn all divs that don't have children block level elements into p's Since we can't change the tree as we iterate over it, we must do this before we process our document. The idea is that we process all divs and if the div does not contain another list of divs, then we replace it with a p tag instead appending it's contents/children to it. """ for elem in doc.iter(tag='div'): child_tags = [n.tag for n in elem.getchildren()] if 'div' not in child_tags: # if there is no div inside of this div...then it's a leaf # node in a sense. # We need to create a <p> and put all it's contents in there # We'll just stringify it, then regex replace the first/last # div bits to turn them into <p> vs <div>. LNODE.log(elem, 1, 'Turning leaf <div> into <p>') orig = tounicode(elem).strip() started = re.sub(r'^<\s*div', '<p', orig) ended = re.sub(r'div>$', 'p>', started) elem.getparent().replace(elem, fromstring(ended)) return doc
def build_section_by_section(sxs, fr_start_page, previous_label): """Given a list of xml nodes in the section by section analysis, pull out hierarchical data into a structure. Previous label is carried along to merge analyses of the same section.""" structures = [] while len(sxs): # while sxs: is deprecated cfr_part = previous_label.split('-')[0] title, text_els, sub_sections, sxs = split_into_ttsr(sxs, cfr_part) page = find_page(title, title.sourceline, fr_start_page) paragraph_xmls = [deepcopy(el) for el in text_els if el.tag == 'P' or el.tag == 'FP'] footnotes = [] for p_idx, paragraph_xml in enumerate(paragraph_xmls): spaces_then_remove(paragraph_xml, 'PRTPAGE') spaces_then_remove(paragraph_xml, 'FTREF') swap_emphasis_tags(paragraph_xml) # Anything inside a SU can also be ignored for su in paragraph_xml.xpath('./SU'): su_text = etree.tounicode(su) footnotes.append({ 'paragraph': p_idx, 'reference': su.text, 'offset': body_to_string(paragraph_xml).find(su_text)}) if su.tail and su.getprevious() is not None: su.getprevious().tail = (su.getprevious().tail or '') su.getprevious().tail += su.tail elif su.tail: su.getparent().text = (su.getparent().text or '') su.getparent().text += su.tail su.getparent().remove(su) paragraphs = [body_to_string(el) for el in paragraph_xmls] label_for_children = previous_label labels = parse_into_labels(title.text, cfr_part) if labels: label_for_children = labels[-1] # recursively build children. Be sure to give them the proper label children = build_section_by_section(sub_sections, page, label_for_children) next_structure = { 'page': page, 'title': add_spaces_to_title(title.text), 'paragraphs': paragraphs, 'children': children, 'footnote_refs': footnotes } if (labels and # No label => subheader # Concatenate if repeat label or backtrack not all(label == previous_label or is_backtrack(previous_label, label) for label in labels)): previous_label = labels[-1] next_structure['labels'] = labels structures.append(next_structure) return structures
def _le_xml(self, arquivo): if arquivo is None: return False if not isinstance(arquivo, basestring): arquivo = etree.tounicode(arquivo) #self._xml = arquivo #return True #elif arquivo is not None: if arquivo is not None: if isinstance(arquivo, basestring): if isinstance(arquivo, str): arquivo = unicode(arquivo.encode('utf-8')) if '<' in arquivo: self._xml = etree.fromstring(tira_abertura(arquivo).encode('utf-8')) else: arq = open(arquivo) txt = b''.join(arq.readlines()) txt = unicode(txt.decode('utf-8')) txt = tira_abertura(txt) arq.close() self._xml = etree.fromstring(txt) else: self._xml = etree.parse(arquivo) return True return False
def write(self): """ Print converted rules """ print(etree.tounicode(self.new_rules, pretty_print=True)) print("<!-- Failed convert. Please, handle it manually-->\n") print("\n".join(self.fails))
def check(proxy): # url = 'https://ip.cn/' # url = 'https://httpbin.org/ip' url = 'https://geoiptool.com/zh/' proxies = { 'http': 'http://{}'.format(proxy), 'https': 'http://{}'.format(proxy) } try: res = requests.get(url, proxies=proxies, verify=False).text data = etree.HTML(res) except Exception as e: print(e) return '当前代理已经失效' else: if url == 'https://ip.cn/': result = data.xpath('//div[@id="result"]')[0] content = html.tostring(result) return content elif (url == 'https://geoiptool.com/zh/' and data): content = data.xpath('//div[contains(@class, "sidebar-data")]')[0] if data.xpath('//div[contains(@class, "sidebar-data")]') else None if content is None: return '当前代理已经失效' content = etree.tounicode(content) content = re.sub(r'<img.*?>', '', content) content = re.sub(r'hidden-xs hidden-sm', '', content) return content else: return '当前代理已经失效'
def html_to_plaintext(text): """try to get readable plaintext from the G+ html. Lxml doesn't seem to do <br> elements properly.""" text = text.replace("<br />", " ") parser = etree.HTMLParser() tree = etree.parse(StringIO(text), parser) return etree.tounicode(tree.getroot(), method="text")
def test_parentheses_cleanup(original, new_text): """Helper function to verify that the XML is transformed as expected""" with XMLBuilder("PART") as ctx: ctx.child_from_string(u"<P>{0}</P>".format(original)) preprocessors.parentheses_cleanup(ctx.xml) assert etree.tounicode(ctx.xml[0]) == "<P>{0}</P>".format(new_text)
def as_stringio(self): """Returns a ``StringIO`` representation of the ``ResultDocument`` instance. """ buf = etree.tounicode(self._document, pretty_print=True) return StringIO(buf)
def cross_references(self): """ Return a list of CrossReference objects representing any cross-references found in the definition. """ try: return self._xrefs except AttributeError: self._xrefs = [CrossReference(xref_node) for xref_node in self.node_stripped().findall('.//xr')] if self._xrefs: # Add a 'type' attribute to each cross-reference, # determined by the preceding text for xref in self._xrefs: xref.type = None # default value # Split definitions into sections, one section per xref, # with the xref at the end of the section. The 'sections' # list should then be aligned with the self._xrefs list. serialized = etree.tounicode(self.node_stripped()) sections = [] for section in serialized.split('</xr>'): section = XREF_STRIPPER.edit(section.lower()) sections.append(section) for section, xref in zip(sections, self._xrefs): if EQUALS_XREF.search(section): xref.type = 'equals' elif 'see <xr' in section: xref.type = 'see' elif 'also <xr' in section or 'cf. <xr' in section: xref.type = 'cf' elif 'opp. <xr' in section: xref.type = 'opposite' return self._xrefs
def tostring(self, root=None, doctype=None, pretty_print=True): """return the content of the XML document as a unicode string""" if root is None: root = self.root return etree.tounicode( root, doctype=doctype or self.info.doctype, pretty_print=pretty_print )
def parse_nodes(self, xml): """Derive a flat list of nodes from this xml chunk. This does nothing to determine node depth""" nodes = [] for child in xml.getchildren(): matching = (m for m in self.MATCHERS if m.matches(child)) tag_matcher = next(matching, None) if tag_matcher: nodes.extend(tag_matcher.derive_nodes(child, processor=self)) else: logger.warning("No tag match\n%s", etree.tounicode(child)) # Trailing stars don't matter; slightly more efficient to ignore them while nodes and nodes[-1].label[0] in mtypes.stars: nodes = nodes[:-1] return nodes
def get_mo_fail_response(text, ack, status_code): """ Builds xml return in case of error (status_code <> 200) :param text: error description :param ack: message's arrival acknowledgement :param status_code: request status code :return: xml """ body = etree.Element('smsmo_response', ack=str(ack)) message_id = etree.SubElement(body, "message_id") source = etree.SubElement(body, "source") large_account = etree.SubElement(body, "large_account") response_datetime = etree.SubElement(body, "request_datetime") description = etree.SubElement(body, "description", code=str(status_code)) description.text = str(text) return etree.tounicode(body)
def build_yang_response(self, root, request, yang_options=None, custom_rpc=False): try: self.custom_rpc = custom_rpc yang_xml = self.to_yang_xml(root, request, yang_options, custom_rpc) log.info('yang-xml', yang_xml=etree.tounicode(yang_xml, pretty_print=True)) return self.build_xml_response(request, yang_xml, custom_rpc) except Exception as e: log.exception('error-building-yang-response', request=request, xml=etree.tostring(root)) self.rpc_response.is_error = True self.rpc_response.node = ncerror.BadMsg(request) return
def xls2xml(xls_name): with xlrd.open_workbook(xls_name) as wb: ws = wb.sheet_by_index(0) table = OrderedDict() for i in range(ws.nrows): key = int(ws.row_values(i)[0]) value = str(ws.row_values(i)[1:]) table[key] = value with open("student1.xml", 'w') as f: root = etree.Element("root") e_root = etree.ElementTree(root) e_students = etree.SubElement(root, 'students') e_students.text = '\n' + str( json.dumps(table, indent=4, ensure_ascii=False)) + '\n' e_students.append( etree.Comment('\n 学生信息表\n "id" : [名字,数学,语文,英语]\n')) f.write('<?xml version="1.0" encoding="UTF-8"?>' + etree.tounicode(e_root.getroot()))
def test_parse_colletion_metdata_without_print_style(tmpdir, litezip_valid_litezip): working_dir = tmpdir.mkdir('col') collection_file = working_dir.join('collection.xml') # Copy over and modify the collection.xml file. with (litezip_valid_litezip / 'collection.xml').open() as origin: xml = etree.parse(origin) elm = xml.xpath('//col:param[@name="print-style"]', namespaces=COLLECTION_NSMAP)[0] elm.getparent().remove(elm) collection_file.write(etree.tounicode(xml).encode('utf8')) assert 'print-style' not in collection_file.read() # Test the parser doesn't error when a print-style is missing. # given a Collection object, model = parse_collection(Path(working_dir)) # parse the metadata into a CollectionMetadata, md = parse_collection_metadata(model) assert md.print_style is None
def to_svg(self): ''' Returns: unicode : SVG XML source with up-to-date electrode channel lists. ''' xml_root = etree.parse(self.svg_filepath) # Identify electrodes with modified channel lists. df_diff_channels = self.diff_electrode_channels() # Update `svg:path` XML elements for electrodes with modified channel # lists. xpath = XPathEvaluator(xml_root, namespaces=INKSCAPE_NSMAP) for electrode_id, (orig_i, new_i) in df_diff_channels.iterrows(): elements_i = xpath.evaluate('//svg:path[@id="%s"]' % electrode_id) for element_i in elements_i: element_i.attrib['data-channels'] = ','.join(map(str, new_i)) return etree.tounicode(xml_root)
def test_mets_dnx(): """Test basic construction of METS DNX""" ie_dc_dict = {"dc:title": "test title"} mets = mdf.build_mets( ie_dmd_dict=ie_dc_dict, pres_master_dir=os.path.join( os.path.dirname(os.path.realpath(__file__)), 'data', 'test_batch_1', 'pm'), modified_master_dir=os.path.join( os.path.dirname(os.path.realpath(__file__)), 'data', 'test_batch_1', 'mm'), input_dir=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'test_batch_1'), generalIECharacteristics=[{ 'submissionReason': 'bornDigitalContent', 'IEEntityType': 'periodicIE' }], ) print(ET.tounicode(mets, pretty_print=True))
def xls_xml(file_name): # 从xls文件读取 data = {} excel = xlrd.open_workbook(file_name) table = excel.sheet_by_name('city') nrows = table.nrows for i in range(nrows): key = str(int(table.row_values(i)[0])) value = str(table.row_values(i)[1:]) data[key] = value # 写入到xml文件 output = codecs.open('city.xml', 'w', 'utf-8') root = etree.Element('root') citys_xml = etree.ElementTree(root) citys = etree.SubElement(root, 'citys') citys.append(etree.Comment('城市信息')) citys.text = str(data) output.write(etree.tounicode(citys_xml.getroot())) output.close()
def get_body(self, configs, channel_id, msisdn, interface, subscription_type): """ Builds xml request body :param configs: Cache object application.settings['config'] :param channel_id: Channel id number :param msisdn: Telephone number :param interface: Interface number :param subscription_type: Subscription type number :return: xml """ body = etree.Element('tangram_request', company_id=str(configs['company_id']), service_id=str(configs['service_id']), user=str(configs['user'])) if interface: body.attrib['interface'] = str(interface) provisioning = etree.SubElement(body, "provisioning") operation = etree.SubElement(provisioning, "operation", code=str(configs['operation_code'])) operation.text = configs['operation_description'] channel = etree.SubElement(provisioning, "channel_id") channel.text = str(channel_id) destination = etree.SubElement(provisioning, "destination") destination.text = str(msisdn) if subscription_type: destination.attrib['subscription_type'] = str(subscription_type) notification = etree.SubElement(provisioning, "notification", type=str(configs['notification_type']), calltype=str( configs['notification_calltype'])) notification.text = configs['notification_callback'] request_datetime = etree.SubElement(provisioning, "request_datetime") request_datetime.text = str(int(time.time())) return etree.tounicode(body)
def xls2xml(filename, outfile): with xlrd.open_workbook(filename) as excel: #table = excel.sheet_by_name('student') table = excel.sheet_by_index(0) data = OrderedDict() for i in range(table.nrows): key = str(int(table.row_values(i)[0])) value = str(table.row_values(i)[1:]) data[key] = value output = codecs.open(outfile, 'w', 'utf-8') root = etree.Element('root') students_xml = etree.ElementTree(root) students = etree.SubElement(root, 'students') students.append(etree.Comment('\n\t学生信息表\n\t"d" :[名字, 数学, 语文, 英语]\n')) students.text = '\n\t学生信息表\n\t"d" :[名字, 数学, 语文, 英语]\n' students.text = '\n'+str(json.dumps(data, indent=4, ensure_ascii=False))+'\n' output.write('<?xml version="1.0" encoding="UTF-8"?>\n' + etree.tounicode(students_xml.getroot())) output.close()
def __add_certainty(self, text, certainty): tree = etree.fromstring(text) certainties = tree.xpath( '//default:teiHeader' '//default:classCode[@scheme="http://providedh.eu/uncertainty/ns/1.0"]', namespaces=NAMESPACES) if not certainties: tree = self.__create_annotation_list(tree) certainties = tree.xpath( '//default:teiHeader' '//default:classCode[@scheme="http://providedh.eu/uncertainty/ns/1.0"]', namespaces=NAMESPACES) certainties[0].append(certainty) text = etree.tounicode(tree) return text
def __add_annotator(self, text, annotator): tree = etree.fromstring(text) list_person = tree.xpath( '//default:teiHeader' '//default:listPerson[@type="PROVIDEDH Annotators"]', namespaces=NAMESPACES) if not list_person: tree = self.__create_list_person(tree) list_person = tree.xpath( '//default:teiHeader' '//default:listPerson[@type="PROVIDEDH Annotators"]', namespaces=NAMESPACES) list_person[0].append(annotator) text = etree.tounicode(tree) return text
def send_notification(self, data, *params): msg = etree.Element("{{{}}}notification".format(NSMAP['ncEvent'])) node_event_time = util.leaf_elm( 'eventTime', date_time_string(datetime.datetime.now())) msg.append(node_event_time) msg.append(data) msg_unicode = etree.tounicode(msg, pretty_print=True) logger.debug("notification msg is:\n%s", str(msg_unicode)) for socket in self.server.sockets: if socket.running is False: continue for session in socket.sessions: if session.session_open is False: continue logger.debug( "Sending to client, session id: %d, ip:%s, port:%d", session.session_id, socket.client_addr[0], socket.client_addr[1]) session.send_message(msg_unicode) return
def xls_xml(file_name): data = {} excel = xlrd.open_workbook(file_name) table = excel.sheet_by_name('student') # print(table.row_values(0)) nrows = table.nrows for i in range(nrows): key = str(int(table.row_values(i)[0])) value = str(table.row_values(i)[1:]) data[key] = value output = codecs.open('students.xml','w','utf-8') root = etree.Element('root') students_xml = etree.ElementTree(root) students = etree.SubElement(root, 'students') students.append(etree.Comment('学生信息表\n\"id\": [名字,数学,语文,英语]')) students.text = str(data) output.write(etree.tounicode(students_xml.getroot())) output.close()
def getProvince(self): """ 获得所有省的对应的url :return:[[href, province_detail_name],[]...] """ url = 'http://www.cc10000.cn/0/' options = { 'method': 'get', 'url': url, 'headers': self.headers, 'timeout': _time_out } response = Request.basic(options, resend_times=4) selector = etree.HTML(response.text) content = etree.tounicode(selector.xpath('//body/div[6]')[0]) href_and_name = re.findall('href="(/\d.*?)">(.*?)<', content) # 仅提取省,并将用详细省名代替简写省名 seq = [[ i[0], config.ROOT_DETAIL_NAMES[config.ROOT_SHORT_NAMES.index(i[1])] ] for i in href_and_name if i[1] in config.ROOT_SHORT_NAMES] self.hrefs.extend([index[0] for index in seq]) return seq
def replace_id_and_version(model, id, version): """Does an inplace replacement of the given model's id and version :param model: module :type model: :class:`litezip.Collection` or :class:`litezip.Module` :param id: id :type id: str :param version: major and minor version tuple :type version: tuple of int """ # Rewrite the content with the id and version with model.file.open('rb') as fb: xml = etree.parse(fb) elm = xml.xpath('//md:content-id', namespaces=COLLECTION_NSMAP)[0] elm.text = id elm = xml.xpath('//md:version', namespaces=COLLECTION_NSMAP)[0] elm.text = convert_version_to_legacy_version(version) with model.file.open('wb') as fb: fb.write(etree.tounicode(xml).encode('utf8'))
def log(self, node, action, description): """Write out our log info based on the node and event specified. We only log this information if we're are DEBUG loglevel """ if self._active: content = tounicode(node) hashed = md5() try: hashed.update(content.encode('utf-8', errors="replace")) except Exception as exc: LOG.error("Cannot hash the current node." + str(exc)) hash_id = hashed.hexdigest()[0:8] # if hash_id in ['9c880b27', '8393b7d7', '69bfebdd']: print("{0} :: {1}\n{2}".format( hash_id, description, content.replace("\n", "")[0:202], ))
def get_clinical_document(access_token, hie_profile): """get member's clinical data from HIXNY (CDA XML), convert to FHIR (JSON), return both. """ request_xml = """ <GETDOCUMENTPAYLOAD> <MRN>%s</MRN> <DATAREQUESTOR>%s</DATAREQUESTOR> </GETDOCUMENTPAYLOAD> """ % ( hie_profile.mrn, hie_profile.data_requestor, ) print(request_xml) response = requests.post( settings.HIE_GETDOCUMENT_API_URI, verify=False, headers={ 'Content-Type': 'application/xml', 'Authorization': "Bearer %s" % (access_token) }, data=request_xml, ) response_xml = etree.XML(response.content) print(response_xml) cda_element = response_xml.find("{%(hl7)s}ClinicalDocument" % NAMESPACES) if cda_element is not None: cda_content = etree.tounicode(cda_element) fhir_content = cda2fhir(cda_content).decode('utf-8') result = { 'cda_content': cda_content, 'fhir_content': fhir_content, } else: result = { 'cda_content': None, 'fhir_content': None, } return result
def ocrdata(): if "Content-Encoding" in request.headers and \ request.headers["Content-Encoding"] == "gzip": data = json.loads(gzip.decompress(request.data).decode("utf-8")) else: data = request.get_json() cnt = 0 for bname, bdict in data["ocrdata"].items(): b = Book.query.filter_by(name=bname).one() for pname, pdict in bdict.items(): p = Page.query.filter_by(book_id=b.id, name=pname).one() root = etree.fromstring(p.data) ns = {"ns": root.nsmap[None]} for lid, text in pdict.items(): linexml = root.find('.//ns:TextLine[@id="' + lid + '"]', namespaces=ns) if linexml is None: continue textequivxml = linexml.find( './ns:TextEquiv[@index="{}"]'.format(data["index"]), namespaces=ns) if textequivxml is None: textequivxml = etree.SubElement( linexml, "{{{}}}TextEquiv".format(ns["ns"]), attrib={"index": str(data["index"])}) unicodexml = textequivxml.find('./ns:Unicode', namespaces=ns) if unicodexml is None: unicodexml = etree.SubElement( textequivxml, "{{{}}}Unicode".format(ns["ns"])) unicodexml.text = text cnt += 1 p.no_lines_ocr = int( root.xpath( 'count(//ns:TextLine' '[count(./ns:TextEquiv' '[@index>0])>0])', namespaces=ns)) p.data = etree.tounicode(root.getroottree()) db_session.commit() return "Imported {} lines.".format(cnt)
def storeBatches(collectedRecords, firstRecord): global config if len(collectedRecords) > 0: if 'xml-batch' in config.format: XMLContainer = ET.XML('<records/>') for (ID, record) in collectedRecords.iteritems(): XMLContainer.append(record) filePath = pathForBatch(firstRecord, 'xml') XMLFile = open(filePath, 'w') XMLString = ET.tounicode(XMLContainer).encode('UTF-8') XMLFile.write(XMLString) XMLFile.close() print u"XML-Batch: " + str( len(collectedRecords)) + u" records to »" + filePath + u"«" if 'json-batch' in config.format or 'couchdb-batch' in config.format: JSONContainer = [] for (ID, record) in collectedRecords.iteritems(): JSONInternal = elem_to_internal(record, strip=1) if len(JSONInternal) == 1: JSONInternal = JSONInternal.values()[0] JSONInternal['_id'] = ID JSONContainer += [JSONInternal] if 'json-batch' in config.format: filePath = pathForBatch(firstRecord, 'json') JSONFile = open(filePath, "w") JSONFile.write(simplejson.dumps(JSONContainer)) JSONFile.close() print u"JSON-Batch: " + str( len(collectedRecords)) + u" records to »" + filePath + u"«" if 'couchdb-batch' in config.format: filePath = pathForBatch(firstRecord, 'couch.json') JSONContainer = {'docs': JSONContainer} JSONFile = open(filePath, "w") JSONFile.write(simplejson.dumps(JSONContainer)) JSONFile.close() print u"CouchDB JSON-Batch: " + str( len(collectedRecords)) + u" records to »" + filePath + u"«"
def detail_page(self, response): # 初始化字段 _id = '' _id_ = '' ann_type = '送达公告' announcer = '普洱市中级人民法院' defendant = '' defendant_origin = '' ann_date = '' ann_content = '' ann_html = '' content_url = response.url pdf_url = '' case_no = '' source = '普洱市中级人民法院' try: html = self.xml_xpath(response, 0) content_text = html.xpath('//div[@class="sswy_article_m"]//text()') ann_html = etree.tounicode(html.xpath('//div[@class="sswy_article_m"]')[0]) # print(ann_html) content_p = ''.join(content_text).replace('\xa0', '') content_p = content_p.replace('\r\n', '') ann_content = ''.join(content_p.split()) ann_date = re.findall(r'(.{4}[年].{1,2}[月].{1,3}[日号])', ann_content)[-1] ann_date = self.parse_time(ann_date) text = ann_content.replace('本院', ':') text = text.split(':')[0] + ":" case = re.findall(r'((.{4}).*?)号', text) print(len(case)) print(text) if len(case) > 0: print("*******") defendant_origin_list = re.findall(r'号(.*?):', text)[0] # print(defendant_origin_list) elif '公告' in text: defendant_origin_list = re.findall(r'公告(.*?):', text)[0]
def extractExamples(directory): examples = {} for filename in os.listdir(directory): if not filename.endswith('.rml'): continue rmlFile = open(os.path.join(directory, filename), 'rb') root = etree.parse(rmlFile).getroot() elements = root.xpath('//@doc:example/parent::*', namespaces={'doc': EXAMPLE_NS}) # Phase 1: Collect all elements for elem in elements: demoTag = elem.get(EXAMPLE_ATTR_NAME) or elem.tag elemExamples = examples.setdefault(demoTag, []) elemExamples.append({ 'filename': filename, 'line': elem.sourceline, 'element': elem, 'rmlurl': INPUT_URL % filename, 'pdfurl': EXPECTED_URL % (filename[:-4] + '.pdf') }) # Phase 2: Render all elements removeDocAttributes(root) for dirExamples in examples.values(): for example in dirExamples: xml = etree.tounicode(example['element']).strip() xml = re.sub( ' ?xmlns:doc="http://namespaces.zope.org/rml/doc"', '', xml) xml = dedent(xml) xml = enforceColumns(xml, 80) xml = highlightRML(xml) example['code'] = xml rmlFile.close() return examples
def test_make_instructions(): tokenized = [ tokens.Paragraph.make(part='111'), tokens.Verb(tokens.Verb.PUT, active=True), tokens.Paragraph.make(part='222'), tokens.Paragraph.make(part='333'), tokens.Paragraph.make(part='444'), tokens.Verb(tokens.Verb.DELETE, active=True), tokens.Paragraph.make(part='555'), tokens.Verb(tokens.Verb.MOVE, active=True), tokens.Paragraph.make(part='666'), tokens.Paragraph.make(part='777') ] with XMLBuilder("EREGS_INSTRUCTIONS") as ctx: ctx.PUT(label=222) ctx.PUT(label=333) ctx.PUT(label=444) ctx.DELETE(label=555) ctx.MOVE(label=666, destination=777) assert ctx.xml_str == etree.tounicode( amdparser.make_instructions(tokenized))
def test_digtial_original_dnx_single_file(): """Test that the digitalOriginal value is being properly translated from a boolean input to a lower-case string of 'true' or 'false' for a single-file METS""" ie_dc_dict = {"dc:title": "test title"} mets = mdf.build_single_file_mets( ie_dmd_dict=ie_dc_dict, filepath=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'test_batch_1', 'pm', 'presmaster.jpg'), generalIECharacteristics=[{ 'submissionReason': 'bornDigitalContent', 'IEEntityType': 'periodicIE' }], digital_original=True) grc = mets.findall('.//section[@id="generalRepCharacteristics"]')[0] # print(ET.tounicode(grc[0], pretty_print=True)) do = grc.findall('.//key[@id="DigitalOriginal"]')[0] assert (do.text == 'true') # for grc in general_rep_characteristics: # assert(grc.text == 'true') print(ET.tounicode(mets, pretty_print=True))
def parse_by_br(self, response): html = etree.HTML(text=response.text) text_l_new = [] for querySelector in self.querySelectorList_br: p_list = html.xpath(querySelector) # print(len(p_list)) for p in p_list: temp_text = etree.tounicode(p) # print(temp_text) if '<br>' in temp_text: text_l = temp_text.split('<br>') elif '<br/>' in temp_text: text_l = temp_text.split('<br/>') else: print('text_l.split by br error, maybe not found br') text_l = [] for text in text_l: text = text.strip().replace('\n', '').replace('\r', '') text = re.sub('<.*?>', '', text) text_l_new.append(text) return text_l_new
def _ogc_filter_to_expression(prop): if 'And' in prop.tag: return ' and '.join(map(_ogc_filter_to_expression, prop.iterchildren())) elif 'Or' in prop.tag: return ' or '.join(map(_ogc_filter_to_expression, prop.iterchildren())) elif 'PropertyIsGreaterThan' in prop.tag: return _compile_bin_op('>', prop.iterchildren()) elif 'PropertyIsLessThan' in prop.tag: return _compile_bin_op('<', prop.iterchildren()) elif 'PropertyIsEqualTo' in prop.tag: return _compile_bin_op('=', prop.iterchildren()) elif 'PropertyIsNotEqualTo' in prop.tag: return _compile_bin_op('!=', prop.iterchildren()) elif 'PropertyIsBetween' in prop.tag: name = prop.PropertyName cql_lo = _compile_bin_op('>', [name, prop.LowerBoundary.Literal]) cql_hi = _compile_bin_op('<', [name, prop.UpperBoundary.Literal]) return cql_lo + 'and ' + cql_hi raise AssertionError(etree.tounicode(prop, pretty_print=True))
def send_rpc_reply(self, rpc_reply, origmsg): reply = etree.Element(qmap('nc') + "rpc-reply", attrib=origmsg.attrib, nsmap=origmsg.nsmap) print('step 10') print(etree.tostring(reply, pretty_print=True)) try: #rpc_reply.getchildren # pylint: disable=W0104 reply.append(rpc_reply) print('step 11') print(reply) except AttributeError: reply.extend(rpc_reply) print('stpe 12') print(reply) ucode = etree.tounicode(reply, pretty_print=True) if self.debug: logger.debug("%s: Sending RPC-Reply: %s", str(self), str(ucode)) print('step 13') print(ucode) self.send_message(ucode)
def xslt(request): transform_result = '' transform_result_pretty = '' if request.method == 'POST': form = forms.TransformForm(request.POST) if form.is_valid(): tansformer = transformers.get(form.cleaned_data['transformer']) record_tree = etree.fromstring( form.cleaned_data['xml'].encode('utf-8')) transformed = tansformer(record_tree, abstract='0') transform_result = unicode(transformed) transform_result_pretty = etree.tounicode(transformed, pretty_print=True) else: form = forms.TransformForm() return render( request, 'transformers_pool/administration/xslt.html', { 'form': form, 'transform_result': transform_result, 'transform_result_pretty': transform_result_pretty })
def write_to_file(verses, f): if len(verses) > 0: tf = get_text_format(verses[0]) for i, v in enumerate(tf["verses"]): last_verse = i == len(tf["verses"]) - 1 verse = ET.tounicode(v) verse_wo_terms = get_rid_of_notes(verse) verse = ET.fromstring(verse_wo_terms) verse_wo_add = remove_nodes(verse, "add", NS_TI["ti"]) verse = stringify_children(verse_wo_add) verse = clean_entities(verse) if len(verse.strip()) > 0: print('"'+verse+'"') f.write("{verse}{eol}".format(verse=verse, eol="\n" if not last_verse else "")) else: f.write("\n")
def dump_to_xml(self, tree_name='Item'): tree = etree.parse(file_path) root = tree.getroot() for bad in root.xpath("//%s[@login=\'%s\']" % (tree_name, self.login)): bad.getparent().remove(bad) item_branch = etree.Element("Item", login=self.login) for curr in self.__dict__: elem = etree.Element(curr) #print self.__dict__[curr] elem.text = unicode(self.__dict__[curr]) item_branch.append(elem) root.append(item_branch) xml = etree.tounicode(root, pretty_print=True) root = etree.fromstring(xml) et = etree.ElementTree(root) et.write(file_path, pretty_print=True, encoding="UTF-8")