Ejemplos de tounicode en Python, ejemplos de lxml.etree.tounicode en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: sru-loader.py Proyecto: ssp/sru-loader

def storeRecordWithID (record, ID, collectedRecords):
	global config

	sys.stdout.write("ID: " + str(ID) + u"… ")

	collectedRecords[ID] = record

	""" Write XML file for record. """
	if 'xml' in config.format:
		filePath = pathForID(ID, 'xml')
		XMLFile = open(filePath, 'w')
		XMLString = ET.tounicode(record).encode('UTF-8')
		XMLFile.write(XMLString)
		XMLFile.close()
		sys.stdout.write(' ./' + filePath)

	""" Convert to JSON and write file. """
	if 'json' in config.format:
		JSONInternal = elem_to_internal(record, strip=1)
		if len(JSONInternal) == 1:
			JSONInternal = JSONInternal.values()[0]
		JSONInternal['_id'] = ID
		filePath = pathForID(ID, 'json')
		JSONFile = open (filePath, "w")
		JSONFile.write(simplejson.dumps(JSONInternal))
		JSONFile.close()
		sys.stdout.write(' ./' + filePath)

	""" If no format is given, print the record. """
	if len(config.format) == 0:
		print ET.tounicode(record)

	print ""

Ejemplo n.º 2

0

Mostrar archivo

Archivo: tests.py Proyecto: konrado0/vosqa

 def testFilteringStyle(self):
     html = u"""
         <div style="color: #ffff00;"><p style="float: left;"><img src="http://cdnimg.visualizeus.com/thumbs/3e/37/hairstyles-3e37929b6847d0216b0aabe296ed9a76_h.jpg?ts=93246" alt="" width="248" height="400" style="width: 500px; color: blue;"><a href="http://www.hairstyles123.com/hairstylepics/faces/hairstyles_for_heart_shaped_faces/heart_shaped_faces_hairstyle_4.jpg" class="clb cboxElement"><img src="http://www.hairstyles123.com/hairstylepics/faces/hairstyles_for_heart_shaped_faces/heart_shaped_faces_hairstyle_4.jpg" alt="" width="400" height="544"></a><a href="http://womeninfashion.net/wp-content/uploads/2013/11/heart-face-shape-hairstyles-jennifer-aniston.jpg" class="clb cboxElement"><img src="http://womeninfashion.net/wp-content/uploads/2013/11/heart-face-shape-hairstyles-jennifer-aniston.jpg" alt="" width="420" height="560"></a><a href="http://thisgirlscity.com/wp-content/uploads/2013/07/reese.jpg" class="clb cboxElement"><img src="http://thisgirlscity.com/wp-content/uploads/2013/07/reese.jpg" alt="" width="420" height="560"></a><a href="http://images.beautyriot.com/photos/200/hairstyles_heart_shape_face-200.jpg" class="clb cboxElement"><img src="http://images.beautyriot.com/photos/200/hairstyles_heart_shape_face-200.jpg" alt="" width="200" height="272"></a></p>
         <p><a href="http://www.youbeauty.com/p/482031/thumbnail/entry_id/0_hmc2pi25/width/0/height/0/quality/90" class="clb cboxElement"><img src="http://www.youbeauty.com/p/482031/thumbnail/entry_id/0_hmc2pi25/width/0/height/0/quality/90" alt="" width="200" height="290"></a><a href="http://www.hairstyles123.com/hairstylepics/faces/hairstyles_for_heart_shaped_faces/heart_shaped_faces_hairstyle_11.jpg" class="clb cboxElement"><img src="http://www.hairstyles123.com/hairstylepics/faces/hairstyles_for_heart_shaped_faces/heart_shaped_faces_hairstyle_11.jpg" alt="" width="520" height="780"></a><a href="http://www.allure.com/images/hair-ideas/2012/05/heart-face-shape-hairstyles-reese-witherspoon.jpg" class="clb cboxElement"><img src="http://www.allure.com/images/hair-ideas/2012/05/heart-face-shape-hairstyles-reese-witherspoon.jpg" alt="" width="420" height="560"></a><a href="http://slodive.com/wp-content/uploads/2012/03/hairstyles-for-heart-shaped-faces/heart-shape-glasses.jpg" class="clb cboxElement"><img src="http://slodive.com/wp-content/uploads/2012/03/hairstyles-for-heart-shaped-faces/heart-shape-glasses.jpg" alt=""></a><a href="http://www.beautifulhairstyle.net/wp-content/uploads/2014/02/Long-Hairstyles-For-Heart-Shaped-Faces.jpg" class="clb cboxElement"><img src="http://www.beautifulhairstyle.net/wp-content/uploads/2014/02/Long-Hairstyles-For-Heart-Shaped-Faces.jpg" alt=""></a><a href="http://beautyhairtotoe.com/wp-content/uploads/2013/08/rby-heart-shaped-reese-marked-mdn.jpg" class="clb cboxElement"><img src="http://beautyhairtotoe.com/wp-content/uploads/2013/08/rby-heart-shaped-reese-marked-mdn.jpg" alt=""></a><a href="http://www.prettydesigns.com/wp-content/uploads/2013/09/Hairstyle-for-Oval-shaped-Women.jpg" class="clb cboxElement"><img src="http://www.prettydesigns.com/wp-content/uploads/2013/09/Hairstyle-for-Oval-shaped-Women.jpg" alt="" width="550" height="775"></a><a href="http://www.hairnext.com/wp-content/uploads/2014/05/Heart-Shaped-Face-Best-Short-Bangs-Hairstyle-For-Fine-Hair.jpg" class="clb cboxElement"><img src="http://www.hairnext.com/wp-content/uploads/2014/05/Heart-Shaped-Face-Best-Short-Bangs-Hairstyle-For-Fine-Hair.jpg" alt="Heart Shaped Face Best Short Bangs Hairstyle For Fine Hair"></a><a href="http://www.hairnext.com/wp-content/uploads/2014/05/Short-Bob-Side-Swept-For-Long-Face-Shape.jpg" class="clb cboxElement"><img src="http://www.hairnext.com/wp-content/uploads/2014/05/Short-Bob-Side-Swept-For-Long-Face-Shape.jpg" alt="Short Bob Side Swept  For Long Face Shape"></a></p>
         <p>&nbsp; <img src="http://www.hairnext.com/wp-content/uploads/2014/05/Short-blonde-Curly-hairstyle.jpg" alt="Short blonde Curly hairstyle:"></p></div>
         """
     print etree.tounicode(filter_style(fromstring(html)))

Ejemplo n.º 3

0

Mostrar archivo

Archivo: thread.py Proyecto: chipsahoy/puzzles-and-other-code

    def htmltopost(self, html, pagetime):
        self.removecomments(html)
        c = etree.tounicode(html, method='html', pretty_print=True)

        postnumber = 0
        postnumbernode = html.xpath(r"../../../tr[1]/td[2]/a[last()]")
        if postnumbernode:
            postnumber = int(etree.tounicode(postnumbernode[-1], method="text"))
            postlinknode = postnumbernode[-1].attrib['href']
            parsed = urlparse(postlinknode)
            postid = int(parse_qs(parsed.query)['p'][0])

        titlenode = html.xpath(r"../div[@class='smallfont']/strong")
        title = etree.tounicode(titlenode[-1], method="text").strip()

        posternode = html.xpath(r"../../td[1]/div/a[starts-with(@class,'bigusername')]")
        poster = etree.tounicode(posternode[-1], method="text").strip()

        timenode = html.xpath(r"../../../tr[1]/td[1]")
        timestring = etree.tounicode(timenode[-1], method="text").strip()
        ts = misc.parseitemtime(pagetime, timestring)

        p = post.Post(content=c, postnumber=postnumber, title=title, postername=poster,
                      postid=postid, ts=ts)
        print(postnumber, postid, poster, title)
        return p

Ejemplo n.º 4

0

Mostrar archivo

Archivo: extractvcard.py Proyecto: encolpe/lom2mlr

def main():
    extensions = {(URL_MLR_EXT, 'vcard_uuid'): utils.vcard_uuid}
    converterExtract = XMLTransform(STYLESHEET_EXTRACT, extensions)
    converterDup = XMLTransform(STYLESHEET_DUP)
    parser = argparse.ArgumentParser(
        description='Extend the vcard of a lom into a xcard')
    parser.add_argument('-f', '--format', default='rawxml',
                        help="output format: one of 'rawxml', 'xml', 'n3',"
                             " 'turtle', 'nt', 'pretty-xml', trix'")
    parser.add_argument('-o', '--output', help="Output file",
                        type=argparse.FileType('w'), default=sys.stdout)
    parser.add_argument('infile', help="input file or url", nargs="?")
    converterExtract.populate_argparser(parser)
    #converterDup.populate_argparser(parser)
    args = parser.parse_args()
    converterExtract.set_options_from_dict(vars(args))
    #converterDup.set_options_from_dict(vars(args))
    
    if (urlparse(args.infile).scheme):
        opener = urlopen
    else:
        opener = open

    with opener(args.infile) as infile:
        xml = converterExtract.convertfile(infile)
    if xml:
        xml = converterDup.convertxml(xml)
    if xml:
        if args.format == "rawxml":
            args.output.write(etree.tounicode(xml, pretty_print=True).encode('utf-8'))
        else:
            rdf = Graph().parse(data=etree.tounicode(xml), format="xml")
            if rdf:
                args.output.write(rdf.serialize(format=args.format, encoding='utf-8'))
    args.output.close()

Ejemplo n.º 5

0

Mostrar archivo

Archivo: system.py Proyecto: moudlajs/auct-web

 def create_xml(self, p):
     path = "log.xml"
     # checks whether XML file exists and loads the root elemenet
     if not os.path.isfile(path):
         root = etree.Element("searchedItems")
         # if there is no XML file, create blank with root element
         with open(path, mode = "w", encoding="utf-8") as f:
             f.write(etree.tounicode(root, pretty_print=True))
             
     root = etree.parse(path)
     # creates child element from root
     item = etree.SubElement(root.getroot(), "item", attrib = {"id":p["id"]})
     
     while p:
         print(etree.tounicode(root, pretty_print=True))
         key, value = p.popitem()
         # for BuyNow and Bid prices nested elements are needed!
         if "buyNow" in key or "bid" in key:
             if item.find("price") is None:
                 price = etree.Element("price")
                 item.append(price)
             subitem = etree.Element(key)
             price.append(subitem)
             subitem.text = value
             # otherwise all elements are ascendants of item element
         else:
             subitem = etree.Element(key)
             item.append(subitem)
             subitem.text = value
             
     with open(path, mode="w", encoding="utf-8") as f:
         f.write(etree.tounicode(root, pretty_print = True))

Ejemplo n.º 6

0

Mostrar archivo

Archivo: definition.py Proyecto: necrop/pylib3.2

 def serialized(self, stripped=True):
     """
     Serialized version of the definition node.
     """
     if stripped:
         return etree.tounicode(self.node_stripped())
     else:
         return etree.tounicode(self.node)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: utils.py Proyecto: katakumpo/django-scraper

def get_single_content(element, data_type):
    """Return the processed content of given element"""
    if isinstance(element, basestring) or \
       isinstance(element, etree._ElementStringResult) or \
       isinstance(element, etree._ElementUnicodeResult):
        return element
    if data_type == 'text':
        # Return element.text or ''
        return etree.tounicode(element, method='text').strip()
    elif data_type == 'html':
        return etree.tounicode(element, pretty_print=True).strip()

Ejemplo n.º 8

0

Mostrar archivo

Archivo: sru-loader.py Proyecto: ssp/sru-loader

def main ():
	global config

	loadXSLs()

	SRUBaseURL = config.url + '?' \
								+ 'operation=searchRetrieve' \
								+ '&' + 'version=1.1' \
								+ '&' + 'recordPacking=xml' \
								+ '&' + 'recordSchema=' + urllib.quote(config.schema) \
								+ '&' + 'maximumRecords=' + str(config.chunksize) \
								+ '&' + 'query=' + urllib.quote(config.query)


	recordCount = 1
	done = False


	while not done:
		firstRecord = recordCount
		SRUURL = SRUBaseURL + '&' + 'startRecord=' + str(recordCount)
		print SRUURL
		SRUResponse = urllib.urlopen(SRUURL).read()

		XML = ET.fromstring(SRUResponse)
		records = XML.findall('.//{http://www.loc.gov/zing/srw/}recordData/*')
		numberOfRecords = XML.findall('.//{http://www.loc.gov/zing/srw/}numberOfRecords')
		resultCount = 0
		if len(numberOfRecords) > 0:
			resultCount = int(numberOfRecords[0].text)

		print u"Loaded " + str(len(records)) + " records: " + str(recordCount) + "-" + str(min(recordCount + config.chunksize, resultCount)) + " of " + str(resultCount)

		collectedRecords = {}
		for record in records:
			ID = recordID(record, recordCount)

			""" Transform record. """
			for XSL in config.XSLs:
				record = XSL(record).getroot()

			if record is None:
				print u"Record transformation failed for ID »" + ID + u"«"
				print ET.tounicode(record)
			else:
				storeRecordWithID(record, ID, collectedRecords)

			recordCount += 1

		storeBatches(collectedRecords, firstRecord)

		done = (len(records) == 0 or recordCount > resultCount)

Ejemplo n.º 9

0

Mostrar archivo

Archivo: pull.py Proyecto: Bloodlettinger/dishop

 def full_description(self, url):
     tree = etree.parse(retrieve(url=url), self.parser)
     desc_list = tree.xpath("//div[@id='item-full']")
     assert len(desc_list), _(u'No elements found.')
     for item in desc_list:
         try:
             return {
                 'image': item.xpath(".//div[@class='item-pic']//img/@src")[0],
                 'desc': tounicode(item.xpath(".//div[@id='item-details']/node()")[0]),
                 'tech': tounicode(item.xpath(".//div[@id='item-tech']/node()")[0]),
                 }
         except IndexError:
             print 'Bad structure in %s !' % url
             return {}

Ejemplo n.º 10

0

Mostrar archivo

Archivo: quotationtext.py Proyecto: necrop/pylib3.2

 def comment_stripped_text(self):
     """
     Return a version of the plain text with any square-bracketed
     comments removed.
     """
     serialized = etree.tounicode(self.node)
     if '<cm' in serialized:
         stripped = COMMENT_STRIPPER.edit(serialized)
         try:
             new_node = etree.XML(stripped)
         except etree.XMLSyntaxError:
             new_node = self.node
     else:
         new_node = self.node
     return etree.tounicode(new_node, method='text') or ''

Ejemplo n.º 11

0

Mostrar archivo

Archivo: sale.py Proyecto: gldnspud/simplsale

 def success(self, template_name, transaction_number):
     # First, divert to the 404 page if the
     # transaction_number is not found.
     if transaction_number not in g.success_data:
         return abort(
             status_code = 404, 
             comment = 'Transaction number expired or invalid.',
             )
     # Retrieve from success cache. Copy it since we will mutate it.
     values = g.success_data[transaction_number].copy()
     # Apply the generic commerce notice.
     sale_template = SaleTemplate(template_name)
     success_xml = sale_template.success_xml()
     self._apply_commerce_notice(success_xml)
     # Grab the mailer, since it's an object and not a string.
     mailer = values.pop('mailer_instance')
     # Apply remaining text values to the template.
     for key, value in values.items():
         for e in CSSSelector('#' + key)(success_xml):
             e.text = value
     # Allow the mailer to manipulate the page.
     for e in CSSSelector('#simplsale-email-notice')(success_xml):
         mailer.apply_notice(e)
     # Render.
     return XHTML11_DTD + tounicode(success_xml, method='html')

Ejemplo n.º 12

0

Mostrar archivo

Archivo: xml.py Proyecto: BlackEarth/bxml

    def tag_words_in(cls, elem, tag='w'):
        w = Dict(PATTERN=re.compile("([^\s]+)"), REPLACE=r'{%s}\1{/%s}' % (tag, tag), OMIT_ELEMS=[])

        def tag_words(e):
            e.text = re.sub(w.PATTERN, w.REPLACE, e.text or '')
            for ch in e:
                if ch.tag not in w.OMIT_ELEMS:
                    tag_words(ch)
                ch.tail = re.sub(w.PATTERN, w.REPLACE, ch.tail or '')

        new_elem = XML.fromstring(etree.tounicode(elem))
        tag_words(new_elem)
        s = etree.tounicode(new_elem)
        s = s.replace('{%s}' % tag, '<%s>' % tag).replace('{/%s}' % tag, '</%s>' % tag)
        new_elem = XML.fromstring(s)
        return new_elem

Ejemplo n.º 13

0

Mostrar archivo

Archivo: test_wms.py Proyecto: jblarsen/pydap.responses.wms

    def __init__(self, datapath):
        """Class for operating on a single WMS source."""
        self.datapath = datapath
        self.handler = webtest.TestApp(NetCDFHandler(datapath))

        self.path_info = '/' + datapath + '.wms'

        # Find this directory
        dir_path = os.path.dirname(os.path.realpath(__file__))

        self.base_env = {
            'pydap.config': {
                'pydap.responses.wms.fill_method': 'contourf',
                'pydap.responses.wms.paletted': True,
                'pydap.responses.wms.allow_eval': True,
                'pydap.responses.wms.colorfile': dir_path + '/colors.json',
                'pydap.responses.wms.styles_file': dir_path + '/styles.json',
                'pydap.responses.wms.max_age': 600,
                'pydap.responses.wms.s_maxage': 93600,
                'pydap.responses.wms.max_image_size': 16777216,
                'pydap.responses.wms.localCache': True,
                'pydap.responses.wms.redis': False,
                'pydap.responses.wms.redis.host': 'localhost',
                'pydap.responses.wms.redis.port': 6379,
                'pydap.responses.wms.redis.db': 0,
                'pydap.responses.wms.redis.redis_expiration_time': 604800,
                'pydap.responses.wms.redis.distributed_lock': True
            }
        }

        self.base_query_map = {'SERVICE': 'WMS',
                               'REQUEST': 'GetMap',
                               'VERSION': '1.3.0',
                               'STYLES': '',
                               'FORMAT': 'image/png',
                               'TRANSPARENT': 'TRUE',
                               'HEIGHT': 512,
                               'WIDTH': 512,
                               'BBOX': '-180.0,-90.0,180.0,90.0',
                               'CRS': 'EPSG:4326'}
        self.base_query_cap = {'SERVICE': 'WMS',
                               'REQUEST': 'GetCapabilities',
                               'VERSION': '1.3.0',}
        #print('Getting Capabilities for %s' % self.path_info)
        env = self.base_env.copy()
        env['QUERY_STRING'] = urllib.parse.urlencode(self.base_query_cap)
        response = self.get(params=self.base_query_cap,
                            extra_environ=env, status=200)
        self.xml = response.normal_body
        try:
            self.wms = WebMapService(self.path_info, xml=self.xml,
                                     version='1.3.0')
        except:
            print('PATH_INFO', self.path_info)
            parser = etree.XMLParser(remove_blank_text=True)
            file_obj = io.BytesIO(self.xml)
            tree = etree.parse(file_obj, parser)
            x_str = etree.tounicode(tree, pretty_print=True)
            print('XML', x_str)
            raise

Ejemplo n.º 14

0

Mostrar archivo

Archivo: XmlTools.py Proyecto: radicke-atix-de/comoonics-cluster-suite

def evaluateXPath(path, element):
    try:
        import xml.dom
        from xml.xpath import Evaluate
        result=Evaluate(path, element)
        if hasattr(result,'__iter__'):
            for i in range(len(result)):
                if isinstance(result[i], xml.dom.Node) and result[i].nodeType == xml.dom.Node.ATTRIBUTE_NODE:
                    result[i]=result[i].value
        elif type(result)==bool:
            return result
        else:
            result=[result]
        return result
    except ImportError:
        # Implementation for etree
        from lxml.etree import XPath, fromstring, tounicode
        # returns a list of _ElementStringResult
        buf=toPrettyXML(element)
        elist=XPath(path).evaluate(fromstring(buf))
        nodelist=list()
        # if is iterable
        if hasattr(elist,'__iter__'):
            for eelement in elist:
                # either the returnlist is a stringlist or a element list
                if isinstance(eelement, basestring):
                    nodelist.append(eelement)
                else:
                    nodelist.append(parseXMLString(tounicode(eelement)).documentElement)
        elif type(elist)==bool:
            return elist
        else:
            nodelist.append(elist)
        return nodelist

Ejemplo n.º 15

0

Mostrar archivo

Archivo: http.py Proyecto: lamby/musicdb

    def __init__(self, tracks, filename=None):
        if filename is None:
            filename = 'playlist.xspf'

        NSMAP = {
            None: 'http://xspf.org/ns/0/',
        }

        playlist = etree.Element('playlist', nsmap=NSMAP, attrib={
            'version': '1',
        })

        track_list = etree.SubElement(playlist, 'trackList')

        for track in tracks:
            elem = etree.SubElement(track_list, 'track')

            title = etree.SubElement(elem, 'title')
            title.text = track.get_parent_instance().title

            duration = etree.SubElement(elem, 'duration')
            duration.text = unicode(track.length * 1000)

            location = etree.SubElement(elem, 'location')
            location.text = track.file.url().replace('https:', 'http:')

        super(XSPFResponse, self).__init__(
            etree.tounicode(playlist),
            content_type='application/xspf+xml',
        )

        self['Content-Disposition'] = 'attachment; filename=%s' % filename

Ejemplo n.º 16

0

Mostrar archivo

Archivo: xml.py Proyecto: BlackEarth/bxml

 def Element(cls, s, *args):
     """given a string s and string *args, return an Element."""
     sargs = []
     for arg in args:
         if type(arg) == etree._Element:
             sargs.append(etree.tounicode(arg))
         else:
             sargs.append(arg)
     if type(s) == etree._Element:
         t = etree.tounicode(s)
     else:
         t = s
     if len(args) == 0:
         return XML.fromstring(t)
     else:
         return XML.fromstring(t % tuple(sargs))

Ejemplo n.º 17

0

Mostrar archivo

Archivo: entrycomponent.py Proyecto: necrop/pylib3.2

    def serialized(self):
        """
        Return the node serialized in string form.

        (Wrapper for etree.tounicode())
        """
        return etree.tounicode(self.node)

Ejemplo n.º 18

0

Mostrar archivo

Archivo: tests.py Proyecto: ConocimientoAbierto/policrowd

    def test_all_basic_feed_with_one_item(self):
        response = self.app.get('/results/all-basic.atom')
        root = etree.XML(response.content)
        xml_pretty = etree.tounicode(root, pretty_print=True)

        result_event = ResultEvent.objects.first()
        expected = '''<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-gb">
  <title>Election results from example.com</title>
  <link href="http://example.com/" rel="alternate"/>
  <link href="http://example.com/results/all-basic.atom" rel="self"/>
  <id>http://example.com/</id>
  <updated>{updated}</updated>
  <entry>
    <title>Tessa Jowell (Labour Party) won in Member of Parliament for Dulwich and West Norwood</title>
    <link href="http://example.com/#{item_id}" rel="alternate"/>
    <published>{updated}</published>
    <updated>{updated}</updated>
    <author>
      <name>john</name>
    </author>
    <id>http://example.com/#{item_id}</id>
    <summary type="html">A example.com volunteer recorded at {space_separated} that Tessa Jowell (Labour Party) won the ballot in Member of Parliament for Dulwich and West Norwood, quoting the source 'Seen on the BBC news').</summary>
  </entry>
</feed>
'''.format(
    updated=rfc3339_date(result_event.created),
    space_separated=result_event.created.strftime("%Y-%m-%d %H:%M:%S"),
    item_id=result_event.id,
)
        self.compare_xml(expected, xml_pretty)

Ejemplo n.º 19

0

Mostrar archivo

Archivo: xml.py Proyecto: BlackEarth/bxml

 def remove_range(cls, elem, end_elem, delete_end=True):
     """delete everything from elem to end_elem, including elem.
     if delete_end==True, also including end_elem; otherwise, leave it."""
     while elem is not None and elem != end_elem and end_elem not in elem.xpath("descendant::*"):
         parent = elem.getparent()
         nxt = elem.getnext()
         parent.remove(elem)
         if DEBUG == True:
             print(etree.tounicode(elem))
         elem = nxt
     if elem == end_elem:
         if delete_end == True:
             cls.remove(end_elem, leave_tail=True)
     elif elem is None:
         if parent.tail not in [None, '']:
             parent.tail = ''
         cls.remove_range(parent.getnext(), end_elem)
         XML.remove_if_empty(parent)
     elif end_elem in elem.xpath("descendant::*"):
         if DEBUG == True:
             print(elem.text)
         elem.text = ''
         cls.remove_range(elem.getchildren()[0], end_elem)
         XML.remove_if_empty(elem)
     else:
         print("LOGIC ERROR", file=sys.stderr)

Ejemplo n.º 20

0

Mostrar archivo

Archivo: transform.py Proyecto: AlecChou/ldoce5viewer

def trans_entry(data):
    r = []
    meta = {}

    try:
        root = et.fromstring(data)
        head = root.find('Head')
        title = _get_text_nr(head.find('HWD/BASE'))
        poslist = head.findall('POS')
        if poslist:
            title += ' ({0})'.format(
                    ', '.join(_get_text_nr(pos) for pos in poslist))
    except:
        title = ""

    try:
        pron_gb = head.find('Audio[@resource="GB_HWD_PRON"]')
        if pron_gb is not None:
            meta['gb_pron'] = pron_gb.get('topic').split('/')[-1]
        pron_us = head.find('Audio[@resource="US_HWD_PRON"]')
        if pron_us is not None:
            meta['us_pron'] = pron_us.get('topic').split('/')[-1]
    except:
        pass

    r.append(_build_header(['entry'], title=title, meta=meta))

    r.append(et.tounicode(_trans_assets(root),
        pretty_print=True, method='html'))

    r.append(body2html(root))
    r.append('</body></html>')
    return enc_utf8(''.join(r))

Ejemplo n.º 21

0

Mostrar archivo

Archivo: readable.py Proyecto: RedTuna/breadability

def transform_misused_divs_into_paragraphs(doc):
    """Turn all divs that don't have children block level elements into p's

    Since we can't change the tree as we iterate over it, we must do this
    before we process our document.

    The idea is that we process all divs and if the div does not contain
    another list of divs, then we replace it with a p tag instead appending
    it's contents/children to it.

    """
    for elem in doc.iter(tag='div'):
        child_tags = [n.tag for n in elem.getchildren()]
        if 'div' not in child_tags:
            # if there is no div inside of this div...then it's a leaf
            # node in a sense.
            # We need to create a <p> and put all it's contents in there
            # We'll just stringify it, then regex replace the first/last
            # div bits to turn them into <p> vs <div>.
            LNODE.log(elem, 1, 'Turning leaf <div> into <p>')
            orig = tounicode(elem).strip()
            started = re.sub(r'^<\s*div', '<p', orig)
            ended = re.sub(r'div>$', 'p>', started)
            elem.getparent().replace(elem, fromstring(ended))
    return doc

Ejemplo n.º 22

0

Mostrar archivo

Archivo: sxs.py Proyecto: eregs/regulations-parser

def build_section_by_section(sxs, fr_start_page, previous_label):
    """Given a list of xml nodes in the section by section analysis, pull
    out hierarchical data into a structure. Previous label is carried along to
    merge analyses of the same section."""
    structures = []
    while len(sxs):  # while sxs: is deprecated
        cfr_part = previous_label.split('-')[0]
        title, text_els, sub_sections, sxs = split_into_ttsr(sxs, cfr_part)

        page = find_page(title, title.sourceline, fr_start_page)
        paragraph_xmls = [deepcopy(el) for el in text_els
                          if el.tag == 'P' or el.tag == 'FP']
        footnotes = []
        for p_idx, paragraph_xml in enumerate(paragraph_xmls):
            spaces_then_remove(paragraph_xml, 'PRTPAGE')
            spaces_then_remove(paragraph_xml, 'FTREF')
            swap_emphasis_tags(paragraph_xml)
            # Anything inside a SU can also be ignored
            for su in paragraph_xml.xpath('./SU'):
                su_text = etree.tounicode(su)
                footnotes.append({
                    'paragraph': p_idx,
                    'reference': su.text,
                    'offset': body_to_string(paragraph_xml).find(su_text)})
                if su.tail and su.getprevious() is not None:
                    su.getprevious().tail = (su.getprevious().tail or '')
                    su.getprevious().tail += su.tail
                elif su.tail:
                    su.getparent().text = (su.getparent().text or '')
                    su.getparent().text += su.tail
                su.getparent().remove(su)

        paragraphs = [body_to_string(el) for el in paragraph_xmls]
        label_for_children = previous_label
        labels = parse_into_labels(title.text, cfr_part)
        if labels:
            label_for_children = labels[-1]

        # recursively build children. Be sure to give them the proper label
        children = build_section_by_section(sub_sections, page,
                                            label_for_children)

        next_structure = {
            'page': page,
            'title': add_spaces_to_title(title.text),
            'paragraphs': paragraphs,
            'children': children,
            'footnote_refs': footnotes
        }

        if (labels and  # No label => subheader
                # Concatenate if repeat label or backtrack
                not all(label == previous_label or
                        is_backtrack(previous_label, label)
                        for label in labels)):
            previous_label = labels[-1]
            next_structure['labels'] = labels
        structures.append(next_structure)

    return structures

Ejemplo n.º 23

0

Mostrar archivo

Archivo: base.py Proyecto: daniel-hartmann/PySPED

    def _le_xml(self, arquivo):
        if arquivo is None:
            return False

        if not isinstance(arquivo, basestring):
            arquivo = etree.tounicode(arquivo)
            #self._xml = arquivo
            #return True

        #elif arquivo is not None:
        if arquivo is not None:
            if isinstance(arquivo, basestring):
                if isinstance(arquivo, str):
                    arquivo = unicode(arquivo.encode('utf-8'))

                if '<' in arquivo:
                    self._xml = etree.fromstring(tira_abertura(arquivo).encode('utf-8'))
                else:
                    arq = open(arquivo)
                    txt = b''.join(arq.readlines())
                    txt = unicode(txt.decode('utf-8'))
                    txt = tira_abertura(txt)
                    arq.close()
                    self._xml = etree.fromstring(txt)
            else:
                self._xml = etree.parse(arquivo)
            return True

        return False

Ejemplo n.º 24

0

Mostrar archivo

Archivo: IISRewriterConv.py Proyecto: m-messiah/IISRewriterConv

 def write(self):
     """
         Print converted rules
     """
     print(etree.tounicode(self.new_rules, pretty_print=True))
     print("<!-- Failed convert. Please, handle it manually-->\n")
     print("\n".join(self.fails))

Ejemplo n.º 25

0

Mostrar archivo

Archivo: views.py Proyecto: AlphaRgb/flask

def check(proxy):
    # url = 'https://ip.cn/' 
    # url = 'https://httpbin.org/ip'
    url = 'https://geoiptool.com/zh/'
    proxies = {
        'http': 'http://{}'.format(proxy),
        'https': 'http://{}'.format(proxy)
    }
    try:
        res = requests.get(url, proxies=proxies, verify=False).text
        data = etree.HTML(res)
    except Exception as e:
        print(e)
        return '当前代理已经失效'
    else:
        if url == 'https://ip.cn/':
            result = data.xpath('//div[@id="result"]')[0]
            content = html.tostring(result)
            return content
        elif (url == 'https://geoiptool.com/zh/' and data):
            content = data.xpath('//div[contains(@class, "sidebar-data")]')[0] if data.xpath('//div[contains(@class, "sidebar-data")]') else None
            if content is None:
                return '当前代理已经失效'
            content = etree.tounicode(content)
            content = re.sub(r'<img.*?>', '', content)
            content = re.sub(r'hidden-xs hidden-sm', '', content)
            return content
        else:
            return '当前代理已经失效'

Ejemplo n.º 26

0

Mostrar archivo

Archivo: feedplus.py Proyecto: timfoster/FeedPlus

def html_to_plaintext(text):
        """try to get readable plaintext from the G+ html.   Lxml doesn't
        seem to do <br> elements properly."""
        text = text.replace("<br />", " ")
        parser = etree.HTMLParser()
        tree = etree.parse(StringIO(text), parser)
        return etree.tounicode(tree.getroot(), method="text")

Ejemplo n.º 27

0

Mostrar archivo

Archivo: preprocessors_tests.py Proyecto: eregs/regulations-parser

def test_parentheses_cleanup(original, new_text):
    """Helper function to verify that the XML is transformed as
    expected"""
    with XMLBuilder("PART") as ctx:
        ctx.child_from_string(u"<P>{0}</P>".format(original))
    preprocessors.parentheses_cleanup(ctx.xml)
    assert etree.tounicode(ctx.xml[0]) == "<P>{0}</P>".format(new_text)

Ejemplo n.º 28

0

Mostrar archivo

Archivo: results.py Proyecto: STIXProject/stix-ramrod

    def as_stringio(self):
        """Returns a ``StringIO`` representation of the
        ``ResultDocument`` instance.

        """
        buf = etree.tounicode(self._document, pretty_print=True)
        return StringIO(buf)

Ejemplo n.º 29

0

Mostrar archivo

Archivo: definition.py Proyecto: necrop/pylib3.2

 def cross_references(self):
     """
     Return a list of CrossReference objects representing any
     cross-references found in the definition.
     """
     try:
         return self._xrefs
     except AttributeError:
         self._xrefs = [CrossReference(xref_node) for xref_node in
                        self.node_stripped().findall('.//xr')]
         if self._xrefs:
             # Add a 'type' attribute to each cross-reference,
             # determined by the preceding text
             for xref in self._xrefs:
                 xref.type = None  # default value
             # Split definitions into sections, one section per xref,
             #  with the xref at the end of the section. The 'sections'
             #  list should then be aligned with the self._xrefs list.
             serialized = etree.tounicode(self.node_stripped())
             sections = []
             for section in serialized.split('</xr>'):
                 section = XREF_STRIPPER.edit(section.lower())
                 sections.append(section)
             for section, xref in zip(sections, self._xrefs):
                 if EQUALS_XREF.search(section):
                     xref.type = 'equals'
                 elif 'see <xr' in section:
                     xref.type = 'see'
                 elif 'also <xr' in section or 'cf. <xr' in section:
                     xref.type = 'cf'
                 elif 'opp. <xr' in section:
                     xref.type = 'opposite'
         return self._xrefs

Ejemplo n.º 30

0

Mostrar archivo

Archivo: xml.py Proyecto: BlackEarth/bxml

 def tostring(self, root=None, doctype=None, pretty_print=True):
     """return the content of the XML document as a unicode string"""
     if root is None:
         root = self.root
     return etree.tounicode(
         root, doctype=doctype or self.info.doctype, pretty_print=pretty_print
     )

Ejemplo n.º 31

0

Mostrar archivo

    def parse_nodes(self, xml):
        """Derive a flat list of nodes from this xml chunk. This does nothing
        to determine node depth"""
        nodes = []

        for child in xml.getchildren():
            matching = (m for m in self.MATCHERS if m.matches(child))

            tag_matcher = next(matching, None)
            if tag_matcher:
                nodes.extend(tag_matcher.derive_nodes(child, processor=self))
            else:
                logger.warning("No tag match\n%s", etree.tounicode(child))

        # Trailing stars don't matter; slightly more efficient to ignore them
        while nodes and nodes[-1].label[0] in mtypes.stars:
            nodes = nodes[:-1]

        return nodes

Ejemplo n.º 32

0

Mostrar archivo

Archivo: partner.py Proyecto: lzefyrus/oldprojectsreference

def get_mo_fail_response(text, ack, status_code):
    """
    Builds xml return in case of error (status_code <> 200)

    :param text: error description
    :param ack: message's arrival acknowledgement
    :param status_code: request status code
    :return: xml
    """

    body = etree.Element('smsmo_response', ack=str(ack))
    message_id = etree.SubElement(body, "message_id")
    source = etree.SubElement(body, "source")
    large_account = etree.SubElement(body, "large_account")
    response_datetime = etree.SubElement(body, "request_datetime")
    description = etree.SubElement(body, "description", code=str(status_code))
    description.text = str(text)

    return etree.tounicode(body)

Ejemplo n.º 33

0

Mostrar archivo

Archivo: rpc_response.py Proyecto: netsia/voltha_doc

 def build_yang_response(self,
                         root,
                         request,
                         yang_options=None,
                         custom_rpc=False):
     try:
         self.custom_rpc = custom_rpc
         yang_xml = self.to_yang_xml(root, request, yang_options,
                                     custom_rpc)
         log.info('yang-xml',
                  yang_xml=etree.tounicode(yang_xml, pretty_print=True))
         return self.build_xml_response(request, yang_xml, custom_rpc)
     except Exception as e:
         log.exception('error-building-yang-response',
                       request=request,
                       xml=etree.tostring(root))
         self.rpc_response.is_error = True
         self.rpc_response.node = ncerror.BadMsg(request)
         return

Ejemplo n.º 34

0

Mostrar archivo

def xls2xml(xls_name):
    with xlrd.open_workbook(xls_name) as wb:
        ws = wb.sheet_by_index(0)
    table = OrderedDict()
    for i in range(ws.nrows):
        key = int(ws.row_values(i)[0])
        value = str(ws.row_values(i)[1:])
        table[key] = value

    with open("student1.xml", 'w') as f:
        root = etree.Element("root")
        e_root = etree.ElementTree(root)
        e_students = etree.SubElement(root, 'students')
        e_students.text = '\n' + str(
            json.dumps(table, indent=4, ensure_ascii=False)) + '\n'
        e_students.append(
            etree.Comment('\n    学生信息表\n    "id" : [名字，数学，语文，英语]\n'))
        f.write('<?xml version="1.0" encoding="UTF-8"?>' +
                etree.tounicode(e_root.getroot()))

Ejemplo n.º 35

0

Mostrar archivo

Archivo: test_collection.py Proyecto: BryanEliDimas/cnx-press

def test_parse_colletion_metdata_without_print_style(tmpdir,
                                                     litezip_valid_litezip):
    working_dir = tmpdir.mkdir('col')
    collection_file = working_dir.join('collection.xml')
    # Copy over and modify the collection.xml file.
    with (litezip_valid_litezip / 'collection.xml').open() as origin:
        xml = etree.parse(origin)
        elm = xml.xpath('//col:param[@name="print-style"]',
                        namespaces=COLLECTION_NSMAP)[0]
        elm.getparent().remove(elm)
        collection_file.write(etree.tounicode(xml).encode('utf8'))
    assert 'print-style' not in collection_file.read()

    # Test the parser doesn't error when a print-style is missing.
    # given a Collection object,
    model = parse_collection(Path(working_dir))
    # parse the metadata into a CollectionMetadata,
    md = parse_collection_metadata(model)
    assert md.print_style is None

Ejemplo n.º 36

0

Mostrar archivo

    def to_svg(self):
        '''
        Returns:

            unicode : SVG XML source with up-to-date electrode channel lists.
        '''
        xml_root = etree.parse(self.svg_filepath)

        # Identify electrodes with modified channel lists.
        df_diff_channels = self.diff_electrode_channels()

        # Update `svg:path` XML elements for electrodes with modified channel
        # lists.
        xpath = XPathEvaluator(xml_root, namespaces=INKSCAPE_NSMAP)
        for electrode_id, (orig_i, new_i) in df_diff_channels.iterrows():
            elements_i = xpath.evaluate('//svg:path[@id="%s"]' % electrode_id)
            for element_i in elements_i:
                element_i.attrib['data-channels'] = ','.join(map(str, new_i))
        return etree.tounicode(xml_root)

Ejemplo n.º 37

0

Mostrar archivo

def test_mets_dnx():
    """Test basic construction of METS DNX"""
    ie_dc_dict = {"dc:title": "test title"}
    mets = mdf.build_mets(
        ie_dmd_dict=ie_dc_dict,
        pres_master_dir=os.path.join(
            os.path.dirname(os.path.realpath(__file__)), 'data',
            'test_batch_1', 'pm'),
        modified_master_dir=os.path.join(
            os.path.dirname(os.path.realpath(__file__)), 'data',
            'test_batch_1', 'mm'),
        input_dir=os.path.join(os.path.dirname(os.path.realpath(__file__)),
                               'data', 'test_batch_1'),
        generalIECharacteristics=[{
            'submissionReason': 'bornDigitalContent',
            'IEEntityType': 'periodicIE'
        }],
    )
    print(ET.tounicode(mets, pretty_print=True))

Ejemplo n.º 38

0

Mostrar archivo

Archivo: xls2xml.py Proyecto: YeomanYe/show-me-the-code

def xls_xml(file_name):
    # 从xls文件读取
    data = {}
    excel = xlrd.open_workbook(file_name)
    table = excel.sheet_by_name('city')
    nrows = table.nrows
    for i in range(nrows):
        key = str(int(table.row_values(i)[0]))
        value = str(table.row_values(i)[1:])
        data[key] = value
    # 写入到xml文件
    output = codecs.open('city.xml', 'w', 'utf-8')
    root = etree.Element('root')
    citys_xml = etree.ElementTree(root)
    citys = etree.SubElement(root, 'citys')
    citys.append(etree.Comment('城市信息'))
    citys.text = str(data)
    output.write(etree.tounicode(citys_xml.getroot()))
    output.close()

Ejemplo n.º 39

0

Mostrar archivo

    def get_body(self, configs, channel_id, msisdn, interface,
                 subscription_type):
        """
        Builds xml request body

        :param configs: Cache object application.settings['config']
        :param channel_id: Channel id number
        :param msisdn: Telephone number
        :param interface: Interface number
        :param subscription_type: Subscription type number
        :return: xml
        """

        body = etree.Element('tangram_request',
                             company_id=str(configs['company_id']),
                             service_id=str(configs['service_id']),
                             user=str(configs['user']))
        if interface:
            body.attrib['interface'] = str(interface)

        provisioning = etree.SubElement(body, "provisioning")
        operation = etree.SubElement(provisioning,
                                     "operation",
                                     code=str(configs['operation_code']))
        operation.text = configs['operation_description']
        channel = etree.SubElement(provisioning, "channel_id")
        channel.text = str(channel_id)
        destination = etree.SubElement(provisioning, "destination")
        destination.text = str(msisdn)
        if subscription_type:
            destination.attrib['subscription_type'] = str(subscription_type)

        notification = etree.SubElement(provisioning,
                                        "notification",
                                        type=str(configs['notification_type']),
                                        calltype=str(
                                            configs['notification_calltype']))
        notification.text = configs['notification_callback']
        request_datetime = etree.SubElement(provisioning, "request_datetime")
        request_datetime.text = str(int(time.time()))

        return etree.tounicode(body)

Ejemplo n.º 40

0

Mostrar archivo

Archivo: excel2xml.py Proyecto: xiaqunfeng/show-me-the-code

def xls2xml(filename, outfile):
    with xlrd.open_workbook(filename) as excel:
        #table = excel.sheet_by_name('student')
        table = excel.sheet_by_index(0)

    data = OrderedDict()
    for i in range(table.nrows):
        key = str(int(table.row_values(i)[0]))
        value = str(table.row_values(i)[1:])
        data[key] = value

    output = codecs.open(outfile, 'w', 'utf-8')
    root = etree.Element('root')
    students_xml = etree.ElementTree(root)
    students = etree.SubElement(root, 'students')
    students.append(etree.Comment('\n\t学生信息表\n\t"d" :[名字, 数学, 语文, 英语]\n'))
    students.text = '\n\t学生信息表\n\t"d" :[名字, 数学, 语文, 英语]\n'
    students.text = '\n'+str(json.dumps(data, indent=4, ensure_ascii=False))+'\n'
    output.write('<?xml version="1.0" encoding="UTF-8"?>\n' + etree.tounicode(students_xml.getroot()))
    output.close()

Ejemplo n.º 41

0

Mostrar archivo

Archivo: annotator.py Proyecto: exploration-space/collaborative-platform

    def __add_certainty(self, text, certainty):
        tree = etree.fromstring(text)

        certainties = tree.xpath(
            '//default:teiHeader'
            '//default:classCode[@scheme="http://providedh.eu/uncertainty/ns/1.0"]',
            namespaces=NAMESPACES)

        if not certainties:
            tree = self.__create_annotation_list(tree)
            certainties = tree.xpath(
                '//default:teiHeader'
                '//default:classCode[@scheme="http://providedh.eu/uncertainty/ns/1.0"]',
                namespaces=NAMESPACES)

        certainties[0].append(certainty)

        text = etree.tounicode(tree)

        return text

Ejemplo n.º 42

0

Mostrar archivo

Archivo: annotator.py Proyecto: exploration-space/collaborative-platform

    def __add_annotator(self, text, annotator):
        tree = etree.fromstring(text)

        list_person = tree.xpath(
            '//default:teiHeader'
            '//default:listPerson[@type="PROVIDEDH Annotators"]',
            namespaces=NAMESPACES)

        if not list_person:
            tree = self.__create_list_person(tree)
            list_person = tree.xpath(
                '//default:teiHeader'
                '//default:listPerson[@type="PROVIDEDH Annotators"]',
                namespaces=NAMESPACES)

        list_person[0].append(annotator)

        text = etree.tounicode(tree)

        return text

Ejemplo n.º 43

0

Mostrar archivo

Archivo: ncServer.py Proyecto: kicoliu/netconf

 def send_notification(self, data, *params):
     msg = etree.Element("{{{}}}notification".format(NSMAP['ncEvent']))
     node_event_time = util.leaf_elm(
         'eventTime', date_time_string(datetime.datetime.now()))
     msg.append(node_event_time)
     msg.append(data)
     msg_unicode = etree.tounicode(msg, pretty_print=True)
     logger.debug("notification msg is:\n%s", str(msg_unicode))
     for socket in self.server.sockets:
         if socket.running is False:
             continue
         for session in socket.sessions:
             if session.session_open is False:
                 continue
             logger.debug(
                 "Sending to client, session id: %d, ip:%s, port:%d",
                 session.session_id, socket.client_addr[0],
                 socket.client_addr[1])
             session.send_message(msg_unicode)
     return

Ejemplo n.º 44

0

Mostrar archivo

Archivo: xls2xml.py Proyecto: Rajpratik71/Python-Show-Me-the-Code-Version

def xls_xml(file_name):
	data = {}

	excel = xlrd.open_workbook(file_name)
	table = excel.sheet_by_name('student')
	# print(table.row_values(0))
	nrows = table.nrows
	for i in range(nrows):
		key = str(int(table.row_values(i)[0]))
		value = str(table.row_values(i)[1:])
		data[key] = value

	output = codecs.open('students.xml','w','utf-8')
	root = etree.Element('root')
	students_xml = etree.ElementTree(root)
	students = etree.SubElement(root, 'students')
	students.append(etree.Comment('学生信息表\n\"id\": [名字，数学，语文，英语]'))
	students.text = str(data)
	output.write(etree.tounicode(students_xml.getroot()))
	output.close()

Ejemplo n.º 45

0

Mostrar archivo

Archivo: phone_book.py Proyecto: yuelinsoft/uf-spider

 def getProvince(self):
     """ 获得所有省的对应的url
     :return:[[href, province_detail_name],[]...] """
     url = 'http://www.cc10000.cn/0/'
     options = {
         'method': 'get',
         'url': url,
         'headers': self.headers,
         'timeout': _time_out
     }
     response = Request.basic(options, resend_times=4)
     selector = etree.HTML(response.text)
     content = etree.tounicode(selector.xpath('//body/div[6]')[0])
     href_and_name = re.findall('href="(/\d.*?)">(.*?)<', content)
     # 仅提取省,并将用详细省名代替简写省名
     seq = [[
         i[0], config.ROOT_DETAIL_NAMES[config.ROOT_SHORT_NAMES.index(i[1])]
     ] for i in href_and_name if i[1] in config.ROOT_SHORT_NAMES]
     self.hrefs.extend([index[0] for index in seq])
     return seq

Ejemplo n.º 46

0

Mostrar archivo

Archivo: utils.py Proyecto: openstax/cnx-press

def replace_id_and_version(model, id, version):
    """Does an inplace replacement of the given model's id and version

    :param model: module
    :type model: :class:`litezip.Collection` or :class:`litezip.Module`
    :param id: id
    :type id: str
    :param version: major and minor version tuple
    :type version: tuple of int

    """
    # Rewrite the content with the id and version
    with model.file.open('rb') as fb:
        xml = etree.parse(fb)
    elm = xml.xpath('//md:content-id', namespaces=COLLECTION_NSMAP)[0]
    elm.text = id
    elm = xml.xpath('//md:version', namespaces=COLLECTION_NSMAP)[0]
    elm.text = convert_version_to_legacy_version(version)
    with model.file.open('wb') as fb:
        fb.write(etree.tounicode(xml).encode('utf8'))

Ejemplo n.º 47

0

Mostrar archivo

Archivo: logconfig.py Proyecto: RedTuna/breadability

    def log(self, node, action, description):
        """Write out our log info based on the node and event specified.

        We only log this information if we're are DEBUG loglevel

        """
        if self._active:
            content = tounicode(node)
            hashed = md5()
            try:
                hashed.update(content.encode('utf-8', errors="replace"))
            except Exception as exc:
                LOG.error("Cannot hash the current node." + str(exc))
            hash_id = hashed.hexdigest()[0:8]
            # if hash_id in ['9c880b27', '8393b7d7', '69bfebdd']:
            print("{0} :: {1}\n{2}".format(
                hash_id,
                description,
                content.replace("\n", "")[0:202],
            ))

Ejemplo n.º 48

0

Mostrar archivo

def get_clinical_document(access_token, hie_profile):
    """get member's clinical data from HIXNY (CDA XML), convert to FHIR (JSON), return both.
    """
    request_xml = """
        <GETDOCUMENTPAYLOAD>
            <MRN>%s</MRN>
            <DATAREQUESTOR>%s</DATAREQUESTOR>
        </GETDOCUMENTPAYLOAD>
        """ % (
        hie_profile.mrn,
        hie_profile.data_requestor,
    )
    print(request_xml)

    response = requests.post(
        settings.HIE_GETDOCUMENT_API_URI,
        verify=False,
        headers={
            'Content-Type': 'application/xml',
            'Authorization': "Bearer %s" % (access_token)
        },
        data=request_xml,
    )
    response_xml = etree.XML(response.content)
    print(response_xml)

    cda_element = response_xml.find("{%(hl7)s}ClinicalDocument" % NAMESPACES)
    if cda_element is not None:
        cda_content = etree.tounicode(cda_element)
        fhir_content = cda2fhir(cda_content).decode('utf-8')
        result = {
            'cda_content': cda_content,
            'fhir_content': fhir_content,
        }
    else:
        result = {
            'cda_content': None,
            'fhir_content': None,
        }

    return result

Ejemplo n.º 49

0

Mostrar archivo

def ocrdata():
    if "Content-Encoding" in request.headers and \
            request.headers["Content-Encoding"] == "gzip":
        data = json.loads(gzip.decompress(request.data).decode("utf-8"))
    else:
        data = request.get_json()
    cnt = 0
    for bname, bdict in data["ocrdata"].items():
        b = Book.query.filter_by(name=bname).one()
        for pname, pdict in bdict.items():
            p = Page.query.filter_by(book_id=b.id, name=pname).one()
            root = etree.fromstring(p.data)
            ns = {"ns": root.nsmap[None]}
            for lid, text in pdict.items():
                linexml = root.find('.//ns:TextLine[@id="' + lid + '"]',
                                    namespaces=ns)
                if linexml is None:
                    continue
                textequivxml = linexml.find(
                    './ns:TextEquiv[@index="{}"]'.format(data["index"]),
                    namespaces=ns)
                if textequivxml is None:
                    textequivxml = etree.SubElement(
                        linexml,
                        "{{{}}}TextEquiv".format(ns["ns"]),
                        attrib={"index": str(data["index"])})
                unicodexml = textequivxml.find('./ns:Unicode', namespaces=ns)
                if unicodexml is None:
                    unicodexml = etree.SubElement(
                        textequivxml, "{{{}}}Unicode".format(ns["ns"]))
                unicodexml.text = text
                cnt += 1
            p.no_lines_ocr = int(
                root.xpath(
                    'count(//ns:TextLine'
                    '[count(./ns:TextEquiv'
                    '[@index>0])>0])',
                    namespaces=ns))
            p.data = etree.tounicode(root.getroottree())
    db_session.commit()
    return "Imported {} lines.".format(cnt)

Ejemplo n.º 50

0

Mostrar archivo

def storeBatches(collectedRecords, firstRecord):
    global config

    if len(collectedRecords) > 0:
        if 'xml-batch' in config.format:
            XMLContainer = ET.XML('<records/>')
            for (ID, record) in collectedRecords.iteritems():
                XMLContainer.append(record)
            filePath = pathForBatch(firstRecord, 'xml')
            XMLFile = open(filePath, 'w')
            XMLString = ET.tounicode(XMLContainer).encode('UTF-8')
            XMLFile.write(XMLString)
            XMLFile.close()
            print u"XML-Batch: " + str(
                len(collectedRecords)) + u" records to »" + filePath + u"«"

        if 'json-batch' in config.format or 'couchdb-batch' in config.format:
            JSONContainer = []
            for (ID, record) in collectedRecords.iteritems():
                JSONInternal = elem_to_internal(record, strip=1)
                if len(JSONInternal) == 1:
                    JSONInternal = JSONInternal.values()[0]
                JSONInternal['_id'] = ID
                JSONContainer += [JSONInternal]

            if 'json-batch' in config.format:
                filePath = pathForBatch(firstRecord, 'json')
                JSONFile = open(filePath, "w")
                JSONFile.write(simplejson.dumps(JSONContainer))
                JSONFile.close()
                print u"JSON-Batch: " + str(
                    len(collectedRecords)) + u" records to »" + filePath + u"«"

            if 'couchdb-batch' in config.format:
                filePath = pathForBatch(firstRecord, 'couch.json')
                JSONContainer = {'docs': JSONContainer}
                JSONFile = open(filePath, "w")
                JSONFile.write(simplejson.dumps(JSONContainer))
                JSONFile.close()
                print u"CouchDB JSON-Batch: " + str(
                    len(collectedRecords)) + u" records to »" + filePath + u"«"

Ejemplo n.º 51

0

Mostrar archivo

Archivo: fygg_yn_pe.py Proyecto: chenliy/youshu

    def detail_page(self, response):
        # 初始化字段
        _id = ''
        _id_ = ''
        ann_type = '送达公告'
        announcer = '普洱市中级人民法院'
        defendant = ''
        defendant_origin = ''
        ann_date = ''
        ann_content = ''
        ann_html = ''
        content_url = response.url
        pdf_url = ''
        case_no = ''
        source = '普洱市中级人民法院'

        try:
            html = self.xml_xpath(response, 0)
            content_text = html.xpath('//div[@class="sswy_article_m"]//text()')

            ann_html = etree.tounicode(html.xpath('//div[@class="sswy_article_m"]')[0])
            # print(ann_html)
            content_p = ''.join(content_text).replace('\xa0', '')
            content_p = content_p.replace('\r\n', '')
            ann_content = ''.join(content_p.split())

            ann_date = re.findall(r'(.{4}[年].{1,2}[月].{1,3}[日号])', ann_content)[-1]

            ann_date = self.parse_time(ann_date)

            text = ann_content.replace('本院', '：')
            text = text.split('：')[0] + ":"
            case = re.findall(r'(（.{4}）.*?)号', text)
            print(len(case))
            print(text)
            if len(case) > 0:
                print("*******")
                defendant_origin_list = re.findall(r'号(.*?):', text)[0]
                # print(defendant_origin_list)
            elif '公告' in text:
                defendant_origin_list = re.findall(r'公告(.*?):', text)[0]

Ejemplo n.º 52

0

Mostrar archivo

Archivo: reference.py Proyecto: vorujack/z3c.rml

def extractExamples(directory):
    examples = {}
    for filename in os.listdir(directory):
        if not filename.endswith('.rml'):
            continue
        rmlFile = open(os.path.join(directory, filename), 'rb')
        root = etree.parse(rmlFile).getroot()
        elements = root.xpath('//@doc:example/parent::*',
                              namespaces={'doc': EXAMPLE_NS})
        # Phase 1: Collect all elements
        for elem in elements:
            demoTag = elem.get(EXAMPLE_ATTR_NAME) or elem.tag
            elemExamples = examples.setdefault(demoTag, [])
            elemExamples.append({
                'filename':
                filename,
                'line':
                elem.sourceline,
                'element':
                elem,
                'rmlurl':
                INPUT_URL % filename,
                'pdfurl':
                EXPECTED_URL % (filename[:-4] + '.pdf')
            })
        # Phase 2: Render all elements
        removeDocAttributes(root)
        for dirExamples in examples.values():
            for example in dirExamples:
                xml = etree.tounicode(example['element']).strip()
                xml = re.sub(
                    ' ?xmlns:doc="http://namespaces.zope.org/rml/doc"', '',
                    xml)
                xml = dedent(xml)
                xml = enforceColumns(xml, 80)
                xml = highlightRML(xml)
                example['code'] = xml

        rmlFile.close()

    return examples

Ejemplo n.º 53

0

Mostrar archivo

def test_make_instructions():
    tokenized = [
        tokens.Paragraph.make(part='111'),
        tokens.Verb(tokens.Verb.PUT, active=True),
        tokens.Paragraph.make(part='222'),
        tokens.Paragraph.make(part='333'),
        tokens.Paragraph.make(part='444'),
        tokens.Verb(tokens.Verb.DELETE, active=True),
        tokens.Paragraph.make(part='555'),
        tokens.Verb(tokens.Verb.MOVE, active=True),
        tokens.Paragraph.make(part='666'),
        tokens.Paragraph.make(part='777')
    ]
    with XMLBuilder("EREGS_INSTRUCTIONS") as ctx:
        ctx.PUT(label=222)
        ctx.PUT(label=333)
        ctx.PUT(label=444)
        ctx.DELETE(label=555)
        ctx.MOVE(label=666, destination=777)
    assert ctx.xml_str == etree.tounicode(
        amdparser.make_instructions(tokenized))

Ejemplo n.º 54

0

Mostrar archivo

def test_digtial_original_dnx_single_file():
    """Test that the digitalOriginal value is being properly translated
    from a boolean input to a lower-case string of 'true' or 'false' for a
    single-file METS"""
    ie_dc_dict = {"dc:title": "test title"}
    mets = mdf.build_single_file_mets(
        ie_dmd_dict=ie_dc_dict,
        filepath=os.path.join(os.path.dirname(os.path.realpath(__file__)),
                              'data', 'test_batch_1', 'pm', 'presmaster.jpg'),
        generalIECharacteristics=[{
            'submissionReason': 'bornDigitalContent',
            'IEEntityType': 'periodicIE'
        }],
        digital_original=True)
    grc = mets.findall('.//section[@id="generalRepCharacteristics"]')[0]
    # print(ET.tounicode(grc[0], pretty_print=True))
    do = grc.findall('.//key[@id="DigitalOriginal"]')[0]
    assert (do.text == 'true')
    # for grc in general_rep_characteristics:
    #     assert(grc.text == 'true')
    print(ET.tounicode(mets, pretty_print=True))

Ejemplo n.º 55

0

Mostrar archivo

 def parse_by_br(self, response):
     html = etree.HTML(text=response.text)
     text_l_new = []
     for querySelector in self.querySelectorList_br:
         p_list = html.xpath(querySelector)
         # print(len(p_list))
         for p in p_list:
             temp_text = etree.tounicode(p)
             # print(temp_text)
             if '<br>' in temp_text:
                 text_l = temp_text.split('<br>')
             elif '<br/>' in temp_text:
                 text_l = temp_text.split('<br/>')
             else:
                 print('text_l.split by br error, maybe not found br')
                 text_l = []
             for text in text_l:
                 text = text.strip().replace('\n', '').replace('\r', '')
                 text = re.sub('<.*?>', '', text)
                 text_l_new.append(text)
     return text_l_new

Ejemplo n.º 56

0

Mostrar archivo

def _ogc_filter_to_expression(prop):
    if 'And' in prop.tag:
        return ' and '.join(map(_ogc_filter_to_expression,
                                prop.iterchildren()))
    elif 'Or' in prop.tag:
        return ' or '.join(map(_ogc_filter_to_expression, prop.iterchildren()))
    elif 'PropertyIsGreaterThan' in prop.tag:
        return _compile_bin_op('>', prop.iterchildren())
    elif 'PropertyIsLessThan' in prop.tag:
        return _compile_bin_op('<', prop.iterchildren())
    elif 'PropertyIsEqualTo' in prop.tag:
        return _compile_bin_op('=', prop.iterchildren())
    elif 'PropertyIsNotEqualTo' in prop.tag:
        return _compile_bin_op('!=', prop.iterchildren())
    elif 'PropertyIsBetween' in prop.tag:
        name = prop.PropertyName
        cql_lo = _compile_bin_op('>', [name, prop.LowerBoundary.Literal])
        cql_hi = _compile_bin_op('<', [name, prop.UpperBoundary.Literal])
        return cql_lo + 'and ' + cql_hi

    raise AssertionError(etree.tounicode(prop, pretty_print=True))

Ejemplo n.º 57

0

Mostrar archivo

Archivo: server.py Proyecto: bonald/vim_cfg

 def send_rpc_reply(self, rpc_reply, origmsg):
     reply = etree.Element(qmap('nc') + "rpc-reply",
                           attrib=origmsg.attrib,
                           nsmap=origmsg.nsmap)
     print('step 10')
     print(etree.tostring(reply, pretty_print=True))
     try:
         #rpc_reply.getchildren                           # pylint: disable=W0104
         reply.append(rpc_reply)
         print('step 11')
         print(reply)
     except AttributeError:
         reply.extend(rpc_reply)
         print('stpe 12')
         print(reply)
     ucode = etree.tounicode(reply, pretty_print=True)
     if self.debug:
         logger.debug("%s: Sending RPC-Reply: %s", str(self), str(ucode))
     print('step 13')
     print(ucode)
     self.send_message(ucode)

Ejemplo n.º 58

0

Mostrar archivo

def xslt(request):
    transform_result = ''
    transform_result_pretty = ''
    if request.method == 'POST':
        form = forms.TransformForm(request.POST)
        if form.is_valid():
            tansformer = transformers.get(form.cleaned_data['transformer'])
            record_tree = etree.fromstring(
                form.cleaned_data['xml'].encode('utf-8'))
            transformed = tansformer(record_tree, abstract='0')
            transform_result = unicode(transformed)
            transform_result_pretty = etree.tounicode(transformed,
                                                      pretty_print=True)
    else:
        form = forms.TransformForm()
    return render(
        request, 'transformers_pool/administration/xslt.html', {
            'form': form,
            'transform_result': transform_result,
            'transform_result_pretty': transform_result_pretty
        })

Ejemplo n.º 59

0

Mostrar archivo

            def write_to_file(verses, f):
                if len(verses) > 0:
                    tf = get_text_format(verses[0])

                    for i, v in enumerate(tf["verses"]):
                        last_verse = i == len(tf["verses"]) - 1

                        verse = ET.tounicode(v)
                        verse_wo_terms = get_rid_of_notes(verse)
                        verse = ET.fromstring(verse_wo_terms)

                        verse_wo_add = remove_nodes(verse, "add", NS_TI["ti"])

                        verse = stringify_children(verse_wo_add)
                        verse = clean_entities(verse)
                        if len(verse.strip()) > 0:
                            print('"'+verse+'"')
                            f.write("{verse}{eol}".format(verse=verse,
                                                          eol="\n" if not last_verse else ""))
                        else:
                            f.write("\n")

Ejemplo n.º 60

0

Mostrar archivo

    def dump_to_xml(self, tree_name='Item'):

        tree = etree.parse(file_path)
        root = tree.getroot()

        for bad in root.xpath("//%s[@login=\'%s\']" % (tree_name, self.login)):
            bad.getparent().remove(bad)

        item_branch = etree.Element("Item", login=self.login)
        for curr in self.__dict__:
            elem = etree.Element(curr)
            #print self.__dict__[curr]
            elem.text = unicode(self.__dict__[curr])
            item_branch.append(elem)
        root.append(item_branch)

        xml = etree.tounicode(root, pretty_print=True)

        root = etree.fromstring(xml)
        et = etree.ElementTree(root)
        et.write(file_path, pretty_print=True, encoding="UTF-8")