Python fromstring Examples

Programming Language: Python

Namespace/Package Name: lxml.html.clean

Method/Function: fromstring

Examples at hotexamples.com: 5

Python fromstring - 5 examples found. These are the top rated real world Python examples of lxml.html.clean.fromstring extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: feedproc.py Project: ap-Codkelden/point-core

    def process_text(self, text):
        try:
            tree = fromstring(clean_html(text))
        except etree.XMLSyntaxError:
            return xhtmlim(text)

        self._parse_element(tree)
        return html2md(etree.tostring(tree, method="xml", encoding=unicode))

Example #2

Show file

File: feedproc.py Project: ap-Codkelden/point-core

    def process_text(self, text):
        try:
            tree = fromstring(clean_html(text))
        except etree.XMLSyntaxError:
            return xhtmlim(text)

        for el in tree.find_class('tags'):
            el.drop_tree()

        self._parse_element(tree)
        return html2md(etree.tostring(tree, method="xml", encoding=unicode))

Example #3

Show file

File: node.py Project: konrado0/vosqa

 def process_node_body(self, html):
     try:
         html = autolink(html_cleaner.clean_html(html))
         
         #To stay on the safe side escape all % characters
         html = html.replace(u"%", u"%%")
         doc = filter_style(fromstring(html))
         
         for el in doc.iter(u'img'):
             src = el.get(u"src").replace(u"%%", u"%")
             if not src:
                 el.getparent().remove(el)
             
             if src.startswith(djsettings.APP_URL):
                 src.replace(djsettings.APP_URL, u"/", 1)
             
             if src.startswith(djsettings.MEDIA_URL):                
                 elem_id = el.get(u"id")
                 try:
                     if elem_id and elem_id.startswith(self.image_id_prefix):                        
                         image_id = int(elem_id.replace(self.image_id_prefix, ""))
                         image = Image.objects.get(id=image_id)
                         self.existing_images.append(image)
                         self.set_image_attributes(el, image)                        
                         continue
                 except:
                     logger.error(u'Malformed id (%s)found on our own img url %s' % (elem_id, src))
                 
                 try:
                     image = Image.objects.get(image=src.replace(djsettings.MEDIA_URL,u""))
                     self.set_image_attributes(el, image)
                     self.existing_images.append(image)                    
                     continue
                 except:
                     logger.error(u'Unable to locate img stored under url %s in Image table' % src)
                     if src.startswith(u"/"):
                         src = src.replace(u"/", djsettings.APP_URL, 1)         
 
             image_file = download_image_file(src)
             image = Image(image=image_file, upload_url=src)
             image.save()
             self.new_images.append(image)
             self.set_image_attributes(el, image)
         
         return etree.tounicode(doc, method="html")
     except ValidationError:
         for image in self.new_images:
             image.delete()
         raise
     except Exception:
         logger.exception(u'Unhandled exception while parsing "%s" body' % html)
         for image in self.new_images:
             image.delete()
         raise ValidationError(_(u"Unexpected error happened :("))

Example #4

Show file

File: feedproc.py Project: ap-Codkelden/point-core

    def process_entry(self, entry):
        out = super(LorFeedProcessor, self).process_entry(entry)

        out['tags'] = []

        tree = fromstring(entry['summary'])

        for el in tree.find_class('tags'):
            for t in el.find_class('tag'):
                out['tags'].append(t.text.strip())

        return out

Example #5

Show file

File: feedproc.py Project: ap-Codkelden/point-core

    def process_text(self, text):
        try:
            tree = fromstring(clean_html(text))
        except etree.XMLSyntaxError:
            return xhtmlim(text)

        rm = False
        for el in tree.iterchildren():
            if el.tag.lower() == 'a' and el.get('name').lower().startswith('cutid'):
                rm = True

            if rm:
                el.drop_tree()

        self._parse_element(tree)
        return html2md(etree.tostring(tree, method="xml", encoding=unicode))