def to_html(self): html = [] html_examples = self.get_examples_html() html_options = self.get_options_html() message = "<div>Your friend <b>likes</b> these 3 restaurants : </div>" option_message = "<div>Among the following 3 restaurants, which would you recommend most:</div>" html.append(message) html.extend(html_examples) html.append(option_message) html.extend(html_options) return (html_examples, html_options)
def store_htmls_in_list(): urls = get_urls_from_text() widget = ['Getting link html: ', Percentage(), Bar(), ETA(), ' '] pbar = ProgressBar(widgets=widget, maxval=len(urls)).start() i=0 html = [] for l_url in urls: url = l_url.encode("ascii") html.append(get_html_from_website(url)) sleep(0.001) pbar.update(i) i+=1 pbar.finish() return html
def render_many(self, documents, **kwargs): html = [] with tempfile.TemporaryDirectory() as tmpdir: # render individual documents for i, doc in enumerate(documents): self.media_url = f'doc-{i}/' doc_html = super(PDFRenderer, self).render(doc, **kwargs) self.save_attachments(doc_html, doc, f'doc-{i}/media/', tmpdir) html.append(doc_html) # combine and embed the HTML into the PDF container html = render_to_string('indigo_api/akn/export/pdf.html', { 'documents': list(zip(documents, html)), }) return self.to_pdf(html, tmpdir, documents=documents)
def adddivstodoc(self, thtml): """ put divs around headers """ from lxml import etree, html oldbody = html.fromstring('<body>\n' + thtml + '\n</body>') newbody = html.fromstring('<html>\n</html>') activediv = None for child in oldbody.iter(): if child.getparent() == oldbody: if child.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: if activediv != None: newbody.append(activediv) activediv = None activediv = etree.fromstring('<div class="indent%s"></div>' \ % child.tag) activediv.append(copy.deepcopy(child)) elif activediv != None: activediv.append(copy.deepcopy(child)) else: newbody.append(copy.deepcopy(child)) if activediv != None: newbody.append(activediv) htmlout = etree.tostring(newbody, pretty_print=True) html = htmlout.split('\n') if '<html>' == html[0]: html.pop(0) while html[-1] == '': html.pop() lastelem = html.pop() lastelem = lastelem.replace('</html>', '') if lastelem: html.append(lastelem) return '\n'.join(html)
def adddivstodoc(thtml): """ put divs around headers """ from lxml import etree, html oldbody = html.fromstring('<body>\n' + thtml + '\n</body>') newbody = html.fromstring('<html>\n</html>') activediv = None for child in oldbody.iter(): if child.getparent() == oldbody: if child.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: if activediv != None: newbody.append(activediv) activediv = None activediv = etree.fromstring('<div class="indent%s"></div>' \ % child.tag) activediv.append(copy.deepcopy(child)) elif activediv != None: activediv.append(copy.deepcopy(child)) else: newbody.append(copy.deepcopy(child)) if activediv != None: newbody.append(activediv) htmlout = etree.tostring(newbody, pretty_print=True) html = htmlout.split('\n') if html[0] == '<html>': html.pop(0) while html[-1] == '': html.pop() lastelem = html.pop() lastelem = lastelem.replace('</html>', '') if lastelem: html.append(lastelem) return '\n'.join(html)
def process_response(self, request, response): if (request.META.get('HTTP_X_PJAX') and response.status_code == 200 and 'html' in response.get('content-type', '').lower()): # TODO(Kumar) cache this. with statsd.timer('pjax.parse'): tree = lxml.html.document_fromstring(response.content) # HTML is encoded as ascii with entity refs for non-ascii. html = [] found_pjax = False for elem in tree.cssselect('title,%s' % settings.PJAX_SELECTOR): if elem.tag == 'title': # Inject a <title> for jquery-pjax html.append(lxml.html.tostring(elem, encoding=None)) else: found_pjax = True if elem.text: html.append(elem.text.encode('ascii', 'xmlcharrefreplace')) for ch in elem.iterchildren(): html.append(lxml.html.tostring(ch, encoding=None)) if not found_pjax: msg = ('pjax response for %s does not contain selector %r' % (request.path, settings.PJAX_SELECTOR)) if settings.DEBUG: # Tell the developer the template is bad. raise ValueError(msg) else: pjax_log.error(msg) return response response.content = ''.join(html) return response
def diff_prettyHtml(self, diffs): """Convert a diff array into a pretty HTML report. Args: diffs: Array of diff tuples. Returns: HTML representation. """ html = [] ct = 1 for (op, data) in diffs: text = (data.replace("&", "&").replace("<", "<").replace( ">", ">").replace("\n", "<br>")) if op == self.DIFF_INSERT: html.append("<ins style=\"background:#e6ffe6;\">%s</ins>" % text) elif op == self.DIFF_DELETE: html.append("<del style=\"background:#ffe6e6;\">%s</del>" % text) elif op == self.DIFF_EQUAL: html.append("<span>%s</span>" % text) return "".join(html)
current = node count = 0 while current is not None: css_class = current.attrib.get('class', '') if 'topicref' in css_class: count += 1 current = current.getparent() return count with open(full_fn, 'rb') as fp: root = lxml.html.fromstring(fp.read()) selector = CSSSelector('li.topicref') html = lxml.etree.Element('html') html.append(lxml.etree.Element('head')) body = lxml.etree.Element('body') html.append(body) for topicref in selector(root): first_link = topicref.find('a') topic_href = first_link.attrib['href'] topic_title = first_link.text topic_level = hierarchy_level(topicref) article_node = article_from_href(topic_href) article_node.attrib['level'] = str(topic_level) body.append(article_node) with open(target_fn, 'wb') as fp: fp.write(lxml.html.tostring(html))
threads = tree.xpath("//div[contains(@class, 'thread')]") #make a list of the user names available to select from, assign correct one to variable for thread in threads: names.append(thread.text) if thread.text == "REPLACEDUSERNAMES": courtenaythread = thread html = [] messages = courtenaythread.xpath("//div[text()[contains(., 'REPLACEDUSERNAMES')]]/*") for message in messages: #while n < 50: #print message.keys() if message.keys() == ['class']: html.append("<hr>") html.append("Sent by") html.append(message[0][0].text) html.append("<br>") html.append("Date and Time") html.append(message[0][1].text) else: html.append("<br>") html.append("Message:") html.append(message.text) #n =+ 1 #html.append(message[0][0].text) #html.append(message[0][1].text) #html.append(message.text)
] # WARNING LETTERS: http://google2.fda.gov/search?client=FDAgov&site=FDAgov-WarningLetters-ICECI&output=xml_no_dtd&proxystylesheet=FDAgov&ie=UTF-8&oe=UTF-8&as_q=&num=100&btnG=Search&as_epq=COMPANY+NAME&as_oq=&as_eq=&restrictBox=FDAgov-WarningLetters-ICECI&lr=&as_ft=i&as_filetype=&as_occt=any&as_dt=i&as_sitesearch=&sort= # ENFORCEMENT REPORTS http://google2.fda.gov/search?client=FDAgov&site=FDAgov-EnforcementReports-Safety&output=xml_no_dtd&proxystylesheet=FDAgov&ie=UTF-8&oe=UTF-8&as_q=++&num=100&btnG=Search&as_epq=COMPANY+NAME&as_oq=&as_eq=&restrictBox=FDAgov-EnforcementReports-Safety&lr=&as_ft=i&as_filetype=&as_occt=any&as_dt=i&as_sitesearch=&sort= SearchURL1First = "http://google2.fda.gov/search?client=FDAgov&site=FDAgov-WarningLetters-ICECI&output=xml_no_dtd&proxystylesheet=FDAgov&ie=UTF-8&oe=UTF-8&as_q=&num=100&btnG=Search&as_epq=" SearchURL1Second = "&as_oq=&as_eq=&restrictBox=FDAgov-WarningLetters-ICECI&lr=&as_ft=i&as_filetype=&as_occt=any&as_dt=i&as_sitesearch=&sort=" SearchURL2First = "http://google2.fda.gov/search?client=FDAgov&site=FDAgov-EnforcementReports-Safety&output=xml_no_dtd&proxystylesheet=FDAgov&ie=UTF-8&oe=UTF-8&as_q=++&num=100&btnG=Search&as_epq=" SearchURL2Second = "&as_oq=&as_eq=&restrictBox=FDAgov-EnforcementReports-Safety&lr=&as_ft=i&as_filetype=&as_occt=any&as_dt=i&as_sitesearch=&sort=" html = [] root = [] for z in companies: html.append(scraperwiki.scrape(SearchURL1First + z + SearchURL1Second)) html.append(scraperwiki.scrape(SearchURL2First + z + SearchURL2Second)) for x in html: root.append(lxml.html.fromstring(x)) for y in root: for el in y.cssselect("p.g a"): data = {'URL': el.attrib['href']} scraperwiki.sqlite.save(unique_keys=['URL'], data=data) import scraperwiki import lxml.html companies = [ 'AgrIcola+Daniella', 'Agricola+El+Consuelo', 'Agricola+El+Rosal',
count = 0 while current is not None: css_class = current.attrib.get('class', '') if 'topicref' in css_class: count += 1 current = current.getparent() return count with open(full_fn, 'rb') as fp: root = lxml.html.fromstring(fp.read()) selector = CSSSelector('li.topicref') html = lxml.etree.Element('html') html.append(lxml.etree.Element('head')) body = lxml.etree.Element('body') html.append(body) for topicref in selector(root): first_link = topicref.find('a') topic_href = first_link.attrib['href'] topic_title = first_link.text topic_level = hierarchy_level(topicref) article_node = article_from_href(topic_href) article_node.attrib['level'] = str(topic_level) body.append(article_node) with open(target_fn, 'wb') as fp: fp.write(lxml.html.tostring(html))
parser = lxml.etree.HTMLParser() tree = lxml.etree.parse(content, parser) return tree tree = parse_html(file) names = [] threads = tree.xpath("//div[contains(@class, 'thread')]") conversations = {} #make a list of the user names available to select from, assign correct one to variable n=0 for thread in threads: html = [] for message in thread: if message.keys() == ['class']: #html.append("<hr>") html.append("<br> Sent by: " + message[0][0].text) #html.append(message[0][0].text) #html.append("<br>") html.append("Date and Time: " +message[0][1].text) else: #html.append("<br>") html.append("Message:") html.append(message.text) conversations[thread.text] = html courtneyconvos = {} courtneykey = [] def find_message_content(content,fileout): for key in conversations.keys(): for i in conversations.get(key):
def render_log(self, source, formatter, prev_url, next_url): html = [ DOC_HEADER % dict(title='', styledefs=formatter.get_style_defs('body'), encoding='utf-8'), ] if prev_url: html.append(u'<a href="%s">Zurück</a>' % (prev_url, )) if next_url: html.append(u'<a href="%s">Weiter</a>' % (next_url, )) html.append(highlight(source, IrcLogsLexer(), formatter)) if prev_url: html.append(u'<a href="%s">Zurück</a>' % (prev_url, )) if next_url: html.append(u'<a href="%s">Weiter</a>' % (next_url, )) html.append(DOC_FOOTER) return ''.join(html)
'Custom+Pak'] # WARNING LETTERS: http://google2.fda.gov/search?client=FDAgov&site=FDAgov-WarningLetters-ICECI&output=xml_no_dtd&proxystylesheet=FDAgov&ie=UTF-8&oe=UTF-8&as_q=&num=100&btnG=Search&as_epq=COMPANY+NAME&as_oq=&as_eq=&restrictBox=FDAgov-WarningLetters-ICECI&lr=&as_ft=i&as_filetype=&as_occt=any&as_dt=i&as_sitesearch=&sort= # ENFORCEMENT REPORTS http://google2.fda.gov/search?client=FDAgov&site=FDAgov-EnforcementReports-Safety&output=xml_no_dtd&proxystylesheet=FDAgov&ie=UTF-8&oe=UTF-8&as_q=++&num=100&btnG=Search&as_epq=COMPANY+NAME&as_oq=&as_eq=&restrictBox=FDAgov-EnforcementReports-Safety&lr=&as_ft=i&as_filetype=&as_occt=any&as_dt=i&as_sitesearch=&sort= SearchURL1First = "http://google2.fda.gov/search?client=FDAgov&site=FDAgov-WarningLetters-ICECI&output=xml_no_dtd&proxystylesheet=FDAgov&ie=UTF-8&oe=UTF-8&as_q=&num=100&btnG=Search&as_epq=" SearchURL1Second = "&as_oq=&as_eq=&restrictBox=FDAgov-WarningLetters-ICECI&lr=&as_ft=i&as_filetype=&as_occt=any&as_dt=i&as_sitesearch=&sort=" SearchURL2First = "http://google2.fda.gov/search?client=FDAgov&site=FDAgov-EnforcementReports-Safety&output=xml_no_dtd&proxystylesheet=FDAgov&ie=UTF-8&oe=UTF-8&as_q=++&num=100&btnG=Search&as_epq=" SearchURL2Second = "&as_oq=&as_eq=&restrictBox=FDAgov-EnforcementReports-Safety&lr=&as_ft=i&as_filetype=&as_occt=any&as_dt=i&as_sitesearch=&sort=" html = [] root = [] for z in companies: html.append(scraperwiki.scrape(SearchURL1First + z + SearchURL1Second)) html.append(scraperwiki.scrape(SearchURL2First + z + SearchURL2Second)) for x in html: root.append(lxml.html.fromstring(x)) for y in root: for el in y.cssselect("p.g a"): data = { 'URL' : el.attrib['href'] } scraperwiki.sqlite.save(unique_keys=['URL'], data=data) import scraperwiki import lxml.html