예제 #1
0
파일: describe.py 프로젝트: a1043332/sc_api
def document_collection_recursive(resource, path, root_discovery, discovery):

    html = document_collection(resource, path, root_discovery, discovery)

    f = open(os.path.join(FLAGS.dest, path + "html"), "w")
    if sys.version_info.major < 3:
        html = html.encode("utf-8")

    f.write(html)
    f.close()

    for name in dir(resource):
        if (
            not name.startswith("_")
            and callable(getattr(resource, name))
            and hasattr(getattr(resource, name), "__is_resource__")
            and discovery != {}
        ):
            dname = name.rsplit("_")[0]
            collection = getattr(resource, name)()
            document_collection_recursive(
                collection,
                path + name + ".",
                root_discovery,
                discovery["resources"].get(dname, {}),
            )
예제 #2
0
def html_result(html: str,
                extraheaders: TYPE_WSGI_RESPONSE_HEADERS = None) \
        -> WSGI_TUPLE_TYPE:
    """
    Returns ``(contenttype, extraheaders, data)`` tuple for UTF-8 HTML.
    """
    extraheaders = extraheaders or []
    return 'text/html; charset=utf-8', extraheaders, html.encode("utf-8")
예제 #3
0
def clean_html(html_str):
    html = re.sub("[\n\r\t]+", " ", html_str)
    html = htmlp.unescape(html.decode("utf-8"))  #remove html entities
    html = html.encode("utf-8")
    html = re.sub("(<!--.*?-->)", "", html)  #remove HTML comments
    html = re.sub('(?i)<script.*?</script>', '', html)  #remove javascript
    html = re.sub('(?i)<style.*?</style>', '', html)  #remove css
    notags = re.sub("<.*?>", "  ", html)  #remove tags
    return notags
예제 #4
0
파일: views.py 프로젝트: moonkracker/notes
def render_to_pdf(template_src, context_dict={}):
    template = get_template(template_src)
    html = template.render(context_dict)
    result = BytesIO()
    pdf = pisa.pisaDocument(BytesIO(html.encode("windows-1251")),
                            result,
                            link_callback=link_callback)
    if not pdf.err:
        return HttpResponse(result.getvalue(), content_type='application/pdf')
    return HttpResponse("Error Rendering PDF", status=400)
예제 #5
0
파일: psblog.py 프로젝트: rwos/psblog
def write_out(filename, html):
    """ Write a file, including a gzipped version, to the out_dir """
    writefile(config.out_dir+filename, html)
    f = gzip.open(config.out_dir+filename+".gz", 'wb')
    try:
        # XXX HACK
        # The whole unicode issue is a complete fuckup as of now.
        f.write(html.encode("utf8"))
    except UnicodeDecodeError:
        f.write(html)
    f.close()
예제 #6
0
def get_thumbnail_url(doc, pagenumber, small):
    # Returns a URL to a thumbnail image for a particular page of the document.
    # 'small' is a boolean.

    # If the document is on DocumentCloud, get the URL to DocumentCloud's thumbnail image.
    documentcloud_id = get_documentcloud_document_id(doc)
    if documentcloud_id:
        # We can use the DocumentCloud API to get the URL to a thumbnail, but in the
        # interests of speed, construct the URL ourselves.
        #return query_documentcloud_api(documentcloud_id)["document"]["resources"]["page"]["image"].format(
        #    page=pagenumber,
        #    size="small" if small else "normal",
        #)
        return "https://assets.documentcloud.org/documents/%s/pages/%s-p%d-%s.gif" % (
            documentcloud_id[0], documentcloud_id[1], pagenumber, "small" if small else "normal")

    # If it's a Markdown document, download it, convert it to HTML, then render it to
    # a PDF, and then to an image, and return that image as a data: URL.
    elif doc.get("format") == "markdown" and os.path.exists("/usr/bin/htmldoc") and os.path.exists("/usr/bin/pdftoppm"):
        # Download the Markdown file.
        md = get_document_text(doc, pagenumber)

        # If we got it...
        if md:
            import subprocess, base64

            # Render the Markdown as HTML.
            html = CommonMark.commonmark(md)

            # Render the HTML as a PDF.
            # TODO: Possible security issue if the Markdown source can generate HTML that
            # causes htmldoc to perform network requests or possibly unsafe operations.
            pdf = subprocess.check_output(["/usr/bin/htmldoc", "--quiet", "--continuous",
                "--size", "4.5x5.8in", # smaller page magnifies the text
                "--top", "0", "--right", "1cm", "--bottom", "1cm", "--left", "1cm", # margins
                "-t", "pdf14", "-"],
                input=html.encode("utf8"))

            # Render the PDF and a PNG.
            png = subprocess.check_output(["/usr/bin/pdftoppm", "-singlefile", "-r", "60", "-png"],
                input=pdf)

            # Return a data: URL so we don't have to store/host the image anywhere,
            # but we can display it directly.
            return "data:image/png;base64," + base64.b64encode(png).decode("ascii")

    # No thumbnail image is available for this resource.
    return None
예제 #7
0
def html_to_data_uri(html, js_callback=None):
    # This function is called in two ways:
    # 1. From Python: in this case value is returned
    # 2. From Javascript: in this case value cannot be returned because
    #    inter-process messaging is asynchronous, so must return value
    #    by calling js_callback.
    html = html.encode("utf-8", "replace")
    b64 = base64.b64encode(html).decode("utf-8", "replace")
    ret = "data:text/html;base64,{data}".format(data=b64)
    if js_callback:
        js_print(js_callback.GetFrame().GetBrowser(), "Python",
                 "html_to_data_uri",
                 "Called from Javascript. Will call Javascript callback now.")
        js_callback.Call(ret)
    else:
        return ret
예제 #8
0
파일: base.py 프로젝트: joram/trip-planner
	def get_soup(self, url):
		filepath = os.path.join(self.data_dir, url.replace(self.base_url, ""))
		if filepath.endswith("/"):
			filepath += "index.html"
		if os.path.exists(filepath):
			if self.debug:
				print "visited url"
			f = open(filepath)
			html = f.read()
			soup = BeautifulSoup(html)
			return soup

		if self.debug:
			print "new url"
		time.sleep(self.wait)
		html = requests.get(url).text
		if not os.path.exists(os.path.dirname(filepath)):
		    os.makedirs(os.path.dirname(filepath))
		f = open(filepath, "w")
		f.write(html.encode("utf8"))
		f.close()

		soup = BeautifulSoup(html)
		return soup
예제 #9
0
def respond_with_html(request: Request, code: int, html: str) -> None:
    """
    Wraps `respond_with_html_bytes` by first encoding HTML from a str to UTF-8 bytes.
    """
    respond_with_html_bytes(request, code, html.encode("utf-8"))
예제 #10
0
def format_display_name(account, **options):
    html = encode(account.display_name.presence or account.username)
    if options["custom_emojify"]:
        html = encode_custom_emojis(html, account.emojis)
    return html #.html_safe # rubocop:disable Rails/OutputSafety
예제 #11
0
def format_spoiler(status):
    html = encode(status.spoiler_text)
    html = encode_custom_emojis(html, status.emojis)
    #html.html_safe # rubocop:disable Rails/OutputSafety
    return html
예제 #12
0
#     end

#     result << encode(chars[last_index..-1].join)

#     result.flatten.join
#   end

  def link_to_url(entity, options = {})
    url        = Addressable::URI.parse(entity[:url])
    html_attrs = { target: '_blank', rel: 'nofollow noopener' }

    html_attrs[:rel] = "me #{html_attrs[:rel]}" if options[:me]

    Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), url, html_attrs)
  rescue Addressable::URI::InvalidURIError, IDN::Idna::IdnaError
    encode(entity[:url])
  end

  def link_to_mention(entity, linkable_accounts)
    acct = entity[:screen_name]

    return link_to_account(acct) unless linkable_accounts

    account = linkable_accounts.find { |item| TagManager.instance.same_acct?(item.acct, acct) }
    account ? mention_html(account) : "@#{acct}"
  end

  def link_to_account(acct)
    username, domain = acct.split('@')

    domain  = nil if TagManager.instance.local_domain?(domain)