Esempio n. 1
0
    def sanitize_html(rendered):
        if rendered:
            cleaner = Cleaner(safe_attrs_only=False)
            cleaner.host_whitelist = ('www.youtube.com', 'player.vimeo.com',
                                      'embed.ted.com', 'prezi.com',
                                      'www.google.com', 'www.slideshare.net',
                                      'maps.google.com', 'docs.google.com')
            cleaner.forms = False
            rendered = cleaner.clean_html(rendered)

            # remove div wrapper if there is one
            if rendered.startswith('<div>'):
                rendered = rendered[5:-6]
        return rendered
Esempio n. 2
0
    def sanitize_html(rendered):
        if rendered:
            cleaner = Cleaner(safe_attrs_only=False)
            cleaner.host_whitelist = (
                'www.youtube.com',
                'player.vimeo.com',
                'embed.ted.com',
            )
            cleaner.forms = False
            rendered = cleaner.clean_html(rendered)

            # remove div wrapper if there is one
            if rendered.startswith('<div>'):
                rendered = rendered[5:-6]
        return rendered
Esempio n. 3
0
    def sanitize_html(rendered):
        if rendered:
            cleaner = Cleaner(safe_attrs_only=False)
            cleaner.host_whitelist = (
                "www.youtube.com",
                "player.vimeo.com",
                "embed.ted.com",
                "prezi.com",
                "www.google.com",
                "www.slideshare.net",
                "maps.google.com",
                "docs.google.com",
            )
            cleaner.forms = False
            rendered = cleaner.clean_html(rendered)

            # remove div wrapper if there is one
            if rendered.startswith("<div>"):
                rendered = rendered[5:-6]
        return rendered
Esempio n. 4
0
    def rendered_body(self):
        # body
        body_parts = [PageOperationMixin.remove_metadata(self.body)]

        # incoming links
        if len(self.inlinks) > 0:
            lines = [u"# Incoming Links"]
            for rel, links in self.inlinks.items():
                itemtype, rel = rel.split("/")
                humane_rel = schema.humane_property(itemtype, rel, True)
                lines.append(u"## %s" % humane_rel)

                # remove dups and sort
                links = list(set(links))
                links.sort()

                lines += [u"* [[%s]]" % title for title in links]
            body_parts.append(u"\n".join(lines))

        # related links
        related_links = self.related_links_by_score
        if len(related_links) > 0:
            lines = [u"# Suggested Pages"]
            lines += [
                u"* {{.score::%.3f}} [[%s]]\n{.noli}" % (score, title) for title, score in related_links.items()[:10]
            ]
            body_parts.append(u"\n".join(lines))

        # other posts
        if self.older_title or self.newer_title:
            lines = [u"# Other Posts"]
            if self.newer_title:
                lines.append(u"* {{.newer::newer}} [[%s]]\n{.noli}" % self.newer_title)
            if self.older_title:
                lines.append(u"* {{.older::older}} [[%s]]\n{.noli}" % self.older_title)
            body_parts.append(u"\n".join(lines))

        # remove yaml/schema block
        joined = u"\n".join(body_parts)
        joined = re.sub(PageOperationMixin.re_yaml_schema, u"\n", joined)

        # render to html
        rendered = md.convert(joined)

        # add table of contents
        rendered = TocGenerator(rendered).add_toc()

        # add class for embedded image
        rendered = PageOperationMixin.re_img.sub(u'<p class="img-container"><img \\1/></p>', rendered)

        # add structured data block
        rendered = self.rendered_data + rendered

        # sanitize
        if rendered:
            cleaner = Cleaner(safe_attrs_only=False)
            cleaner.host_whitelist = ("www.youtube.com", "player.vimeo.com")
            rendered = cleaner.clean_html(rendered)

            # remove div wrapper if there is one
            if rendered.startswith("<div>"):
                rendered = rendered[5:-6]

        return rendered