コード例 #1
0
def get_safe_comment_html(html_doc):
    cleaner = Cleaner(tags=COMMENT_ALLOWED_TAGS,
                      attributes=COMMENT_ALLOWED_ATTRIBUTES,
                      styles=COMMENT_ALLOWED_STYLES,
                      protocols=COMMENT_ALLOWED_PROTOCOLS,
                      strip=False)
    return cleaner.clean(html_doc)
コード例 #2
0
def clean_all_tags(html_doc):
    cleaner = Cleaner(tags=[],
                      attributes={},
                      styles=[],
                      protocols=[],
                      strip=True)
    return cleaner.clean(html_doc)
コード例 #3
0
def render_markdown(text):
    """Render Markdown text to XHTML.

    The Markdown text will be sanitized to prevent injecting custom HTML
    or dangerous links. It will also enable a few plugins for code
    highlighting and sane lists.

    It's rendered to XHTML in order to allow the element tree to be easily
    parsed for code review and change description diffing.

    Args:
        text (bytes or unicode):
            The Markdown text to render.

            If this is a byte string, it must represent UTF-8-encoded text.

    Returns:
        unicode:
        The Markdown-rendered XHTML.
    """
    html = markdown(force_text(text), **MARKDOWN_KWARGS)

    # Create a bleach HTML cleaner, and override settings on the html5lib
    # serializer it contains to ensure we use self-closing HTML tags, like
    # <br/>. This is needed so that we can parse the resulting HTML in
    # Djblets for things like Markdown diffing.
    cleaner = Cleaner(tags=markdown_tags, attributes=markdown_attrs)
    cleaner.serializer.use_trailing_solidus = True

    return cleaner.clean(html)
コード例 #4
0
 def __init__(self, supported_tags=None):
     if supported_tags is None:
         supported_tags = DEFAULT_TAG_LIST
     else:
         supported_tags = supported_tags
     self.cleaner = Cleaner(tags=supported_tags,
                            attributes=ALLOWED_ATTRIBUTES)
コード例 #5
0
def search(request):
    def highlighter(matchobj):
        return '<span class="highlight">' + matchobj.group(0) + '</span>'

    cleaner = Cleaner(tags=[], attributes={}, strip=True)
    search_term = request.GET.get('q', '')
    posts, cursor = Post.objects.get_post_list(request.user,
                                               search_term=search_term,
                                               include_child_posts=True,
                                               sort_by='last_activity')

    for post in posts:
        snippet = cleaner.clean(post.html)
        snippet = re.sub(search_term,
                         highlighter,
                         snippet,
                         flags=re.IGNORECASE)
        post.snippet = snippet

    return render(request,
                  "search.html",
                  context={
                      "search_term": search_term,
                      "results": {
                          "posts": posts
                      }
                  })
コード例 #6
0
ファイル: differ.py プロジェクト: ei8fdb/hypha-1
def compare(answer_a, answer_b, should_bleach=True):
    if should_bleach:
        cleaner = Cleaner(tags=['h4'], attributes={}, strip=True)
        answer_a = re.sub('(<li[^>]*>)', r'\1● ', answer_a)
        answer_b = re.sub('(<li[^>]*>)', r'\1● ', answer_b)
        answer_a = cleaner.clean(answer_a)
        answer_b = cleaner.clean(answer_b)

    diff = SequenceMatcher(None, answer_a, answer_b)
    from_diff = []
    to_diff = []
    for opcode, a0, a1, b0, b1 in diff.get_opcodes():
        if opcode == 'equal':
            from_diff.append(mark_safe(diff.a[a0:a1]))
            to_diff.append(mark_safe(diff.b[b0:b1]))
        elif opcode == 'insert':
            from_diff.append(mark_safe(diff.a[a0:a1]))
            to_diff.append(wrap_with_span(diff.b[b0:b1], 'added'))
        elif opcode == 'delete':
            from_diff.append(wrap_with_span(diff.a[a0:a1], 'deleted'))
            to_diff.append(mark_safe(diff.b[b0:b1]))
        elif opcode == 'replace':
            from_diff.append(wrap_with_span(diff.a[a0:a1], 'deleted'))
            to_diff.append(wrap_with_span(diff.b[b0:b1], 'added'))

    from_display = ''.join(from_diff)
    to_display = ''.join(to_diff)
    from_display = re.sub('([●○]|[0-9]{1,2}[\)\.])', r'<br>\1', from_display)
    to_display = re.sub('([●○]|[0-9]{1,2}[\)\.])', r'<br>\1', to_display)
    from_display = re.sub('(\.\n)', r'\1<br><br>', from_display)
    to_display = re.sub('(\.\n)', r'\1<br><br>', to_display)
    from_display = mark_safe(from_display)
    to_display = mark_safe(to_display)

    return (from_display, to_display)
コード例 #7
0
def do_html_title_cleanup(s, result):
    class PflFilter(Filter):
        def __iter__(self):
            for token in Filter.__iter__(self):
                if token['type'] in ['StartTag', 'EmptyTag'] and token['data']:
                    if token['name'] == 'img':
                        for attr, value in token['data'].items():
                            token['data'][attr] = image_rewrite(
                                urljoin(
                                    result['meta']['original_object_urls']
                                    ['html'], token['data'][attr]),
                                result['meta']['_id'])
                yield token

    ATTRS = {'*': allow_src}
    TAGS = []
    cleaner = Cleaner(tags=TAGS,
                      attributes=ATTRS,
                      filters=[PflFilter],
                      strip=True)
    try:
        return cleaner.clean(s).replace(
            '<img ', '<img class="img-responsive" ').replace('&amp;nbsp;', '')
    except TypeError:
        return u''
コード例 #8
0
def new_flag_api(service, user, post):
    cursor = get_cursor()
    query = "SELECT * FROM posts WHERE id = %s AND \"user\" = %s AND service = %s"
    params = (post, user, service)
    cursor.execute(query, params)
    results = cursor.fetchall()
    if len(results) == 0:
        return "", 404

    cursor2 = get_cursor()
    query2 = "SELECT * FROM booru_flags WHERE id = %s AND \"user\" = %s AND service = %s"
    params2 = (post, user, service)
    cursor2.execute(query2, params2)
    results2 = cursor.fetchall()
    if len(results2) > 0:
        # conflict; flag already exists
        return "", 409

    scrub = Cleaner(tags=[])
    columns = ['id', '"user"', 'service']
    params = (
        scrub.clean(post),
        scrub.clean(user),
        scrub.clean(service)
    )
    data = ['%s'] * len(params)
    query = "INSERT INTO booru_flags ({fields}) VALUES ({values})".format(
        fields=','.join(columns),
        values=','.join(data)
    )
    cursor3 = get_cursor()
    cursor3.execute(query, params)

    return "", 200
コード例 #9
0
ファイル: account.py プロジェクト: ujichagallery/Kemono2
def create_account(username, password, favorites):
    account_id = None
    password_hash = bcrypt.hashpw(get_base_password_hash(password),
                                  bcrypt.gensalt()).decode('utf-8')
    account_create_lock.acquire()
    try:
        if is_username_taken(username):
            return False

        scrub = Cleaner(tags=[])

        cursor = get_cursor()
        query = "insert into account (username, password_hash) values (%s, %s) returning id"
        cursor.execute(query, (
            scrub.clean(username),
            password_hash,
        ))
        account_id = cursor.fetchone()['id']
    finally:
        account_create_lock.release()

    if favorites is not None:
        for favorite in favorites:
            artist = get_artist(favorite['service'], favorite['artist_id'])
            if artist is None:
                continue
            add_favorite_artist(account_id, favorite['service'],
                                favorite['artist_id'])

    return True
コード例 #10
0
ファイル: __init__.py プロジェクト: ashray-00/Computer-Vision
def clean(text,
          tags=ALLOWED_TAGS,
          attributes=ALLOWED_ATTRIBUTES,
          styles=ALLOWED_STYLES,
          protocols=ALLOWED_PROTOCOLS,
          strip=False,
          strip_comments=True):
    """Clean an HTML fragment of malicious content and return it

    This function is a security-focused function whose sole purpose is to
    remove malicious content from a string such that it can be displayed as
    content in a web page.

    This function is not designed to use to transform content to be used in
    non-web-page contexts.

    Example::

        import bleach

        better_text = bleach.clean(yucky_text)


    .. Note::

       If you're cleaning a lot of text and passing the same argument values or
       you want more configurability, consider using a
       :py:class:`bleach.sanitizer.Cleaner` instance.

    :arg str text: the text to clean

    :arg list tags: allowed list of tags; defaults to
        ``bleach.sanitizer.ALLOWED_TAGS``

    :arg dict attributes: allowed attributes; can be a callable, list or dict;
        defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

    :arg list styles: allowed list of css styles; defaults to
        ``bleach.sanitizer.ALLOWED_STYLES``

    :arg list protocols: allowed list of protocols for links; defaults
        to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

    :arg bool strip: whether or not to strip disallowed elements

    :arg bool strip_comments: whether or not to strip HTML comments

    :returns: cleaned text as unicode

    """
    cleaner = Cleaner(
        tags=tags,
        attributes=attributes,
        styles=styles,
        protocols=protocols,
        strip=strip,
        strip_comments=strip_comments,
    )
    return cleaner.clean(text)
コード例 #11
0
    def test_basics(self):
        TAGS = ["span", "br"]
        ATTRS = {"span": ["style"]}

        cleaner = Cleaner(tags=TAGS, attributes=ATTRS)

        assert (cleaner.clean('a <br/><span style="color:red">test</span>') ==
                'a <br><span style="">test</span>')
コード例 #12
0
def sanitize_html(html_text):
    cleaner = Cleaner(tags=ALLOWED_HTML_TAGS,
                      attributes=ALLOWED_HTML_ATTRIBUTES,
                      styles=ALLOWED_HTML_STYLES,
                      protocols=ALLOWED_HTML_PROTOCOLS,
                      strip=True,
                      strip_comments=True)
    return cleaner.clean(html_text)
コード例 #13
0
    def test_basics(self):
        TAGS = ['span', 'br']
        ATTRS = {'span': ['style']}

        cleaner = Cleaner(tags=TAGS, attributes=ATTRS)

        assert (cleaner.clean('a <br/><span style="color:red">test</span>') ==
                'a <br><span style="">test</span>')
コード例 #14
0
def md_bleach_imgless(text):
    cleaner = Cleaner(tags=[
        'p', 'br', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'em', 'strong', 'a',
        'ul', 'ol', 'li', 'blockquote', 'pre', 'code', 'hr', 'del'
    ],
                      attributes={'a': ['href']},
                      protocols=['http', 'https'],
                      strip=True)
    text = cleaner.clean(text)
    return mark_safe(text)
コード例 #15
0
ファイル: test_clean.py プロジェクト: mozilla/bleach
    def test_basics(self):
        TAGS = ['span', 'br']
        ATTRS = {'span': ['style']}

        cleaner = Cleaner(tags=TAGS, attributes=ATTRS)

        assert (
            cleaner.clean('a <br/><span style="color:red">test</span>') ==
            'a <br><span style="">test</span>'
        )
コード例 #16
0
ファイル: __init__.py プロジェクト: BarnetteME1/DnD-stuff
def clean(text, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES,
          styles=ALLOWED_STYLES, protocols=ALLOWED_PROTOCOLS, strip=False,
          strip_comments=True):
    """Clean an HTML fragment of malicious content and return it

    This function is a security-focused function whose sole purpose is to
    remove malicious content from a string such that it can be displayed as
    content in a web page.

    This function is not designed to use to transform content to be used in
    non-web-page contexts.

    Example::

        import bleach

        better_text = bleach.clean(yucky_text)


    .. Note::

       If you're cleaning a lot of text and passing the same argument values or
       you want more configurability, consider using a
       :py:class:`bleach.sanitizer.Cleaner` instance.

    :arg str text: the text to clean

    :arg list tags: allowed list of tags; defaults to
        ``bleach.sanitizer.ALLOWED_TAGS``

    :arg dict attributes: allowed attributes; can be a callable, list or dict;
        defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

    :arg list styles: allowed list of css styles; defaults to
        ``bleach.sanitizer.ALLOWED_STYLES``

    :arg list protocols: allowed list of protocols for links; defaults
        to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

    :arg bool strip: whether or not to strip disallowed elements

    :arg bool strip_comments: whether or not to strip HTML comments

    :returns: cleaned text as unicode

    """
    cleaner = Cleaner(
        tags=tags,
        attributes=attributes,
        styles=styles,
        protocols=protocols,
        strip=strip,
        strip_comments=strip_comments,
    )
    return cleaner.clean(text)
コード例 #17
0
def clean(text):
    cleaner = Cleaner(
        tags=bleach.sanitizer.ALLOWED_TAGS + ['p', 'br'],
        attributes=bleach.sanitizer.ALLOWED_ATTRIBUTES,
        styles=bleach.sanitizer.ALLOWED_STYLES,
        protocols=bleach.sanitizer.ALLOWED_PROTOCOLS,
        strip=True,
        strip_comments=True,
        filters=[EmptyLinkFilter],
    )
    return cleaner.clean(text)
コード例 #18
0
def html_cleanup(s):
    ATTRS = {}
    TAGS = []
    cleaner = Cleaner(tags=TAGS,
                      attributes=ATTRS,
                      filters=[Filter],
                      strip=True)
    try:
        return cleaner.clean(s).replace('&amp;nbsp;', '')
    except TypeError:
        return u''
コード例 #19
0
def pgmarkdown(value, allow_images=False, allow_relative_links=False):
    tags = _markdown_tags
    filters = []

    if allow_images:
        tags.append('img')
    if not allow_relative_links:
        filters.append(RelativeLinkFilter)

    cleaner = Cleaner(tags=tags, attributes=_markdown_attrs, filters=filters)

    return cleaner.clean(markdown.markdown(value))
コード例 #20
0
ファイル: util.py プロジェクト: hsoft/tn2
def sanitize_comment(text):
    ALLOWED_TAGS = ['b', 'i', 'u', 's', 'p', 'br', 'img', 'a', 'em', 'strong', 'ul', 'ol', 'li']
    ALLOWED_ATTRS = {
        'img': ['alt', 'src', 'width', 'height'],
        'a': ['href'],
    }
    cleaner = Cleaner(
        tags=ALLOWED_TAGS,
        attributes=ALLOWED_ATTRS,
        filters=[partial(LinkifyFilter, callbacks=[nofollow, target_blank])]
    )
    return cleaner.clean(text)
コード例 #21
0
def html_cleanup_with_structure(s):
    ATTRS = {'*': allow_src}
    TAGS = [
        'img', 'a', 'p', 'div', 'script', 'html', 'body', 'head', 'title',
        'style'
    ]
    cleaner = Cleaner(tags=TAGS,
                      attributes=ATTRS,
                      filters=[Filter],
                      strip=True)
    try:
        return cleaner.clean(s).replace('&amp;nbsp;', '')
    except TypeError:
        return u''
コード例 #22
0
def do_html_cleanup(s, result):
    ATTRS = {'*': allow_src}
    TAGS = ['img', 'a', 'p', 'div']
    cleaner = Cleaner(tags=TAGS,
                      attributes=ATTRS,
                      filters=[Filter],
                      strip=True)
    try:
        return cleaner.clean(s).replace(
            '<img ',
            '<img style="border:0;display:block;outline:none;text-decoration:none;width:100%;" '
        ).replace('&amp;nbsp;', '')
    except TypeError:
        return u''
コード例 #23
0
ファイル: export_wiki.py プロジェクト: SasView/scripts
def sanitise_html(content):
    attributes = bleach.sanitizer.ALLOWED_ATTRIBUTES
    attributes.update({
        'img': ['alt', 'src'],
    })
    cleaner = Cleaner(tags=bleach.sanitizer.ALLOWED_TAGS + [
        'pre', 'table', 'tr', 'td', 'th', 'tt', 'dl', 'dt', 'dd', "a", "h1",
        "h2", "h3", "strong", "em", "p", "ul", "ol", "li", "br", "sub", "sup",
        "hr", "img"
    ],
                      attributes=attributes,
                      strip=True,
                      strip_comments=True)
    return cleaner.clean(content)
コード例 #24
0
 def clean_content(self):
     content = self.cleaned_data['content']
     cleaner = Cleaner(tags=[
         'p', 'u', 's', 'i', 'b', 'a', 'sub', 'sup', 'img', 'div', 'ul',
         'li', 'ol', 'em', 'strong', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
         'pre', 'address', 'caption'
     ],
                       attributes={
                           'a': ['href', 'target'],
                           'img': ['src', 'style', 'alt'],
                       },
                       styles=['height', 'width'],
                       protocols=['http', 'https', 'mailto'])
     content = cleaner.clean(content)
     return content
コード例 #25
0
class HtmlParser:
	auto_paragraphs = True
	add_nofollow = True
	cleaner = None

	def __init__(self, supported_tags=None):
		if supported_tags is None:
			supported_tags = DEFAULT_TAG_LIST
		else:
			supported_tags = supported_tags
		self.cleaner = Cleaner(tags=supported_tags, attributes=ALLOWED_ATTRIBUTES)

	def parse(self, text):
		filters = [AddRequiredAttributesFilter, ClassFilter, alphabeticalattributes.Filter]
		if self.auto_paragraphs:
			filters.append(AutoParagraphFilter)
		if self.add_nofollow:
			filters.append(AddNofollowFilter)
		self.cleaner.filters = filters
		output = self.cleaner.clean(text)
		self.output = output

	def get_output(self):
		return self.output

	def get_attributes(self):
		return {}

	@property
	def supported_tags(self):
		return self.cleaner.tags
コード例 #26
0
ファイル: capa_module.py プロジェクト: yf/edx-platform
    def index_dictionary(self):
        """
        Return dictionary prepared with module content and type for indexing.
        """
        xblock_body = super(ProblemBlock, self).index_dictionary()

        # Make optioninput's options index friendly by replacing the actual tag with the values
        capa_content = re.sub(
            r'<optioninput options="\(([^"]+)\)".*?>\s*|\S*<\/optioninput>',
            r'\1', self.data)

        # Removing solutions and hints, as well as script and style
        capa_content = re.sub(
            re.compile(
                r"""
                    <solution>.*?</solution> |
                    <script>.*?</script> |
                    <style>.*?</style> |
                    <[a-z]*hint.*?>.*?</[a-z]*hint>
                """, re.DOTALL | re.VERBOSE), "", capa_content)
        capa_content = re.sub(r"(\s|&nbsp;|//)+", " ",
                              Cleaner(tags=[], strip=True).clean(capa_content))

        capa_body = {
            "capa_content": capa_content,
            "display_name": self.display_name,
        }
        if "content" in xblock_body:
            xblock_body["content"].update(capa_body)
        else:
            xblock_body["content"] = capa_body
        xblock_body["content_type"] = self.INDEX_CONTENT_TYPE
        xblock_body["problem_types"] = list(self.problem_types)
        return xblock_body
コード例 #27
0
ファイル: parser.py プロジェクト: LinuxOSsk/Shakal-NG
class HtmlParser:
	auto_paragraphs = True
	add_nofollow = True
	cleaner = None

	def __init__(self, supported_tags=None):
		if supported_tags is None:
			supported_tags = DEFAULT_TAG_LIST
		else:
			supported_tags = supported_tags
		self.cleaner = Cleaner(tags=supported_tags, attributes=ALLOWED_ATTRIBUTES)

	def parse(self, text):
		filters = [AddRequiredAttributesFilter, ClassFilter, alphabeticalattributes.Filter]
		if self.auto_paragraphs:
			filters.append(AutoParagraphFilter)
		if self.add_nofollow:
			filters.append(AddNofollowFilter)
		self.cleaner.filters = filters
		output = self.cleaner.clean(text)
		self.output = output

	def get_output(self):
		return self.output

	def get_attributes(self):
		return {}

	@property
	def supported_tags(self):
		return self.cleaner.tags
コード例 #28
0
def get_topThreeArticles():
    db = psycopg2.connect(database=DBNAME)
    c = db.cursor()
    c.execute('''SELECT articles.title article, count(log.path) totalViews FROM
          articles JOIN log ON articles.slug=substring(log.path, 10) and
          log.status LIKE '%200%' GROUP BY articles.title ORDER BY
          count(log.path) desc; ''')
    results = c.fetchall()
    cleaner = Cleaner()
    cleanResults = []
    for r in results:
        articles = r[0]
        totalViews = r[1]
        cleaner.clean(articles + '')
        cleanResults.append((articles, totalViews))
    db.close()
    return cleanResults
コード例 #29
0
def clean_html(html: str) -> str:
    text = unescape(html)  # decode HTML entities

    if not hasattr(_THREAD_LOCALS, "cleaner"):
        _THREAD_LOCALS.cleaner = Cleaner(tags=ALLOWED_TAGS, strip=True)

    sanitized: str = _THREAD_LOCALS.cleaner.clean(text)
    return sanitized.strip()
コード例 #30
0
def test_filters():
    # Create a Filter that changes all the attr values to "moo"
    class MooFilter(Filter):
        def __iter__(self):
            for token in Filter.__iter__(self):
                if token['type'] in ['StartTag', 'EmptyTag'] and token['data']:
                    for attr, value in token['data'].items():
                        token['data'][attr] = 'moo'

                yield token

    ATTRS = {'img': ['rel', 'src']}
    TAGS = ['img']

    cleaner = Cleaner(tags=TAGS, attributes=ATTRS, filters=[MooFilter])

    dirty = 'this is cute! <img src="http://example.com/puppy.jpg" rel="nofollow">'
    assert (cleaner.clean(dirty) == 'this is cute! <img rel="moo" src="moo">')
コード例 #31
0
ファイル: test_clean.py プロジェクト: yujitakahashi/bleach
    def test_filters(self):
        # Create a Filter that changes all the attr values to "moo"
        class MooFilter(Filter):
            def __iter__(self):
                for token in Filter.__iter__(self):
                    if token["type"] in ["StartTag", "EmptyTag"] and token["data"]:
                        for attr, value in token["data"].items():
                            token["data"][attr] = "moo"

                    yield token

        ATTRS = {"img": ["rel", "src"]}
        TAGS = ["img"]

        cleaner = Cleaner(tags=TAGS, attributes=ATTRS, filters=[MooFilter])

        dirty = 'this is cute! <img src="http://example.com/puppy.jpg" rel="nofollow">'
        assert cleaner.clean(dirty) == 'this is cute! <img rel="moo" src="moo">'
コード例 #32
0
def sanitize_html(text):
    cleaner = Cleaner(tags=['a', 'abbr', 'b', 'blockquote', 'br', 'caption', 'code',
                            'col', 'colgroup', 'dd', 'del', 'div', 'dl', 'dt', 'em',
                            'figcaption', 'figure', 'h1', 'h2', 'h3', 'h4', 'h5',
                            'h6', 'hr', 'i', 'img', 'ins', 'li', 'mark', 'ol', 'p',
                            'pre', 's', 'span', 'strong', 'sub', 'sup', 'table',
                            'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'u', 'ul'],
                      attributes={'*': ['class', 'id'],
                                  'abbr': ['title'],
                                  'a': ['alt', 'href', 'title'],
                                  'img': ['alt', 'src', 'title']},
                      styles=[],
                      protocols=['http', 'https', 'mailto'],
                      strip=False,
                      strip_comments=True,
                      filters=None)
    sanitized = cleaner.clean(text)
    return sanitized
コード例 #33
0
ファイル: account.py プロジェクト: OpenYiff/Kemono2
def create_account(username: str,
                   password: str,
                   favorites: Optional[List[Dict]] = None) -> bool:
    account_id = None
    password_hash = bcrypt.hashpw(get_base_password_hash(password),
                                  bcrypt.gensalt()).decode('utf-8')
    account_create_lock.acquire()
    try:
        if is_username_taken(username):
            return False

        scrub = Cleaner(tags=[])

        cursor = get_cursor()
        query = """
            INSERT INTO account (username, password_hash)
            VALUES (%s, %s)
            RETURNING id
        """
        cursor.execute(query, (
            scrub.clean(username),
            password_hash,
        ))
        account_id = cursor.fetchone()['id']
        if (account_id == 1):
            cursor = get_cursor()
            query = """
                UPDATE account
                SET role = 'administrator'
                WHERE id = 1
            """
            cursor.execute(query)
    finally:
        account_create_lock.release()

    if favorites is not None:
        for favorite in favorites:
            artist = get_artist(favorite['service'], favorite['artist_id'])
            if artist is None:
                continue
            add_favorite_artist(account_id, favorite['service'],
                                favorite['artist_id'])

    return True
コード例 #34
0
ファイル: test_clean.py プロジェクト: mozilla/bleach
    def test_filters(self):
        # Create a Filter that changes all the attr values to "moo"
        class MooFilter(Filter):
            def __iter__(self):
                for token in Filter.__iter__(self):
                    if token['type'] in ['StartTag', 'EmptyTag'] and token['data']:
                        for attr, value in token['data'].items():
                            token['data'][attr] = 'moo'

                    yield token

        ATTRS = {
            'img': ['rel', 'src']
        }
        TAGS = ['img']

        cleaner = Cleaner(tags=TAGS, attributes=ATTRS, filters=[MooFilter])

        dirty = 'this is cute! <img src="http://example.com/puppy.jpg" rel="nofollow">'
        assert (
            cleaner.clean(dirty) ==
            'this is cute! <img rel="moo" src="moo">'
        )
コード例 #35
0
ファイル: parser.py プロジェクト: LinuxOSsk/Shakal-NG
	def __init__(self, supported_tags=None):
		if supported_tags is None:
			supported_tags = DEFAULT_TAG_LIST
		else:
			supported_tags = supported_tags
		self.cleaner = Cleaner(tags=supported_tags, attributes=ALLOWED_ATTRIBUTES)
コード例 #36
0
    def node(comment):
        ##print("building node for comment id={0}...".format(comment.get('number', comment['id'])))
        # preload its comments (a separate API call)
        child_comments = [ ]
        if comment.get('comments') and comment.get('comments') > 0:
            get_children_url = comment['comments_url']
            resp = requests.get( get_children_url, headers=GH_GET_HEADERS, timeout=10)
            # N.B. Timeout is in seconds, and watches for *any* new data within that time (vs. whole response)
            try:
                resp.raise_for_status()
                try:
                    child_comments = resp.json()
                except:
                    child_comments = resp.json
            except:
                # WE need logging in the web app!
                try:
                    import sys
                    sys.stderr.write('Error: got a {c} from {u}\n'.format(c=resp.status_code,
                                                                        u=get_children_url))
                except:
                    pass # well that sucks, we failed to even write to stderr

        metadata = parse_comment_metadata(comment['body'])
        ##print(metadata)

        # Examine the comment metadata (if any) to get the best display name
        # and URL for its author. Guests should appear here as the name and
        # email address they entered when creating a comment, rather than the
        # 'opentreeapi' bot user.
        #
        # Default values are what we can fetch from the issues API
        author_display_name = comment['user']['login']
        author_link = comment['user']['html_url']
        # Now let's try for something more friendly...
        if metadata:
            meta_author_info = metadata.get('Author', None)
            if meta_author_info:
                # Try to parse this fron a Markdown hyperlink. Typical values include:
                #   u'opentreeapi'
                #   u'11'
                #   u'[Jim Allman](https://github.com/jimallman)'
                #   u'[John Smith](mailto:[email protected])'
                regex = re.compile(r'\[(.*)\]\((.*)\)')
                markdown_fields = regex.findall(meta_author_info)
                if len(markdown_fields) > 0:
                    # look for parts of a markdown link
                    author_display_name, author_link = markdown_fields[0]
                else:
                    # it's not a markdown link, just a bare name or numeric userid
                    if meta_author_info.isdigit():
                        # ignore ugly userid (login is better)
                        pass
                    else:
                        author_display_name = meta_author_info

        # Is this node for an issue (thread starter) or a comment (reply)?
        issue_node = 'number' in comment

        # Is the current user logged in? If so, what is their GitHub ID (login)?
        current_user_id = auth.user and auth.user.github_login or None

        # Cook up some reasonably strong regular expressions to detect bare
        # URLs and wrap them in hyperlinks. Adapted from 
        # http://stackoverflow.com/questions/1071191/detect-urls-in-a-string-and-wrap-with-a-href-tag
        link_regex = re.compile(  r'''
                             (?x)( # verbose identify URLs within text
                      (http|https) # make sure we find a resource type
                               :// # ...needs to be followed by colon-slash-slash
                    (\w+[:.]?){2,} # at least two domain groups, e.g. (gnosis.)(cx)
                              (/?| # could be just the domain name (maybe w/ slash)
                        [^ \n\r"]+ # or stuff then space, newline, tab, quote
                            [\w/]) # resource name ends in alphanumeric or slash
             (?=([\s\.,>)'"\]]|$)) # assert: followed by white or clause ending OR end of line
                                 ) # end of match group
                                   ''')
        # link_replace = r'<a href="\1" />\1</a>'
        # let's try this do-nothing version
        link_replace = r'\1'
        # NOTE the funky constructor required to use this below

        # Define a consistent cleaner to sanitize user input. We need a few
        # elements that are common in our markdown but missing from the Bleach
        # whitelist.
        # N.B. HTML comments are stripped by default. Non-allowed tags will appear
        # "naked" in output, so we can identify any bad actors.
        common_feedback_tags = [u'p', u'br',
                                u'h1', u'h2', u'h3', u'h4', u'h5', u'h6',
                                ]
        ot_markdown_tags = list(set( bleach.sanitizer.ALLOWED_TAGS + common_feedback_tags))
        ot_cleaner = Cleaner(tags=ot_markdown_tags)

        try:   # TODO: if not comment.deleted:
            # N.B. some missing information (e.g. supporting URL) will appear here as a string like "None"
            supporting_reference_url = metadata.get('Supporting reference', None)
            has_supporting_reference_url = supporting_reference_url and (supporting_reference_url != u'None')
            # Prepare a sanitized rendering of this user-submitted markup
            rendered_comment_markdown = markdown(
                get_visible_comment_body(comment['body'] or ''),
                extras={'link-patterns':None},
                link_patterns=[(link_regex, link_replace)]).encode('utf-8')
            safe_comment_markup = XML(
                ot_cleaner.clean(rendered_comment_markdown),
                sanitize=False)  # gluon's sanitize will break on Unicode!
            markup = LI(
                    DIV(##T('posted by %(first_name)s %(last_name)s',comment.created_by),
                    # not sure why this doesn't work... db.auth record is not a mapping!?
                    ('title' in comment) and DIV( comment['title'], A(T('on GitHub'), _href=comment['html_url'], _target='_blank'), _class='topic-title') or '',
                    DIV( safe_comment_markup, _class=(issue_node and 'body issue-body' or 'body comment-body')),
                    DIV( A(T('Supporting reference (opens in a new window)'), _href=supporting_reference_url, _target='_blank'), _class='body issue-supporting-reference' ) if has_supporting_reference_url else '',
                    DIV(
                        A(T(author_display_name), _href=author_link, _target='_blank'),
                        # SPAN(' [local expertise]',_class='badge') if comment.claimed_expertise else '',
                        SPAN(' ',metadata.get('Feedback type'),' ',_class='badge') if metadata.get('Feedback type') else '',
                        T(' - %s',prettydate(utc_to_local(datetime.strptime(comment['created_at'], GH_DATETIME_FORMAT)),T)),
                        SPAN(
                            issue_node and A(T(child_comments and 'Hide comments' or 'Show/add comments'),_class='toggle',_href='#') or '',
                            issue_node and comment['user']['login'] == current_user_id and SPAN(' | ') or '',
                            A(T('Delete'),_class='delete',_href='#') if comment['user']['login'] == current_user_id else '',
                        _class='controls'),
                    _class='byline'),
                    _id='r%s' % comment.get('number', comment['id']),
                    _class='msg-wrapper'),
                # child messages (toggle hides/shows these)
                issue_node and SUL(*[node(comment) for comment in child_comments], _style=("" if child_comments else "display: none;")) or '',
                issue_node and DIV(_class='reply', _style=("" if child_comments else "display: none;")) or '',
                _class=(issue_node and 'issue' or 'comment'))
            return markup
        except:
            import sys
            print "Unexpected error:", sys.exc_info()[0]
            raise
コード例 #37
0
ファイル: test_linkify.py プロジェクト: jvanasco/bleach
def test_linkify_filter(text, expected):
    cleaner = Cleaner(filters=[LinkifyFilter])
    assert cleaner.clean(text) == expected