Python Cleaner 예제들, bleach.Cleaner Python 예제들

예제 #1

0

파일 보기

파일: register.py 프로젝트: ZoomTen/SheepyArt

def do_register():
    form = RegistrationForm()

    # make sure of the methods
    # run this if we're submitting stuff on the page
    if request.method == "POST":
        if form.validate_on_submit():
            # Sanitize some fields
            scrub = Cleaner()
            username = scrub.clean(form.username.data)
            dispname = scrub.clean(form.dispname.data)
            # Add the new user data
            new_user = User(username=username,
                            dispname=dispname,
                            email=form.email.data,
                            password=hash.generate_password_hash(
                                form.password.data),
                            dob=form.dob.data,
                            gender=int(form.gender.data),
                            country=int(form.country.data))
            db.session.add(new_user)
            db.session.flush()

            # Add default favorites collection
            user_faves = CollectionMeta(title='Favorites',
                                        user_id=new_user.id,
                                        use_as_favorites=True)
            db.session.add(user_faves)

            # Catch some errors
            try:
                db.session.commit()
            except IntegrityError as e:
                db.session.rollback()
                # FIXME: register: don't have debug stuff printing out
                flash(f'Registration failed: {e.__cause__}', 'error')
                return render_template("register.haml", form=form)

            # LOG: User registration
            app.logger.info(
                f"User '{new_user.username}' successfully registered as ID '{new_user.id}'"
            )

            # Registration success
            flash(f'Account created for {form.username.data}!', 'success')
            return redirect(url_for('login.do_login'))

        # If anything happens, print out all the errors on the page
        for field, errors in form.errors.items():
            for err in errors:
                flash(err, 'error')
        return render_template("register.haml", form=form)
    # run this when we're only loading the regpage
    else:
        return render_template("register.haml", form=form)

예제 #2

0

파일 보기

파일: markdown.py 프로젝트: xenonca/contentdb

def render_markdown(source):
    html = md.convert(source)

    cleaner = Cleaner(tags=ALLOWED_TAGS,
                      attributes=ALLOWED_ATTRIBUTES,
                      protocols=ALLOWED_PROTOCOLS,
                      filters=[
                          partial(LinkifyFilter,
                                  callbacks=bleach.linkifier.DEFAULT_CALLBACKS)
                      ])
    return cleaner.clean(html)

예제 #3

0

파일 보기

 def html(self, node):
     """The cleaned HTML of the page"""
     html_output = build_html_output(self.content, node=node)
     try:
         cleaner = Cleaner(
             tags=settings.WIKI_WHITELIST['tags'],
             attributes=settings.WIKI_WHITELIST['attributes'],
             styles=settings.WIKI_WHITELIST['styles'],
             filters=[partial(LinkifyFilter, callbacks=[nofollow, ])]
         )
         return cleaner.clean(html_output)
     except TypeError:
         logger.warning('Returning unlinkified content.')
         return render_content(self.content, node=node)

예제 #4

0

파일 보기

def md_to_html(md):
    """Converts md to html and sanitizes it"""
    html = markdown.markdown(md)
    cleaner = Cleaner(
        tags=[
            "h1", "h2", "h3", "h4", "h5", "h6",
            "b", "i", "strong", "em", "tt", "del", "abbr",
            "p", "br",
            "span", "div", "blockquote", "code", "pre", "hr",
            "ul", "dl", "ol", "li", "dd", "dt",
            "img",
            "a",
            "sub", "sup",
        ],
        attributes={
            "img": ["src", "alt", "title"],
            "a": ["href", "alt", "title"],
            "abbr": ["title"],
        },
        filters=[LinkifyFilter],
    )
    return cleaner.clean(html)

예제 #5

0

파일 보기

def sanitize_html(source):
    """Uses bleach to sanitize HTML of any tags and attributes that are
    invalid in Confluence storage format.

    Uses a regex to workaround https://github.com/mozilla/bleach/issues/28 in
    common cases.
    """
    html = Cleaner(tags=ALLOWED_TAGS,
                   attributes=ALLOWED_ATTRS,
                   styles=ALLOWED_STYLES,
                   filters=[RemovalFilter],
                   strip=True,
                   strip_comments=True).clean(source)
    return EMPTY_TAG_REGEX.sub(r'<\1/>', html)

예제 #6

0

파일 보기

def md_nourl(s):
    """Transform Markdown into html. URLs and email addresses are not converted into links automatically.
       Not allowed html tags will be escaped.
    """
    cleaner = Cleaner(tags=markdown_allowed_tags)
    return cleaner.clean(markdown.markdown(s))

예제 #7

0

파일 보기

def md(s):
    """Transform Markdown into html.
       Not allowed html tags will be escaped.
    """
    cleaner = Cleaner(tags=markdown_allowed_tags, filters=[partial(LinkifyFilter, parse_email=True)])
    return cleaner.clean(markdown.markdown(s))

예제 #8

0

파일 보기

파일: markdown.py 프로젝트: wjd52/puzzlord

    "ol",
    "p",
    "pre",
    "q",
    "s",
    "small",
    "span",
    "sub",
    "sup",
    "strike",
    "strong",
    "table",
    "tbody",
    "td",
    "th",
    "thead",
    "tfoot",
    "tr",
    "u",
    "ul",
]

# LinkifyFilter converts raw URLs in text into links
cleaner = Cleaner(tags=SAFE_TAGS, filters=[LinkifyFilter])


@register.filter
def markdown(text):
    return mark_safe(
        cleaner.clean(convert_markdown(text, extensions=["extra"])))

예제 #9

0

파일 보기

# -*- coding: utf-8 -*-

__author__ = 'ipetrash'

# SOURCE: https://github.com/mozilla/bleach
# SOURCE: https://bleach.readthedocs.io/en/latest/linkify.html#using-bleach-linkifier-linkifyfilter

from functools import partial

# pip install bleach
from bleach import Cleaner
from bleach.linkifier import LinkifyFilter

html = '<pre>http://example.com</pre>'

cleaner = Cleaner(tags=['pre'])
print(cleaner.clean(html))
# <pre>http://example.com</pre>

cleaner = Cleaner(tags=['pre'], filters=[LinkifyFilter])
print(cleaner.clean(html))
# <pre><a href="http://example.com" rel="nofollow">http://example.com</a></pre>

print('\n' + '-' * 100 + '\n')

# skip_tags (list) – list of tags that you don’t want to linkify
# the contents of; for example, you could set this to ['pre']
# to skip linkifying contents of pre tags
cleaner = Cleaner(tags=['pre'],
                  filters=[partial(LinkifyFilter, skip_tags=['pre'])])
print(cleaner.clean(html))

예제 #10

0

파일 보기

파일: cli.py 프로젝트: OneGov/onegov.agency

    def _import(request, app):
        EXPORT_FIELDS = {
            'academic_title': 'person.academic_title',
            'address': 'person.address',
            'direct_number': 'person.phone_direct',
            'firstname': 'person.first_name',
            'lastname': 'person.last_name',
            'occupation': 'person.profession',
            'phone': 'person.phone',
            'political_party': 'person.political_party',
            'postfix': 'membership.addition',
            'role': 'membership.title',
            'start': 'membership.since',
            'title': 'person.title',
            'year': 'person.born',
        }

        class LinkFilter(Filter):
            """ Uses the href rather than the content of an a-tag. """
            def __iter__(self):
                in_link = False
                for token in Filter.__iter__(self):
                    if token.get('name') == 'a':
                        if token['type'] == 'StartTag':
                            in_link = True
                            data = token['data'][(None, 'href')]
                            data = data.replace('mailto:', '')
                            yield {'type': 'Characters', 'data': data}
                        elif token['type'] == 'EndTag':
                            in_link = False
                    elif token['type'] == 'Characters':
                        if not in_link:
                            yield token
                    else:
                        yield token

        cleaner = Cleaner(tags=['a', 'p', 'br'],
                          attributes={'a': 'href'},
                          strip=True,
                          filters=[LinkFilter, whitespace_filter])

        session = app.session()

        if clear:
            click.secho("Deleting all agencies", fg='yellow')
            for root in AgencyCollection(session).roots:
                session.delete(root)
            click.secho("Deleting all people", fg='yellow')
            for person in PersonCollection(session).query():
                session.delete(person)

        workbook = open_workbook(file)

        click.secho("Importing agencies", fg='green')
        agencies = ExtendedAgencyCollection(session)
        people = ExtendedPersonCollection(session)
        sheet = workbook.sheet_by_name('Organisationen')
        ids = {}
        parents = {}
        alphabetical = []
        for row in range(1, sheet.nrows):
            if skip_root and row == 1:
                continue

            if row and (row % 50 == 0):
                app.es_indexer.process()

            # We use our own, internal IDs which are auto-incremented
            external_id = int(sheet.cell_value(row, 0))

            # Remove the HTML code from the portrait, prepend the description
            portrait = '\n'.join(
                (sheet.cell_value(row, 3).strip(),
                 html_to_text(cleaner.clean(sheet.cell_value(row, 4)))))
            portrait = portrait.replace('\n\n', '\n').strip()

            # Re-map the export fields
            export_fields = sheet.cell_value(row, 7) or 'role,title'
            export_fields = export_fields.split(',')
            export_fields = [EXPORT_FIELDS[field] for field in export_fields]

            agency = agencies.add(
                parent=parents.get(external_id),
                title=sheet.cell_value(row, 2).strip(),
                portrait=portrait,
                export_fields=export_fields,
                is_hidden_from_public=sheet.cell_value(row, 8) == 'private',
                order=external_id,
            )
            ids[external_id] = agency.id

            # Download and add the organigram
            if not skip_download:
                organigram_url = sheet.cell_value(row, 6)
                if organigram_url:
                    response = get(organigram_url)
                    response.raise_for_status()
                    agency.organigram_file = BytesIO(response.content)

            if sheet.cell_value(row, 5):
                alphabetical.append(agency.id)

            for child in sheet.cell_value(row, 1).split(','):
                if child:
                    child = int(child)
                    parents[child] = agency

        # Let's make sure, the order have nice, cohere values
        def defrag_ordering(agency):
            for order, child in enumerate(agency.children):
                child.order = order
                defrag_ordering(child)

        for order, root in enumerate(agencies.roots):
            root.order = order
            defrag_ordering(root)

        click.secho("Importing people and memberships", fg='green')
        sheet = workbook.sheet_by_name('Personen')
        for row in range(1, sheet.nrows):
            if row and (row % 50 == 0):
                app.es_indexer.process()

            notes = '\n'.join((sheet.cell_value(row, 13).strip(),
                               sheet.cell_value(row, 14).strip())).strip()

            person = people.add(
                academic_title=sheet.cell_value(row, 0).strip(),
                profession=sheet.cell_value(row, 1).strip(),
                function=(sheet.cell_value(row, 17).strip()
                          if sheet.ncols > 17 else ''),
                first_name=sheet.cell_value(row, 2).strip(),
                last_name=sheet.cell_value(row, 3).strip(),
                political_party=sheet.cell_value(row, 4).strip(),
                born=sheet.cell_value(row, 5).strip(),
                email=sheet.cell_value(row, 6).strip(),
                address=sheet.cell_value(row, 7).strip(),
                phone=sheet.cell_value(row, 8).strip(),
                phone_direct=sheet.cell_value(row, 9).strip(),
                salutation=sheet.cell_value(row, 10).strip(),
                website=sheet.cell_value(row, 12).strip(),
                is_hidden_from_public=sheet.cell_value(row, 15) == 'private',
                notes=notes,
            )
            memberships = sheet.cell_value(row, 16).split('//')
            for membership in memberships:
                if membership:
                    matched = re.match(
                        r'^\((\d*)\)\((.*)\)\((.*)\)\((.*)\)'
                        r'\((.*)\)\((.*)\)\((\d*)\)\((\d*)\)$', membership)
                    if matched:
                        values = matched.groups()
                    else:
                        # old version before order_within_person existed
                        matched = re.match(
                            r'^\((\d*)\)\((.*)\)\((.*)\)\((.*)\)'
                            r'\((.*)\)\((.*)\)\((\d*)\)$', membership)
                        values = list(matched.groups())
                        values.append('0')
                    person.memberships.append(
                        ExtendedAgencyMembership(
                            agency_id=ids[int(values[0])],
                            title=values[1] or "",
                            since=values[2] or None,
                            prefix=values[3],
                            addition=values[4],
                            note=values[5],
                            order_within_agency=int(values[6]),
                            order_within_person=int(values[7]),
                        ))

        # Order the memberships alphabetically, if desired
        for id_ in alphabetical:
            agencies.by_id(id_).sort_relationships()

        # Show a tree view of what we imported
        if visualize:
            click.secho("Imported data:", fg='green')

            def show(agency, level):
                text = f'{agency.title}\n'
                for membership in agency.memberships:
                    person = membership.person
                    text += f'* {membership.title}: {person.title}\n'
                click.echo(indent(text.strip(), level * '  '))

                for child in agency.children:
                    show(child, level + 1)

            for root in agencies.roots:
                show(root, 1)

        # Abort the transaction if requested
        if dry_run:
            transaction.abort()
            click.secho("Aborting transaction", fg='yellow')

예제 #11

0

파일 보기

파일: gestalt.py 프로젝트: ILikePizza555/gestalt

#! /usr/bin/env python3

from bleach import Cleaner
from bleach.linkifier import LinkifyFilter
from sanic import Sanic, response
import socketio
import os

STATIC_ASSETS_PATH = "./static"
LISTEN_HOST = "127.0.0.1"
LISTEN_PORT = 9001

inputCleaner = Cleaner(filters=[LinkifyFilter])

app = Sanic(name="gestalt")
app.static("/static", STATIC_ASSETS_PATH)


@app.route('/')
async def app_view(request):
    return await response.file("chat.html")


sio = socketio.AsyncServer(async_mode='sanic')
sio.attach(app)


@sio.event()
async def message(sid: str, data: dict):
    sanitizedText = inputCleaner.clean(data["text"])
    await sio.emit("message", {

예제 #12

0

파일 보기

 def parse_html_content(self, html, **kwargs):
     if html is None:
         return None
     cleaner = Cleaner(**kwargs, strip=True)
     return cleaner.clean(html)