def do_register(): form = RegistrationForm() # make sure of the methods # run this if we're submitting stuff on the page if request.method == "POST": if form.validate_on_submit(): # Sanitize some fields scrub = Cleaner() username = scrub.clean(form.username.data) dispname = scrub.clean(form.dispname.data) # Add the new user data new_user = User(username=username, dispname=dispname, email=form.email.data, password=hash.generate_password_hash( form.password.data), dob=form.dob.data, gender=int(form.gender.data), country=int(form.country.data)) db.session.add(new_user) db.session.flush() # Add default favorites collection user_faves = CollectionMeta(title='Favorites', user_id=new_user.id, use_as_favorites=True) db.session.add(user_faves) # Catch some errors try: db.session.commit() except IntegrityError as e: db.session.rollback() # FIXME: register: don't have debug stuff printing out flash(f'Registration failed: {e.__cause__}', 'error') return render_template("register.haml", form=form) # LOG: User registration app.logger.info( f"User '{new_user.username}' successfully registered as ID '{new_user.id}'" ) # Registration success flash(f'Account created for {form.username.data}!', 'success') return redirect(url_for('login.do_login')) # If anything happens, print out all the errors on the page for field, errors in form.errors.items(): for err in errors: flash(err, 'error') return render_template("register.haml", form=form) # run this when we're only loading the regpage else: return render_template("register.haml", form=form)
def render_markdown(source): html = md.convert(source) cleaner = Cleaner(tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES, protocols=ALLOWED_PROTOCOLS, filters=[ partial(LinkifyFilter, callbacks=bleach.linkifier.DEFAULT_CALLBACKS) ]) return cleaner.clean(html)
def html(self, node): """The cleaned HTML of the page""" html_output = build_html_output(self.content, node=node) try: cleaner = Cleaner( tags=settings.WIKI_WHITELIST['tags'], attributes=settings.WIKI_WHITELIST['attributes'], styles=settings.WIKI_WHITELIST['styles'], filters=[partial(LinkifyFilter, callbacks=[nofollow, ])] ) return cleaner.clean(html_output) except TypeError: logger.warning('Returning unlinkified content.') return render_content(self.content, node=node)
def md_to_html(md): """Converts md to html and sanitizes it""" html = markdown.markdown(md) cleaner = Cleaner( tags=[ "h1", "h2", "h3", "h4", "h5", "h6", "b", "i", "strong", "em", "tt", "del", "abbr", "p", "br", "span", "div", "blockquote", "code", "pre", "hr", "ul", "dl", "ol", "li", "dd", "dt", "img", "a", "sub", "sup", ], attributes={ "img": ["src", "alt", "title"], "a": ["href", "alt", "title"], "abbr": ["title"], }, filters=[LinkifyFilter], ) return cleaner.clean(html)
def sanitize_html(source): """Uses bleach to sanitize HTML of any tags and attributes that are invalid in Confluence storage format. Uses a regex to workaround https://github.com/mozilla/bleach/issues/28 in common cases. """ html = Cleaner(tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRS, styles=ALLOWED_STYLES, filters=[RemovalFilter], strip=True, strip_comments=True).clean(source) return EMPTY_TAG_REGEX.sub(r'<\1/>', html)
def md_nourl(s): """Transform Markdown into html. URLs and email addresses are not converted into links automatically. Not allowed html tags will be escaped. """ cleaner = Cleaner(tags=markdown_allowed_tags) return cleaner.clean(markdown.markdown(s))
def md(s): """Transform Markdown into html. Not allowed html tags will be escaped. """ cleaner = Cleaner(tags=markdown_allowed_tags, filters=[partial(LinkifyFilter, parse_email=True)]) return cleaner.clean(markdown.markdown(s))
"ol", "p", "pre", "q", "s", "small", "span", "sub", "sup", "strike", "strong", "table", "tbody", "td", "th", "thead", "tfoot", "tr", "u", "ul", ] # LinkifyFilter converts raw URLs in text into links cleaner = Cleaner(tags=SAFE_TAGS, filters=[LinkifyFilter]) @register.filter def markdown(text): return mark_safe( cleaner.clean(convert_markdown(text, extensions=["extra"])))
# -*- coding: utf-8 -*- __author__ = 'ipetrash' # SOURCE: https://github.com/mozilla/bleach # SOURCE: https://bleach.readthedocs.io/en/latest/linkify.html#using-bleach-linkifier-linkifyfilter from functools import partial # pip install bleach from bleach import Cleaner from bleach.linkifier import LinkifyFilter html = '<pre>http://example.com</pre>' cleaner = Cleaner(tags=['pre']) print(cleaner.clean(html)) # <pre>http://example.com</pre> cleaner = Cleaner(tags=['pre'], filters=[LinkifyFilter]) print(cleaner.clean(html)) # <pre><a href="http://example.com" rel="nofollow">http://example.com</a></pre> print('\n' + '-' * 100 + '\n') # skip_tags (list) – list of tags that you don’t want to linkify # the contents of; for example, you could set this to ['pre'] # to skip linkifying contents of pre tags cleaner = Cleaner(tags=['pre'], filters=[partial(LinkifyFilter, skip_tags=['pre'])]) print(cleaner.clean(html))
def _import(request, app): EXPORT_FIELDS = { 'academic_title': 'person.academic_title', 'address': 'person.address', 'direct_number': 'person.phone_direct', 'firstname': 'person.first_name', 'lastname': 'person.last_name', 'occupation': 'person.profession', 'phone': 'person.phone', 'political_party': 'person.political_party', 'postfix': 'membership.addition', 'role': 'membership.title', 'start': 'membership.since', 'title': 'person.title', 'year': 'person.born', } class LinkFilter(Filter): """ Uses the href rather than the content of an a-tag. """ def __iter__(self): in_link = False for token in Filter.__iter__(self): if token.get('name') == 'a': if token['type'] == 'StartTag': in_link = True data = token['data'][(None, 'href')] data = data.replace('mailto:', '') yield {'type': 'Characters', 'data': data} elif token['type'] == 'EndTag': in_link = False elif token['type'] == 'Characters': if not in_link: yield token else: yield token cleaner = Cleaner(tags=['a', 'p', 'br'], attributes={'a': 'href'}, strip=True, filters=[LinkFilter, whitespace_filter]) session = app.session() if clear: click.secho("Deleting all agencies", fg='yellow') for root in AgencyCollection(session).roots: session.delete(root) click.secho("Deleting all people", fg='yellow') for person in PersonCollection(session).query(): session.delete(person) workbook = open_workbook(file) click.secho("Importing agencies", fg='green') agencies = ExtendedAgencyCollection(session) people = ExtendedPersonCollection(session) sheet = workbook.sheet_by_name('Organisationen') ids = {} parents = {} alphabetical = [] for row in range(1, sheet.nrows): if skip_root and row == 1: continue if row and (row % 50 == 0): app.es_indexer.process() # We use our own, internal IDs which are auto-incremented external_id = int(sheet.cell_value(row, 0)) # Remove the HTML code from the portrait, prepend the description portrait = '\n'.join( (sheet.cell_value(row, 3).strip(), html_to_text(cleaner.clean(sheet.cell_value(row, 4))))) portrait = portrait.replace('\n\n', '\n').strip() # Re-map the export fields export_fields = sheet.cell_value(row, 7) or 'role,title' export_fields = export_fields.split(',') export_fields = [EXPORT_FIELDS[field] for field in export_fields] agency = agencies.add( parent=parents.get(external_id), title=sheet.cell_value(row, 2).strip(), portrait=portrait, export_fields=export_fields, is_hidden_from_public=sheet.cell_value(row, 8) == 'private', order=external_id, ) ids[external_id] = agency.id # Download and add the organigram if not skip_download: organigram_url = sheet.cell_value(row, 6) if organigram_url: response = get(organigram_url) response.raise_for_status() agency.organigram_file = BytesIO(response.content) if sheet.cell_value(row, 5): alphabetical.append(agency.id) for child in sheet.cell_value(row, 1).split(','): if child: child = int(child) parents[child] = agency # Let's make sure, the order have nice, cohere values def defrag_ordering(agency): for order, child in enumerate(agency.children): child.order = order defrag_ordering(child) for order, root in enumerate(agencies.roots): root.order = order defrag_ordering(root) click.secho("Importing people and memberships", fg='green') sheet = workbook.sheet_by_name('Personen') for row in range(1, sheet.nrows): if row and (row % 50 == 0): app.es_indexer.process() notes = '\n'.join((sheet.cell_value(row, 13).strip(), sheet.cell_value(row, 14).strip())).strip() person = people.add( academic_title=sheet.cell_value(row, 0).strip(), profession=sheet.cell_value(row, 1).strip(), function=(sheet.cell_value(row, 17).strip() if sheet.ncols > 17 else ''), first_name=sheet.cell_value(row, 2).strip(), last_name=sheet.cell_value(row, 3).strip(), political_party=sheet.cell_value(row, 4).strip(), born=sheet.cell_value(row, 5).strip(), email=sheet.cell_value(row, 6).strip(), address=sheet.cell_value(row, 7).strip(), phone=sheet.cell_value(row, 8).strip(), phone_direct=sheet.cell_value(row, 9).strip(), salutation=sheet.cell_value(row, 10).strip(), website=sheet.cell_value(row, 12).strip(), is_hidden_from_public=sheet.cell_value(row, 15) == 'private', notes=notes, ) memberships = sheet.cell_value(row, 16).split('//') for membership in memberships: if membership: matched = re.match( r'^\((\d*)\)\((.*)\)\((.*)\)\((.*)\)' r'\((.*)\)\((.*)\)\((\d*)\)\((\d*)\)$', membership) if matched: values = matched.groups() else: # old version before order_within_person existed matched = re.match( r'^\((\d*)\)\((.*)\)\((.*)\)\((.*)\)' r'\((.*)\)\((.*)\)\((\d*)\)$', membership) values = list(matched.groups()) values.append('0') person.memberships.append( ExtendedAgencyMembership( agency_id=ids[int(values[0])], title=values[1] or "", since=values[2] or None, prefix=values[3], addition=values[4], note=values[5], order_within_agency=int(values[6]), order_within_person=int(values[7]), )) # Order the memberships alphabetically, if desired for id_ in alphabetical: agencies.by_id(id_).sort_relationships() # Show a tree view of what we imported if visualize: click.secho("Imported data:", fg='green') def show(agency, level): text = f'{agency.title}\n' for membership in agency.memberships: person = membership.person text += f'* {membership.title}: {person.title}\n' click.echo(indent(text.strip(), level * ' ')) for child in agency.children: show(child, level + 1) for root in agencies.roots: show(root, 1) # Abort the transaction if requested if dry_run: transaction.abort() click.secho("Aborting transaction", fg='yellow')
#! /usr/bin/env python3 from bleach import Cleaner from bleach.linkifier import LinkifyFilter from sanic import Sanic, response import socketio import os STATIC_ASSETS_PATH = "./static" LISTEN_HOST = "127.0.0.1" LISTEN_PORT = 9001 inputCleaner = Cleaner(filters=[LinkifyFilter]) app = Sanic(name="gestalt") app.static("/static", STATIC_ASSETS_PATH) @app.route('/') async def app_view(request): return await response.file("chat.html") sio = socketio.AsyncServer(async_mode='sanic') sio.attach(app) @sio.event() async def message(sid: str, data: dict): sanitizedText = inputCleaner.clean(data["text"]) await sio.emit("message", {
def parse_html_content(self, html, **kwargs): if html is None: return None cleaner = Cleaner(**kwargs, strip=True) return cleaner.clean(html)