def clean_comment(self): self.cleaned_data['comment'] = clean_html(self.cleaned_data['comment']) self.cleaned_data['comment'] = autolink_html( self.cleaned_data['comment']) self.cleaned_data['comment'] = autolink_email( self.cleaned_data['comment']) return self.cleaned_data['comment']
def html_clean(body): # validate HTML content # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html body = clean_html(body) body = autolink_html(body) # emails too body = autolink_email(body) return body
def clean_body(self): body = self.cleaned_data.get('body', '') # validate HTML content # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html body = clean_html(body) body = autolink_html(body) # emails too body = autolink_email(body) return body
def clean_description(self): body = self.cleaned_data.get('description', '') if body: # validate HTML content # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html body = clean_html(body) body = autolink_html(body) # emails too body = autolink_email(body) self.cleaned_data['description'] = body return body
def clean_comment(self): self.cleaned_data['comment'] = clean_html(self.cleaned_data['comment']) self.cleaned_data['comment'] = autolink_html(self.cleaned_data['comment']) self.cleaned_data['comment'] = autolink_email(self.cleaned_data['comment']) return self.cleaned_data['comment']
def parse_body(msg): body = None if msg.is_multipart(): html = None txt = None for part in msg.get_payload(): if part.is_multipart(): for part2 in part.get_payload(): if part2.get_content_type() == 'text/html': html = part2.get_payload(decode=True) elif part2.get_content_type() == 'text/plain': txt = part2.get_payload(decode=True) if part.get_content_type() == 'text/html': html = part.get_payload(decode=True) elif part.get_content_type() == 'text/plain': txt = part.get_payload(decode=True) if html: body = html elif txt: body = txt.replace("\n", "<br/>\n") else: body = msg.get_payload(decode=True) body = body.replace("\n", "<br/>\n") try: decoder = codecs.getdecoder(msg.get_content_charset()) body = decoder(body)[0] except: pass # strip out reply text # http://stackoverflow.com/questions/278788/parse-email-content-from-quoted-reply may be a better way quoting_gmail = r'<div(?:.*)gmail_quote(?:.*)>' # gmail puts their quotes in <div class="gmail_quote"> body = re.split(quoting_gmail, body)[0] quoting_thunderbird = r'<blockquote(?:.*)cite(?:.*)>' # thunderbird uses <blockquote type="cite"> body = re.split(quoting_thunderbird, body)[0] quoting_outlook = r'<(?:.*)style(?:.*)border-top: #B5C4DF(?:.*)>' # outlook is just a pain body = re.split(quoting_outlook, body)[0] quoting_text = r'<br/>\n*(.*)<br/>\n*(>(.*)<br/>\n*)+[(?:<br/>)\n]*$' # takes any block of end-of-message >-prefix lines, plus the one line preceeding it body = re.sub(quoting_text, '', body) if not body: raise BounceError("I wasn't able to understand the email you sent; it was in a format that is not supported.") # validate HTML content # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html body = clean_html(body) body = autolink_html(body) body = autolink_email(body) # TODO: strip out in-reference-to text in replies? return body