Ejemplo n.º 1
0
 def clean_comment(self):
     self.cleaned_data['comment'] = clean_html(self.cleaned_data['comment'])
     self.cleaned_data['comment'] = autolink_html(
         self.cleaned_data['comment'])
     self.cleaned_data['comment'] = autolink_email(
         self.cleaned_data['comment'])
     return self.cleaned_data['comment']
Ejemplo n.º 2
0
def html_clean(body):
    # validate HTML content
    # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html
    body = clean_html(body)
    body = autolink_html(body)

    # emails too
    body = autolink_email(body)

    return body
Ejemplo n.º 3
0
def html_clean(body):
  # validate HTML content
  # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html
  body = clean_html(body)
  body = autolink_html(body)
    
  # emails too
  body = autolink_email(body)
    
  return body
Ejemplo n.º 4
0
 def clean_body(self):
     body = self.cleaned_data.get('body', '')
     
     # validate HTML content
     # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html
     body = clean_html(body)
     body = autolink_html(body)
     
     # emails too
     body = autolink_email(body)
     
     return body
Ejemplo n.º 5
0
    def clean_body(self):
        body = self.cleaned_data.get('body', '')

        # validate HTML content
        # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html
        body = clean_html(body)
        body = autolink_html(body)

        # emails too
        body = autolink_email(body)

        return body
Ejemplo n.º 6
0
 def clean_description(self):
     body = self.cleaned_data.get('description', '')
     
     if body:
         # validate HTML content
         # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html
         body = clean_html(body)
         body = autolink_html(body)
     
         # emails too
         body = autolink_email(body)
     
         self.cleaned_data['description'] = body
     return body
Ejemplo n.º 7
0
 def clean_comment(self):
     self.cleaned_data['comment'] = clean_html(self.cleaned_data['comment'])
     self.cleaned_data['comment'] = autolink_html(self.cleaned_data['comment'])
     self.cleaned_data['comment'] = autolink_email(self.cleaned_data['comment'])
     return self.cleaned_data['comment']
Ejemplo n.º 8
0
def parse_body(msg):
    body = None
    
    if msg.is_multipart():
        html = None
        txt = None
        
        for part in msg.get_payload():
            if part.is_multipart():
                for part2 in part.get_payload():
                    if part2.get_content_type() == 'text/html':
                        html = part2.get_payload(decode=True)
                    elif part2.get_content_type() == 'text/plain':
                        txt = part2.get_payload(decode=True)

            if part.get_content_type() == 'text/html':
                html = part.get_payload(decode=True)
            elif part.get_content_type() == 'text/plain':
                txt = part.get_payload(decode=True)
                
        if html:
            body = html
        elif txt:
            body = txt.replace("\n", "<br/>\n")
    
    else:
        body = msg.get_payload(decode=True)
        body = body.replace("\n", "<br/>\n")

    try:
        decoder = codecs.getdecoder(msg.get_content_charset())
        body = decoder(body)[0]
    except:
        pass
    
    # strip out reply text
    # http://stackoverflow.com/questions/278788/parse-email-content-from-quoted-reply may be a better way
    quoting_gmail = r'<div(?:.*)gmail_quote(?:.*)>'     # gmail puts their quotes in <div class="gmail_quote">
    body = re.split(quoting_gmail, body)[0]
    
    quoting_thunderbird = r'<blockquote(?:.*)cite(?:.*)>'   # thunderbird uses <blockquote type="cite">
    body = re.split(quoting_thunderbird, body)[0]
    
    quoting_outlook = r'<(?:.*)style(?:.*)border-top: #B5C4DF(?:.*)>'   # outlook is just a pain
    body = re.split(quoting_outlook, body)[0]
    
    quoting_text = r'<br/>\n*(.*)<br/>\n*(>(.*)<br/>\n*)+[(?:<br/>)\n]*$'        # takes any block of end-of-message >-prefix lines, plus the one line preceeding it
    body = re.sub(quoting_text, '', body)
    
    if not body:
        raise BounceError("I wasn't able to understand the email you sent; it was in a format that is not supported.")
    
    # validate HTML content
    # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html
    body = clean_html(body)
    body = autolink_html(body)
    body = autolink_email(body)
    
    # TODO: strip out in-reference-to text in replies?
    
    return body