Esempio n. 1
0
 def sanitize_html(answer):
     """
     Take a student response and sanitize the HTML to prevent malicious script injection
     or other unwanted content.
     answer - any string
     return - a cleaned version of the string
     """
     try:
         answer = autolink_html(answer)
         cleaner = WhiteListCleaner(style=True,
                                    links=True,
                                    add_nofollow=False,
                                    page_structure=True,
                                    safe_attrs_only=True,
                                    whitelist_tags=(
                                        'embed',
                                        'iframe',
                                        'a',
                                        'img',
                                        'br',
                                    ))
         clean_html = cleaner.clean_html(answer)
         clean_html = re.sub(r'</p>$', '', re.sub(r'^<p>', '', clean_html))
         clean_html = re.sub("\n", "<br/>", clean_html)
     except Exception:
         clean_html = answer
     return clean_html
Esempio n. 2
0
 def clean_comment(self):
     self.cleaned_data['comment'] = clean_html(self.cleaned_data['comment'])
     self.cleaned_data['comment'] = autolink_html(
         self.cleaned_data['comment'])
     self.cleaned_data['comment'] = autolink_email(
         self.cleaned_data['comment'])
     return self.cleaned_data['comment']
Esempio n. 3
0
 def clean_content(self):
     """ Do our usual HTML cleanup.
     Do we want to mangle the markup field to always be "html"?
     """
     self.cleaned_data['content'] = clean_html(self.cleaned_data['content'])
     self.cleaned_data['content'] = autolink_html(self.cleaned_data['content'])
     return self.cleaned_data['content']
Esempio n. 4
0
 def clean_content(self):
     """ Do our usual HTML cleanup.
     Do we want to mangle the markup field to always be "html"?
     """
     self.cleaned_data['content'] = clean_html(self.cleaned_data['content'])
     self.cleaned_data['content'] = autolink_html(self.cleaned_data['content'])
     return self.cleaned_data['content']
def sanitize(html):
    if not html:
        return html
    cleaner = Cleaner(allow_tags=_safe_tags, safe_attrs_only=True, safe_attrs=_safe_attrs, remove_unknown_tags=False)
    html = autolink_html(cleaner.clean_html(html))

    parts = re.split('(<.*?>)', html)

    output = ''
    in_a_tag = False
    for part in parts:
        if not len(part):
            continue

        is_tag = part[0] == '<'
        if is_tag or in_a_tag:
            output += part
            if part[0:2].lower() == '<a':
                in_a_tag = True
            elif part[0:3].lower() == '</a':
                in_a_tag = False
            continue

        part = re.sub("([a-zA-Z0-9_\\-+\\.\']*[a-zA-Z0-9]@[0-9a-zA-Z\\-\\.]+\\.[a-zA-Z]{2,})", '<a href="mailto:\\1">\\1</a>', part)

        # After linking up emails, only look for twitter in the remaining parts
        sub_parts = re.split('(<.*?>)', part)
        part = ''
        for sub_part in sub_parts:
            part += re.sub("(?<![a-zA-Z0-9])@([0-9a-zA-Z_]{1,15})", '<a href="https://twitter.com/\\1">@\\1</a>', sub_part)

        output += part

    return output
Esempio n. 6
0
def sanitize_html(html_text: str) -> str:
    """Clean dangerous tags from the HTML and convert urls into anchors."""
    sanitized_html = str(autolink_html(clean_html(html_text)))
    # The clean_html function creates HTML elements. That means if the user enters a simple text string it gets
    # enclosed in a <p> tag. Remove it to not confuse users that haven't entered any HTML:
    if sanitized_html.count("<") == 2:
        sanitized_html = re.sub("</?p>", "", sanitized_html)
    return sanitized_html
Esempio n. 7
0
def clean_up_html(html, method='html'):
    html = autolink_html(html, link_regexes=_link_regexes)
    html = lxml.html.fromstring(cleaner.clean_html(html))
    for h1 in html.findall('h1'):
        h1.tag = 'h2'
    for a in html.cssselect('a'):
        a.attrib['target'] = '_blank'
    return lxml.html.tostring(html, encoding='utf-8', method=method)
Esempio n. 8
0
def cleanup_chat_text(html):
    html = autolink_html(html, link_regexes=_link_regexes)

    html = lxml.html.fromstring(html)
    for a in html.cssselect('a'):
        a.attrib['target'] = '_blank'

    return lxml.html.tostring(html).decode('utf-8')
Esempio n. 9
0
def clean_input(comment):
    data = comment
    if 'href' not in data:
        data = autolink_html(data, avoid_elements=['a'])

    cleaner = Cleaner(add_nofollow=True, allow_tags=ALLOWED_TAGS,
                      remove_unknown_tags=False)
    content = cleaner.clean_html(data).replace('\n', '<br/>')
    return content
Esempio n. 10
0
def clean_input(comment):
    data = comment
    if 'href' not in data:
        data = autolink_html(data, avoid_elements=['a'])

    cleaner = Cleaner(add_nofollow=True, allow_tags=ALLOWED_TAGS,
                      remove_unknown_tags=False)
    content = cleaner.clean_html(data).replace('\n', '<br/>')
    return content
Esempio n. 11
0
    def clean_body(self):
        body = self.cleaned_data.get('body', '')

        # validate HTML content
        # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html
        body = clean_html(body)
        body = autolink_html(body)
    
        self.cleaned_data['body'] = body
        return self.cleaned_data['body']
Esempio n. 12
0
    def clean_body(self):
        body = self.cleaned_data.get('body', '')

        # validate HTML content
        # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html
        body = clean_html(body)
        body = autolink_html(body)

        self.cleaned_data['body'] = body
        return self.cleaned_data['body']
Esempio n. 13
0
def html_clean(body):
  # validate HTML content
  # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html
  body = clean_html(body)
  body = autolink_html(body)
    
  # emails too
  body = autolink_email(body)
    
  return body
Esempio n. 14
0
def html_clean(body):
    # validate HTML content
    # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html
    body = clean_html(body)
    body = autolink_html(body)

    # emails too
    body = autolink_email(body)

    return body
Esempio n. 15
0
 def sanitize_html(answer):
     try:
         answer = autolink_html(answer)
         cleaner = Cleaner(style=True, links=True, add_nofollow=False, page_structure=True, safe_attrs_only=True,
                           host_whitelist=open_ended_image_submission.TRUSTED_IMAGE_DOMAINS,
                           whitelist_tags=set(['embed', 'iframe', 'a', 'img']))
         clean_html = cleaner.clean_html(answer)
         clean_html = re.sub(r'</p>$', '', re.sub(r'^<p>', '', clean_html))
     except:
         clean_html = answer
     return clean_html
Esempio n. 16
0
 def sanitize_html(answer):
     try:
         answer = autolink_html(answer)
         cleaner = Cleaner(style=True, links=True, add_nofollow=False, page_structure=True, safe_attrs_only=True,
                           host_whitelist=open_ended_image_submission.TRUSTED_IMAGE_DOMAINS,
                           whitelist_tags=set(['embed', 'iframe', 'a', 'img']))
         clean_html = cleaner.clean_html(answer)
         clean_html = re.sub(r'</p>$', '', re.sub(r'^<p>', '', clean_html))
     except:
         clean_html = answer
     return clean_html
Esempio n. 17
0
def send_mail(subject=None, txtMessage=None, htmlMessage=None,
              fromemail=None, recipients=None, shortname=None,
              priority=None, context={}, use_template=True,
              lang='en'):
    # try to be backwards-compatible
    if htmlMessage and not recipients:
        recipients = fromemail
        fromemail = htmlMessage
        htmlMessage = None

    if not htmlMessage:
        htmlMessage = txtMessage.replace("\n", "<br/>")
        htmlMessage = clean_html(htmlMessage)
        htmlMessage = autolink_html(htmlMessage)
        
    if not txtMessage:
        txtMessage = htmlMessage
        context['do_text_conversion'] = True
        # TODO: do a fancy strip tags thing
            
    subject = force_unicode(subject)
    txtMessage = force_unicode(txtMessage)
    htmlMessage = force_unicode(htmlMessage)

    if use_template:
        if not context.get('do_text_conversion', None):
            context['do_text_conversion'] = False
            
        context['body'] = htmlMessage
        htmlMessage = loader.get_template("email_template.html").render(Context(context))
        
        context['body'] = txtMessage
        txtMessage = loader.get_template("email_template.txt").render(Context(context))

    recips = ",".join(recipients)
            
    if shortname:
        shortname = shortname.lower()
        Email.objects.create(recipients=recips,
                             shortName=shortname,
                             sender=fromemail,
                             subject=subject,
                             textMessage=txtMessage,
                             htmlMessage=htmlMessage,
                             lang=lang)
    else:
        Email.objects.create(recipients=recips,
                             shortName=shortname,
                             sender=fromemail,
                             subject=subject,
                             textMessage=txtMessage,
                             htmlMessage=htmlMessage,
                             lang=lang)
Esempio n. 18
0
  def fmt_part(self, part):
    what = [part['type'], escape_html(part['data'])]
    if what[0] == 'pgpbeginsigned':
      what[1] = ('<input type="submit" name="gpg_recvkey"'
                 ' value="Get PGP key and Verify">' + what[1])
    if what[0] in ('pgpsignature', 'pgpbeginsigned'):
      key_id = re.search('key ID ([0-9A-Fa-f]+)', what[1])
      if key_id:
        what[1] += ('<input type="hidden" name="gpg_key_id" value="0x%s">'
                    ) % key_id.group(1)

    return ('html', autolink_html('<p class="%s">%s</p>' % tuple(what)))
Esempio n. 19
0
  def fmt_part(self, part):
    what = [part['type'], escape_html(part['data'])]
    if what[0] == 'pgpbeginsigned':
      what[1] = ('<input type="submit" name="gpg_recvkey"'
                 ' value="Get PGP key and Verify">' + what[1])
    if what[0] in ('pgpsignature', 'pgpbeginsigned'):
      key_id = re.search('key ID ([0-9A-Fa-f]+)', what[1])
      if key_id:
        what[1] += ('<input type="hidden" name="gpg_key_id" value="0x%s">'
                    ) % key_id.group(1)

    return ('html', autolink_html('<p class="%s">%s</p>' % tuple(what)))
Esempio n. 20
0
 def save(self, force_insert=False, force_update=False):
     # validate HTML content
     # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html
     self.body = clean_html(self.body)
     self.body = autolink_html(self.body)
     
     # set parent group
     group = BaseGroup.objects.get(id=self.object_id)
     self.parent_group = group
     
     super(GroupTopic, self).save(force_insert, force_update)
     post_save.send(sender=Topic, instance=GroupTopic.objects.get(id=self.id))
Esempio n. 21
0
    def save(self, force_insert=False, force_update=False):
        # set parent group
        group = BaseGroup.objects.get(id=self.object_id)
        self.parent_group = group

        # validate HTML content
        # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html
        if self.content and self.content.strip():
            self.content = clean_html(self.content)
            self.content = autolink_html(self.content)

        super(Whiteboard, self).save(force_insert, force_update)
Esempio n. 22
0
 def save(self, force_insert=False, force_update=False):
     # set parent group
     group = BaseGroup.objects.get(id=self.object_id)
     self.parent_group = group
     
     # validate HTML content
     # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html
     if self.content and self.content.strip():
         self.content = clean_html(self.content)
         self.content = autolink_html(self.content)
     
     super(Whiteboard, self).save(force_insert, force_update)
Esempio n. 23
0
def find_links_in_message(text, name, avatar):
    link_list = [i for i in iterlinks(autolink_html(text))]
    extracted_links = list()
    for link in link_list:
        extract_dict = extract_link(link)
        if extract_dict:
            extracted_links.append(extract_dict)

    txt = autolink_html(cleaner.clean_html(text))
    txt = re.sub(ur'(#[а-яА-ЯёЁA-Za-z0-9-]+)', add_user_link, txt)
    txt = re.sub(ur'(\$[а-яА-ЯёЁA-Za-z0-9-]+)', add_room_link, txt)
    user_info = render_template(
        'user_message.html',
        avatar=avatar,
        name=name,
        time=datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S Z"),
        sid=md5(name + datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S Z")).hexdigest(),
        txt=Markup(txt),
        extracted_links=extracted_links
    )

    return user_info
    def post_to_wordpress(self, url, blog, author, hour, format=True):
        """
        SIMPLISTIC WAY TO GET WHAT COULD BE AN EMAIL ONTO A WORDPRESS BLOG
        REQUIRES wp-cli https://github.com/wp-cli/wp-cli
        """
        if format:
            self.prepare_formatting()
        path_to_wordpress = config_get_section_attribute('SITES', 'path_to_docroot', required=True)
        path_to_wpcli = config_get_section_attribute('SITES', 'path_to_wpcli', required=True)
        if not url:
            wordpress_url = config_get_section_attribute('SITES', 'url', required=True)
        else:
            wordpress_url = url

        # Get the user information first
        command = "{} --path={} user get {} --format=json ".format(path_to_wpcli, path_to_wordpress, author)
        to_call = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
        result, err = to_call.communicate()
        import json
        user = json.loads(result.decode())

        # Now clean up the html, add links if not there and remove errant tags, also clean up for passing on
        try:
            from lxml.html.clean import Cleaner
            from lxml.html.clean import autolink_html
        except ImportError:
            click.secho('We need lxml!', fg='red')

        content = self.get_html()
        cleaner = Cleaner(remove_tags=['p', 'div'])  # Moodle's editor has loads of lonely p and div tags
        content = cleaner.clean_html(content)
        content = autolink_html(content)
        replace_apostrophes = "'\\''"
        content = content.replace("'", replace_apostrophes).replace('\r', ' ')   # escape apostrophes for bash

        date_as_string = '{}-{}-{} {}:{}:00'.format(self.date.year, self.date.month, self.date.day, hour.tm_hour, hour.tm_min)

        d = {
            'title': self.get_subject(),   # remove the 'Student Notices for' part
            'author': user['ID'],
            'content': content,
            'date': date_as_string,
            'blog': blog,
            'url': wordpress_url,
            'path_to_wpcli': path_to_wpcli,
            'path_to_docroot': path_to_wordpress
            }

        command = """{path_to_wpcli} post create --path={path_to_docroot} --post_type=post --post_title='{title}' --post_content='{content}' --post_author={author} --post_status=future --post_date='{date}' --url={url}/{blog}""".format(**d)
        subprocess.call(command, shell=True)
Esempio n. 25
0
 def clean_description(self):
     body = self.cleaned_data.get('description', '')
     
     if body:
         # validate HTML content
         # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html
         body = clean_html(body)
         body = autolink_html(body)
     
         # emails too
         body = autolink_email(body)
     
         self.cleaned_data['description'] = body
     return body
Esempio n. 26
0
 def save(self, force_insert=False, force_update=False):
     ''' Automatically generate the slug from the title '''
     self.slug = slugify(self.title)
     
     # and set the parent_group property
     # (to be honest, we could probably do away with generic foreign keys altogether)
     group = BaseGroup.objects.get(id=self.object_id)
     self.parent_group = group
     
     # validate HTML content
     # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html
     self.description = clean_html(self.description)
     self.description = autolink_html(self.description)
     
     super(Event, self).save(force_insert, force_update)
Esempio n. 27
0
    def display_message(self, email, tree, raw=False, sep='', fd=None):
        if raw:
            for line in email.get_file().readlines():
                try:
                    line = line.decode('utf-8')
                except UnicodeDecodeError:
                    try:
                        line = line.decode('iso-8859-1')
                    except:
                        line = '(MAILPILE DECODING FAILED)\n'
                self.say(line, newline='', fd=fd)
        else:
            self.buffered_html.append(('html', '<div class=headers>'))
            for hdr in ('From', 'Subject', 'To', 'Cc'):
                value = email.get(hdr, '')
                if value:
                    html = '<b>%s:</b> %s<br>' % (hdr, escape_html(value))
                    self.buffered_html.append(('html', html))
            self.buffered_html.append(('html', '</div><br>'))

            if tree['text_parts']:
                self.buffered_html.append(
                    ('html', '<div class="message plain">'))
                last = '<bogus>'
                for part in tree['text_parts']:
                    if part['data'] != last:
                        self.buffered_html.append(self.fmt_part(part))
                        last = part['data']
            else:
                self.buffered_html.append(
                    ('html', '<div class="message html">'))
                last = '<bogus>'
                for part in tree['html_parts']:
                    if part['data'] != last:
                        self.buffered_html.append(
                            ('html', autolink_html(part['data'])))
                        last = part['data']
            if tree['attachments']:
                self.buffered_html.append(
                    ('html', '</div><div class="attachments"><ul>'))
                for att in tree['attachments']:
                    desc = (
                        '<a href="./att:%(count)s">Attachment: %(filename)s</a> '
                        '(%(mimetype)s, %(length)s bytes)') % att
                    self.buffered_html.append(('html', '<li>%s</li>' % desc))
                self.buffered_html.append(('html', '</ul>'))
            self.buffered_html.append(('html', '</div>'))
Esempio n. 28
0
def clean_input(comment):
    try:
        data = comment
        if 'href' not in data:
            data = autolink_html(data, avoid_elements=['a'])

        cleaner = Cleaner(add_nofollow=True,
                          allow_tags=ALLOWED_TAGS,
                          remove_unknown_tags=False)
        content = cleaner.clean_html(data).replace('\n', '<br/>')
        return content
    except Exception as e:
        if type(e).__name__ == "ParserError":
            raise logic.ValidationError("Comment text is required")
        else:
            template = "An exception of type {0} occurred. Arguments:\n{1!r}"
            message = template.format(type(e).__name__, e.args)
            log.debug(message)
Esempio n. 29
0
  def display_message(self, email, tree, raw=False, sep='', fd=None):
    if raw:
      for line in email.get_file().readlines():
        try:
          line = line.decode('utf-8')
        except UnicodeDecodeError:
          try:
            line = line.decode('iso-8859-1')
          except:
            line = '(MAILPILE DECODING FAILED)\n'
        self.say(line, newline='', fd=fd)
    else:
      self.buffered_html.append(('html', '<div class=headers>'))
      for hdr in ('From', 'Subject', 'To', 'Cc'):
        value = email.get(hdr, '')
        if value:
          html = '<b>%s:</b> %s<br>' % (hdr, escape_html(value))
          self.buffered_html.append(('html', html))
      self.buffered_html.append(('html', '</div><br>'))

      if tree['text_parts']:
        self.buffered_html.append(('html', '<div class="message plain">'))
        last = '<bogus>'
        for part in tree['text_parts']:
          if part['data'] != last:
            self.buffered_html.append(self.fmt_part(part))
            last = part['data']
      else:
        self.buffered_html.append(('html', '<div class="message html">'))
        last = '<bogus>'
        for part in tree['html_parts']:
          if part['data'] != last:
            self.buffered_html.append(('html', autolink_html(part['data'])))
            last = part['data']
      if tree['attachments']:
        self.buffered_html.append(('html', '</div><div class="attachments"><ul>'))
        for att in tree['attachments']:
          desc = ('<a href="./att:%(count)s">Attachment: %(filename)s</a> '
                  '(%(mimetype)s, %(length)s bytes)') % att
          self.buffered_html.append(('html', '<li>%s</li>' % desc))
        self.buffered_html.append(('html', '</ul>'))
      self.buffered_html.append(('html', '</div>'))
Esempio n. 30
0
def sanitize(html):
    if not html:
        return html
    cleaner = Cleaner(allow_tags=_safe_tags,
                      safe_attrs_only=True,
                      safe_attrs=_safe_attrs,
                      remove_unknown_tags=False)
    html = autolink_html(cleaner.clean_html(html))

    parts = re.split('(<.*?>)', html)

    output = ''
    in_a_tag = False
    for part in parts:
        if not len(part):
            continue

        is_tag = part[0] == '<'
        if is_tag or in_a_tag:
            output += part
            if part[0:2].lower() == '<a':
                in_a_tag = True
            elif part[0:3].lower() == '</a':
                in_a_tag = False
            continue

        part = re.sub(
            "([a-zA-Z0-9_\\-+\\.\']*[a-zA-Z0-9]@[0-9a-zA-Z\\-\\.]+\\.[a-zA-Z]{2,})",
            '<a href="mailto:\\1">\\1</a>', part)

        # After linking up emails, only look for twitter in the remaining parts
        sub_parts = re.split('(<.*?>)', part)
        part = ''
        for sub_part in sub_parts:
            part += re.sub("(?<![a-zA-Z0-9])@([0-9a-zA-Z_]{1,15})",
                           '<a href="https://twitter.com/\\1">@\\1</a>',
                           sub_part)

        output += part

    return output
Esempio n. 31
0
 def sanitize_html(answer):
     """
     Take a student response and sanitize the HTML to prevent malicious script injection
     or other unwanted content.
     answer - any string
     return - a cleaned version of the string
     """
     try:
         answer = autolink_html(answer)
         cleaner = WhiteListCleaner(
             style=True,
             links=True,
             add_nofollow=False,
             page_structure=True,
             safe_attrs_only=True,
             whitelist_tags=('embed', 'iframe', 'a', 'img', 'br',)
         )
         clean_html = cleaner.clean_html(answer)
         clean_html = re.sub(r'</p>$', '', re.sub(r'^<p>', '', clean_html))
         clean_html = re.sub("\n","<br/>", clean_html)
     except Exception:
         clean_html = answer
     return clean_html
Esempio n. 32
0
def send_mail(subject=None, txtMessage=None, htmlMessage=None,
              fromemail=None, recipients=None, shortname=None,
              priority=None, context={}, use_template=True,
              lang='en', cc=None, bcc=None,
              content_object=None, reply_to=None):

    # try to be backwards-compatible
    if htmlMessage and recipients == None:
        recipients = fromemail
        fromemail = htmlMessage
        htmlMessage = None

    if not htmlMessage:
        htmlMessage = txtMessage.replace("\n", "<br/>")
        htmlMessage = clean_html(htmlMessage)
        htmlMessage = autolink_html(htmlMessage)
        
    if not txtMessage:
        txtMessage = htmlMessage
        context['do_text_conversion'] = True
        # TODO: do a fancy strip tags thing
            
    subject = force_unicode(subject)
    txtMessage = force_unicode(txtMessage)
    htmlMessage = force_unicode(htmlMessage)

    if not context.get('do_text_conversion', None):
        context['do_text_conversion'] = False
    if use_template:
        context['body'] = htmlMessage
        htmlMessage = loader.get_template("email_template.html").render(Context(context))
        
        context['body'] = txtMessage
        txtMessage = loader.get_template("email_template.txt").render(Context(context))
    
    else:
        context['body'] = txtMessage
        txtMessage = loader.get_template("email_template_clean.txt").render(Context(context))
    
    recips = ",".join(recipients)
    cc_string = None
    bcc_string = None
    if cc:
        cc_string = ",".join(cc)
    if bcc:
        bcc_string = ",".join(bcc)

    if content_object:
        type_id = ContentType.objects.get_for_model(content_object)
        message_id = '*****@*****.**' % (int(round(time.time())), type_id.id, content_object.id)
    else:
        message_id = '*****@*****.**' % int(round(time.time()))
            
    if shortname:
        shortname = shortname.lower()
        e = Email.objects.create(recipients=recips,
                             shortName=shortname,
                             sender=fromemail,
                             subject=subject,
                             textMessage=txtMessage,
                             htmlMessage=htmlMessage,
                             lang=lang,
                             cc=cc_string,
                             bcc=bcc_string,
                             reply_to=reply_to,
                             message_id=message_id)
    else:
        e = Email.objects.create(recipients=recips,
                             shortName=shortname,
                             sender=fromemail,
                             subject=subject,
                             textMessage=txtMessage,
                             htmlMessage=htmlMessage,
                             lang=lang,
                             cc=cc_string,
                             bcc=bcc_string,
                             reply_to=reply_to,
                             message_id=message_id)

    if content_object:
        e.content_object = content_object
        e.save()
Esempio n. 33
0
def send_mail(subject=None,
              txtMessage=None,
              htmlMessage=None,
              fromemail=None,
              recipients=None,
              shortname=None,
              priority=None,
              context={},
              use_template=True,
              lang='en',
              cc=None,
              bcc=None):

    # try to be backwards-compatible
    if htmlMessage and recipients == None:
        recipients = fromemail
        fromemail = htmlMessage
        htmlMessage = None

    if not htmlMessage:
        htmlMessage = txtMessage.replace("\n", "<br/>")
        htmlMessage = clean_html(htmlMessage)
        htmlMessage = autolink_html(htmlMessage)

    if not txtMessage:
        txtMessage = htmlMessage
        context['do_text_conversion'] = True
        # TODO: do a fancy strip tags thing

    subject = force_unicode(subject)
    txtMessage = force_unicode(txtMessage)
    htmlMessage = force_unicode(htmlMessage)

    if not context.get('do_text_conversion', None):
        context['do_text_conversion'] = False
    if use_template:
        context['body'] = htmlMessage
        htmlMessage = loader.get_template("email_template.html").render(
            Context(context))

        context['body'] = txtMessage
        txtMessage = loader.get_template("email_template.txt").render(
            Context(context))

    else:
        context['body'] = txtMessage
        txtMessage = loader.get_template("email_template_clean.txt").render(
            Context(context))

    recips = ",".join(recipients)
    cc_string = None
    bcc_string = None
    if cc:
        cc_string = ",".join(cc)
    if bcc:
        bcc_string = ",".join(bcc)

    if shortname:
        shortname = shortname.lower()
        Email.objects.create(recipients=recips,
                             shortName=shortname,
                             sender=fromemail,
                             subject=subject,
                             textMessage=txtMessage,
                             htmlMessage=htmlMessage,
                             lang=lang,
                             cc=cc_string,
                             bcc=bcc_string)
    else:
        Email.objects.create(recipients=recips,
                             shortName=shortname,
                             sender=fromemail,
                             subject=subject,
                             textMessage=txtMessage,
                             htmlMessage=htmlMessage,
                             lang=lang,
                             cc=cc_string,
                             bcc=bcc_string)
Esempio n. 34
0
def cleanhtml(html='', cleaner=None):
    html_doc = soupparser.fromstring(html)
    if not cleaner:
        cleaner = sanitizer
    cleaned_html = cleaner.clean_html(html_doc)
    return lxml.html.tostring(autolink_html(cleaned_html))
Esempio n. 35
0
def clean_html(html, host_whitelist=()):
    cleaner = Bleacher(host_whitelist=host_whitelist)
    cleaned_html = cleaner.clean_html('<body>' + html + '</body>')
    linkified_html = clean.autolink_html(cleaned_html)
    return linkified_html
Esempio n. 36
0
def autolink(html):
    if html:
        return autolink_reg.sub(r'<a \1 target="_blank">',autolink_html(html))
    return html
Esempio n. 37
0
def clean_html(html, host_whitelist=()):
    cleaner = Bleacher(host_whitelist=host_whitelist)
    cleaned_html = cleaner.clean_html('<body>' + html + '</body>')
    linkified_html = clean.autolink_html(cleaned_html)
    return linkified_html
Esempio n. 38
0
 def clean_comment(self):
     self.cleaned_data['comment'] = clean_html(self.cleaned_data['comment'])
     self.cleaned_data['comment'] = autolink_html(self.cleaned_data['comment'])
     return self.cleaned_data['comment']
Esempio n. 39
0
def cleanhtml(html='', cleaner=None):
    html_doc = soupparser.fromstring(remove_control_chars(html))
    if not cleaner:
        cleaner = sanitizer
    cleaned_html = cleaner.clean_html(html_doc)
    return lxml.html.tostring(autolink_html(cleaned_html))
Esempio n. 40
0
def cleanhtml(html=''):
    html_doc = soupparser.fromstring(html)
    cleaned_html = sanitizer.clean_html(html_doc)
    return lxml.html.tostring(autolink_html(cleaned_html))
Esempio n. 41
0
 def render(content):
     return autolink_html(content)
Esempio n. 42
0
def parse_body(msg):
    body = None
    
    if msg.is_multipart():
        html = None
        txt = None
        
        for part in msg.get_payload():
            if part.is_multipart():
                for part2 in part.get_payload():
                    if part2.get_content_type() == 'text/html':
                        html = part2.get_payload(decode=True)
                    elif part2.get_content_type() == 'text/plain':
                        txt = part2.get_payload(decode=True)

            if part.get_content_type() == 'text/html':
                html = part.get_payload(decode=True)
            elif part.get_content_type() == 'text/plain':
                txt = part.get_payload(decode=True)
                
        if html:
            body = html
        elif txt:
            body = txt.replace("\n", "<br/>\n")
    
    else:
        body = msg.get_payload(decode=True)
        body = body.replace("\n", "<br/>\n")

    try:
        decoder = codecs.getdecoder(msg.get_content_charset())
        body = decoder(body)[0]
    except:
        pass
    
    # strip out reply text
    # http://stackoverflow.com/questions/278788/parse-email-content-from-quoted-reply may be a better way
    quoting_gmail = r'<div(?:.*)gmail_quote(?:.*)>'     # gmail puts their quotes in <div class="gmail_quote">
    body = re.split(quoting_gmail, body)[0]
    
    quoting_thunderbird = r'<blockquote(?:.*)cite(?:.*)>'   # thunderbird uses <blockquote type="cite">
    body = re.split(quoting_thunderbird, body)[0]
    
    quoting_outlook = r'<(?:.*)style(?:.*)border-top: #B5C4DF(?:.*)>'   # outlook is just a pain
    body = re.split(quoting_outlook, body)[0]
    
    quoting_text = r'<br/>\n*(.*)<br/>\n*(>(.*)<br/>\n*)+[(?:<br/>)\n]*$'        # takes any block of end-of-message >-prefix lines, plus the one line preceeding it
    body = re.sub(quoting_text, '', body)
    
    if not body:
        raise BounceError("I wasn't able to understand the email you sent; it was in a format that is not supported.")
    
    # validate HTML content
    # Additional options at http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html
    body = clean_html(body)
    body = autolink_html(body)
    body = autolink_email(body)
    
    # TODO: strip out in-reference-to text in replies?
    
    return body