def test_guess_minimum_encoding(self): """textutils - guess_minimum_encoding.""" self.assertEqual(guess_minimum_encoding('patata'), ('patata', 'ascii')) self.assertEqual(guess_minimum_encoding('àèéìòù'), ('\xe0\xe8\xe9\xec\xf2\xf9', 'latin1')) self.assertEqual(guess_minimum_encoding('Ιθάκη'), ('Ιθάκη', 'utf8'))
def forge_email(fromaddr, toaddr, subject, content, html_content='', html_images=None, usebcc=False, header=None, footer=None, html_header=None, html_footer=None, ln=CFG_SITE_LANG, charset=None, replytoaddr="", attachments=None, bccaddr=""): """Prepare email. Add header and footer if needed. @param fromaddr: [string] sender @param toaddr: [string or list-of-strings] list of receivers (if string, then receivers are separated by ',') @param usebcc: [bool] True for using Bcc in place of To @param subject: [string] subject of the email @param content: [string] content of the email @param html_content: [string] html version of the email @param html_images: [dict] dictionary of image id, image path @param header: [string] None for the default header @param footer: [string] None for the default footer @param ln: language @charset: [string] the content charset. By default is None which means to try to encode the email as ascii, then latin1 then utf-8. @param replytoaddr: [string or list-of-strings] to be used for the reply-to header of the email (if string, then receivers are separated by ',') @param attachments: list of paths of files to be attached. Alternatively, every element of the list could be a tuple: (filename, mimetype) @param bccaddr: [string or list-of-strings] to be used for BCC header of the email (if string, then receivers are separated by ',') @return: forged email as a string""" if html_images is None: html_images = {} if header is None: content = email_header(ln) + content else: content = header + content if footer is None: content += email_footer(ln) else: content += footer if charset is None: (content, content_charset) = guess_minimum_encoding(content) else: content_charset = charset subject = get_mail_header(subject) fromaddr = get_mail_header(fromaddr) toaddr = get_mail_header(toaddr) replytoaddr = get_mail_header(replytoaddr) bccaddr = get_mail_header(bccaddr) toaddr = remove_temporary_emails(toaddr) if html_content: if html_header is None: html_content = email_html_header(ln) + html_content else: html_content = html_header + html_content if html_footer is None: html_content += email_html_footer(ln) else: html_content += html_footer if charset is None: (html_content, html_content_charset) = guess_minimum_encoding(html_content) else: html_content_charset = charset msg_root = MIMEMultipart('alternative') msg_root.preamble = 'This is a multi-part message in MIME format.' msg_text = MIMEText(content, _charset=content_charset) msg_root.attach(msg_text) msg_text = MIMEText(html_content, 'html', _charset=html_content_charset) if not html_images: # No image? Attach the HTML to the root msg_root.attach(msg_text) else: # Image(s)? Attach the HTML and image(s) as children of a # "related" block msg_related = MIMEMultipart('related') msg_related.attach(msg_text) for image_id, image_path in html_images.iteritems(): msg_image = MIMEImage(open(image_path, 'rb').read()) msg_image.add_header('Content-ID', '<%s>' % image_id) msg_image.add_header('Content-Disposition', 'attachment', filename=os.path.split(image_path)[1]) msg_related.attach(msg_image) msg_root.attach(msg_related) else: msg_root = MIMEText(content, _charset=content_charset) if attachments: from invenio.bibdocfile import _mimes, guess_format_from_url old_msg_root = msg_root msg_root = MIMEMultipart() msg_root.attach(old_msg_root) for attachment in attachments: try: if type(attachment) in (list, tuple): attachment, mime = attachment if mime is None: ## Automatic guessing of mimetype mime = _mimes.guess_type(attachment)[0] if mime is None: ext = guess_format_from_url(attachment) mime = _mimes.guess_type("foo" + ext)[0] if not mime: mime = 'application/octet-stream' part = MIMEBase(*mime.split('/', 1)) part.set_payload(open(attachment, 'rb').read()) Encoders.encode_base64(part) part.add_header('Content-Disposition', 'attachment; filename="%s"' % os.path.basename(attachment)) msg_root.attach(part) except: register_exception(alert_admin=True, prefix="Can't attach %s" % attachment) msg_root['From'] = fromaddr if replytoaddr: msg_root['Reply-To'] = replytoaddr if usebcc: msg_root['Bcc'] = toaddr msg_root['To'] = 'Undisclosed.Recipients:' if bccaddr: msg_root['Bcc'] += ",%s" % (bccaddr,) else: msg_root['To'] = toaddr if bccaddr: msg_root['Bcc'] = bccaddr msg_root['Date'] = formatdate(localtime=True) msg_root['Subject'] = subject msg_root['User-Agent'] = 'Invenio %s at %s' % (CFG_VERSION, CFG_SITE_URL) return msg_root.as_string()
def test_alalc(self): msg = "眾鳥高飛盡" encoded_text, encoding = guess_minimum_encoding(msg) unicode_text = unicode(encoded_text.decode(encoding)) self.assertEqual("Zhong Niao Gao Fei Jin ", transliterate_ala_lc(unicode_text))
def forge_email(fromaddr, toaddr, subject, content, html_content='', html_images=None, usebcc=False, header=None, footer=None, html_header=None, html_footer=None, ln=CFG_SITE_LANG, charset=None, replytoaddr=""): """Prepare email. Add header and footer if needed. @param fromaddr: [string] sender @param toaddr: [string or list-of-strings] list of receivers (if string, then receivers are separated by ',') @param usebcc: [bool] True for using Bcc in place of To @param subject: [string] subject of the email @param content: [string] content of the email @param html_content: [string] html version of the email @param html_images: [dict] dictionary of image id, image path @param header: [string] None for the default header @param footer: [string] None for the default footer @param ln: language @charset: [string] the content charset. By default is None which means to try to encode the email as ascii, then latin1 then utf-8. @param replytoaddr: [string or list-of-strings] to be used for the reply-to header of the email (if string, then receivers are separated by ',') @return: forged email as a string""" if html_images is None: html_images = {} if header is None: content = email_header(ln) + content else: content = header + content if footer is None: content += email_footer(ln) else: content += footer if charset is None: (content, content_charset) = guess_minimum_encoding(content) else: content_charset = charset try: subject = subject.encode('ascii') except (UnicodeEncodeError, UnicodeDecodeError): subject = Header(subject, 'utf-8') try: fromaddr = fromaddr.encode('ascii') except (UnicodeEncodeError, UnicodeDecodeError): fromaddr = Header(fromaddr, 'utf-8') if type(toaddr) is not str: toaddr = ','.join(toaddr) try: toaddr = toaddr.encode('ascii') except (UnicodeEncodeError, UnicodeDecodeError): toaddr = Header(toaddr, 'utf-8') if type(replytoaddr) is not str: replytoaddr = ','.join(replytoaddr) try: replytoaddr = replytoaddr.encode('ascii') except (UnicodeEncodeError, UnicodeDecodeError): replytoaddr = Header(replytoaddr, 'utf-8') if html_content: if html_header is None: html_content = email_html_header(ln) + html_content else: html_content = html_header + html_content if html_footer is None: html_content += email_html_footer(ln) else: html_content += html_footer if charset is None: (html_content, html_content_charset) = guess_minimum_encoding(html_content) else: html_content_charset = charset msg_root = MIMEMultipart('alternative') msg_root['Subject'] = subject msg_root['From'] = fromaddr if replytoaddr: msg_root['Reply-To'] = replytoaddr if usebcc: msg_root['Bcc'] = toaddr msg_root['To'] = 'Undisclosed.Recipients:' else: msg_root['To'] = toaddr msg_root.preamble = 'This is a multi-part message in MIME format.' msg_text = MIMEText(content, _charset=content_charset) msg_root.attach(msg_text) msg_text = MIMEText(html_content, 'html', _charset=html_content_charset) if not html_images: # No image? Attach the HTML to the root msg_root.attach(msg_text) else: # Image(s)? Attach the HTML and image(s) as children of a # "related" block msg_related = MIMEMultipart('related') msg_related.attach(msg_text) for image_id, image_path in html_images.iteritems(): msg_image = MIMEImage(open(image_path, 'rb').read()) msg_image.add_header('Content-ID', '<%s>' % image_id) msg_image.add_header('Content-Disposition', 'attachment', filename=os.path.split(image_path)[1]) msg_related.attach(msg_image) msg_root.attach(msg_related) else: msg_root = MIMEText(content, _charset=content_charset) msg_root['From'] = fromaddr if replytoaddr: msg_root['Reply-To'] = replytoaddr if usebcc: msg_root['Bcc'] = toaddr msg_root['To'] = 'Undisclosed.Recipients:' else: msg_root['To'] = toaddr msg_root['Subject'] = subject msg_root['User-Agent'] = 'Invenio %s at %s' % (CFG_VERSION, CFG_SITE_URL) return msg_root.as_string()
def test_guess_minimum_encoding(self): """textutils - guess_minimum_encoding.""" self.assertEqual(guess_minimum_encoding('patata'), ('patata', 'ascii')) self.assertEqual(guess_minimum_encoding('àèéìòù'), ('\xe0\xe8\xe9\xec\xf2\xf9', 'latin1')) self.assertEqual(guess_minimum_encoding('Ιθάκη'), ('Ιθάκη', 'utf8'))
def forge_email( fromaddr, toaddr, subject, content, html_content="", html_images=None, usebcc=False, header=None, footer=None, html_header=None, html_footer=None, ln=CFG_SITE_LANG, charset=None, ): """Prepare email. Add header and footer if needed. @param fromaddr: [string] sender @param toaddr: [string or list-of-strings] list of receivers (if string, then receivers are separated by ',') @param usebcc: [bool] True for using Bcc in place of To @param subject: [string] subject of the email @param content: [string] content of the email @param html_content: [string] html version of the email @param html_images: [dict] dictionary of image id, image path @param header: [string] None for the default header @param footer: [string] None for the default footer @param ln: language @charset: [string] the content charset. By default is None which means to try to encode the email as ascii, then latin1 then utf-8. @return: forged email as a string""" if html_images is None: html_images = {} if header is None: content = email_header(ln) + content else: content = header + content if footer is None: content += email_footer(ln) else: content += footer if charset is None: (content, content_charset) = guess_minimum_encoding(content) else: content_charset = charset try: subject = subject.encode("ascii") except (UnicodeEncodeError, UnicodeDecodeError): subject = Header(subject, "utf-8") try: fromaddr = fromaddr.encode("ascii") except (UnicodeEncodeError, UnicodeDecodeError): fromaddr = Header(fromaddr, "utf-8") if type(toaddr) is not str: toaddr = ",".join(toaddr) try: toaddr = toaddr.encode("ascii") except (UnicodeEncodeError, UnicodeDecodeError): toaddr = Header(toaddr, "utf-8") if html_content: if html_header is None: html_content = email_html_header(ln) + html_content else: html_content = html_header + html_content if html_footer is None: html_content += email_html_footer(ln) else: html_content += html_footer if charset is None: (html_content, html_content_charset) = guess_minimum_encoding(html_content) else: html_content_charset = charset msg_root = MIMEMultipart("alternative") msg_root["Subject"] = subject msg_root["From"] = fromaddr if usebcc: msg_root["Bcc"] = toaddr msg_root["To"] = "Undisclosed.Recipients:" else: msg_root["To"] = toaddr msg_root.preamble = "This is a multi-part message in MIME format." msg_text = MIMEText(content, _charset=content_charset) msg_root.attach(msg_text) msg_text = MIMEText(html_content, "html", _charset=html_content_charset) if not html_images: # No image? Attach the HTML to the root msg_root.attach(msg_text) else: # Image(s)? Attach the HTML and image(s) as children of a # "related" block msg_related = MIMEMultipart("related") msg_related.attach(msg_text) for image_id, image_path in html_images.iteritems(): msg_image = MIMEImage(open(image_path, "rb").read()) msg_image.add_header("Content-ID", "<%s>" % image_id) msg_image.add_header("Content-Disposition", "attachment", filename=os.path.split(image_path)[1]) msg_related.attach(msg_image) msg_root.attach(msg_related) else: msg_root = MIMEText(content, _charset=content_charset) msg_root["From"] = fromaddr if usebcc: msg_root["Bcc"] = toaddr msg_root["To"] = "Undisclosed.Recipients:" else: msg_root["To"] = toaddr msg_root["Subject"] = subject msg_root["User-Agent"] = "Invenio %s at %s" % (CFG_VERSION, CFG_SITE_URL) return msg_root.as_string()
def forge_email(fromaddr, toaddr, subject, content, html_content='', html_images=None, usebcc=False, header=None, footer=None, html_header=None, html_footer=None, ln=CFG_SITE_LANG, charset=None): """Prepare email. Add header and footer if needed. @param fromaddr: [string] sender @param toaddr: [string or list-of-strings] list of receivers (if string, then receivers are separated by ',') @param usebcc: [bool] True for using Bcc in place of To @param subject: [string] subject of the email @param content: [string] content of the email @param html_content: [string] html version of the email @param html_images: [dict] dictionary of image id, image path @param header: [string] None for the default header @param footer: [string] None for the default footer @param ln: language @charset: [string] the content charset. By default is None which means to try to encode the email as ascii, then latin1 then utf-8. @return: forged email as a string""" if html_images is None: html_images = {} if header is None: content = email_header(ln) + content else: content = header + content if footer is None: content += email_footer(ln) else: content += footer if charset is None: (content, content_charset) = guess_minimum_encoding(content) else: content_charset = charset try: subject = subject.encode('ascii') except (UnicodeEncodeError, UnicodeDecodeError): subject = Header(subject, 'utf-8') try: fromaddr = fromaddr.encode('ascii') except (UnicodeEncodeError, UnicodeDecodeError): fromaddr = Header(fromaddr, 'utf-8') if type(toaddr) is not str: toaddr = ','.join(toaddr) try: toaddr = toaddr.encode('ascii') except (UnicodeEncodeError, UnicodeDecodeError): toaddr = Header(toaddr, 'utf-8') if html_content: if html_header is None: html_content = email_html_header(ln) + html_content else: html_content = html_header + html_content if html_footer is None: html_content += email_html_footer(ln) else: html_content += html_footer if charset is None: (html_content, html_content_charset) = guess_minimum_encoding(html_content) else: html_content_charset = charset msg_root = MIMEMultipart('alternative') msg_root['Subject'] = subject msg_root['From'] = fromaddr if usebcc: msg_root['Bcc'] = toaddr msg_root['To'] = 'Undisclosed.Recipients:' else: msg_root['To'] = toaddr msg_root.preamble = 'This is a multi-part message in MIME format.' msg_text = MIMEText(content, _charset=content_charset) msg_root.attach(msg_text) msg_text = MIMEText(html_content, 'html', _charset=html_content_charset) if not html_images: # No image? Attach the HTML to the root msg_root.attach(msg_text) else: # Image(s)? Attach the HTML and image(s) as children of a # "related" block msg_related = MIMEMultipart('related') msg_related.attach(msg_text) for image_id, image_path in html_images.iteritems(): msg_image = MIMEImage(open(image_path, 'rb').read()) msg_image.add_header('Content-ID', '<%s>' % image_id) msg_image.add_header('Content-Disposition', 'attachment', filename=os.path.split(image_path)[1]) msg_related.attach(msg_image) msg_root.attach(msg_related) else: msg_root = MIMEText(content, _charset=content_charset) msg_root['From'] = fromaddr if usebcc: msg_root['Bcc'] = toaddr msg_root['To'] = 'Undisclosed.Recipients:' else: msg_root['To'] = toaddr msg_root['Subject'] = subject msg_root['User-Agent'] = 'Invenio %s at %s' % (CFG_VERSION, CFG_SITE_URL) return msg_root.as_string()
def test_alalc(self): msg = "眾鳥高飛盡" encoded_text, encoding = guess_minimum_encoding(msg) unicode_text = unicode(encoded_text.decode(encoding)) self.assertEqual("Zhong Niao Gao Fei Jin ", transliterate_ala_lc(unicode_text))
def forge_email(fromaddr, toaddr, subject, content, html_content='', html_images=None, usebcc=False, header=None, footer=None, html_header=None, html_footer=None, ln=CFG_SITE_LANG, charset=None, replytoaddr="", attachments=None, bccaddr=""): """Prepare email. Add header and footer if needed. @param fromaddr: [string] sender @param toaddr: [string or list-of-strings] list of receivers (if string, then receivers are separated by ',') @param usebcc: [bool] True for using Bcc in place of To @param subject: [string] subject of the email @param content: [string] content of the email @param html_content: [string] html version of the email @param html_images: [dict] dictionary of image id, image path @param header: [string] None for the default header @param footer: [string] None for the default footer @param ln: language @charset: [string] the content charset. By default is None which means to try to encode the email as ascii, then latin1 then utf-8. @param replytoaddr: [string or list-of-strings] to be used for the reply-to header of the email (if string, then receivers are separated by ',') @param attachments: list of paths of files to be attached. Alternatively, every element of the list could be a tuple: (filename, mimetype) @param bccaddr: [string or list-of-strings] to be used for BCC header of the email (if string, then receivers are separated by ',') @return: forged email as a string""" if html_images is None: html_images = {} if header is None: content = email_header(ln) + content else: content = header + content if footer is None: content += email_footer(ln) else: content += footer if charset is None: (content, content_charset) = guess_minimum_encoding(content) else: content_charset = charset subject = get_mail_header(subject) fromaddr = get_mail_header(fromaddr) toaddr = get_mail_header(toaddr) replytoaddr = get_mail_header(replytoaddr) bccaddr = get_mail_header(bccaddr) toaddr = remove_temporary_emails(toaddr) if html_content: if html_header is None: html_content = email_html_header(ln) + html_content else: html_content = html_header + html_content if html_footer is None: html_content += email_html_footer(ln) else: html_content += html_footer if charset is None: (html_content, html_content_charset) = guess_minimum_encoding(html_content) else: html_content_charset = charset msg_root = MIMEMultipart('alternative') msg_root.preamble = 'This is a multi-part message in MIME format.' msg_text = MIMEText(content, _charset=content_charset) msg_root.attach(msg_text) msg_text = MIMEText(html_content, 'html', _charset=html_content_charset) if not html_images: # No image? Attach the HTML to the root msg_root.attach(msg_text) else: # Image(s)? Attach the HTML and image(s) as children of a # "related" block msg_related = MIMEMultipart('related') msg_related.attach(msg_text) for image_id, image_path in html_images.iteritems(): msg_image = MIMEImage(open(image_path, 'rb').read()) msg_image.add_header('Content-ID', '<%s>' % image_id) msg_image.add_header('Content-Disposition', 'attachment', filename=os.path.split(image_path)[1]) msg_related.attach(msg_image) msg_root.attach(msg_related) else: msg_root = MIMEText(content, _charset=content_charset) if attachments: from invenio.bibdocfile import _mimes, guess_format_from_url old_msg_root = msg_root msg_root = MIMEMultipart() msg_root.attach(old_msg_root) for attachment in attachments: try: if type(attachment) in (list, tuple): attachment, mime = attachment if mime is None: ## Automatic guessing of mimetype mime = _mimes.guess_type(attachment)[0] if mime is None: ext = guess_format_from_url(attachment) mime = _mimes.guess_type("foo" + ext)[0] if not mime: mime = 'application/octet-stream' part = MIMEBase(*mime.split('/', 1)) part.set_payload(open(attachment, 'rb').read()) Encoders.encode_base64(part) part.add_header('Content-Disposition', 'attachment; filename="%s"' % os.path.basename(attachment)) msg_root.attach(part) except: register_exception(alert_admin=True, prefix="Can't attach %s" % attachment) msg_root['From'] = fromaddr if replytoaddr: msg_root['Reply-To'] = replytoaddr if usebcc: msg_root['Bcc'] = toaddr msg_root['To'] = 'Undisclosed.Recipients:' if bccaddr: msg_root['Bcc'] += ",%s" % (bccaddr,) else: msg_root['To'] = toaddr if bccaddr: msg_root['Bcc'] = bccaddr msg_root['Date'] = formatdate(localtime=True) msg_root['Subject'] = subject msg_root['User-Agent'] = 'Invenio %s at %s' % (CFG_VERSION, CFG_SITE_URL) return msg_root.as_string()
def test_guess_minimum_encoding(self): """textutils - guess_minimum_encoding.""" self.assertEqual(guess_minimum_encoding("patata"), ("patata", "ascii")) self.assertEqual(guess_minimum_encoding("àèéìòù"), ("\xe0\xe8\xe9\xec\xf2\xf9", "latin1")) self.assertEqual(guess_minimum_encoding("Ιθάκη"), ("Ιθάκη", "utf8"))