def test_CRLF(): """CR is not converted to ' ' """ symbol = ' ' extracted = quotations.extract_from_html('<html>\r\n</html>') assert_false(symbol in extracted) eq_('<html></html>', RE_WHITESPACE.sub('', extracted)) msg_body = """My reply <blockquote> <div> On 11-Apr-2011, at 6:54 PM, Bob <[email protected]> wrote: </div> <div> Test </div> </blockquote>""" msg_body = msg_body.replace('\n', '\r\n') extracted = quotations.extract_from_html(msg_body) assert_false(symbol in extracted) # Keep new lines otherwise "My reply" becomes one word - "Myreply" eq_("<html><head></head><body>My\nreply\n</body></html>", extracted)
def test_reply_quotations_share_block(): msg = mime.from_string(REPLY_QUOTATIONS_SHARE_BLOCK) html_part = list(msg.walk())[1] assert html_part.content_type == 'text/html' stripped_html = quotations.extract_from_html(html_part.body) ok_(stripped_html) ok_('From' not in stripped_html)
def test_no_blockquote(): msg_body = """ <html> <body> Reply <div> On 11-Apr-2011, at 6:54 PM, Bob <[email protected]> wrote: </div> <div> Test </div> </body> </html> """ reply = """ <html> <head></head> <body> Reply </body></html>""" eq_(RE_WHITESPACE.sub('', reply), RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_blockquote_disclaimer(): msg_body = """ <html> <body> <div> <div> message </div> <blockquote> Quote </blockquote> </div> <div> disclaimer </div> </body> </html> """ stripped_html = """ <html> <body> <div> <div> message </div> </div> <div> disclaimer </div> </body> </html> """ eq_(RE_WHITESPACE.sub('', stripped_html), RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_too_large_html(): msg_body = 'Reply' \ '<div class="gmail_quote">' \ '<div class="gmail_quote">On 11-Apr-2011, at 6:54 PM, Bob <[email protected]> wrote:' \ '<div>Test</div>' \ '</div>' \ '</div>' eq_(RE_WHITESPACE.sub('', msg_body), RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_no_gmail_quote_false_positive(): msg_body = """ <html><body> <div class="gmail_quote"> broken_email_client_sent_this </div> </body></html>""" eq_("<html><body><div>broken_email_client_sent_this</div></body></html>", RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def extract_reply_and_check(filename): f = open(filename) msg_body = f.read() reply = quotations.extract_from_html(msg_body) plain_reply = u.html_to_text(reply) eq_(RE_WHITESPACE.sub('', "Hi. I am fine.\n\nThanks,\nAlex"), RE_WHITESPACE.sub('', plain_reply))
def test_gmail_quote_compact(): msg_body = 'Reply' \ '<div class="gmail_quote">' \ '<div class="gmail_quote">On 11-Apr-2011, at 6:54 PM, Bob <[email protected]> wrote:' \ '<div>Test</div>' \ '</div>' \ '</div>' eq_("<html><head></head><body>Reply</body></html>", RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_gmail_quote_blockquote(): msg_body = """Message <blockquote class="gmail_quote"> <div class="gmail_default"> My name is William Shakespeare. <br/> </div> </blockquote>""" eq_(RE_WHITESPACE.sub('', msg_body), RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_CRLF(): """CR is not converted to ' ' """ eq_('<html>\r\n</html>', quotations.extract_from_html('<html>\r\n</html>')) msg_body = """Reply <blockquote> <div> On 11-Apr-2011, at 6:54 PM, Bob <[email protected]> wrote: </div> <div> Test </div> </blockquote>""" msg_body = msg_body.replace('\n', '\r\n') eq_("<html><body><p>Reply\r\n</p></body></html>", quotations.extract_from_html(msg_body))
def test_gmail_quote(): msg_body = """Reply <div class="gmail_quote"> <div class="gmail_quote"> On 11-Apr-2011, at 6:54 PM, Bob <[email protected]> wrote: <div> Test </div> </div> </div>""" eq_("<html><body><p>Reply</p></body></html>", RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def extract_body(message: message.Message) -> Text: # If the message contains a plaintext version of the body, use # that. plaintext_content = get_message_part_by_type(message, "text/plain") if plaintext_content: return quotations.extract_from_plain(plaintext_content) # If we only have an HTML version, try to make that look nice. html_content = get_message_part_by_type(message, "text/html") if html_content: return convert_html_to_markdown(quotations.extract_from_html(html_content)) raise ZulipEmailForwardError("Unable to find plaintext or HTML message body")
def test_unicode_in_reply(): msg_body = u"""Reply \xa0 \xa0 Text<br> <div> <br> </div> <blockquote> Quote </blockquote>""".encode("utf-8") eq_("<html><head></head><body>Reply  Text<br><div><br></div>" "</body></html>", RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_from_block(): msg_body = """<div> message<br> <div> <hr> From: <a href="mailto:[email protected]">[email protected]</a><br> Date: Fri, 23 Mar 2012 12:35:31 -0600<br> To: <a href="mailto:[email protected]">[email protected]</a><br> Subject: You Have New Mail From Mary!<br><br> text </div></div> """ eq_('<html><head></head><body><div>message<br></div></body></html>', RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_quotation_splitter_outside_blockquote(): msg_body = """Reply <div> On 11-Apr-2011, at 6:54 PM, Bob <[email protected]> wrote: </div> <blockquote> <div> Test </div> </blockquote> """ eq_("<html><head></head><body>Reply</body></html>", RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def extract_reply_and_check(filename): import sys kwargs = {} if sys.version_info > (3, 0): kwargs["encoding"] = "utf8" f = open(filename, **kwargs) msg_body = f.read() reply = quotations.extract_from_html(msg_body) plain_reply = u.html_to_text(reply) plain_reply = plain_reply.decode('utf8') eq_(RE_WHITESPACE.sub('', "Hi. I am fine.\n\nThanks,\nAlex"), RE_WHITESPACE.sub('', plain_reply))
def test_reply_shares_div_with_from_block(): msg_body = ''' <body> <div> Blah<br><br> <hr>Date: Tue, 22 May 2012 18:29:16 -0600<br> To: [email protected]<br> From: [email protected]<br> Subject: You Have New Mail From x!<br><br> </div> </body>''' eq_('<html><head></head><body><div>Blah<br><br></div></body></html>', RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_regular_blockquote(): msg_body = """Reply <blockquote>Regular</blockquote> <div> On 11-Apr-2011, at 6:54 PM, Bob <[email protected]> wrote: </div> <blockquote> <div> <blockquote>Nested</blockquote> </div> </blockquote> """ eq_("<html><head></head><body>Reply<blockquote>Regular</blockquote></body></html>", RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_quotation_splitter_inside_blockquote(): msg_body = """Reply <blockquote> <div> On 11-Apr-2011, at 6:54 PM, Bob <[email protected]> wrote: </div> <div> Test </div> </blockquote>""" eq_("<html><body><p>Reply\n</p></body></html>", quotations.extract_from_html(msg_body))
def test_readable_html_empty(): msg_body = """ <blockquote> Reply <div> On 11-Apr-2011, at 6:54 PM, Bob <[email protected]> wrote: </div> <div> Test </div> </blockquote>""" eq_(RE_WHITESPACE.sub('', msg_body), RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def extract_reply_and_check(filename): f = open(filename) msg_body = f.read() reply = quotations.extract_from_html(msg_body) h = html2text.HTML2Text() h.body_width = 0 plain_reply = h.handle(reply) #remove spaces plain_reply = plain_reply.replace(u'\xa0', u' ') if RE_REPLY.match(plain_reply): eq_(1, 1) else: eq_("Hi. I am fine.\n\nThanks,\nAlex", plain_reply)
def test_from_block_and_quotations_in_separate_divs(): msg_body = ''' Reply <div> <hr/> <div> <font> <b>From: [email protected]</b> <b>Date: Thu, 24 Mar 2016 08:07:12 -0700</b> </font> </div> <div> Quoted message </div> </div> ''' eq_('<html><head></head><body>Reply<div><hr></div></body></html>', RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_validate_output_html(): msg_body = """Reply <div> On 11-Apr-2011, at 6:54 PM, Bob <[email protected]> wrote: <blockquote> <div> Test </div> </blockquote> </div> <div/> """ out = quotations.extract_from_html(msg_body) ok_('<html>' in out and '</html>' in out, 'Invalid HTML - <html>/</html> tag not present') ok_('<div/>' not in out, 'Invalid HTML output - <div/> element is not valid')
def test_blockquote_disclaimer(): msg_body = """ <html> <body> <div> <div> message </div> <blockquote> Quote </blockquote> </div> <div> disclaimer </div> </body> </html> """ stripped_html = """ <html> <head></head> <body> <div> <div> message </div> </div> <div> disclaimer </div> </body> </html> """ eq_(RE_WHITESPACE.sub('', stripped_html), RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_empty_body(): eq_('', quotations.extract_from_html(''))
def test_empty_body(): eq_("", quotations.extract_from_html(""))
def test_OLK_SRC_BODY_SECTION_stripped(): eq_( "<html><head></head><body><div>Reply</div></body></html>", RE_WHITESPACE.sub("", quotations.extract_from_html(OLK_SRC_BODY_SECTION)), )
def test_bad_html(): bad_html = "<html></html>" eq_(bad_html, quotations.extract_from_html(bad_html))
import sys import talon import base64 from talon import quotations talon.init() type = sys.argv[1] #html = base64.b64decode(sys.argv[2]) #html = sys.stdin.readline().rstrip() html = "" for line in sys.stdin: html += line.rstrip() html = base64.b64decode(html) if type.lower() == 'html': reply = quotations.extract_from_html(html) else: reply = quotations.extract_from(html, 'type/plain') # reply == "<html><body><p>Reply</p></body></html>" print ("%s" % reply)
def _encode(obj, namespace_public_id=None, expand=False, legacy_nsid=False): """ Returns a dictionary representation of an Inbox model object obj, or None if there is no such representation defined. If the optional namespace_public_id parameter is passed, it will used instead of fetching the namespace public id for each object. This improves performance when serializing large numbers of objects, but also means that you must take care to ONLY serialize objects that belong to the given namespace! Parameters ---------- namespace_public_id: string, optional public id of the namespace to which the object to serialize belongs. Returns ------- dictionary or None """ def _get_namespace_public_id(obj): return namespace_public_id or obj.namespace.public_id def _format_participant_data(participant): """Event.participants is a JSON blob which may contain internal data. This function returns a dict with only the data we want to make public.""" dct = {} for attribute in ["name", "status", "email", "comment"]: dct[attribute] = participant.get(attribute) return dct def _get_lowercase_class_name(obj): return type(obj).__name__.lower() if legacy_nsid: public_id_key_name = "namespace_id" else: public_id_key_name = "account_id" # Flask's jsonify() doesn't handle datetimes or json arrays as primary # objects. if isinstance(obj, datetime.datetime): return calendar.timegm(obj.utctimetuple()) if isinstance(obj, datetime.date): return obj.isoformat() if isinstance(obj, arrow.arrow.Arrow): return encode(obj.datetime, legacy_nsid=legacy_nsid) # TODO deprecate this and remove -- legacy_nsid elif isinstance(obj, Namespace) and legacy_nsid: return { "id": obj.public_id, "object": "namespace", "namespace_id": obj.public_id, # Account specific "account_id": obj.account.public_id, "email_address": obj.account.email_address, "name": obj.account.name, "provider": obj.account.provider, "organization_unit": obj.account.category_type, } elif isinstance(obj, Namespace): # these are now "Account" objects return { "id": obj.public_id, "object": "account", "account_id": obj.public_id, "email_address": obj.account.email_address, "name": obj.account.name, "provider": obj.account.provider, "organization_unit": obj.account.category_type, } elif isinstance(obj, Account) and not legacy_nsid: raise Exception("Should never be serializing accounts (legacy_nsid)") elif isinstance(obj, Account): return { "account_id": obj.namespace.public_id, # ugh "id": obj.namespace.public_id, # ugh "object": "account", "email_address": obj.email_address, "name": obj.name, "organization_unit": obj.category_type, "provider": obj.provider, # TODO add capabilities/scope (i.e. mail, contacts, cal, etc.) # 'status': 'syncing', # TODO what are values here # 'last_sync': 1398790077, # tuesday 4/29 } elif isinstance(obj, Message): resp = { "id": obj.public_id, "object": "message", public_id_key_name: _get_namespace_public_id(obj), "subject": obj.subject, "from": format_address_list(obj.from_addr), "reply_to": format_address_list(obj.reply_to), "to": format_address_list(obj.to_addr), "cc": format_address_list(obj.cc_addr), "bcc": format_address_list(obj.bcc_addr), "date": obj.received_date, "thread_id": obj.thread.public_id, "snippet": obj.snippet, "body": obj.body, "text": quotations.extract_from_html(quotations.extract_from(obj.body, "text/html")), "unread": not obj.is_read, "starred": obj.is_starred, "files": obj.api_attachment_metadata, "events": [encode(e, legacy_nsid=legacy_nsid) for e in obj.events], } categories = format_categories(obj.categories) if obj.namespace.account.category_type == "folder": resp["folder"] = categories[0] if categories else None else: resp["labels"] = categories # If the message is a draft (Inbox-created or otherwise): if obj.is_draft: resp["object"] = "draft" resp["version"] = obj.version if obj.reply_to_message is not None: resp["reply_to_message_id"] = obj.reply_to_message.public_id else: resp["reply_to_message_id"] = None if expand: resp["headers"] = { "Message-Id": obj.message_id_header, "In-Reply-To": obj.in_reply_to, "References": obj.references, } return resp elif isinstance(obj, Thread): base = { "id": obj.public_id, "object": "thread", public_id_key_name: _get_namespace_public_id(obj), "subject": obj.subject, "participants": format_address_list(obj.participants), "last_message_timestamp": obj.recentdate, "last_message_received_timestamp": obj.receivedrecentdate, "first_message_timestamp": obj.subjectdate, "snippet": obj.snippet, "unread": obj.unread, "starred": obj.starred, "has_attachments": obj.has_attachments, "version": obj.version, # For backwards-compatibility -- remove after deprecating tags API "tags": obj.tags, } categories = format_categories(obj.categories) if obj.namespace.account.category_type == "folder": base["folders"] = categories else: base["labels"] = categories if not expand: base["message_ids"] = [m.public_id for m in obj.messages if not m.is_draft] base["draft_ids"] = [m.public_id for m in obj.drafts] return base # Expand messages within threads all_expanded_messages = [] all_expanded_drafts = [] for msg in obj.messages: resp = { "id": msg.public_id, "object": "message", public_id_key_name: _get_namespace_public_id(msg), "subject": msg.subject, "from": format_address_list(msg.from_addr), "reply_to": format_address_list(msg.reply_to), "to": format_address_list(msg.to_addr), "cc": format_address_list(msg.cc_addr), "bcc": format_address_list(msg.bcc_addr), "date": msg.received_date, "thread_id": obj.public_id, "snippet": msg.snippet, "unread": not msg.is_read, "starred": msg.is_starred, "files": msg.api_attachment_metadata, } categories = format_categories(msg.categories) if obj.namespace.account.category_type == "folder": resp["folder"] = categories[0] if categories else None else: resp["labels"] = categories if msg.is_draft: resp["object"] = "draft" resp["version"] = msg.version if msg.reply_to_message is not None: resp["reply_to_message_id"] = msg.reply_to_message.public_id else: resp["reply_to_message_id"] = None all_expanded_drafts.append(resp) else: all_expanded_messages.append(resp) base["messages"] = all_expanded_messages base["drafts"] = all_expanded_drafts return base elif isinstance(obj, Contact): return { "id": obj.public_id, "object": "contact", public_id_key_name: _get_namespace_public_id(obj), "name": obj.name, "email": obj.email_address, } elif isinstance(obj, Event): resp = { "id": obj.public_id, "object": "event", public_id_key_name: _get_namespace_public_id(obj), "calendar_id": obj.calendar.public_id if obj.calendar else None, "message_id": obj.message.public_id if obj.message else None, "title": obj.title, "description": obj.description, "owner": obj.owner, "participants": [_format_participant_data(participant) for participant in obj.participants], "read_only": obj.read_only, "location": obj.location, "when": encode(obj.when, legacy_nsid=legacy_nsid), "busy": obj.busy, "status": obj.status, } if isinstance(obj, RecurringEvent): resp["recurrence"] = {"rrule": obj.recurring, "timezone": obj.start_timezone} if isinstance(obj, RecurringEventOverride): resp["original_start_time"] = encode(obj.original_start_time, legacy_nsid=legacy_nsid) if obj.master: resp["master_event_id"] = obj.master.public_id return resp elif isinstance(obj, Calendar): return { "id": obj.public_id, "object": "calendar", public_id_key_name: _get_namespace_public_id(obj), "name": obj.name, "description": obj.description, "read_only": obj.read_only, } elif isinstance(obj, When): # Get time dictionary e.g. 'start_time': x, 'end_time': y or 'date': z times = obj.get_time_dict() resp = {k: encode(v, legacy_nsid=legacy_nsid) for k, v in times.iteritems()} resp["object"] = _get_lowercase_class_name(obj) return resp elif isinstance(obj, Block): # ie: Attachments/Files resp = { "id": obj.public_id, "object": "file", public_id_key_name: _get_namespace_public_id(obj), "content_type": obj.content_type, "size": obj.size, "filename": obj.filename, } if len(obj.parts): # if obj is actually a message attachment (and not merely an # uploaded file), set additional properties resp.update({"message_ids": [p.message.public_id for p in obj.parts]}) return resp elif isinstance(obj, Category): # 'object' is set to 'folder' or 'label' resp = { "id": obj.public_id, "object": obj.type, public_id_key_name: _get_namespace_public_id(obj), "name": obj.name, "display_name": obj.api_display_name, } return resp
def test_gmail_forwarded_msg(): msg_body = """<div dir="ltr"><br><div class="gmail_quote">---------- Forwarded message ----------<br>From: <b class="gmail_sendername">Bob</b> <span dir="ltr"><<a href="mailto:[email protected]">[email protected]</a>></span><br>Date: Fri, Feb 11, 2010 at 5:59 PM<br>Subject: Bob WFH today<br>To: Mary <<a href="mailto:[email protected]">[email protected]</a>><br><br><br><div dir="ltr">eom</div> </div><br></div>""" extracted = quotations.extract_from_html(msg_body) eq_(RE_WHITESPACE.sub('', msg_body), RE_WHITESPACE.sub('', extracted))
def trim_html_with_talon(html): return quotations.extract_from_html(html)
def getConversationBody(conversation): user_question = strip_tags(conversation.json()['conversation_message']['body']).lower() user_question = quotations.extract_from_html(user_question) logging.warn(user_question) return user_question
def _encode(obj, namespace_public_id=None, expand=False, legacy_nsid=False): """ Returns a dictionary representation of an Inbox model object obj, or None if there is no such representation defined. If the optional namespace_public_id parameter is passed, it will used instead of fetching the namespace public id for each object. This improves performance when serializing large numbers of objects, but also means that you must take care to ONLY serialize objects that belong to the given namespace! Parameters ---------- namespace_public_id: string, optional public id of the namespace to which the object to serialize belongs. Returns ------- dictionary or None """ def _get_namespace_public_id(obj): return namespace_public_id or obj.namespace.public_id def _format_participant_data(participant): """Event.participants is a JSON blob which may contain internal data. This function returns a dict with only the data we want to make public.""" dct = {} for attribute in ['name', 'status', 'email', 'comment']: dct[attribute] = participant.get(attribute) return dct def _get_lowercase_class_name(obj): return type(obj).__name__.lower() if legacy_nsid: public_id_key_name = 'namespace_id' else: public_id_key_name = 'account_id' # Flask's jsonify() doesn't handle datetimes or json arrays as primary # objects. if isinstance(obj, datetime.datetime): return calendar.timegm(obj.utctimetuple()) if isinstance(obj, datetime.date): return obj.isoformat() if isinstance(obj, arrow.arrow.Arrow): return encode(obj.datetime, legacy_nsid=legacy_nsid) # TODO deprecate this and remove -- legacy_nsid elif isinstance(obj, Namespace) and legacy_nsid: return { 'id': obj.public_id, 'object': 'namespace', 'namespace_id': obj.public_id, # Account specific 'account_id': obj.account.public_id, 'email_address': obj.account.email_address, 'name': obj.account.name, 'provider': obj.account.provider, 'organization_unit': obj.account.category_type } elif isinstance(obj, Namespace): # these are now "Account" objects return { 'id': obj.public_id, 'object': 'account', 'account_id': obj.public_id, 'email_address': obj.account.email_address, 'name': obj.account.name, 'provider': obj.account.provider, 'organization_unit': obj.account.category_type } elif isinstance(obj, Account) and not legacy_nsid: raise Exception("Should never be serializing accounts (legacy_nsid)") elif isinstance(obj, Account): return { 'account_id': obj.namespace.public_id, # ugh 'id': obj.namespace.public_id, # ugh 'object': 'account', 'email_address': obj.email_address, 'name': obj.name, 'organization_unit': obj.category_type, 'provider': obj.provider, # TODO add capabilities/scope (i.e. mail, contacts, cal, etc.) # 'status': 'syncing', # TODO what are values here # 'last_sync': 1398790077, # tuesday 4/29 } elif isinstance(obj, Message): resp = { 'id': obj.public_id, 'object': 'message', public_id_key_name: _get_namespace_public_id(obj), 'subject': obj.subject, 'from': format_address_list(obj.from_addr), 'reply_to': format_address_list(obj.reply_to), 'to': format_address_list(obj.to_addr), 'cc': format_address_list(obj.cc_addr), 'bcc': format_address_list(obj.bcc_addr), 'date': obj.received_date, 'thread_id': obj.thread.public_id, 'snippet': obj.snippet, 'body': obj.body, 'text': quotations.extract_from_html(quotations.extract_from(obj.body, 'text/html')), 'unread': not obj.is_read, 'starred': obj.is_starred, 'files': obj.api_attachment_metadata, 'events': [encode(e, legacy_nsid=legacy_nsid) for e in obj.events] } categories = format_categories(obj.categories) if obj.namespace.account.category_type == 'folder': resp['folder'] = categories[0] if categories else None else: resp['labels'] = categories # If the message is a draft (Inbox-created or otherwise): if obj.is_draft: resp['object'] = 'draft' resp['version'] = obj.version if obj.reply_to_message is not None: resp['reply_to_message_id'] = obj.reply_to_message.public_id else: resp['reply_to_message_id'] = None if expand: resp['headers'] = { 'Message-Id': obj.message_id_header, 'In-Reply-To': obj.in_reply_to, 'References': obj.references } return resp elif isinstance(obj, Thread): base = { 'id': obj.public_id, 'object': 'thread', public_id_key_name: _get_namespace_public_id(obj), 'subject': obj.subject, 'participants': format_address_list(obj.participants), 'last_message_timestamp': obj.recentdate, 'last_message_received_timestamp': obj.receivedrecentdate, 'first_message_timestamp': obj.subjectdate, 'snippet': obj.snippet, 'unread': obj.unread, 'starred': obj.starred, 'has_attachments': obj.has_attachments, 'version': obj.version, # For backwards-compatibility -- remove after deprecating tags API 'tags': obj.tags } categories = format_categories(obj.categories) if obj.namespace.account.category_type == 'folder': base['folders'] = categories else: base['labels'] = categories if not expand: base['message_ids'] = \ [m.public_id for m in obj.messages if not m.is_draft] base['draft_ids'] = [m.public_id for m in obj.drafts] return base # Expand messages within threads all_expanded_messages = [] all_expanded_drafts = [] for msg in obj.messages: resp = { 'id': msg.public_id, 'object': 'message', public_id_key_name: _get_namespace_public_id(msg), 'subject': msg.subject, 'from': format_address_list(msg.from_addr), 'reply_to': format_address_list(msg.reply_to), 'to': format_address_list(msg.to_addr), 'cc': format_address_list(msg.cc_addr), 'bcc': format_address_list(msg.bcc_addr), 'date': msg.received_date, 'thread_id': obj.public_id, 'snippet': msg.snippet, 'unread': not msg.is_read, 'starred': msg.is_starred, 'files': msg.api_attachment_metadata } categories = format_categories(msg.categories) if obj.namespace.account.category_type == 'folder': resp['folder'] = categories[0] if categories else None else: resp['labels'] = categories if msg.is_draft: resp['object'] = 'draft' resp['version'] = msg.version if msg.reply_to_message is not None: resp['reply_to_message_id'] = \ msg.reply_to_message.public_id else: resp['reply_to_message_id'] = None all_expanded_drafts.append(resp) else: all_expanded_messages.append(resp) base['messages'] = all_expanded_messages base['drafts'] = all_expanded_drafts return base elif isinstance(obj, Contact): return { 'id': obj.public_id, 'object': 'contact', public_id_key_name: _get_namespace_public_id(obj), 'name': obj.name, 'email': obj.email_address } elif isinstance(obj, Event): resp = { 'id': obj.public_id, 'object': 'event', public_id_key_name: _get_namespace_public_id(obj), 'calendar_id': obj.calendar.public_id if obj.calendar else None, 'message_id': obj.message.public_id if obj.message else None, 'title': obj.title, 'description': obj.description, 'owner': obj.owner, 'participants': [_format_participant_data(participant) for participant in obj.participants], 'read_only': obj.read_only, 'location': obj.location, 'when': encode(obj.when, legacy_nsid=legacy_nsid), 'busy': obj.busy, 'status': obj.status, } if isinstance(obj, RecurringEvent): resp['recurrence'] = { 'rrule': obj.recurring, 'timezone': obj.start_timezone } if isinstance(obj, RecurringEventOverride): resp['original_start_time'] = encode(obj.original_start_time, legacy_nsid=legacy_nsid) if obj.master: resp['master_event_id'] = obj.master.public_id return resp elif isinstance(obj, Calendar): return { 'id': obj.public_id, 'object': 'calendar', public_id_key_name: _get_namespace_public_id(obj), 'name': obj.name, 'description': obj.description, 'read_only': obj.read_only, } elif isinstance(obj, When): # Get time dictionary e.g. 'start_time': x, 'end_time': y or 'date': z times = obj.get_time_dict() resp = {k: encode(v, legacy_nsid=legacy_nsid) for k, v in times.iteritems()} resp['object'] = _get_lowercase_class_name(obj) return resp elif isinstance(obj, Block): # ie: Attachments/Files resp = { 'id': obj.public_id, 'object': 'file', public_id_key_name: _get_namespace_public_id(obj), 'content_type': obj.content_type, 'size': obj.size, 'filename': obj.filename, } if len(obj.parts): # if obj is actually a message attachment (and not merely an # uploaded file), set additional properties resp.update({ 'message_ids': [p.message.public_id for p in obj.parts] }) return resp elif isinstance(obj, Category): # 'object' is set to 'folder' or 'label' resp = { 'id': obj.public_id, 'object': obj.type, public_id_key_name: _get_namespace_public_id(obj), 'name': obj.name, 'display_name': obj.api_display_name } return resp
def test_reply_separated_by_hr(): eq_('<html><head></head><body><div>Hi<div>there</div></div></body></html>', RE_WHITESPACE.sub( '', quotations.extract_from_html(REPLY_SEPARATED_BY_HR)))
def test_OLK_SRC_BODY_SECTION_stripped(): eq_('<html><head></head><body><div>Reply</div></body></html>', RE_WHITESPACE.sub( '', quotations.extract_from_html(OLK_SRC_BODY_SECTION)))
def test_OLK_SRC_BODY_SECTION_stripped(): eq_( '<html><body><div>Reply</div></body></html>', RE_WHITESPACE.sub('', quotations.extract_from_html(OLK_SRC_BODY_SECTION)))
def test_reply_separated_by_hr(): eq_( '<html><body><div>Hi<div>there</div></div></body></html>', RE_WHITESPACE.sub('', quotations.extract_from_html(REPLY_SEPARATED_BY_HR)))
def test_malformed_html(): eq_('</body></html>', quotations.extract_from_html('</body></html>'))