Python extract_from_htmlの例、talon.quotations.extract_from_html Pythonの例

コード例 #1

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_CRLF():
    """CR is not converted to '&#13;'
    """
    symbol = '&#13;'
    extracted = quotations.extract_from_html('<html>\r\n</html>')
    assert_false(symbol in extracted)
    eq_('<html></html>', RE_WHITESPACE.sub('', extracted))

    msg_body = """My
reply
<blockquote>

  <div>
    On 11-Apr-2011, at 6:54 PM, Bob &lt;[email protected]&gt; wrote:
  </div>

  <div>
    Test
  </div>

</blockquote>"""
    msg_body = msg_body.replace('\n', '\r\n')
    extracted = quotations.extract_from_html(msg_body)
    assert_false(symbol in extracted)
    # Keep new lines otherwise "My reply" becomes one word - "Myreply" 
    eq_("<html><head></head><body>My\nreply\n</body></html>", extracted)

コード例 #2

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: CatalinBraescu/talon

def test_reply_quotations_share_block():
    msg = mime.from_string(REPLY_QUOTATIONS_SHARE_BLOCK)
    html_part = list(msg.walk())[1]
    assert html_part.content_type == 'text/html'
    stripped_html = quotations.extract_from_html(html_part.body)
    ok_(stripped_html)
    ok_('From' not in stripped_html)

コード例 #3

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_no_blockquote():
    msg_body = """
<html>
<body>
Reply

<div>
  On 11-Apr-2011, at 6:54 PM, Bob &lt;[email protected]&gt; wrote:
</div>

<div>
  Test
</div>
</body>
</html>
"""

    reply = """
<html>
<head></head>
<body>
Reply

</body></html>"""
    eq_(RE_WHITESPACE.sub('', reply),
        RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))

コード例 #4

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: clara-labs/talon

def test_blockquote_disclaimer():
    msg_body = """
<html>
  <body>
  <div>
    <div>
      message
    </div>
    <blockquote>
      Quote
    </blockquote>
  </div>
  <div>
    disclaimer
  </div>
  </body>
</html>
"""

    stripped_html = """
<html>
  <body>
  <div>
    <div>
      message
    </div>
  </div>
  <div>
    disclaimer
  </div>
  </body>
</html>
"""
    eq_(RE_WHITESPACE.sub('', stripped_html),
        RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))

コード例 #5

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_too_large_html():
    msg_body = 'Reply' \
               '<div class="gmail_quote">' \
               '<div class="gmail_quote">On 11-Apr-2011, at 6:54 PM, Bob &lt;[email protected]&gt; wrote:' \
               '<div>Test</div>' \
               '</div>' \
               '</div>'
    eq_(RE_WHITESPACE.sub('', msg_body),
        RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))

コード例 #6

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: JordanReiter/talon

def test_no_gmail_quote_false_positive():
    msg_body = """
    <html><body>
    <div class="gmail_quote">
      broken_email_client_sent_this
    </div>
    </body></html>"""
    eq_("<html><body><div>broken_email_client_sent_this</div></body></html>",
        RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))

コード例 #7

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: dichen001/talon

def extract_reply_and_check(filename):
    f = open(filename)

    msg_body = f.read()
    reply = quotations.extract_from_html(msg_body)
    plain_reply = u.html_to_text(reply)

    eq_(RE_WHITESPACE.sub('', "Hi. I am fine.\n\nThanks,\nAlex"),
        RE_WHITESPACE.sub('', plain_reply))

コード例 #8

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_gmail_quote_compact():
    msg_body = 'Reply' \
               '<div class="gmail_quote">' \
               '<div class="gmail_quote">On 11-Apr-2011, at 6:54 PM, Bob &lt;[email protected]&gt; wrote:' \
               '<div>Test</div>' \
               '</div>' \
               '</div>'
    eq_("<html><head></head><body>Reply</body></html>",
        RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))

コード例 #9

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_gmail_quote_blockquote():
    msg_body = """Message
<blockquote class="gmail_quote">
  <div class="gmail_default">
    My name is William Shakespeare.
    <br/>
  </div>
</blockquote>"""
    eq_(RE_WHITESPACE.sub('', msg_body),
        RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))

コード例 #10

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: clara-labs/talon

def test_CRLF():
    """CR is not converted to '&#13;'
    """
    eq_('<html>\r\n</html>', quotations.extract_from_html('<html>\r\n</html>'))

    msg_body = """Reply
<blockquote>

  <div>
    On 11-Apr-2011, at 6:54 PM, Bob &lt;[email protected]&gt; wrote:
  </div>

  <div>
    Test
  </div>

</blockquote>"""
    msg_body = msg_body.replace('\n', '\r\n')
    eq_("<html><body><p>Reply\r\n</p></body></html>",
        quotations.extract_from_html(msg_body))

コード例 #11

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: clara-labs/talon

def test_gmail_quote():
    msg_body = """Reply
<div class="gmail_quote">
  <div class="gmail_quote">
    On 11-Apr-2011, at 6:54 PM, Bob &lt;[email protected]&gt; wrote:
    <div>
      Test
    </div>
  </div>
</div>"""
    eq_("<html><body><p>Reply</p></body></html>",
        RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))

コード例 #12

0

ファイルを表示

ファイル: email_mirror.py プロジェクト: joydeep1701/zulip

def extract_body(message: message.Message) -> Text:
    # If the message contains a plaintext version of the body, use
    # that.
    plaintext_content = get_message_part_by_type(message, "text/plain")
    if plaintext_content:
        return quotations.extract_from_plain(plaintext_content)

    # If we only have an HTML version, try to make that look nice.
    html_content = get_message_part_by_type(message, "text/html")
    if html_content:
        return convert_html_to_markdown(quotations.extract_from_html(html_content))

    raise ZulipEmailForwardError("Unable to find plaintext or HTML message body")

コード例 #13

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_unicode_in_reply():
    msg_body = u"""Reply \xa0 \xa0 Text<br>

<div>
  <br>
</div>

<blockquote>
  Quote
</blockquote>""".encode("utf-8")

    eq_("<html><head></head><body>Reply&#160;&#160;Text<br><div><br></div>"
        "</body></html>",
        RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))

コード例 #14

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_from_block():
    msg_body = """<div>
message<br>
<div>
<hr>
From: <a href="mailto:[email protected]">[email protected]</a><br>
Date: Fri, 23 Mar 2012 12:35:31 -0600<br>
To: <a href="mailto:[email protected]">[email protected]</a><br>
Subject: You Have New Mail From Mary!<br><br>

text
</div></div>
"""
    eq_('<html><head></head><body><div>message<br></div></body></html>',
        RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))

コード例 #15

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_quotation_splitter_outside_blockquote():
    msg_body = """Reply

<div>
  On 11-Apr-2011, at 6:54 PM, Bob &lt;[email protected]&gt; wrote:
</div>

<blockquote>
  <div>
    Test
  </div>
</blockquote>
"""
    eq_("<html><head></head><body>Reply</body></html>",
        RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))

コード例 #16

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def extract_reply_and_check(filename):
    import sys
    kwargs = {}
    if sys.version_info > (3, 0):
        kwargs["encoding"] = "utf8"

    f = open(filename, **kwargs)

    msg_body = f.read()
    reply = quotations.extract_from_html(msg_body)
    plain_reply = u.html_to_text(reply)
    plain_reply = plain_reply.decode('utf8')

    eq_(RE_WHITESPACE.sub('', "Hi. I am fine.\n\nThanks,\nAlex"),
        RE_WHITESPACE.sub('', plain_reply))

コード例 #17

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_reply_shares_div_with_from_block():
    msg_body = '''
<body>
  <div>

    Blah<br><br>

    <hr>Date: Tue, 22 May 2012 18:29:16 -0600<br>
    To: [email protected]<br>
    From: [email protected]<br>
    Subject: You Have New Mail From x!<br><br>

  </div>
</body>'''
    eq_('<html><head></head><body><div>Blah<br><br></div></body></html>',
        RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))

コード例 #18

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_regular_blockquote():
    msg_body = """Reply
<blockquote>Regular</blockquote>

<div>
  On 11-Apr-2011, at 6:54 PM, Bob &lt;[email protected]&gt; wrote:
</div>

<blockquote>
  <div>
    <blockquote>Nested</blockquote>
  </div>
</blockquote>
"""
    eq_("<html><head></head><body>Reply<blockquote>Regular</blockquote></body></html>",
        RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))

コード例 #19

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: clara-labs/talon

def test_quotation_splitter_inside_blockquote():
    msg_body = """Reply
<blockquote>

  <div>
    On 11-Apr-2011, at 6:54 PM, Bob &lt;[email protected]&gt; wrote:
  </div>

  <div>
    Test
  </div>

</blockquote>"""

    eq_("<html><body><p>Reply\n</p></body></html>",
        quotations.extract_from_html(msg_body))

コード例 #20

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_readable_html_empty():
    msg_body = """
<blockquote>
  Reply
  <div>
    On 11-Apr-2011, at 6:54 PM, Bob &lt;[email protected]&gt; wrote:
  </div>

  <div>
    Test
  </div>

</blockquote>"""

    eq_(RE_WHITESPACE.sub('', msg_body),
        RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))

コード例 #21

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: clara-labs/talon

def extract_reply_and_check(filename):
    f = open(filename)

    msg_body = f.read()
    reply = quotations.extract_from_html(msg_body)

    h = html2text.HTML2Text()
    h.body_width = 0
    plain_reply = h.handle(reply)

    #remove &nbsp; spaces
    plain_reply = plain_reply.replace(u'\xa0', u' ')

    if RE_REPLY.match(plain_reply):
        eq_(1, 1)
    else:
        eq_("Hi. I am fine.\n\nThanks,\nAlex", plain_reply)

コード例 #22

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_from_block_and_quotations_in_separate_divs():
    msg_body = '''
Reply
<div>
  <hr/>
  <div>
    <font>
      <b>From: [email protected]</b>
      <b>Date: Thu, 24 Mar 2016 08:07:12 -0700</b>
    </font>
  </div>
  <div>
    Quoted message
  </div>
</div>
'''
    eq_('<html><head></head><body>Reply<div><hr></div></body></html>',
        RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))

コード例 #23

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_validate_output_html():
    msg_body = """Reply
<div>
  On 11-Apr-2011, at 6:54 PM, Bob &lt;[email protected]&gt; wrote:

    <blockquote>
      <div>
        Test
      </div>
    </blockquote>
</div>

<div/>
"""
    out = quotations.extract_from_html(msg_body)
    ok_('<html>' in out and '</html>' in out,
        'Invalid HTML - <html>/</html> tag not present')
    ok_('<div/>' not in out,
        'Invalid HTML output - <div/> element is not valid')

コード例 #24

0

ファイルを表示

def test_blockquote_disclaimer():
    msg_body = """
<html>
  <body>
  <div>
    <div>
      message
    </div>
    <blockquote>
      Quote
    </blockquote>
  </div>
  <div>
    disclaimer
  </div>
  </body>
</html>
"""

    stripped_html = """
<html>
  <head></head>
  <body>
  <div>
    <div>
      message
    </div>
  </div>
  <div>
    disclaimer
  </div>
  </body>
</html>
"""
    eq_(RE_WHITESPACE.sub('', stripped_html),
        RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))

コード例 #25

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_empty_body():
    eq_('', quotations.extract_from_html(''))

コード例 #26

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: cleverly-ai/talon

def test_empty_body():
    eq_("", quotations.extract_from_html(""))

コード例 #27

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: cleverly-ai/talon

def test_OLK_SRC_BODY_SECTION_stripped():
    eq_(
        "<html><head></head><body><div>Reply</div></body></html>",
        RE_WHITESPACE.sub("", quotations.extract_from_html(OLK_SRC_BODY_SECTION)),
    )

コード例 #28

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_bad_html():
    bad_html = "<html></html>"
    eq_(bad_html, quotations.extract_from_html(bad_html))

コード例 #29

0

ファイルを表示

import sys
import talon
import base64
from talon import quotations

talon.init()

type = sys.argv[1]
#html = base64.b64decode(sys.argv[2])
#html = sys.stdin.readline().rstrip()
html = ""
for line in sys.stdin:
    html += line.rstrip()

html = base64.b64decode(html)


if type.lower() == 'html':
    reply = quotations.extract_from_html(html)
else:
    reply = quotations.extract_from(html, 'type/plain')

# reply == "<html><body><p>Reply</p></body></html>"
print ("%s" % reply)

コード例 #30

0

ファイルを表示

ファイル: kellogs.py プロジェクト: steffenmllr/sync-engine

def _encode(obj, namespace_public_id=None, expand=False, legacy_nsid=False):
    """
    Returns a dictionary representation of an Inbox model object obj, or
    None if there is no such representation defined. If the optional
    namespace_public_id parameter is passed, it will used instead of fetching
    the namespace public id for each object. This improves performance when
    serializing large numbers of objects, but also means that you must take
    care to ONLY serialize objects that belong to the given namespace!

    Parameters
    ----------
    namespace_public_id: string, optional
        public id of the namespace to which the object to serialize belongs.

    Returns
    -------
    dictionary or None

    """

    def _get_namespace_public_id(obj):
        return namespace_public_id or obj.namespace.public_id

    def _format_participant_data(participant):
        """Event.participants is a JSON blob which may contain internal data.
        This function returns a dict with only the data we want to make
        public."""
        dct = {}
        for attribute in ["name", "status", "email", "comment"]:
            dct[attribute] = participant.get(attribute)

        return dct

    def _get_lowercase_class_name(obj):
        return type(obj).__name__.lower()

    if legacy_nsid:
        public_id_key_name = "namespace_id"
    else:
        public_id_key_name = "account_id"

    # Flask's jsonify() doesn't handle datetimes or json arrays as primary
    # objects.
    if isinstance(obj, datetime.datetime):
        return calendar.timegm(obj.utctimetuple())

    if isinstance(obj, datetime.date):
        return obj.isoformat()

    if isinstance(obj, arrow.arrow.Arrow):
        return encode(obj.datetime, legacy_nsid=legacy_nsid)

    # TODO deprecate this and remove -- legacy_nsid
    elif isinstance(obj, Namespace) and legacy_nsid:
        return {
            "id": obj.public_id,
            "object": "namespace",
            "namespace_id": obj.public_id,
            # Account specific
            "account_id": obj.account.public_id,
            "email_address": obj.account.email_address,
            "name": obj.account.name,
            "provider": obj.account.provider,
            "organization_unit": obj.account.category_type,
        }
    elif isinstance(obj, Namespace):  # these are now "Account" objects
        return {
            "id": obj.public_id,
            "object": "account",
            "account_id": obj.public_id,
            "email_address": obj.account.email_address,
            "name": obj.account.name,
            "provider": obj.account.provider,
            "organization_unit": obj.account.category_type,
        }

    elif isinstance(obj, Account) and not legacy_nsid:
        raise Exception("Should never be serializing accounts (legacy_nsid)")

    elif isinstance(obj, Account):
        return {
            "account_id": obj.namespace.public_id,  # ugh
            "id": obj.namespace.public_id,  # ugh
            "object": "account",
            "email_address": obj.email_address,
            "name": obj.name,
            "organization_unit": obj.category_type,
            "provider": obj.provider,
            # TODO add capabilities/scope (i.e. mail, contacts, cal, etc.)
            # 'status':  'syncing',  # TODO what are values here
            # 'last_sync':  1398790077,  # tuesday 4/29
        }

    elif isinstance(obj, Message):
        resp = {
            "id": obj.public_id,
            "object": "message",
            public_id_key_name: _get_namespace_public_id(obj),
            "subject": obj.subject,
            "from": format_address_list(obj.from_addr),
            "reply_to": format_address_list(obj.reply_to),
            "to": format_address_list(obj.to_addr),
            "cc": format_address_list(obj.cc_addr),
            "bcc": format_address_list(obj.bcc_addr),
            "date": obj.received_date,
            "thread_id": obj.thread.public_id,
            "snippet": obj.snippet,
            "body": obj.body,
            "text": quotations.extract_from_html(quotations.extract_from(obj.body, "text/html")),
            "unread": not obj.is_read,
            "starred": obj.is_starred,
            "files": obj.api_attachment_metadata,
            "events": [encode(e, legacy_nsid=legacy_nsid) for e in obj.events],
        }

        categories = format_categories(obj.categories)
        if obj.namespace.account.category_type == "folder":
            resp["folder"] = categories[0] if categories else None
        else:
            resp["labels"] = categories

        # If the message is a draft (Inbox-created or otherwise):
        if obj.is_draft:
            resp["object"] = "draft"
            resp["version"] = obj.version
            if obj.reply_to_message is not None:
                resp["reply_to_message_id"] = obj.reply_to_message.public_id
            else:
                resp["reply_to_message_id"] = None

        if expand:
            resp["headers"] = {
                "Message-Id": obj.message_id_header,
                "In-Reply-To": obj.in_reply_to,
                "References": obj.references,
            }

        return resp

    elif isinstance(obj, Thread):
        base = {
            "id": obj.public_id,
            "object": "thread",
            public_id_key_name: _get_namespace_public_id(obj),
            "subject": obj.subject,
            "participants": format_address_list(obj.participants),
            "last_message_timestamp": obj.recentdate,
            "last_message_received_timestamp": obj.receivedrecentdate,
            "first_message_timestamp": obj.subjectdate,
            "snippet": obj.snippet,
            "unread": obj.unread,
            "starred": obj.starred,
            "has_attachments": obj.has_attachments,
            "version": obj.version,
            # For backwards-compatibility -- remove after deprecating tags API
            "tags": obj.tags,
        }

        categories = format_categories(obj.categories)
        if obj.namespace.account.category_type == "folder":
            base["folders"] = categories
        else:
            base["labels"] = categories

        if not expand:
            base["message_ids"] = [m.public_id for m in obj.messages if not m.is_draft]
            base["draft_ids"] = [m.public_id for m in obj.drafts]
            return base

        # Expand messages within threads
        all_expanded_messages = []
        all_expanded_drafts = []
        for msg in obj.messages:
            resp = {
                "id": msg.public_id,
                "object": "message",
                public_id_key_name: _get_namespace_public_id(msg),
                "subject": msg.subject,
                "from": format_address_list(msg.from_addr),
                "reply_to": format_address_list(msg.reply_to),
                "to": format_address_list(msg.to_addr),
                "cc": format_address_list(msg.cc_addr),
                "bcc": format_address_list(msg.bcc_addr),
                "date": msg.received_date,
                "thread_id": obj.public_id,
                "snippet": msg.snippet,
                "unread": not msg.is_read,
                "starred": msg.is_starred,
                "files": msg.api_attachment_metadata,
            }
            categories = format_categories(msg.categories)
            if obj.namespace.account.category_type == "folder":
                resp["folder"] = categories[0] if categories else None
            else:
                resp["labels"] = categories

            if msg.is_draft:
                resp["object"] = "draft"
                resp["version"] = msg.version
                if msg.reply_to_message is not None:
                    resp["reply_to_message_id"] = msg.reply_to_message.public_id
                else:
                    resp["reply_to_message_id"] = None
                all_expanded_drafts.append(resp)
            else:
                all_expanded_messages.append(resp)

        base["messages"] = all_expanded_messages
        base["drafts"] = all_expanded_drafts
        return base

    elif isinstance(obj, Contact):
        return {
            "id": obj.public_id,
            "object": "contact",
            public_id_key_name: _get_namespace_public_id(obj),
            "name": obj.name,
            "email": obj.email_address,
        }

    elif isinstance(obj, Event):
        resp = {
            "id": obj.public_id,
            "object": "event",
            public_id_key_name: _get_namespace_public_id(obj),
            "calendar_id": obj.calendar.public_id if obj.calendar else None,
            "message_id": obj.message.public_id if obj.message else None,
            "title": obj.title,
            "description": obj.description,
            "owner": obj.owner,
            "participants": [_format_participant_data(participant) for participant in obj.participants],
            "read_only": obj.read_only,
            "location": obj.location,
            "when": encode(obj.when, legacy_nsid=legacy_nsid),
            "busy": obj.busy,
            "status": obj.status,
        }
        if isinstance(obj, RecurringEvent):
            resp["recurrence"] = {"rrule": obj.recurring, "timezone": obj.start_timezone}
        if isinstance(obj, RecurringEventOverride):
            resp["original_start_time"] = encode(obj.original_start_time, legacy_nsid=legacy_nsid)
            if obj.master:
                resp["master_event_id"] = obj.master.public_id
        return resp

    elif isinstance(obj, Calendar):
        return {
            "id": obj.public_id,
            "object": "calendar",
            public_id_key_name: _get_namespace_public_id(obj),
            "name": obj.name,
            "description": obj.description,
            "read_only": obj.read_only,
        }

    elif isinstance(obj, When):
        # Get time dictionary e.g. 'start_time': x, 'end_time': y or 'date': z
        times = obj.get_time_dict()
        resp = {k: encode(v, legacy_nsid=legacy_nsid) for k, v in times.iteritems()}
        resp["object"] = _get_lowercase_class_name(obj)
        return resp

    elif isinstance(obj, Block):  # ie: Attachments/Files
        resp = {
            "id": obj.public_id,
            "object": "file",
            public_id_key_name: _get_namespace_public_id(obj),
            "content_type": obj.content_type,
            "size": obj.size,
            "filename": obj.filename,
        }
        if len(obj.parts):
            # if obj is actually a message attachment (and not merely an
            # uploaded file), set additional properties
            resp.update({"message_ids": [p.message.public_id for p in obj.parts]})

        return resp

    elif isinstance(obj, Category):
        # 'object' is set to 'folder' or 'label'
        resp = {
            "id": obj.public_id,
            "object": obj.type,
            public_id_key_name: _get_namespace_public_id(obj),
            "name": obj.name,
            "display_name": obj.api_display_name,
        }
        return resp

コード例 #31

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: dichen001/talon

def test_gmail_forwarded_msg():
    msg_body = """<div dir="ltr"><br><div class="gmail_quote">---------- Forwarded message ----------<br>From: <b class="gmail_sendername">Bob</b> <span dir="ltr">&lt;<a href="mailto:[email protected]">[email protected]</a>&gt;</span><br>Date: Fri, Feb 11, 2010 at 5:59 PM<br>Subject: Bob WFH today<br>To: Mary &lt;<a href="mailto:[email protected]">[email protected]</a>&gt;<br><br><br><div dir="ltr">eom</div>
</div><br></div>"""
    extracted = quotations.extract_from_html(msg_body)
    eq_(RE_WHITESPACE.sub('', msg_body), RE_WHITESPACE.sub('', extracted))

コード例 #32

0

ファイルを表示

def trim_html_with_talon(html):
    return quotations.extract_from_html(html)

コード例 #33

0

ファイルを表示

def getConversationBody(conversation):
    user_question = strip_tags(conversation.json()['conversation_message']['body']).lower()
    user_question = quotations.extract_from_html(user_question)
    logging.warn(user_question)
    return user_question

コード例 #34

0

ファイルを表示

ファイル: kellogs.py プロジェクト: steffenmllr/sync-engine

def _encode(obj, namespace_public_id=None, expand=False, legacy_nsid=False):
    """
    Returns a dictionary representation of an Inbox model object obj, or
    None if there is no such representation defined. If the optional
    namespace_public_id parameter is passed, it will used instead of fetching
    the namespace public id for each object. This improves performance when
    serializing large numbers of objects, but also means that you must take
    care to ONLY serialize objects that belong to the given namespace!

    Parameters
    ----------
    namespace_public_id: string, optional
        public id of the namespace to which the object to serialize belongs.

    Returns
    -------
    dictionary or None

    """
    def _get_namespace_public_id(obj):
        return namespace_public_id or obj.namespace.public_id

    def _format_participant_data(participant):
        """Event.participants is a JSON blob which may contain internal data.
        This function returns a dict with only the data we want to make
        public."""
        dct = {}
        for attribute in ['name', 'status', 'email', 'comment']:
            dct[attribute] = participant.get(attribute)

        return dct

    def _get_lowercase_class_name(obj):
        return type(obj).__name__.lower()

    if legacy_nsid:
        public_id_key_name = 'namespace_id'
    else:
        public_id_key_name = 'account_id'

    # Flask's jsonify() doesn't handle datetimes or json arrays as primary
    # objects.
    if isinstance(obj, datetime.datetime):
        return calendar.timegm(obj.utctimetuple())

    if isinstance(obj, datetime.date):
        return obj.isoformat()

    if isinstance(obj, arrow.arrow.Arrow):
        return encode(obj.datetime, legacy_nsid=legacy_nsid)

    # TODO deprecate this and remove -- legacy_nsid
    elif isinstance(obj, Namespace) and legacy_nsid:
        return {
            'id': obj.public_id,
            'object': 'namespace',
            'namespace_id': obj.public_id,

            # Account specific
            'account_id': obj.account.public_id,
            'email_address': obj.account.email_address,
            'name': obj.account.name,
            'provider': obj.account.provider,
            'organization_unit': obj.account.category_type
        }
    elif isinstance(obj, Namespace):  # these are now "Account" objects
        return {
            'id': obj.public_id,
            'object': 'account',
            'account_id': obj.public_id,

            'email_address': obj.account.email_address,
            'name': obj.account.name,
            'provider': obj.account.provider,
            'organization_unit': obj.account.category_type
        }

    elif isinstance(obj, Account) and not legacy_nsid:
        raise Exception("Should never be serializing accounts (legacy_nsid)")

    elif isinstance(obj, Account):
        return {
            'account_id': obj.namespace.public_id,  # ugh
            'id': obj.namespace.public_id,  # ugh
            'object': 'account',
            'email_address': obj.email_address,
            'name': obj.name,
            'organization_unit': obj.category_type,

            'provider': obj.provider,

            # TODO add capabilities/scope (i.e. mail, contacts, cal, etc.)

            # 'status':  'syncing',  # TODO what are values here
            # 'last_sync':  1398790077,  # tuesday 4/29
        }

    elif isinstance(obj, Message):
        resp = {
            'id': obj.public_id,
            'object': 'message',
            public_id_key_name: _get_namespace_public_id(obj),
            'subject': obj.subject,
            'from': format_address_list(obj.from_addr),
            'reply_to': format_address_list(obj.reply_to),
            'to': format_address_list(obj.to_addr),
            'cc': format_address_list(obj.cc_addr),
            'bcc': format_address_list(obj.bcc_addr),
            'date': obj.received_date,
            'thread_id': obj.thread.public_id,
            'snippet': obj.snippet,
            'body': obj.body,
            'text': quotations.extract_from_html(quotations.extract_from(obj.body, 'text/html')),
            'unread': not obj.is_read,
            'starred': obj.is_starred,
            'files': obj.api_attachment_metadata,
            'events': [encode(e, legacy_nsid=legacy_nsid) for e in obj.events]
        }

        categories = format_categories(obj.categories)
        if obj.namespace.account.category_type == 'folder':
            resp['folder'] = categories[0] if categories else None
        else:
            resp['labels'] = categories

        # If the message is a draft (Inbox-created or otherwise):
        if obj.is_draft:
            resp['object'] = 'draft'
            resp['version'] = obj.version
            if obj.reply_to_message is not None:
                resp['reply_to_message_id'] = obj.reply_to_message.public_id
            else:
                resp['reply_to_message_id'] = None

        if expand:
            resp['headers'] = {
                'Message-Id': obj.message_id_header,
                'In-Reply-To': obj.in_reply_to,
                'References': obj.references
            }

        return resp

    elif isinstance(obj, Thread):
        base = {
            'id': obj.public_id,
            'object': 'thread',
            public_id_key_name: _get_namespace_public_id(obj),
            'subject': obj.subject,
            'participants': format_address_list(obj.participants),
            'last_message_timestamp': obj.recentdate,
            'last_message_received_timestamp': obj.receivedrecentdate,
            'first_message_timestamp': obj.subjectdate,
            'snippet': obj.snippet,
            'unread': obj.unread,
            'starred': obj.starred,
            'has_attachments': obj.has_attachments,
            'version': obj.version,
            # For backwards-compatibility -- remove after deprecating tags API
            'tags': obj.tags
        }

        categories = format_categories(obj.categories)
        if obj.namespace.account.category_type == 'folder':
            base['folders'] = categories
        else:
            base['labels'] = categories

        if not expand:
            base['message_ids'] = \
                [m.public_id for m in obj.messages if not m.is_draft]
            base['draft_ids'] = [m.public_id for m in obj.drafts]
            return base

        # Expand messages within threads
        all_expanded_messages = []
        all_expanded_drafts = []
        for msg in obj.messages:
            resp = {
                'id': msg.public_id,
                'object': 'message',
                public_id_key_name: _get_namespace_public_id(msg),
                'subject': msg.subject,
                'from': format_address_list(msg.from_addr),
                'reply_to': format_address_list(msg.reply_to),
                'to': format_address_list(msg.to_addr),
                'cc': format_address_list(msg.cc_addr),
                'bcc': format_address_list(msg.bcc_addr),
                'date': msg.received_date,
                'thread_id': obj.public_id,
                'snippet': msg.snippet,
                'unread': not msg.is_read,
                'starred': msg.is_starred,
                'files': msg.api_attachment_metadata
            }
            categories = format_categories(msg.categories)
            if obj.namespace.account.category_type == 'folder':
                resp['folder'] = categories[0] if categories else None
            else:
                resp['labels'] = categories

            if msg.is_draft:
                resp['object'] = 'draft'
                resp['version'] = msg.version
                if msg.reply_to_message is not None:
                    resp['reply_to_message_id'] = \
                        msg.reply_to_message.public_id
                else:
                    resp['reply_to_message_id'] = None
                all_expanded_drafts.append(resp)
            else:
                all_expanded_messages.append(resp)

        base['messages'] = all_expanded_messages
        base['drafts'] = all_expanded_drafts
        return base

    elif isinstance(obj, Contact):
        return {
            'id': obj.public_id,
            'object': 'contact',
            public_id_key_name: _get_namespace_public_id(obj),
            'name': obj.name,
            'email': obj.email_address
        }

    elif isinstance(obj, Event):
        resp = {
            'id': obj.public_id,
            'object': 'event',
            public_id_key_name: _get_namespace_public_id(obj),
            'calendar_id': obj.calendar.public_id if obj.calendar else None,
            'message_id': obj.message.public_id if obj.message else None,
            'title': obj.title,
            'description': obj.description,
            'owner': obj.owner,
            'participants': [_format_participant_data(participant)
                             for participant in obj.participants],
            'read_only': obj.read_only,
            'location': obj.location,
            'when': encode(obj.when, legacy_nsid=legacy_nsid),
            'busy': obj.busy,
            'status': obj.status,
        }
        if isinstance(obj, RecurringEvent):
            resp['recurrence'] = {
                'rrule': obj.recurring,
                'timezone': obj.start_timezone
            }
        if isinstance(obj, RecurringEventOverride):
            resp['original_start_time'] = encode(obj.original_start_time,
                                                 legacy_nsid=legacy_nsid)
            if obj.master:
                resp['master_event_id'] = obj.master.public_id
        return resp

    elif isinstance(obj, Calendar):
        return {
            'id': obj.public_id,
            'object': 'calendar',
            public_id_key_name: _get_namespace_public_id(obj),
            'name': obj.name,
            'description': obj.description,
            'read_only': obj.read_only,
        }

    elif isinstance(obj, When):
        # Get time dictionary e.g. 'start_time': x, 'end_time': y or 'date': z
        times = obj.get_time_dict()
        resp = {k: encode(v, legacy_nsid=legacy_nsid) for
                                         k, v in times.iteritems()}
        resp['object'] = _get_lowercase_class_name(obj)
        return resp

    elif isinstance(obj, Block):  # ie: Attachments/Files
        resp = {
            'id': obj.public_id,
            'object': 'file',
            public_id_key_name: _get_namespace_public_id(obj),
            'content_type': obj.content_type,
            'size': obj.size,
            'filename': obj.filename,
        }
        if len(obj.parts):
            # if obj is actually a message attachment (and not merely an
            # uploaded file), set additional properties
            resp.update({
                'message_ids': [p.message.public_id for p in obj.parts]
            })

        return resp

    elif isinstance(obj, Category):
        # 'object' is set to 'folder' or 'label'
        resp = {
            'id': obj.public_id,
            'object': obj.type,
            public_id_key_name: _get_namespace_public_id(obj),
            'name': obj.name,
            'display_name': obj.api_display_name
        }
        return resp

コード例 #35

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_reply_separated_by_hr():
    eq_('<html><head></head><body><div>Hi<div>there</div></div></body></html>',
        RE_WHITESPACE.sub(
            '', quotations.extract_from_html(REPLY_SEPARATED_BY_HR)))

コード例 #36

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_OLK_SRC_BODY_SECTION_stripped():
    eq_('<html><head></head><body><div>Reply</div></body></html>',
        RE_WHITESPACE.sub(
            '', quotations.extract_from_html(OLK_SRC_BODY_SECTION)))

コード例 #37

0

ファイルを表示

def test_bad_html():
    bad_html = "<html></html>"
    eq_(bad_html, quotations.extract_from_html(bad_html))

コード例 #38

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: dichen001/talon

def test_OLK_SRC_BODY_SECTION_stripped():
    eq_(
        '<html><body><div>Reply</div></body></html>',
        RE_WHITESPACE.sub('',
                          quotations.extract_from_html(OLK_SRC_BODY_SECTION)))

コード例 #39

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: guruhq/talon

def test_gmail_forwarded_msg():
    msg_body = """<div dir="ltr"><br><div class="gmail_quote">---------- Forwarded message ----------<br>From: <b class="gmail_sendername">Bob</b> <span dir="ltr">&lt;<a href="mailto:[email protected]">[email protected]</a>&gt;</span><br>Date: Fri, Feb 11, 2010 at 5:59 PM<br>Subject: Bob WFH today<br>To: Mary &lt;<a href="mailto:[email protected]">[email protected]</a>&gt;<br><br><br><div dir="ltr">eom</div>
</div><br></div>"""
    extracted = quotations.extract_from_html(msg_body)
    eq_(RE_WHITESPACE.sub('', msg_body), RE_WHITESPACE.sub('', extracted))

コード例 #40

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: dichen001/talon

def test_reply_separated_by_hr():
    eq_(
        '<html><body><div>Hi<div>there</div></div></body></html>',
        RE_WHITESPACE.sub('',
                          quotations.extract_from_html(REPLY_SEPARATED_BY_HR)))

コード例 #41

0

ファイルを表示

ファイル: quotations_test.py プロジェクト: JordanReiter/talon

def test_malformed_html():
    eq_('</body></html>', quotations.extract_from_html('</body></html>'))

コード例 #42

0

ファイルを表示

ファイル: html_quotations_test.py プロジェクト: dichen001/talon

def test_empty_body():
    eq_('', quotations.extract_from_html(''))

コード例 #43

0

ファイルを表示

def test_malformed_html():
    eq_('</body></html>', quotations.extract_from_html('</body></html>'))