def test_preprocess(): msg = ('Hello\n' 'See <http://google.com\n' '> for more\n' 'information On Nov 30, 2011, at 12:47 PM, Somebody <\n' '416ffd3258d4d2fa4c85cfa4c44e1721d66e3e8f4\n' '@example.com>' 'wrote:\n' '\n' '> Hi') # test the link is rewritten # 'On <date> <person> wrote:' pattern starts from a new line prepared_msg = ('Hello\n' 'See @@http://google.com\n' '@@ for more\n' 'information\n' ' On Nov 30, 2011, at 12:47 PM, Somebody <\n' '416ffd3258d4d2fa4c85cfa4c44e1721d66e3e8f4\n' '@example.com>' 'wrote:\n' '\n' '> Hi') eq_(prepared_msg, quotations.preprocess(msg, '\n')) msg = """ > <http://teemcl.mailgun.org/u/**aD1mZmZiNGU5ODQwMDNkZWZlMTExNm** > MxNjQ4Y2RmOTNlMCZyPXNlcmdleS5v**YnlraG92JTQwbWFpbGd1bmhxLmNvbS** > Z0PSUyQSZkPWUwY2U<http://example.org/u/aD1mZmZiNGU5ODQwMDNkZWZlMTExNmMxNjQ4Y> """ eq_(msg, quotations.preprocess(msg, '\n')) # 'On <date> <person> wrote' shouldn't be spread across too many lines msg = ('Hello\n' 'How are you? On Nov 30, 2011, at 12:47 PM,\n ' 'Example <\n' '416ffd3258d4d2fa4c85cfa4c44e1721d66e3e8f4\n' '@example.org>' 'wrote:\n' '\n' '> Hi') eq_(msg, quotations.preprocess(msg, '\n')) msg = ('Hello On Nov 30, smb wrote:\n' 'Hi\n' 'On Nov 29, smb wrote:\n' 'hi') prepared_msg = ('Hello\n' ' On Nov 30, smb wrote:\n' 'Hi\n' 'On Nov 29, smb wrote:\n' 'hi') eq_(prepared_msg, quotations.preprocess(msg, '\n'))
def split_emails(msg_body): """ :param text: plain text email chain :return: ??? """ delimiter = get_delimiter(msg_body) msg_body = quotations.preprocess(msg_body, delimiter) lines = msg_body.splitlines() markers = mark_message_lines(lines) # Get the indices for all markers denoting a quoted section transitions = [i for i, x in enumerate(markers) if x == 's'] sections = partition(lines, transitions) return sections