def test_thread_unrelated(self): "Thread two unconnected messages" m1 = jwzthreading.Message(None) m1.subject = m1.message_id = 'First' m2 = jwzthreading.Message(None) m2.subject = m2.message_id = 'Second' d = jwzthreading.thread([m1, m2]) self.assertEqual(d['First'].message, m1) self.assertEqual(d['Second'].children, []) self.assertEqual(d['Second'].message, m2)
def test_thread_two_reverse(self): "Thread two messages together, with the child message listed first." m1 = jwzthreading.Message(None) m1.subject = m1.message_id = 'First' m2 = jwzthreading.Message(None) m2.subject = m2.message_id = 'Second' m2.references = ['First'] d = jwzthreading.thread([m2, m1]) self.assertEqual(d['First'].message, m1) self.assertEqual(len(d['First'].children), 1) self.assertEqual(d['First'].children[0].message, m2)
def test_thread_two(self): "Thread two messages together." m1 = jwzthreading.Message(None) m1.subject = m1.message_id = 'First' m2 = jwzthreading.Message(None) m2.subject = m2.message_id = 'Second' m2.references = ['First'] d = jwzthreading.thread([m1, m2]) self.assertEqual(d['First'].message, m1) self.assertEqual(len(d['First'].children), 1) self.assertEqual(d['First'].children[0].message, m2)
def test_thread_two_missing_parent(self): "Thread two messages, both children of a missing parent." m1 = jwzthreading.Message(None) m1.subject = 'Child' m1.message_id = 'First' m1.references = ['parent'] m2 = jwzthreading.Message(None) m2.subject = 'Child' m2.message_id = 'Second' m2.references = ['parent'] d = jwzthreading.thread([m1, m2]) self.assertEqual(d['Child'].message, None) self.assertEqual(len(d['Child'].children), 2) self.assertEqual(d['Child'].children[0].message, m1)
def test_thread_lying_message(self): "Thread three messages together, with other messages lying in their references." dummy_parent_m = jwzthreading.Message(None) dummy_parent_m.subject = dummy_parent_m.message_id = 'Dummy parent' lying_before_m = jwzthreading.Message(None) lying_before_m.subject = lying_before_m.message_id = 'Lying before' lying_before_m.references = [ 'Dummy parent', 'Second', 'First', 'Third' ] m1 = jwzthreading.Message(None) m1.subject = m1.message_id = 'First' m2 = jwzthreading.Message(None) m2.subject = m2.message_id = 'Second' m2.references = ['First'] m3 = jwzthreading.Message(None) m3.subject = m3.message_id = 'Third' m3.references = ['First', 'Second'] lying_after_m = jwzthreading.Message(None) lying_after_m.subject = lying_after_m.message_id = 'Lying after' #lying_after_m.references = ['Dummy parent','Third', 'Second', 'First'] d = jwzthreading.thread( [dummy_parent_m, lying_before_m, m1, m2, m3, lying_after_m]) self.assertEqual(d['First'].message, m1) self.assertEqual(len(d['First'].children), 1) self.assertEqual(d['First'].children[0].message, m2) self.assertEqual(len(d['First'].children[0].children), 1) self.assertEqual(d['First'].children[0].children[0].message, m3)
def test_basic_message(self): text = """\ Subject: random Message-ID: <message1> References: <ref1> <ref2> <ref1> In-Reply-To: <reply> Body.""" msg = message_from_string(textwrap.dedent(text)) m = jwzthreading.Message(msg) self.assertTrue(repr(m)) self.assertEquals(m.subject, 'random') self.assertEquals(sorted(m.references), ['ref1', 'ref2', 'reply']) # Verify that repr() works repr(m)
def test_thread_single(self): "Thread a single message" m = jwzthreading.Message(None) m.subject = m.message_id = 'Single' self.assertEqual(jwzthreading.thread([m])['Single'].message, m)
def test_prune_promote(self): p = jwzthreading.Container() c1 = jwzthreading.Container() c1.message = jwzthreading.Message() p.add_child(c1) self.assertEquals(jwzthreading.prune_container(p), [c1])
def thread_mails(emails): #log.debug('Threading...') emails_for_threading = [] for mail in emails: blob = AbstractMailbox.guess_encoding(mail.imported_blob) email_for_threading = jwzthreading.Message( email.message_from_string(blob)) #Store our emailsubject, jwzthreading does not decode subject itself email_for_threading.subject = mail.subject.first_original().value #Store our email object pointer instead of the raw message text email_for_threading.message = mail emails_for_threading.append(email_for_threading) threaded_emails = jwzthreading.thread(emails_for_threading) # Output for container in threaded_emails: jwzthreading.print_container(container, 0, True) def update_threading(threaded_emails, debug=False): log.debug("\n\nEntering update_threading() for %ld mails:" % len(threaded_emails)) for container in threaded_emails: message = container['message'] # if debug: #jwzthreading.print_container(container) message_string = "%s %s %d " % ( message.subject, message.message_id, message.message.id) if message else "null " log.debug( "Processing: %s container: %s parent: %s children :%s" % (message_string, container, container.parent, container.children)) if (message): current_parent = message.message.parent if (current_parent): db_parent_message_id = current_parent.message_id else: db_parent_message_id = None if container.parent: parent_message = container.parent['message'] if parent_message: #jwzthreading strips the <>, re-add them algorithm_parent_message_id = u"<" + parent_message.message_id + u">" else: log.warn( "Parent was a dummy container, we may need " "to handle this case better, as we just " "potentially lost sibling relationships") algorithm_parent_message_id = None else: algorithm_parent_message_id = None log.debug("Current parent from database: " + repr(db_parent_message_id)) log.debug("Current parent from algorithm: " + repr(algorithm_parent_message_id)) log.debug("References: " + repr(message.references)) if algorithm_parent_message_id != db_parent_message_id: if current_parent == None or isinstance( current_parent, Email): log.debug("UPDATING PARENT for :" + repr(message.message.message_id)) new_parent = parent_message.message if algorithm_parent_message_id else None log.debug(repr(new_parent)) message.message.set_parent(new_parent) else: log.debug( "Skipped reparenting: the current parent " "isn't an email, the threading algorithm only " "considers mails") update_threading(container.children, debug=debug) else: log.debug( "Current message ID: None, was a dummy container") update_threading(container.children, debug=debug) update_threading(threaded_emails, debug=False)