def thread_mails(emails): #print('Threading...') emails_for_threading = [] for mail in emails: email_for_threading = jwzthreading.make_message(email.message_from_string(mail.imported_blob)) #Store our emailsubject, jwzthreading does not decode subject itself email_for_threading.subject = mail.subject.first_original().value #Store our email object pointer instead of the raw message text email_for_threading.message = mail emails_for_threading.append(email_for_threading) threaded_emails = jwzthreading.thread(emails_for_threading) # Output L = threaded_emails.items() L.sort() for subj, container in L: jwzthreading.print_container(container, 0, True) def update_threading(threaded_emails, parent=None, debug=False): if debug: print "\n\nEntering update_threading() for %s mails:" % len(threaded_emails) for container in threaded_emails: if debug: #jwzthreading.print_container(container) print("\nProcessing: " + repr(container.message.subject.first_original().value) + " " + repr(container.message.message_id)+ " " + repr(container.message.message.id)) print "container: " + (repr(container)) print "parent: " + repr(container.parent) print "children: " + repr(container.children) if(container.message): current_parent = container.message.message.parent if(current_parent): db_parent_message_id = current_parent.message_id else: db_parent_message_id = None if parent: if parent.message: #jwzthreading strips the <>, re-add them algorithm_parent_message_id = unicode("<"+parent.message.message_id+">") else: if debug: print "Parent was a dummy container, we may need \ to handle this case better, as we just \ potentially lost sibbling relationships" algorithm_parent_message_id = None else: algorithm_parent_message_id = None if debug: print("Current parent from database: " + repr(db_parent_message_id)) print("Current parent from algorithm: " + repr(algorithm_parent_message_id)) print("References: " + repr(container.message.references)) if algorithm_parent_message_id != db_parent_message_id: if current_parent == None or isinstance(current_parent, Email): if debug: print("UPDATING PARENT for :" + repr(container.message.message.message_id)) new_parent = parent.message.message if algorithm_parent_message_id else None if debug: print repr(new_parent) container.message.message.set_parent(new_parent) else: if debug: print "Skipped reparenting: the current parent \ isn't an email, the threading algorithm only \ considers mails" update_threading(container.children, container, debug=debug) else: if debug: print "Current message ID: None, was a dummy container" update_threading(container.children, parent, debug=debug) update_threading(threaded_emails.values(), debug=False)
def thread_mails(emails): #log.debug('Threading...') emails_for_threading = [] for mail in emails: blob = AbstractMailbox.guess_encoding(mail.imported_blob) email_for_threading = jwzthreading.Message( email.message_from_string(blob)) #Store our emailsubject, jwzthreading does not decode subject itself email_for_threading.subject = mail.subject.first_original().value #Store our email object pointer instead of the raw message text email_for_threading.message = mail emails_for_threading.append(email_for_threading) threaded_emails = jwzthreading.thread(emails_for_threading) # Output for container in threaded_emails: jwzthreading.print_container(container, 0, True) def update_threading(threaded_emails, debug=False): log.debug("\n\nEntering update_threading() for %ld mails:" % len(threaded_emails)) for container in threaded_emails: message = container['message'] # if debug: #jwzthreading.print_container(container) message_string = "%s %s %d " % ( message.subject, message.message_id, message.message.id) if message else "null " log.debug( "Processing: %s container: %s parent: %s children :%s" % (message_string, container, container.parent, container.children)) if (message): current_parent = message.message.parent if (current_parent): db_parent_message_id = current_parent.message_id else: db_parent_message_id = None if container.parent: parent_message = container.parent['message'] if parent_message: #jwzthreading strips the <>, re-add them algorithm_parent_message_id = u"<" + parent_message.message_id + u">" else: log.warn( "Parent was a dummy container, we may need " "to handle this case better, as we just " "potentially lost sibling relationships") algorithm_parent_message_id = None else: algorithm_parent_message_id = None log.debug("Current parent from database: " + repr(db_parent_message_id)) log.debug("Current parent from algorithm: " + repr(algorithm_parent_message_id)) log.debug("References: " + repr(message.references)) if algorithm_parent_message_id != db_parent_message_id: if current_parent == None or isinstance( current_parent, Email): log.debug("UPDATING PARENT for :" + repr(message.message.message_id)) new_parent = parent_message.message if algorithm_parent_message_id else None log.debug(repr(new_parent)) message.message.set_parent(new_parent) else: log.debug( "Skipped reparenting: the current parent " "isn't an email, the threading algorithm only " "considers mails") update_threading(container.children, debug=debug) else: log.debug( "Current message ID: None, was a dummy container") update_threading(container.children, debug=debug) update_threading(threaded_emails, debug=False)
def thread_mails(emails): # print('Threading...') emails_for_threading = [] for mail in emails: email_for_threading = jwzthreading.make_message( email.message_from_string(mail.imported_blob)) # Store our emailsubject, jwzthreading does not decode subject itself email_for_threading.subject = mail.subject.first_original().value # Store our email object pointer instead of the raw message text email_for_threading.message = mail emails_for_threading.append(email_for_threading) threaded_emails = jwzthreading.thread(emails_for_threading) # Output L = threaded_emails.items() L.sort() for subj, container in L: jwzthreading.print_container(container, 0, True) def update_threading(threaded_emails, parent=None, debug=False): if debug: print("\n\nEntering update_threading() for %s mails:" % len(threaded_emails)) for container in threaded_emails: if debug: # jwzthreading.print_container(container) print("\nProcessing: " + repr( container.message.subject.first_original().value) + " " + repr(container.message.message_id) + " " + repr(container.message.message.id)) print("container: " + (repr(container))) print("parent: " + repr(container.parent)) print("children: " + repr(container.children)) if (container.message): current_parent = container.message.message.parent if (current_parent): db_parent_message_id = current_parent.message_id else: db_parent_message_id = None if parent: if parent.message: # jwzthreading strips the <>, re-add them algorithm_parent_message_id = unicode( "<" + parent.message.message_id + ">") else: if debug: print( "Parent was a dummy container, we may need \ to handle this case better, as we just \ potentially lost sibbling relationships") algorithm_parent_message_id = None else: algorithm_parent_message_id = None if debug: print("Current parent from database: " + repr(db_parent_message_id)) print("Current parent from algorithm: " + repr(algorithm_parent_message_id)) print("References: " + repr(container.message.references)) if algorithm_parent_message_id != db_parent_message_id: if current_parent is None or isinstance( current_parent, Email): if debug: print( "UPDATING PARENT for :" + repr(container.message.message.message_id)) new_parent = parent.message.message if algorithm_parent_message_id else None if debug: print(repr(new_parent)) container.message.message.set_parent(new_parent) else: if debug: print( "Skipped reparenting: the current parent \ isn't an email, the threading algorithm only \ considers mails") update_threading(container.children, container, debug=debug) else: if debug: print( "Current message ID: None, was a dummy container") update_threading(container.children, parent, debug=debug) update_threading(threaded_emails.values(), debug=False)
def thread_mails(emails): print('Threading...') emails_for_threading = [] for mail in emails: email_for_threading = jwzthreading.make_message(email.message_from_string(mail.full_message)) #Store our emailsubject, jwzthreading does not decode subject itself email_for_threading.subject = mail.subject #Store our email object pointer instead of the raw message text email_for_threading.message = mail emails_for_threading.append(email_for_threading) threaded_emails = jwzthreading.thread(emails_for_threading) # Output L = threaded_emails.items() L.sort() for subj, container in L: jwzthreading.print_container(container, 0, True) def update_threading(threaded_emails, parent=None): for container in threaded_emails: #jwzthreading.print_container(container) #print (repr(container)) ##print "parent: "+repr(container.parent) ##print "children: "+repr(container.children) ##print("\nProcessing: " + repr(container.message.subject) + " " + repr(container.message.message_id)) if(container.message): current_parent = container.message.message.post.parent if(current_parent): db_parent_message_id = current_parent.content.message_id else: db_parent_message_id = None if parent: if parent.message: #jwzthreading strips the <>, re-add them algorithm_parent_message_id = unicode("<"+parent.message.message_id+">") else: # Parent was a dummy container, we may need to handle this case better # we just potentially lost sibbling relationships algorithm_parent_message_id = None else: algorithm_parent_message_id = None #print("Current parent from algorithm: " + repr(algorithm_parent_message_id)) #print("References: " + repr(container.message.references)) if algorithm_parent_message_id != db_parent_message_id: # Don't reparent if the current parent isn't an email, the threading algorithm only considers mails if current_parent == None or isinstance(current_parent.content, Email): #print("UPDATING PARENT for :" + repr(container.message.message.message_id)) new_parent = parent.message.message.post if algorithm_parent_message_id else None #print repr(new_parent) container.message.message.post.set_parent(new_parent) if current_parent and current_parent.content.source_id != container.message.message.source_id: #This is to correct past mistakes in the database, remove it once everyone ran it benoitg 2013-11-20 print("UPDATING PARENT, BAD ORIGINAL SOURCE" + repr(current_parent.content.source_id) + " " + repr(container.message.message.source_id)) new_parent = parent.message.message.post if algorithm_parent_message_id else None #print repr(new_parent) container.message.message.post.set_parent(new_parent) update_threading(container.children, container) else: #print "Current message ID: None, was a dummy container" update_threading(container.children, parent) update_threading(threaded_emails.values())