def getEmail(): user = "******" pwd = "wikitrocks" # connecting to the gmail imap server m = imaplib.IMAP4_SSL("imap.gmail.com") m.login(user, pwd) # m.select("[Gmail]/INBOX") # here you a can choose a mail box like INBOX instead # m.select("[Gmail]/All Mail") m.select("inbox") # use m.list() to get all the mailboxes resp, items = m.search(None, "UNSEEN") # you could filter using the IMAP rules here (check http://www.example-code.com/csharp/imap-search-critera.asp) items = items[0].split() if len(items) == 0: return # Get last email text = "" resp, data = m.fetch(items[-1], "(RFC822)") # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc raw_email = data[0][1] # getting the mail content # mail = email.message_from_string(email_body) # parsing the mail content to get a mail object m.close() m.logout() text = raw_email # with io.open("file", "+w") as file: # file.write(email.message_from_bytes(raw_email)) # return # # Parse email parser = MailParser(text) results = parser.parse_thread() question = results.question print(results.json()) # Run NLP keywords = nlp.keywords_from_text(question.content) messages = nlp.compute_tf_idf(question, results.messages) results.messages = messages es.store_thread(results) print("All saved");
def result_email(file_name): with io.open(file_name) as file: text = file.read() parser = MailParser(text) results = parser.parse_thread() question = results.question keywords = nlp.keywords_from_text(question.content) messages = nlp.compute_tf_idf(question, results.messages) results.messages = messages # print(results) # es.store_articles([question] + messages es.store_thread(results) for m in messages: print(jsonpickle.encode(m))
def parse_messages(self, message): emails = [e.strip() for e in re.split(r'On.+wrote:|From:.+', message) if "Begin forwarded message:" not in e.strip() and "Forwarded message" not in e] froms = re.findall(r'On.+wrote:|From:.+', message) froms = froms messages = [] for i in range(0, len(emails)): message = Message() meta = self.parse_meta_v3(froms[i] + "\n" + emails[i]) # print(message.content) # print("+++++++++++++++++++++++++++++++++") if meta is not None: message.sender_name = meta["name"] message.sender_address = meta["email"] message.time = meta["time"] messages.append(message) lines = emails[i].splitlines() email = [] for line in lines: if line.strip() == "": continue if line.startswith(('From', 'To', 'Sent', 'Date', 'Subject', 'Cc', 'Received')): # print(line) email = [] # elif line.strip().lower() == message.sender_name.lower() or line.strip().lower() == message.sender_name.split()[0].lower(): elif line.strip().lower() in message.sender_name.lower(): email.append(line) break else: email.append(line) # message.content = re.sub(r'(From|To|Sent|Date|Subject|Cc|Received):.+', '', emails[i], re.MULTILINE).strip() message.content = "\n".join(email) message.keywords = nlp.keywords_from_text(message.content) messages.reverse() return messages