def __init__(self, data): """ Cribbed heavily from https://www.ianlewis.org/en/parsing-email-attachments-python """ Loggable.__init__(self) self.raw = data self.attachments = [] self.recipients = [] message = BytesParser(policy=policy.default).parsebytes(self.raw) self.hash = hashlib.sha512(data).hexdigest() self.sender = parseaddr(str(message["From"]))[1].lower() self.subject = str(message["Subject"]).replace("\r\n", "") # Prefer plain text and strip everything south of the signature. Note # that I'm not sure what will happen here if you send an HTML-only # email. self.body = "\n\n".join( re.sub(r"\r?\n\r?\n-- \r?\n.*", "", str( message.get_body( preferencelist=('plain', 'related', 'html') ) ), flags=re.DOTALL).split("\n\n")[1:] ) self._set_recipients(message) self._set_time(message) self._set_attachments(message) self.logger.info('Consuming email: "{}"'.format(self.subject))
def __init__(self, data, verbosity=1): """ Cribbed heavily from https://www.ianlewis.org/en/parsing-email-attachments-python """ self.verbosity = verbosity self.subject = None self.time = None self.attachment = None message = BytesParser(policy=policy.default).parsebytes(data) self.subject = str(message["Subject"]).replace("\r\n", "") self.body = str(message.get_body()) self.check_subject() self.check_body() self._set_time(message) Log.info( 'Importing email: "{}"'.format(self.subject), Log.COMPONENT_MAIL) attachments = [] for part in message.walk(): content_disposition = part.get("Content-Disposition") if not content_disposition: continue dispositions = content_disposition.strip().split(";") if not dispositions[0].lower() == "attachment": continue file_data = part.get_payload() attachments.append(Attachment( b64decode(file_data), content_type=part.get_content_type())) if len(attachments) == 0: raise InvalidMessageError( "There don't appear to be any attachments to this message") if len(attachments) > 1: raise InvalidMessageError( "There's more than one attachment to this message. It cannot " "be indexed automatically." ) self.attachment = attachments[0]
def extract_contents(message_object, email_name): message_byte_object = BytesParser(policy=policy.default) \ .parsebytes(message_object) email_object = { 'id': email_name, 'from': get_address_from_email(message_byte_object, 'From'), 'to': get_address_from_email(message_byte_object, 'To'), 'cc': get_address_from_email(message_byte_object, 'CC'), 'subject': str(message_byte_object['subject']), 'date': get_date_field_from_email(message_byte_object), 'body': message_byte_object.get_body(preferencelist='plain').get_content() } print('email object {}'.format(email_object)) return json.dumps(email_object)
def process_mailbox(self): rv, data = self.imap.uid('search',None, "ALL") if rv != 'OK': dbgprint("No messages found!") return #delete removed messages self.remove_deleted_msgs_from_history(data[0].split()) self.post_progress(50) #get last history uid lastmessage=0 if self.histcontainer.get_nr_elements('email') > 0: lastmessage = self.histcontainer.get_last_element('email').get_uid() #lastmessage = 35 dbgprint("last element uid: " + str(lastmessage)) #loop over all messages and download new ones if (int(data[0].split()[-1])-lastmessage) > 0 : progressstep = 30/(int(data[0].split()[-1])-lastmessage) progressactual = 50 for uid in data[0].split(): if int(uid) <= lastmessage: continue rv, data = self.imap.uid('fetch', uid, '(RFC822)') if rv != 'OK': dbgprint("ERROR getting message " + uid) continue #dbgprint("New message UID: "+ str(int(uid))) msg = email.message_from_bytes(data[0][1]) msg2 = BytesParser(policy=policy.default).parsebytes(data[0][1]) #dbgprint("BODY:",msg2.get_body(),"------------------") body = msg2.get_body(preferencelist=('plain', 'html')) el = self.histcontainer.make_element_from_message(int(uid),msg) self.process_body(str(body),el) self.process_attachments(msg,el) self.histcontainer.add_element(el) self.mark_msg_as_read(int(uid)) progressactual+=progressstep self.post_progress(progressactual)
def read_mail_ru(login, pw, del_mail=0): today = get_today() server = "pop.mail.ru" # "pop.att.yahoo.com" try: box = poplib.POP3_SSL( server, 995) # в принципе, если порт 995, то его можно и не указывать print('ok pop3 login=%s pass=%s' % (login, pw)) box.user(login) box.pass_(pw.strip()) print('ok login') response, lst, octets = box.list() except: print('err pop3') return -1 s = today + ' ' + login + ' messages: ' + n2s( len(lst)) + ' ' + b2s(response) + '\n' write_file('mail_log', s, 2) print(s) for msgnum, msgsize in [i.split() for i in lst]: n = int(msgnum) print(n, int(msgsize)) (resp, lines, octets) = box.retr(n) bb = b'\n'.join(lines) + b'\n' ss = '' msg = BytesParser(policy=policy.default).parsebytes(bb) ss += 'from: ' + msg['from'] + '\n' # '[email protected]' ss += 'subject: ' + msg['subject'] + '\n' ss += 'date: ' + msg['date'] + '\n' ss += '--------------------------\n' ss += msg.get_body(preferencelist=('plain', 'html')).get_content() ss += '\n==================================================\n' sm = get_mail(msg['from']) f = login + '\\' + str(n) + '_' + today + '(' + sm + ')' write_file(f + '.txt', ss, 2) #декодиравал простые #html теги удалить. value input? script? write_file(f + '.bin', bb, 2) #+сохр как есть if del_mail != 0: box.dele(n) # если надо - удаляем с сервера письмо #end-for box.quit()
def _find_verify_url(self, send_time: datetime = None): """ find verify url. :return: None """ for _id in self._mail_ids(): _, data = self.mail.fetch(str(_id), "(RFC822)") try: msg = email.message_from_string(data[0][1]) except TypeError: msg = email.message_from_bytes(data[0][1]) if msg["from"].find(self.from_email) != -1: is_check = True date_tuple = email.utils.parsedate_tz(msg["date"]) msg_date = datetime.datetime.fromtimestamp( email.utils.mktime_tz(date_tuple)) print("_find_verify_url") print(msg_date) if send_time: is_check = False if send_time <= msg_date: is_check = True if is_check: body_msg = BytesParser(policy=policy.default).parsebytes( data[0][1]) body = body_msg.get_body(preferencelist=("plain", "html")) verify_url = self._find_link_by_pattern( body=body.get_content().splitlines(), pattern=self.pattern) if verify_url: return verify_url
def handle_DATA(self, server, session, envelope): mail_from = envelope.mail_from message = BytesParser(policy=policy.default).parsebytes( envelope.content) body = message.get_body(preferencelist=('plain', )) if body: content = body.get_content() reply = EmailReplyParser.parse_reply(content) author, _ = User.objects.get_or_create(email=mail_from) ticket, message_id = self.get_ticket(message) if ticket: if not ticket.inbox.enable_reply_by_email: return '450 Reply by email is disabled for the inbox' Comment.objects.create( ticket=ticket, author=author, is_reply=ticket.reply_message_id == message_id, content=reply) UserInbox.objects.get_or_create(user=author, inbox=ticket.inbox) else: inbox = Inbox.objects.get(email__in=envelope.rcpt_tos, ) if not inbox.enable_create_new_ticket_by_email: return '450 Creation of ticket by email is disabled for the inbox' Ticket.objects.create(author=author, inbox=inbox, title=message["Subject"], content=reply) UserInbox.objects.get_or_create(user=author, inbox=inbox) return '250 OK'
tds = footer.find_all('td') if len(tds) == 1: td = tds[0] questions = [x.text.strip() for x in td.select('.question')] answers = [x.text.strip() for x in td.select('.answer')] return dict(zip(questions, answers)) else: return {} if __name__ == '__main__': with open('scratch/example_email.txt', 'rb') as fp: whole_email = BytesParser(policy=default).parse(fp) body = whole_email.get_body() if body['content-type'].subtype == 'html': html_str = body.get_content() soup = BeautifulSoup(html_str, 'html.parser') order_id = get_order_id(soup) purchase = get_purchase_details(soup) client_email = get_client_email(soup) additional_q_and_a = get_additional_q_and_a(soup) # print outs for testing purposes:
def getData(self): def getAuthModality(data): test = 'using' found = False for d in data: if found: text = d.text.strip() if text[:1] == '"' and text[-1:] == '"': text = text[1:-1].strip() return text if re.search(test, str(d)): found = True def getDate(data): test = re.compile(r'[0-9]{2}/[0-9]{2}/[0-9]{4}') for d in data: search_res = re.search(test, str(d)) if search_res: text = search_res.group(0) return text def getTime(data): test = re.compile(r'[0-9]{2}:[0-9]{2}:[0-9]{2}') for d in data: search_res = re.search(test, str(d)) if search_res: text = search_res.group(0) return text def getAUAName(data): test = 'deployed by' found = False for d in data: if found: text = d.text.strip() if text[:1] == '"' and text[-1:] == '"': text = text[1:-1].strip() return text if re.search(test, str(d)): found = True def getUIDAIResponseCode(data): test = 'Response code' for d in data: if re.search(test, str(d)): text = d.text.strip() text = text[14:].strip() return text def getAuthenticationResponse(data): for d in data: if re.search('success', str(d)): text = 'Success' return text if re.search('fail', str(d)): text = 'Failure' return text def getDataMethod1(soup): data = AadhaarAuthenticationMail() temp = [x for x in soup.find(id='demo').next_siblings] # The gernerator object soup.find(id='demo').next_siblings # loses items which have been iterated through # So created a list to iterte multiple times data.Auth_Modality = getAuthModality(temp) data.Date = getDate(temp) data.Time = getTime(temp) data.AUA_Name = getAUAName(temp) data.UIDAI_Response_Code = getUIDAIResponseCode(temp) data.Authentication_Response = getAuthenticationResponse(temp) return data def getDataMethod2(soup): data = AadhaarAuthenticationMail() temp1 = [x for x in soup.find(id='demo').next_siblings] temp2 = [x for x in soup.body.next_siblings] data.Auth_Modality = getAuthModality(temp1) data.Date = getDate(temp1) data.Time = getTime(temp1) data.AUA_Name = getAUAName(temp2) data.UIDAI_Response_Code = getUIDAIResponseCode(temp2) data.Authentication_Response = getAuthenticationResponse(temp1) return data with open(self.uri, 'rb') as file: msg = BytesParser(policy=policy.default).parse(file) msg_body = msg.get_body(preferencelist=('plain', 'html')) soup = BeautifulSoup(msg_body.get_content(), 'html.parser') method1data = getDataMethod1(soup) print('Data extracted method1: ', vars(method1data)) if method1data.isClean(): return method1data else: print("Data Not clean") method2data = getDataMethod2(soup) print('Data extracted method2: ', vars(method2data)) if method2data.isClean(): return method2data else: print("Data Not clean")
eMailQuery = re.compile(r'<(.+)>$') def removeTraitors(traitorAddress, element): if traitorAddress in element['senders']: element['count'] = element['count'] - 1 element['senders'] = [ x for x in element['senders'] if x is not traitorAddress ] return element for mail in filelist: with open(mail, 'rb') as msg: msg = BytesParser(policy=policy.default).parse(msg) parsed = msg.get_body(preferencelist=('plain')) if parsed is None: continue text = parsed.get_content() sender = re.search(eMailQuery, msg['From']) if sender is None: continue if sender.groups()[0] in addresses: print('XXXXXX ---- We have a traitor: ', sender.groups()[0]) for key, value in apps.items(): value = removeTraitors(sender.groups()[0], value) continue else: addresses.append(sender.groups()[0]) info = re.search(query, text)
from imaginary import magic_html_parser # In a real program you'd get the filename from the arguments. msg = BytesParser(policy=policy.default).parse(open('outgoing.msg', 'rb')) # Now the header items can be accessed as a dictionary, and any non-ASCII will # be converted to unicode: print('To:', msg['to']) print('From:', msg['from']) print('Subject:', msg['subject']) # If we want to print a priview of the message content, we can extract whatever # the least formatted payload is and print the first three lines. Of course, # if the message has no plain text part printing the first three lines of html # is probably useless, but this is just a conceptual example. simplest = msg.get_body(preferencelist=('plain', 'html')) print() print(''.join(simplest.get_content().splitlines(keepends=True)[:3])) ans = input("View full message?") if ans.lower()[0] == 'n': sys.exit() # We can extract the richest alternative in order to display it: richest = msg.get_body() partfiles = {} if richest['content-type'].maintype == 'text': if richest['content-type'].subtype == 'plain': for line in richest.get_content().splitlines(): print(line) sys.exit()
def get_text_with_eml(self) -> str: file_list = glob.glob('*.eml') # returns list of files with open(file_list[2], 'rb') as fp: # select a specific email file from the list msg = BytesParser(policy=policy.default).parse(fp) return msg.get_body(preferencelist=('plain')).get_content()
#从字节串生成 EmailMessage 消息类 msg = BytesParser(policy=default).parsebytes(mail_bytes) ''' 下面这种只是多了一个由字节解码为字符的过程,无意义 mail_str = b'\r\n'.join(mail_body).decode( 'utf_8' ) msg = Parser(policy=default).parsestr( mail_str ) ''' print('邮件主题->> {}'.format(msg['Subject'])) print('日期->> {}'.format(msg['Date'].datetime)) #返回的是 发件人名称<电子邮件地址> 形式 print('发件人->> {}'.format(msg['From'])) print('主类型->> {}'.format(msg['Content-Type'])) #得到MIME段 text = msg.get_body(preferencelist=('related', 'html', 'plain')) print(text.get_content()) ''' #返回的是 Address类的元组: #(Address(display_name='zhujidong', username='******', domain='163.com'),) print( msg['To'].addresses ) #访问元组的一个元素的值 print( msg['From'].addresses[0].addr_spec ) print( msg['To'].addresses[0].display_name ) print( msg['To'].addresses[0].username ) print( msg['To'].addresses[0].domain ) #深度优先顺序遍历信息对象树的所有部分和子部分 for part in msg.walk(): print(part.get_content_type())
def parseMails(): for mail in filelist: with open(mail, 'rb') as msg: # Convert Message to String msg = BytesParser(policy=policy.default).parse(msg) parsed = msg.get_body(preferencelist=('plain')) # Skip if body is empty if parsed is None: continue emailBody = parsed.get_content() # Check if sender exists sender = re.search(eMailQuery, msg['From']) if sender is None: continue # check if sender crossed limit if sender.groups()[0] not in addresses: addresses[sender.groups()[0]] = 1 elif addresses[sender.groups()[0]] == requestlimit: print('XXXXXX ---- We have a greedy one: ', sender.groups()[0]) for key, value in apps.items(): value = removeGreedy(sender.groups()[0], value) continue else: addresses[sender.groups()[0]] += 1 appInfo = re.search(appInfoQuery, emailBody) # AppInfo could not automatically be extracted if appInfo is None: # Search for String appearance of existing ComponentInfos in E-Mail body for key, value in apps.items(): if key in emailBody: apps[key]['count'] += 1 apps[key]['senders'].append(sender.groups()[0]) continue print('\n/// The following message could not be handled:\n', sender, emailBody, '\n') with open('failedmail.txt', 'a', encoding='utf-8') as fileTwo: fileTwo.write('\n----------------------------\n') fileTwo.write(''.join(emailBody)) else: tempDict = appInfo.groupdict() if tempDict['ComponentInfo'] in apps: apps[tempDict['ComponentInfo']]['count'] = apps[ tempDict['ComponentInfo']]['count'] + 1 apps[tempDict['ComponentInfo']]['senders'].append( sender.groups()[0]) else: tempDict['count'] = 0 tempDict['count'] = 1 tempDict['senders'] = [sender.groups()[0]] apps[tempDict['ComponentInfo']] = tempDict #Update date of last request if 'requestDate' not in apps[ tempDict['ComponentInfo']] or apps[ tempDict['ComponentInfo']]['requestDate'] < mktime( parsedate(msg['date'])): apps[tempDict['ComponentInfo']]['requestDate'] = mktime( parsedate(msg['Date']))
raw = sys.stdin.buffer.read() if not os.isatty(0): fd = os.open('/dev/tty', os.O_RDONLY) if fd < 0: sys.stderr.write('Unable to open an input tty.\n') sys.exit(-1) else: os.dup2(fd, 0) os.close(fd) msg = BytesParser(policy=policy.default).parsebytes(raw) # We can extract the richest alternative in order to display it: richest = msg.get_body() partfiles = {} if richest['content-type'].maintype == 'text': if richest['content-type'].subtype == 'plain': for line in richest.get_content().splitlines(): print(line) sys.exit() elif richest['content-type'].subtype == 'html': body = richest else: print("Don't know how to display {}".format(richest.get_content_type())) sys.exit() elif richest['content-type'].content_type == 'multipart/related': body = richest.get_body(preferencelist=('html')) for part in richest.iter_attachments(): fn = part.get_filename()
def main(): try: Path(EML_PATH).mkdir(parents=True, exist_ok=True) except: # print(f'Error creating folder: {FLR}{EML_PATH}') print(f'Ошибка создания папки: {FLR}{EML_PATH}') sys.exit(-1) try: Path(EML_PATH_READY).mkdir(parents=True, exist_ok=True) except: # print(f'Error creating folder: {FLR}{EML_PATH_READY}') print(f'Ошибка создания папки: {FLR}{EML_PATH_READY}') sys.exit(-1) letters_on_the_server_list = [] letters_on_the_cache_list = [] # Создать список файлов в кэше # Create Cached File List for (_, _, filenames) in os.walk(EML_PATH): for i in filenames: filename, file_extension = os.path.splitext(i) if '.eml' in file_extension: letters_on_the_cache_list.append(filename) break # Начало... # Begin... # Подключение к IMAP4 серверу # Connect to IMAP4 server mail = imaplib.IMAP4_SSL(CONTROLLED_EMAIL_SERVER) try: r, data = mail.login(CONTROLLED_EMAIL_ADDRESSES, CONTROLLED_EMAIL_ADDRESSES_PASSWORD) if r != "OK": str_e = str(data) str_e = str_e.strip("b'").strip("'") # print(f'{FLR}Error login : {str_e}') print(f'{FLR}Ошибка подключения : {str_e}') except (imaplib.IMAP4.error, OSError) as e: str_e = str(e) str_e = str_e.strip("b'").strip("'") # print(f'{FLR}Error login : {str_e}') print(f'{FLR}Ошибка подключения : {str_e}') sys.exit(-1) # Получить список каталогов "INBOX", "Sent", и т.п. # Get the list of catalogs "INBOX", "Sent", etc. mail.list() # Переходим в папку INBOX # Go to the INBOX folder _, select_data = mail.select('INBOX') select_data[0].decode('utf-8') # Получить список id писем через пробел # Get the list id of letters through a space _, data = mail.search(None, 'ALL') ids = data[0] id_list = ids.split() count = len(id_list) print(f'{SR}') print('{:-<80}'.format('')) # print(f'Start scan : {FLC}{"{:%d.%m.%Y %H:%M:%S}".format(datetime.now())}') print( f'Начало сканирования: {FLC}{"{:%d.%m.%Y %H:%M:%S}".format(datetime.now())}' ) # print(f'Total letters : {FLG}{count}') print(f'Всего писем : {FLG}{count}') print('{:-<80}'.format('')) count_found = 0 if count > 0: # Анализ имеющихся писем # Analysis of available letters for item in id_list: email_id = item.decode('utf-8').strip() if email_id == '': continue # Получить письмо # Флаг "Невидимый" не сбрасывается # Get a letter # "Unseen" flag is not reset _, data = mail.fetch(email_id, '(BODY.PEEK[])') # Необработанное содержимое письма # Raw message content raw_email = data[0][1] # Парсинг содержимого письма # Parsing the contents of the letter msg = email.message_from_bytes(raw_email, _class=email.message.EmailMessage) # Получить дату письма # Get the date of the letter str_date = '' if msg['Date'] is not None: timestamp = email.utils.parsedate_tz(msg['Date']) year, month, day, hour, minute, second = timestamp[:6] str_date = '{0:02d}.'.format(day) str_date += '{0:02d}.'.format(month) str_date += '{0:04d} '.format(year) str_date += '{0:02d}:'.format(hour) str_date += '{0:02d}:'.format(minute) str_date += '{0:02d}'.format(second) # Получить адрес отправителя письма # Get the sender address msg_from_decoded = '' if msg['From'] is not None: str_from = str(msg["From"]) if '=?' in str_from.strip(): msg_from_decoded = str(make_header( decode_header(str_from))) else: msg_from_decoded = str_from msg_from_decoded = (msg_from_decoded.replace("\n", "").replace( "\r", "").replace("\t", "").strip()) # Получить декодированную тему письма # Get a decoded letter subject subj = "" if msg["Subject"]: str_subj = str(msg["Subject"]) if '=?' in str_subj.strip(): subj = str(make_header(decode_header(str_subj))) else: subj = str_subj subj = (subj.replace("\n", "").replace("\r", "").replace("\t", "").strip()) # Анализ данных письма # Analysis of the letter data is_important_letter = False for control_email in CONTROLLED_EMAIL_ADDRESSES_SENDERS: # Является ли письмо важным? # (проверяем, имеется ли адрес отправителя или фрагмент # адреса отправителя в списке отслеживаемых важных # писем CONTROLLED_EMAIL_ADDRESSES_SENDERS) # Is the letter important? # (check if the sender's address or the fragment of the # sender's address is in the list of monitored important # letters CONTROLLED_EMAIL_ADDRESSES_SENDERS) if control_email in msg_from_decoded: # Если да, устнавливаем флаг важности письма # If yes, set the letter importance flag is_important_letter = True break if is_important_letter: # Если письмо ВАЖНОЕ # If the letter is IMPORTANT # Дата и время обнаружения # Date and time of discovery date_time_discovery = '{:%d.%m.%Y %H:%M:%S}'.format( datetime.now()) count_found += 1 # Установить флаг "Уведомления уже отправлялись" # Set the flag "Notifications have already been sent" is_notifications_have_already_been_sent = True # Message-ID письма # Message-ID of the letter str_domain = msg_from_decoded.split('@')[-1].strip('>').strip() message_id = f'{str_date.replace(":", ".")}@{str_domain}' message_id = sanitize_filename(message_id) print(f'From : {FLG}{msg_from_decoded}') print(f'Date : {FLG}{str_date}') print(f'Subject : {FLG}{subj}') # Добавить ID письма в список "письма на сервере" # Add letter ID to the list of "letters on the server" letters_on_the_server_list.append(message_id) # Сохранить оригинал письма в .EML формате в # папку кэша (если его там ещё нет) # Save the original letter in .EML format to # the cache folder (if it is not already there) eml_file = EML_PATH + message_id + '.eml' if not Path(eml_file).is_file(): # Сбросить флаг "Уведомления уже отправлялись" # Unset the flag "Notifications have already been sent" is_notifications_have_already_been_sent = False # Сохранить оригинал письма в формате .EML в # папке кэша # Save the original letter in the .EML format in # the cache folder with open(eml_file, 'wb+') as file: file.write(raw_email) # Добавить ID письма в список "письма в кэше" # Add letter ID to the list "letters on the cache" letters_on_the_cache_list.append(message_id) if is_notifications_have_already_been_sent: # Если уведомления уже отправлялись, # повторно получателей не уведомлять # If notifications have already been sent, # do not notify recipients again # print(f'\nSkipped : {FLY}{message_id}{FR}\n' # f'Cause : {FLY}Notifications for ' # f'this email have already been sent') print(f'\nПропускается : {FLY}{message_id}{FR}\n' f'Причина : {FLY}Уведомления по этому ' f'письму ранее уже отправлялись') print('{:-<80}'.format('')) continue warning_msg = f'From : {msg_from_decoded}\n' warning_msg += f'Date : {str_date}\n' warning_msg += f'Subject: {subj}\n' email_msg = warning_msg if Path(eml_file).is_file(): # Читать сырой текст оригинального # письма из файла в кэше # Read the raw text of the original # letter from the file in the cache with open(eml_file, 'rb+') as file: eml_msg = BytesParser( policy=policy.default).parse(file) # Конвертировать сырой текст письма в читаемый текст # Convert raw letter text to readable text eml_text_part = '' eml_text_part_b = eml_msg.get_body() if eml_text_part_b is not None: eml_text_part = eml_text_part_b.get_content() if eml_text_part is not None: eml_text_part = re.sub(r'<br.*?>', '\n', eml_text_part) eml_text_part = re.sub(r'<.*?>', '', eml_text_part) # Ограничить длину текста (для показа только # фрагмента текста в Telegram-чате) # Limit the length of the text (to display only # a fragment of the text in the Telegram-chat) eml_text_part = str(eml_text_part)[:142].strip() if eml_text_part is not None and eml_text_part != '': warning_msg += '{:-<8}\n'.format('') # warning_msg += 'Summary :\n' warning_msg += 'Краткое содержание:\n' warning_msg += '{:-<8}\n'.format('') warning_msg += eml_text_part + '...\n' warning_msg += '{:-<8}\n'.format('') # warning_msg += 'This event applies to all!\n' # warning_msg += 'See the full text of the letter in your email.' warning_msg += 'Это событие касается всех!\n' warning_msg += 'Полный текст письма смотрите в своей почте.' # Установить флаг "Отправить полное telegram-уведомление" # Set the flag "Send full telegram notification" is_send_full_telegram_notification = True # Обнулить список получателей для # полного Telegram-уведомления # Zero the list of recipients for # a full Telegram notification recepints_for_full_telegram_notification_list = [] # Обнулить список получателей для # неполного Telegram-уведомления # Zero the list of recipients for # incomplete Telegram notifications recepints_for_incomplete_telegram_notification_list = [] # Отправка персональных Email-уведомлениий получателям # Send personal email notifications to recipients for recipient_data in RECIPIENTS_FULL.items(): # Email получателя # Email of recipient recepient_email = recipient_data[0] # Имя получателя # Name of recipient recepient_name = recipient_data[1][0] if recepient_name.strip() == '': # Если имя получателя отсутствует # If the recipient's name is missing # recepient_name = 'Unknown' recepient_name = 'Вася Пупкин' # Список адресов входящих писем или их фрагментов, # запрещенных для этого получателя # The list of addresses of incoming letters or their # fragments prohibited for this recipient prohibited_email_list = recipient_data[1][1] # Находится ли данное письмо в списке адресов входящих # писем, запрещённых для этого получателя? # Is this letter in the list of addresses of incoming # emails prohibited for this recipient? is_prohibited = False prohibited_part = '' for prohibited_email in prohibited_email_list: if prohibited_email in msg_from_decoded: is_prohibited = True prohibited_part = prohibited_email break # Создать список получателя для def send_email() # Create a recipient list for def send_email () # to_list[0] - email получателя, email of recipient # to_list[1] - имя получателя, name of recipient to_list = [recepient_email, recepient_name] if not is_prohibited: # По этому письму для данного получателя МОЖНО # отправить Email уведомление # By this letter for this recipient you can # send an email notification # Отправить email-уведомление получателю # Send email to recipient send_email(email_msg, to_list, attached_file=eml_file, date_time=date_time_discovery, subject=subj) # Добаить имя получателя в список получателей # полного уведомления в Telegram-чате # Add the recipient name to the list of recipients # of the full notification in the Telegram chat recepints_for_full_telegram_notification_list.append( to_list[1]) else: # По этому письму для данного получателя ЗАПРЕЩЕНО # отправить Email уведомление # For this recipient, it is FORBIDDEN # to send an Email Notification # Добавить имя получателя в список получателей # неполного уведомления в Telegram-чате # Add recipient name to the list of recipients # of incomplete notification in Telegram chat recepints_for_incomplete_telegram_notification_list.append( to_list[1]) # Сбросить флаг "Отправить полное telegram-уведомление" # Unset the flag "Send full telegram notification" is_send_full_telegram_notification = False # print(f'\nSkipped letter : {FLR}{to_list[0]} ({to_list[1]}){FR}\n' # f'Cause : Incoming letter {FLG}{msg_from_decoded}{FR} ' # f'is in the list of prohibited ' # f'for this recipient ({FLR}{prohibited_part}{FR})') print( f'\nПропускается письмо: {FLR}{to_list[0]} ({to_list[1]}){FR}\n' f'Причина : Входящее письмо {FLG}{msg_from_decoded}{FR} ' f'находится в списке запрещённых для ' f'этого получателя ({FLR}{prohibited_part}{FR})') print('{:-<80}'.format('')) # Отправка общего уведомления в Telegram-чат # Sending general notification to Telegram chat if is_send_full_telegram_notification: # Отправка полного уведомления в Telegram-чат # Sending full notification to Telegram chat send_telegram(warning_msg, date_time=date_time_discovery) else: # Отправка неполного уведомления в Telegram-чат # Sending an incomplete notification to Telegram chat warning_msg = '' for name in recepints_for_full_telegram_notification_list: warning_msg += name + '\n' warning_msg += '{:-<8}\n'.format('') # warning_msg += 'This event is only for recipients listed above!\n' # warning_msg += 'E-mail notification has been sent to all of you.\n' # warning_msg += 'The full text of the letter can be viewed in your email.\n' warning_msg += 'Это событие только для получателей, перечисленных выше!\n' warning_msg += 'Всем вам отправлено уведомление по e-mail.\n' warning_msg += 'Полный текст письма можно посмотреть в своей почте.\n' warning_msg += '\n{:-<8}\n'.format('') # Добавить в Telegram-уведомление имена получателей, # которых данное письмо не касается # Add to the Telegram-notification the names of recipients # whom this letter does not concern. for name in recepints_for_incomplete_telegram_notification_list: warning_msg += name + '\n' warning_msg += '{:-<8}\n'.format('') # warning_msg += 'This event has nothing to do with you.!' warning_msg += 'Это событие не имеет к вам ' \ 'никакого отношения!' # Отправка неполного уведомления в Telegram-чат # Sending an incomplete notification to Telegram chat send_telegram(warning_msg, date_time=date_time_discovery) mail.close() # Отключение от IMA4-сервера # Disconnect from IMA4 server mail.logout() # Подчистить по необходимости кэш # Clean up by need cache for id_on_the_cache in letters_on_the_cache_list: if id_on_the_cache not in letters_on_the_server_list: # Если письмо находится в кэше, но на IMA4-сервере # в папке "INBOX" его уже нет # If the message is in the cache, but on the # IMA4 server in the "INBOX" folder it is no longer there # EML-файл, подлежащий удалению из кэша # EML file to be removed from the cache f_eml_src = EML_PATH + id_on_the_cache + '.eml' # ZIP-файл, подлежащий перемещению в папку истории # ZIP file to be moved to the history folder f_zip_src = EML_PATH + id_on_the_cache + '.zip' f_zip_dst = EML_PATH_READY + id_on_the_cache + '.zip' try: # Удаление EML-файла из кэша # Remove EML file from cache # print(f'Remove : {f_eml_src}') print(f'Удаление : {f_eml_src}') if os.path.isfile(f_eml_src): os.remove(f_eml_src) except OSError as e: # print(f'Error deleting file {FLR}{f_eml_src}') print(f'Ошибка удаления файла {FLR}{f_eml_src}') print(f'{FLR}{e.filename}{FR}: {FLR}{e.strerror}') sys.exit(-1) try: # Перемещение ZIP-файла в папку истории (ready) # Move the ZIP file to the history folder (ready) # print(f'Move : {f_zip_src} в {f_zip_dst}') print(f'Перемещение : {f_zip_src} в {f_zip_dst}') if os.path.isfile(f_zip_src): os.rename(f_zip_src, f_zip_dst) except OSError as e: # print(f'Error moving file {FLR}{f_zip_src}{FR} в {FLR}{f_zip_dst}') print( f'Ошибка перемещения файла {FLR}{f_zip_src}{FR} в {FLR}{f_zip_dst}' ) print(f'{FLR}{e.filename}{FR}: {FLR}{e.strerror}') sys.exit(-1) # Печать статистики # Printing statistics if count_found > 0: print('\n{:-<80}'.format('')) # print(f'Total important : {FLG}{count_found}') # print(f'End of scan : {FLC}{"{:%d.%m.%Y %H:%M:%S}".format(datetime.now())}') print(f'Из них важных : {FLG}{count_found}') print( f'Конец сканирования : {FLC}{"{:%d.%m.%Y %H:%M:%S}".format(datetime.now())}' ) print('{:-<80}\n'.format('')) print(f'{SR}')
def upload_file_(): try: print("Here in uploader") file = request.files['file'] pname = request.form['pname'] print(file) print(pname) if request.method == 'POST': file = request.files['file'] pname = request.form['pname'] lang = request.form['lang'] if file.filename == '': print("file name is empty") return redirect(url_for('addfiles.html', message='No selected file')) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.filename.replace(" ","_") print("file",file.filename) file.save(os.path.join(app.config['UPLOAD_PATH_PDF'], filename)) datetime_now = datetime.datetime.now(); formatted_date = datetime_now.strftime('%Y-%m-%d') db = pymysql.connect(app.config["DATABASEIP"], app.config["DB_USER"], app.config["DB_PASSWORD"], app.config["DATABASE"]) cur = db.cursor() sql = 'INSERT INTO Project_Files (FileName,ProjectName,ProjectUserID,UploadDate,UploadPath,Nodes,Edges,FileEntities,URL) VALUES (%s,%s, %s ,%s,%s,%s,%s,%s,%s)' entityExtractor_ = None document_url = None if (".docx" in file.filename): with open(app.config['UPLOAD_PATH_PDF'] + file.filename, "rb") as docx_file: result = mammoth.convert_to_html(docx_file) html = result.value # The generated HTML temp = file.filename.replace(".docx", "") Html_file = open(app.config['UPLOAD_PATH_PDF'] + temp + ".html", "w") Html_file.write(html) Html_file.close() document_url = "http://george.runmy.tech:5000/static/web/"+ temp + ".html" entityExtractor_ = EntityExtractor(lang,app.config['UPLOAD_PATH_PDF'] + file.filename,pname.strip("'"),document_url,app.config['GOOGLE_API_KEY'],app.config['NLP_API_KEY'],app.config["DATABASEIP"], app.config["DB_USER"], app.config["DB_PASSWORD"], app.config["DATABASE"],file.filename) entityExtractor_.getEntityDocxJson() elif(".txt" in file.filename): temp = file.filename.replace(".txt", "") data = "" html = "<html></html>" soup = BeautifulSoup(html) htmltag = soup.find('html') body = soup.new_tag("body") with open(app.config['UPLOAD_PATH_PDF'] + file.filename, "r") as myfile: data = myfile.read() paras = data.split("\n\n") for para in paras: html = "<p></p>" souppara = BeautifulSoup(html) ptag = souppara.find('p') ptag.insert(0, NavigableString(para)) body.append(ptag) htmltag.append(body) html_page = soup.prettify("utf-8") with open(app.config['UPLOAD_PATH_PDF'] + temp + ".html", "wb+") as filewriter: filewriter.write(html_page) document_url = "http://george.runmy.tech:5000/static/web/" + temp + ".html" entityExtractor_ = EntityExtractor(lang,app.config['UPLOAD_PATH_PDF'] + filename,pname.strip("'"), document_url,app.config['GOOGLE_API_KEY'],app.config['NLP_API_KEY'],app.config["DATABASEIP"], app.config["DB_USER"], app.config["DB_PASSWORD"], app.config["DATABASE"],file.filename) entityExtractor_.getEntityTxtJson() elif(".msg" in file.filename): #pythoncom.CoInitialize() #outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI") #temp = file.filename.replace(".msg", "") #msg = outlook.OpenSharedItem(app.config['UPLOAD_PATH_PDF']+ file.filename) #data = msg.Body #os.system('cd /home/sanam/Test ; msgconvert chunmun.msg') print('cd '+app.config['UPLOAD_PATH_PDF']+'; '+'msgconvert '+file.filename) os.system('cd '+app.config['UPLOAD_PATH_PDF']+'; '+'msgconvert '+file.filename ) with open(app.config['UPLOAD_PATH_PDF']+file.filename+'.eml', 'rb') as fp: # select a specific email file from the list msg = BytesParser(policy=policy.default).parse(fp) data = msg.get_body(preferencelist=('plain')).get_content() #print(text) # print the email content print(data) temp = file.filename.replace(".msg", "") html = "<html></html>" soup = BeautifulSoup(html) htmltag = soup.find('html') body = soup.new_tag("body") paras = data.split("\n\n") for para in paras: html = "<p></p>" souppara = BeautifulSoup(html,features="lxml") ptag = souppara.find('p') ptag.insert(0, NavigableString(para)) body.append(ptag) htmltag.append(body) html_page = soup.prettify("utf-8") with open(app.config['UPLOAD_PATH_PDF'] + temp + ".html", "wb+") as filewriter: filewriter.write(html_page) with open(app.config['UPLOAD_PATH_PDF'] + temp + ".txt", "w+") as filewriter: print("Converting .msg into Text") filewriter.write(data) print("Converted .msg into Text") document_url = "http://george.runmy.tech:5000/static/web/" + temp + ".html" entityExtractor_ = EntityExtractor(lang,app.config['UPLOAD_PATH_PDF'] + temp+".txt", pname.strip("'"),document_url,app.config['GOOGLE_API_KEY'],app.config['NLP_API_KEY'],app.config["DATABASEIP"], app.config["DB_USER"], app.config["DB_PASSWORD"], app.config["DATABASE"],file.filename) print("Stucked") entityExtractor_.getEntityTxtJson() else: temp = file.filename.replace(".pdf", "") document_url = "http://george.runmy.tech:5000/static/web/viewer.html?file=" + file.filename entityExtractor_ = EntityExtractor(lang,app.config['UPLOAD_PATH_PDF'] + file.filename,pname.strip("'"),document_url,app.config['GOOGLE_API_KEY'],app.config['NLP_API_KEY'],app.config["DATABASEIP"], app.config["DB_USER"], app.config["DB_PASSWORD"], app.config["DATABASE"],file.filename) searchable = entityExtractor_.isSearchablePDF(); if(searchable): entityExtractor_.getEntityPDFJson() else: # OCR print("Have to do OCR") document_url = "http://george.runmy.tech:5000/static/web/viewer.html?file=" + file.filename OCR.pdf_splitter(app.config['UPLOAD_PATH_PDF'] + filename, app.config['UPLOAD_PATH_PDF'] + temp+".txt", app.config['OCR_API_KEY']) entityExtractor_ = EntityExtractor(lang,app.config['UPLOAD_PATH_PDF'] + temp + ".txt",pname.strip("'"), document_url,app.config['GOOGLE_API_KEY'],app.config['NLP_API_KEY'],app.config["DATABASEIP"], app.config["DB_USER"], app.config["DB_PASSWORD"], app.config["DATABASE"],file.filename) entityExtractor_.getEntityTxtJson() print(entityExtractor_.getEntities()) print("some stuff") args = (file.filename, pname.strip("'"), session['user'].strip("'"), formatted_date.strip("'"), app.config['UPLOAD_PATH_PDF'],entityExtractor_.getNodesList(),entityExtractor_.getEdgeList(),entityExtractor_.getEntities(),document_url) if(entityExtractor_ !=None): del entityExtractor_ # Execute the SQL command cur.execute(sql, args) # Commit your changes in the database db.commit() # return redirect(url_for('success', n=str(email))) # session['user']=email db.close() return render_template('addfiles.html', email=session['user'],projectList=session['projectList'],message="File is successfully uploaded and processed") else: return render_template('addfiles.html',email=session['user'],projectList=session['projectList'], message='File Extension not allowed') except Exception as e: print("Error is here soooooooooo" + str(e)) print(''.join(traceback.format_exception(etype=type(e), value=e, tb=e.__traceback__))) return render_template('addfiles.html', projectList=session['projectList'],email=session['user'],message='Exception in file processing')
def get_eml_body(eml_file): with open(eml_file, 'rb') as fp: msg = BytesParser(policy=policy.default).parse(fp) text = msg.get_body(preferencelist=('plain')).get_content() return text
def process_students(imap_conn): """ Do something with emails messages in the folder. For the sake of this example, print some headers. """ rv, data = imap_conn.select("INBOX") if rv != 'OK': print("ERROR: Unable to open mailbox ", rv) # rv, data = M.search(None, "ALL") # rv, data = M.uid('search', None, "ALL") rv, data = imap_conn.uid('search', None, "(UNSEEN)") if rv != 'OK': print("No messages found!") return # list of students students = [] for uid in data[0].split(): # rv, data = M.fetch(num, '(RFC822)') rv, data = imap_conn.uid('fetch', uid, '(RFC822)') if rv != 'OK': print("ERROR getting message {}".format(uid)) return # see https://docs.python.org/3/library/email.examples.html for an email processing example msg = BytesParser(policy=policy.default).parsebytes(data[0][1]) # msg = email.message_from_bytes(data[0][1], policy=policy.default) # hdr = email.header.make_header(email.header.decode_header(msg['Subject'])) # subject = str(hdr) subject = msg['subject'] print('Message {}: {}'.format(uid, subject)) print('Raw Date: {}'.format(msg['Date'])) # Now convert to local date-time date_tuple = email.utils.parsedate_tz(msg['Date']) if date_tuple: local_date = datetime.datetime.fromtimestamp( email.utils.mktime_tz(date_tuple)) print ("Local Date:", \ local_date.strftime("%a, %d %b %Y %H:%M:%S")) # print(get_first_text_block(msg)) # bodytext = msg.get_content() # print(bodytext) # If we want to print a preview of the message content, we can extract whatever # the least formatted payload is and print the first three lines. Of course, # if the message has no plain text part printing the first three lines of html # is probably useless, but this is just a conceptual example. simplest = msg.get_body(preferencelist=('plain', 'html')) simplest_text = ''.join( simplest.get_content().splitlines(keepends=True)) # print(simplest_text) # print(html2text.html2text(simplest_text)) if True: soup = BeautifulSoup(simplest_text, features="lxml") # kill all script and style elements for script in soup(["script", "style"]): script.extract() # rip it out # get text text = soup.get_text(separator='\n') # break into lines and remove leading and trailing space on each lines = (line.strip() for line in text.splitlines()) # break multi-headlines into a line each chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) # drop blank lines text_chunks = [chunk for chunk in chunks if chunk] text = '\n'.join(text_chunks) # print(text) if len(text_chunks) >= 3: print("Group: {}".format(text_chunks[0])) print("Name: {}".format(text_chunks[1])) print("Repo name: {}".format(text_chunks[2])) # make uppercase and swap all valid non-numeric characters to english group = text_chunks[0].upper().replace('М', 'M').replace( 'В', 'V').replace('З', 'Z').replace('К', 'K') # remove all invalid characters group = ''.join([c for c in group if c in '0123456789MVZK']) # normalize unicode string # e.g. substitute non-breaking space ('\xa0') # with normal space; see https://stackoverflow.com/a/34669482 name = unicodedata.normalize("NFKC", text_chunks[1]) students.append({ 'group': "'{}'".format(group), 'raw_group': text_chunks[0], 'name': name, 'github': text_chunks[2], 'email': msg['from'], 'uid': uid }) else: print( "Error! Unable to parse email body. There should be at least 3 lines of text in the email." ) # print(msg.keys()) print("") return students
def process(self, projectFile_): entityExtractor_ = None document_url = None if (".docx" in projectFile_.FileName): with open(projectFile_.UploadPath + projectFile_.FileName, "rb") as docx_file: result = mammoth.convert_to_html(docx_file) html = result.value # The generated HTML temp = projectFile_.FileName.replace(".docx", "") Html_file = open(projectFile_.UploadPath + temp + ".html", "w") Html_file.write(html) Html_file.write(html) Html_file.close() document_url = self.uploadurl + temp + ".html" title = self.extractTitle(projectFile_.UploadPath + temp + ".html", projectFile_.lang) print("Here in File Processor") print(self.uploadurl) print(document_url) entityExtractor_ = EntityAndRelationBuilder( projectFile_.lang, projectFile_.UploadPath + projectFile_.FileName, projectFile_.ProjectName.strip("'"), document_url, projectFile_.FileName, title) entityExtractor_.getEntityDocxJson() elif (".txt" in projectFile_.FileName): temp = projectFile_.FileName.replace(".txt", "") data = "" html = "<html></html>" soup = BeautifulSoup(html) htmltag = soup.find('html') body = soup.new_tag("body") with open(projectFile_.UploadPath + projectFile_.FileName, "r") as myfile: data = myfile.read() print(data) paras = data.split("\n\n") for para in paras: html = "<p></p>" souppara = BeautifulSoup(html) ptag = souppara.find('p') ptag.insert(0, NavigableString(para)) body.append(ptag) htmltag.append(body) html_page = soup.prettify("utf-8") with open(projectFile_.UploadPath + temp + ".html", "wb+") as filewriter: filewriter.write(html_page) document_url = self.uploadurl + temp + ".html" title = self.extractTitle(projectFile_.UploadPath + temp + ".html", projectFile_.lang) print("Language is:" + projectFile_.lang) entityExtractor_ = EntityAndRelationBuilder( projectFile_.lang, projectFile_.UploadPath + projectFile_.FileName, projectFile_.ProjectName.strip("'"), document_url, projectFile_.FileName, title) entityExtractor_.getEntityTxtJson() elif (".msg" in projectFile_.FileName): ''' pythoncom.CoInitialize() outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI") temp = projectFile_.FileName.replace(".msg", "") msg = outlook.OpenSharedItem(projectFile_.UploadPath+ projectFile_.FileName) data = msg.Body ''' print('cd ' + projectFile_.UploadPath + '; ' + 'msgconvert ' + projectFile_.FileName) os.system('cd ' + projectFile_.UploadPath + '; ' + 'msgconvert ' + projectFile_.FileName) with open( projectFile_.UploadPath + projectFile_.FileName + '.eml', 'rb') as fp: # select a specific email file from the list msg = BytesParser(policy=policy.default).parse(fp) data = msg.get_body(preferencelist=('plain')).get_content() with open( projectFile_.UploadPath + projectFile_.FileName + '.eml', 'r+') as fhp: # select a specific email file from the list headers = Parser().parse(fhp) print(headers["to"]) print(headers["from"]) print(headers["subject"]) print(data) temp = projectFile_.FileName.replace(".msg.eml", "") html = "<html></html>" soup = BeautifulSoup(html) htmltag = soup.find('html') body = soup.new_tag("body") paras = data.split("\n\n") for para in paras: html = "<p></p>" souppara = BeautifulSoup(html, features="lxml") ptag = souppara.find('p') ptag.insert(0, NavigableString(para)) body.append(ptag) htmltag.append(body) html_page = soup.prettify("utf-8") with open(projectFile_.UploadPath + temp + ".html", "wb+") as filewriter: filewriter.write(html_page) with open(projectFile_.UploadPath + temp + ".txt", "w+") as filewriter: print("Converting .msg into Text") filewriter.write(data) print("Converted .msg into Text") filename = projectFile_.UploadPath + temp + ".txt" document_url = self.uploadurl + temp + ".html" entityExtractor_ = EmailRelationExtractor( projectFile_.lang, filename, projectFile_.ProjectName.strip("'"), document_url, projectFile_.FileName, headers["to"], headers["from"], headers["subject"]) print("Stucked") entityExtractor_.getEntityTxtJson() else: temp = projectFile_.FileName.replace(".pdf", "") title = "" document_url = self.uploadurl + "viewer.html?file=" + projectFile_.FileName entityExtractor_ = EntityAndRelationBuilder( projectFile_.lang, projectFile_.UploadPath + projectFile_.FileName, projectFile_.ProjectName.strip("'"), document_url, projectFile_.FileName, title) searchable = entityExtractor_.isSearchablePDF() if (searchable): entityExtractor_.getEntityPDFJson() else: # OCR print("Have to do OCR") document_url = self.uploadurl + "viewer.html?file=" + projectFile_.FileName OCR_FileName = projectFile_.UploadPath + temp + ".txt" OCR.pdf_splitter( projectFile_.UploadPath + projectFile_.FileName, OCR_FileName, self.ocrapikey) title = self.extractTitleText(OCR_FileName, projectFile_.lang) entityExtractor_ = EntityAndRelationBuilder( projectFile_.lang, OCR_FileName, projectFile_.ProjectName.strip("'"), document_url, projectFile_.FileName, title) entityExtractor_.getEntityTxtJson()
def get_email(text): pattern = r"(?<=Email: )(.*)(?=\/a>)" try: match = re.search(pattern, text).group(1) second_pattern = r"(?<=>)(.*)(?=<)" email = re.search(second_pattern, match).group(1) return email except: return 0 path = './mails/' eml_files = glob.glob(path + '*.eml') data = [] for eml_file in eml_files: with open(eml_file, 'rb') as fp: msg = BytesParser(policy=policy.default).parse(fp) text = str(msg.get_body(preferencelist=('html'))) fp.close() #collecting findings name = get_name(text) email = get_email(text) if name != 0 and email != 0: data.append([name, email]) dataframe = pd.DataFrame(data, columns=["Names", "Emails"]).drop_duplicates() dataframe.to_csv("data.csv", index=False)
f.close() print('Attachment found: ', part.get_filename()) to = msg['to'] fromEmail = msg['from'] cc = msg['cc'] subject = msg['subject'] header = '<div style="background:white;"><b>From</b>: ' + fromEmail + '<br>' header += '<b>To</b>: ' + to + '<br>' if cc != None: header += '<b>CC</b>: ' + cc + '<br>' header += '<b>Subject</b>: ' + subject + '<br>' if len(attachmentNames) > 0: header += '<b>Attachment file name(s)</b>: ' + ', '.join( attachmentNames) + '<br>' header += '<br><hr><br></div>' simplest = msg.get_body(preferencelist=('html', 'plain')).get_content() simplest = header + '\n' + simplest pdfkit.from_string(simplest, outputPath + '/' + fileName + '.pdf', options=options) try: plainText = msg.get_body(preferencelist=('plain')).get_content() r.extract_keywords_from_text(plainText) keywords = r.get_ranked_phrases()[:10] doc = nlp(plainText) persons = dict( Counter([x.text for x in doc.ents if x.label_ == 'PERSON'])) orgs = dict( Counter([x.text for x in doc.ents if x.label_ == 'ORG'])) norp = dict( Counter([x.text for x in doc.ents if x.label_ == 'NORP']))
import email from email import policy from email.parser import BytesParser import glob # file_list = glob.glob('*.eml') # returns list of files file_name = "original_msg.eml" with open(file_name, 'rb') as fp: # select a specific email file from the list msg = BytesParser(policy=policy.default).parse(fp) text = msg.get_body(preferencelist=('plain')).get_content() # footer = msg.get_all() print(text) # print the email content
async def __run(self): # extract email from the recipient email_name = args.recipient.lower() try: email = await Email.objects.get(name=email_name) except NoMatch: logger.error('No recipient with this name') exit(1) # read mail from STDIN and parse to EmailMessage object message = BytesParser(policy=default).parsebytes(stdin.buffer.read()) sender = '' if message.get('sender'): sender = message.get('sender') elif message.get('from'): sender = message.get('from') else: logger.error('No Sender of From header') exit(1) sender = parseaddr(sender)[1] if not sender: logger.error('Could not parse sender') exit(1) maybe_subscriber = await EmailSubscribers.objects.filter(email=sender ).all() if len(maybe_subscriber ) != 1 or maybe_subscriber[0].hood.id != email.hood.id: logger.error('Not a subscriber') exit(1) # extract relevant data from mail text = sub( r'<[^>]*>', '', message.get_body(preferencelist=('plain', 'html')).get_content(), ) response = post( '%s/api/hoods/%d/email/messages/' % (config['root_url'], email.hood.pk), json={ 'text': text, 'secret': email.secret }, ) if response.status_code == status.HTTP_201_CREATED: exit(0) elif response.status_code == status.HTTP_451_UNAVAILABLE_FOR_LEGAL_REASONS: logger.error('Message was\'t accepted: %s' % text) elif response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY: logger.error('Malformed request: %s' % response.json()) elif response.status_code == status.HTTP_401_UNAUTHORIZED: logger.error( 'Wrong API secret. kibicara_mda seems to be misconfigured') else: logger.error('REST-API failed with response status %d' % response.status_code) exit(1)
#SELECT Unsafe Login. Please contact [email protected] for help #imap求每条命令前有一个标签,以便异步响应,所以调用imap._new_tag() #发送数据是字节串,所以b修饰,末尾要有\r\n,否则服务器一直在等命令结束 imap.send( b'%s ID ("name" "zgzxxbot" "version" "1.0" "vendor" "J.D.zhu")\r\n' % imap._new_tag() ) #默认参数是INBOX,返回邮件数量 print( imap.select() ) #response是一个列表;第一个元素是‘空格分隔的邮件号’ status, response = imap.search(None, '(UNSEEN)') unread_msg_nums = response[0].split() #因为BODY[ ]相当于RFC822,所以返回的是全部邮件内容 _, response = imap.fetch( unread_msg_nums[0], '(UID BODY[])' ) #从字节串生成 EmailMessage 消息类 #如果是BODY[HEADER]也可以生成这个消息实例 msg = BytesParser(policy=default).parsebytes( response[0][1] ) print( msg['Subject']) print( msg['Date']) #返回的是 发件人名称<电子邮件地址> 形式 print( msg['From'] ) print( msg['To'] ) print( msg['Content-Type']) #提取纯文本内容 print( msg.get_body('plain').get_content()) imap.logout()
def send_email(message, recipient, attached_file=None, date_time=None, subject=None): """ Отправка e-mail получателю Sending a email to the recipient :param message: str :param recipient: list :param attached_file: str :param date_time: str :param subject: str :return: None """ msg_mime = MIMEMultipart('alternative') msg_mime['From'] = SMTP_SENDER msg_mime['To'] = recipient[1] + ' <' + recipient[0] + '>' if subject: # msg_mime['Subject'] = 'An important letter was found: [ ' + str(subject).strip() + ' ]' msg_mime['Subject'] = 'Обнаружено важное письмо: [ ' + str( subject).strip() + ' ]' else: # msg_mime['Subject'] = 'An important letter was found!' msg_mime['Subject'] = 'Обнаружено важное письмо!' body_text = '%BODY%' body_html = """\ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> </head> <html> <body marginwidth="0" \ marginheight="0" leftmargin="0" topmargin="0" style="background-color:#F6F6F6; \ font-family:Arial,serif; margin:0; padding:0; min-width: 100%; \ -webkit-text-size-adjust:none; -ms-text-size-adjust:none;"> <div style="width: auto; color:#000; background-color: #F4F5F7; \ padding: 50px; display: inline-block;"> %BODY% </div> </body> </html> """ if date_time: today = date_time else: today = '{:%d.%m.%Y %H:%M:%S}'.format(datetime.now()) # today = 'Date and time of discovery: ' + today today = 'Дата и время обнаружения: ' + today hl = '{:-<8}'.format('') eml_text = '' if attached_file: if Path(attached_file).is_file(): with open(attached_file, 'rb+') as file: eml_msg = BytesParser(policy=policy.default).parse(file) eml_text_b = eml_msg.get_body() if eml_text_b: eml_text = eml_text_b.get_content() if eml_text: eml_text = re.sub(r'<br.*?>', '\n', eml_text) eml_text = re.sub(r'<.*?>', '', eml_text) eml_text = str(eml_text).strip() # Для простой части # For part plain b_txt = '' # b_txt += '\nAn important letter was found!\n' b_txt += '\nОбнаружено важное письмо!\n' b_txt += today + '\n' b_txt += hl + '\n' b_txt += message.rstrip('\n') + '\n' b_txt += hl + '\n' # b_txt += 'The text of the original message:\n' b_txt += 'Текст оригинального сообщения:\n' b_txt += hl + '\n' b_txt += eml_text + '\n' body_text = body_text.replace('%BODY%', b_txt) # Для html части # For part html b_html = '' # b_html += '<br><b>An important letter was found!</b><br>' b_html += '<br><b>Обнаружено важное письмо!</b><br>' b_html += '<pre style="white-space: pre-wrap; word-wrap: break-word;">' b_html += today + '\n' b_html += hl + '\n' b_html += message.rstrip('\n') + '\n' b_html += hl + '\n' # b_html += 'The text of the original message:\n' b_html += 'Текст оригинального сообщения:\n' b_html += hl + '\n' b_html += eml_text + '\n' b_html += '</pre>' body_html = body_html.replace('%BODY%', b_html) part_plain = MIMEText(body_text, 'plain') part_html = MIMEText(body_html, 'html') msg_mime.attach(part_plain) msg_mime.attach(part_html) if attached_file: if Path(attached_file).is_file(): eml_zipfile = Path(attached_file) eml_zipfile = eml_zipfile.with_suffix('.zip') if not Path(eml_zipfile).is_file(): with zipfile.ZipFile(eml_zipfile, 'w', zipfile.ZIP_DEFLATED) as zipf: zipf.write(attached_file, basename(attached_file)) with open(eml_zipfile, 'rb+') as file: eml_part = MIMEApplication(file.read(), Name=basename(eml_zipfile)) if Path(eml_zipfile).is_file(): # print(f'\nEmail is sent : {FLG}{recipient[0]} ({recipient[1]}){FR}\n' # f'The attachment : {FLG}{basename(eml_zipfile)} ({basename(attached_file)})') print( f'\nОтправляется письмо: {FLG}{recipient[0]} ({recipient[1]}){FR}\n' f'Вложение : {FLG}{basename(eml_zipfile)} ({basename(attached_file)})' ) print('{:-<80}'.format('')) eml_part[ 'Content-Disposition'] = 'attachment; filename="%s"' % basename( eml_zipfile) msg_mime.attach(eml_part) summary_message = msg_mime.as_string() server = None context = None # Пробуем подключиться к SMTP-серверу для отправки email # Try to connect to the SMTP server to send email try: if SMTP_SSL or SMTP_STARTTLS: # Если используется SSL или STARTTLS # If using SSL or STARTTLS # Создать безопасный SSL-контекст # Create a secure SSL context context = ssl.create_default_context() if SMTP_SSL: # Если используется SSL # If using SSL server = smtplib.SMTP_SSL(host=SMTP_SERVER, port=SMTP_PORT, context=context) else: # Если используется обычное подключение # If using normal connection server = smtplib.SMTP(SMTP_SERVER, SMTP_PORT) server.ehlo() if SMTP_STARTTLS: # Если используется STARTTLS # If STARTTLS is used # Безопасное соединение # Secure the connection server.starttls(context=context) server.ehlo() server.login(SMTP_SENDER, SMTP_PASSWORD) server.sendmail(SMTP_SENDER, recipient[0], summary_message) except Exception as e: # Печать любых сообщения об ошибках на стандартный вывод # Print any error messages to stdout print(f'{FLR}{e}') finally: server.quit()
def get_message_contents(self, request, pk=None): message = get_object_or_404(self.get_queryset(), pk=pk) data = { 'message_id': message.id, 'mailq_id': message.mailq_id, 'message_contents': None } if message.mailscanner_hostname != settings.APP_HOSTNAME: token = Token.objects.get(user=request.user) host = MailScannerHost.objects.get( hostname=message.mailscanner_hostname) protocol = 'https' if host.use_tls else 'http' url = '{0}://{1}/api/messages/{2}/contents/'.format( protocol, host.hostname, pk) headers = { 'Content-Type': 'application/json', 'Authorization': 'Token {0}'.format(token.key) } result = requests.get(url, headers=headers) print(result) if result.status_code == 404: return Response({}, status.HTTP_404_NOT_FOUND) data = result.json() else: if not message.queue_file_exists(): return Response({}, status.HTTP_404_NOT_FOUND) m = None data = { 'message': { 'message_id': message.id, 'mailq_id': message.mailq_id } } with open(message.file_path(), 'rb') as fp: m = BytesParser(policy=policy.default).parse(fp) simplest = m.get_body(preferencelist=('plain', 'html')) richest = m.get_body() data['message']['simple_type'] = "{0}/{1}".format( simplest['content-type'].maintype, simplest['content-type'].subtype) data['message']['rich_type'] = "{0}/{1}".format( richest['content-type'].maintype, richest['content-type'].subtype) if simplest['content-type'].subtype == 'html': data['message']['simple_version'] = '' else: data['message']['simple_version'] = simplest if richest['content-type'].subtype == 'html': data['message']['rich_version'] = richest elif richest['content-type'].content_type == 'multipart/related': data['message']['rich_version'] = richest.get_body( preferencelist=('html')).get_content().replace( '<script>', '>script<').replace('</scrpt>', '>/script<') data['message']['attachments'] = [] for part in richest.iter_attachments(): data['message']['attachments'].append(part.get_filename()) else: data['message']['rich_version'] = _('Preview unavailable') return Response(data)
def readmails(): import eml_parser, datetime, re from email import policy from email.parser import BytesParser def json_serial(obj): if isinstance(obj, datetime.datetime): serial = obj.isoformat() return serial KNOWN = [ '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**' ] ep = eml_parser.eml_parser end = len(os.listdir(MAIL_)) for _ in range(1, end + 1): if os.path.exists(MAIL_ + '/' + str(_) + '/processed-data.json'): continue try: if LOG: log.write(f'[+] Reading: {_}\n') if CNS: print(f'[+] Reading: {_}') with open(MAIL_ + '/' + str(_) + '/main-content.elm', 'rb') as f: raw_email = f.read() parsed_eml = ep.decode_email_b(raw_email) parsed_json = json.loads( json.dumps(parsed_eml, default=json_serial)) FROM = parsed_json['header']['from'] SUBJECT = parsed_json['header']['subject'] DATE = parsed_json['header']['date'] CATEGORY, OUR_ADDRESS, VICTIM_ADDRESS = '', '', '' if FROM in KNOWN: with open(MAIL_ + '/' + str(_) + '/main-content.elm', 'rb') as f: msg = BytesParser(policy=policy.default).parse(f) try: TEXT = msg.get_body(preferencelist=('plain')).get_content() except: pass if FROM == '*****@*****.**': OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', TEXT)[0] elif FROM == '*****@*****.**': if 'phishing' in TEXT: CATEGORY = 'phishing' OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', TEXT)[1] VICTIM_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', TEXT)[0] elif FROM == '*****@*****.**': OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', SUBJECT)[0] if 'botnet' in TEXT: CATEGORY = 'botnet' elif FROM == '*****@*****.**': OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', SUBJECT)[0] if 'Netscan' in SUBJECT: CATEGORY = 'scan' elif FROM in 'p2p.markmonitor.com': OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', TEXT)[0] elif FROM == '*****@*****.**': OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', TEXT)[0] elif FROM == '*****@*****.**': OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', SUBJECT)[0] print(OUR_ADDRESS) elif FROM == '*****@*****.**': OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', SUBJECT)[0] if 'ssh' in SUBJECT: CATEGORY = 'ssh' elif FROM == '*****@*****.**': if 'DOS' in TEXT: CATEGORY = 'DOS' OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', TEXT)[0] elif FROM == '*****@*****.**': if 'DOS' in TEXT: CATEGORY = 'DOS' OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', TEXT)[0] elif FROM == '*****@*****.**': CATEGORY = 'wp-admin' OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', SUBJECT)[0] elif FROM == '*****@*****.**': OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', TEXT)[0] if 'scan' in TEXT: CATEGORY = 'scan' elif FROM == '*****@*****.**': CATEGORY = 'scan' OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', SUBJECT)[0] else: if LOG: log.write('[-] Unknown mail\n') if CNS: print('[-] Unknown mail') if CNS: print('-' * 20) outdata = {} outdata['FROM'] = FROM outdata['SUBJECT'] = SUBJECT outdata['DATE'] = DATE if CATEGORY != '': outdata['CATEGORY'] = CATEGORY if OUR_ADDRESS != '': outdata['OUR_ADDRESS'] = OUR_ADDRESS if VICTIM_ADDRESS != '': outdata['VICTIM_ADDRESS'] = VICTIM_ADDRESS print(outdata) open(MAIL_ + str(_) + '/processed-data.json', 'w').write(json.dumps(outdata)) print('-' * 20) except Exception as e: if LOG: log.write(f'[-] Error: {e}\n') if CNS: print(f'[-] Error: {_}') if CNS: print(f'[+] Reading all mail successfully') if LOG: log.write(f'[+] Reading all mail successfully\n')
def display_eml(eml_filepath): ## -> treba vyladit!!! with open(eml_filepath, 'rb') as eml_file: msg = BytesParser(policy=policy.default).parse(eml_file) text = msg.get_body(preferencelist=('plain')).get_content() # sk = get_info_from_mail_field(msg['from']) # eml_output = eml_file.read() eml_output = msg # eml_output = msg #get_all('Content-Dispositio found = [] for part in msg.walk(): if 'content-disposition' not in part: continue cdisp = part['content-disposition'].split(';') cdisp = [x.strip() for x in cdisp] if cdisp[0].lower() != 'attachment': continue parsed = {} for kv in cdisp[1:]: key, val = kv.split('=') if val.startswith('"'): val = val.strip('"') elif val.startswith("'"): val = val.strip("'") parsed[key] = val found.append((parsed, part)) eml_output = { "Odesílatel": msg.get('From'), "Příjemce": msg.get('To'), "Datum": msg.get('Date'), "Předmět": msg.get('Subject'), "Text zprávy": msg.get_body(preferencelist=('plain')).get_content(), "Přílohy": found #[0] } #print('eml_output',eml_output, msg.get('Cc')) if msg.get_content_maintype() == 'multipart': # <--zjisti zda potrebujes - jinak smaz # loop on the parts of the mail for part in msg.walk(): # find the attachment part - so skip all the other parts if part.get_content_maintype() == 'multipart': continue if part.get_content_maintype() == 'text': content = part.get_body(preferencelist=('plain')) if content: output = part.get_body(preferencelist=('plain')).get_content() else: output = None continue if part.get('Content-Disposition') == 'inline': continue if part.get('Content-Disposition') is None: continue # save the attachment in the program directory result_dict = { "Odesílatel": msg.get('From'), "Příjemce": msg.get('To'), "Datum": msg.get('Date'), "Předmět": msg.get('Subject'), "Text zprávy": output, #msg.get_body(preferencelist=('plain')).get_content(), "Přílohy": part.get_all('Content-Disposition') } #eml_output = result_dict #print('result_dict',result_dict) return eml_output
class EmlParser(): def __init__(self, fileName): self.message = BytesParser(policy=policy.default).parsebytes( readFile(fileName)) def getId(self): return getHashOfItem(self.message) def getAttachmentData(self, name): for part in self.message.walk(): if 'content-disposition' not in part: continue cdisp = part['content-disposition'].split(';') cdisp = [x.strip() for x in cdisp] if cdisp[0].lower() != 'attachment': continue parsed = {} for kv in cdisp[1:]: if kv.startswith('filename='): key, _, val = kv.partition('=') if val.startswith('"'): val = val.strip('"') elif val.startswith("'"): val = val.strip("'") if (name == val): return part.get_payload(decode=True) return None def getAttachmentNames(self): found = [] for part in self.message.walk(): if 'content-disposition' not in part: continue cdisp = part['content-disposition'].split(';') cdisp = [x.strip() for x in cdisp] if cdisp[0].lower() != 'attachment': continue parsed = {} for kv in cdisp[1:]: if kv.startswith('filename='): key, _, val = kv.partition('=') if val.startswith('"'): val = val.strip('"') elif val.startswith("'"): val = val.strip("'") found.append(val) return found def getPayloadHtml(self): body = self.message.get_body('html') if (body): return self._decode_body(body.get_payload(decode=True)) return '' def getPayloadPlain(self): body = self.message.get_body('plain') if (body): return self._decode_body(body.get_payload(decode=True)) return '' def getSender(self): return extractEmails(str(self.message['from'])) def getReceivers(self): return extractEmails(str(self.message['to'])) def getSubject(self): return self._decode_entry(self.message['Subject']) def getDate(self): dt = parse(self.message['Date']) return str(dt.date()) + " " + str(dt.time()) def _decode_entry(self, entry): if entry is None: entry = '' else: result = '' for part in decode_header(entry): if isinstance(part[0], str): result += part[0] else: encoding = part[1] result += part[0].decode(encoding) entry = result return entry def _decode_body(self, entry): try: entry = entry.decode('utf-8') except UnicodeDecodeError: entry = entry.decode('latin-1') return entry
def process_email(raw_email): msg = BytesParser(policy=policy.default).parsebytes(raw_email) body = msg.get_body(preferencelist=['plain']) content = body.get_payload(decode=True) charset = body.get_content_charset() if not charset: charset = chardet.detect(content)['encoding'] content = content.decode(charset) regex = re.compile('^[^+@]+\+(?P<token>[a-zA-Z0-9]{80})@[^@]+$') for addr in msg.get('To', '').split(','): m = regex.match(addr.strip()) if m: break if not m: raise NoTokenFoundException token = m.group('token') try: in_reply_to, author = process_new_token(token) except InvalidTokenException: in_reply_to, author = process_old_token(token) subject = msg.get('Subject', '') Message.objects.create(thread=in_reply_to.thread, in_reply_to=in_reply_to, author=author, subject=subject, content=content)