def Receive(self, index): self.server = poplib.POP3_SSL(self.emailInfo["pop3_server"]) # 身份认证: self.server.user(self.emailInfo["email"]) self.server.pass_(self.emailInfo["pwd"]) if index > 6: for i in range(index, index - 6, -1): msg_content = '' resp, lines, octets = self.server.retr(i) msg_content = b'\r\n'.join(lines) # # 稍后解析出邮件: msg = BytesParser().parsebytes(msg_content) self.print_info(msg) # 可以根据邮件索引号直接从服务器删除邮件: # server.dele(i) # 关闭连接: else: for i in range(index, 0, -1): msg_content = '' resp, lines, octets = self.server.retr(i) # lines存储了邮件的原始文本的每一行, # 可以获得整个邮件的原始文本: msg_content = b'\r\n'.join(lines) # 稍后解析出邮件: msg = BytesParser().parsebytes(msg_content) self.print_info(msg) self.server.quit()
async def populate_with_test_data(storage): """ Populate database with test data for testing purpose """ account = account_manager.get_account_for_address("*****@*****.**") print(account) for msg_src, from_addr in incoming_messages: msg = BytesParser(policy=policy.default).parsebytes(msg_src) recipients = [parse_email("Test <*****@*****.**>")] await storage.store_mail(account, msg, from_addr, recipients, incoming=True) for msg_src, recipients in outgoing_messages: from_addr = "*****@*****.**" msg = BytesParser(policy=policy.default).parsebytes(msg_src) await storage.store_mail(account, msg, from_addr, recipients, incoming=False)
def receive(self): l = self.conn.list() # 列出邮箱中所有的列表,如:收件箱、垃圾箱、草稿箱。。。 s = self.conn.select('INBOX') # 选择收件箱(默认) result, dataid = self.conn.search(None, 'ALL') mailidlist = dataid[0].split() # 转成标准列表,获得所有邮件的ID # type, data = conn.fetch(mailidlist[0], '(RFC822)') maillist = [] for id in mailidlist: result, data = self.conn.fetch(id, '(RFC822)') # 通过邮件id获取邮件 e = email.message_from_bytes(data[0][1]) from email.policy import default msg = BytesParser(policy=default).parsebytes(data[0][1]) maillist.append(e) subject = email.header.make_header( email.header.decode_header(e['SUBJECT'])) mail_from = email.header.make_header( email.header.decode_header(e['From'])) print("邮件的subject是:%s" % subject) print("邮件的发件人是:%s" % mail_from) body = str(get_body(e), encoding='gb2312') # utf-8 gb2312 GB18030解析中文日文英文 print("邮件内容是:%s" % body) return maillist
def runReceive(self): myPop = ReceiveMail() self.popServer = myPop.connect() self.emailNum = myPop.GetEmailNum() # 循环解析邮件 for i in range(self.emailNum, 0, -1): resp, lines, octets = self.popServer.retr(i) msg_content = b'\r\n'.join(lines) # 稍后解析出邮件: msg = BytesParser().parsebytes(msg_content) try: # 解析邮件基本信息 currentEmailInfo = myPop.parseEmailInfo(msg) for item in currentEmailInfo: # 判断邮件是否已经添加到列表 if item not in self.isReceived: # self.isReceived.update(currentEmailInfo) # 解析邮件内容 myPop.parseEmailContent(msg) self.addQList(currentEmailInfo, 'emaillist') except Exception as e: print(str(e)) myPop.quit() self.loading.hide()
def message_from_bytes(s, *args, **kws): """Parse a bytes string into a Message object model. Optional _class and strict are passed to the Parser constructor. """ from email.parser import BytesParser return BytesParser(*args, **kws).parsebytes(s)
def get_meta_data(): with open('bytes_message', 'rb') as fp: headers = BytesParser(policy=default).parse(fp) to = headers['to'] frm = headers['from'] subject = headers['subject'] return to, frm, subject
def _login_btn_clicked(self): # print("Clicked") username = self.input_User.get() password = self.input_Pass.get() print(username, password) #conexion a servicios de gmail M = poplib.POP3_SSL('pop.gmail.com') M.user(username) M.pass_(password) #obtiene el numero de mensaje numero = len(M.list()[1]) #Obtiene mensaje global response, headerLines, bytes for i in range(numero): # Se lee el mensaje response, headerLines, bytes = M.retr(i + 1) #se mete todo en un string mensaje = b'\n'.join(headerLines) #se parsea # Se parsea el mensaje p = BytesParser() email = p.parsebytes(mensaje) #crea nueva ventana self.new_window(email)
def query_S3(bucket, objkey): s3 = boto3.resource('s3') bucket = s3.Bucket(bucket) body = "" for obj in bucket.objects.all(): key = obj.key if key == objkey: body = obj.get()['Body'].read() #print(body) raw_email = body msg = BytesParser(policy=policy.SMTP).parsebytes(body) # get the plain text version of the email plain = '' try: plain = msg.get_body(preferencelist=('plain')) plain = ''.join(plain.get_content().splitlines(keepends=True)) plain = '' if plain == None else plain except: print( 'Incoming message does not have an plain text part - skipping this part.' ) #print("This is the plaintext : ",plain) return plain
def from_FILE_to_raw_email(file_name): global raw_email print("Coping from "+file_name+" raw e-mail content.") time.sleep(1) with open(file_name, 'rb') as fp: raw_email = BytesParser(policy=default).parse(fp)
def make_person_schema(mailFile, outputDir, person_db): msg = BytesParser().parse(mailFile) # Retrieve the from person. (realname, mailAddr) = get_info_from_mail_field(msg['from']) person = Person(realname, mailAddr) # Add it to the database. update_db(person_db, person) # Find ourself (my_name, my_email) = get_info_from_mail_field(msg['Delivered-To']) me = Person(my_name, my_email) def addToMyEmailAddr(field_name): (_, my_email_addr) = get_info_from_mail_field(msg[field_name]) if my_email_addr: me.addEmail(my_email_addr) addToMyEmailAddr('X-Original-To') addToMyEmailAddr('Resent-From') update_db(person_db, me) # Find cc and to relation (excluding ourself) link_people(person_db, me, msg.get_all('to', [])) link_people(person_db, me, msg.get_all('cc', []))
def process_email(raw_email): msg = BytesParser(policy=policy.default).parsebytes(raw_email) body = msg.get_body(preferencelist=['plain']) content = body.get_payload(decode=True) charset = body.get_content_charset() if not charset: charset = chardet.detect(content)['encoding'] content = content.decode(charset) regex = re.compile('^[^+@]+\+(?P<token>[a-zA-Z0-9]{80})@[^@]+$') for addr in msg.get('To', '').split(','): m = regex.match(addr.strip()) if m: break if not m: raise NoTokenFoundException token = m.group('token') key = token[64:] try: thread = MessageThread.objects.get(token=token[:32]) sender = MessageCorrespondent.objects.get(token=token[32:64]) except models.DoesNotExist: raise InvalidTokenException if key != hexdigest_sha256(settings.SECRET_KEY, thread.token, sender.token)[:16]: raise InvalidKeyException Message.objects.create(thread=thread, from_email=sender.email, content=content)
def process_maildir(config): inbox = mailbox.Maildir("~/Maildir") for key in inbox.iterkeys(): try: message = inbox[key] except email.errors.MessageParseError: continue msg = BytesParser(policy=policy.default).parsebytes(message.as_bytes()) replied = process_msg(config=config, msg=msg) # Refile to archive or error and delete incoming message if not args.dry_run: if replied: print("OK", message['subject'], message['from']) archive.lock() archive.add(message) archive.unlock() else: print("NOT OK", message['subject']) error.lock() error.add(message) error.unlock() inbox.lock() inbox.discard(key) inbox.unlock()
def decode_email( msg_str, pos, key_map ): # process whole email parts and build email list/dict records filenames = None p = BytesParser() message = p.parsebytes(msg_str) # get header parts = parse_parts(message, key_map) # add header parts specified in key_map parts['Size'] = len(msg_str) plain_body = '' html_body = '' for part in message.walk(): plain_body += decode_part(part, 'text/plain') if len(plain_body) > 0: html_body = "" else: html_body += decode_part(part, 'text/html') fn = part.get_filename() if fn: if filenames == None: filenames = [] filenames.append(fn) if filenames: parts['Attachments'] = filenames if len(plain_body) > 0: parts['text/plain'] = plain_body elif len(html_body) > 0: parts['text/html'] = html_body return parts
def fetch_and_parse(uids): ''' fetches and parses up to "commit_limit" new emails ''' result = list() for uid in uids: email_dict = dict() reply, email_data = imap_server.uid('fetch', uid, '(RFC822)') if reply == 'OK': raw_email = email_data[0][1] email = BytesParser(policy=default).parsebytes(raw_email) email_dict['Date'] = datetime.strptime( email['Date'], '%a, %d %b %Y %H:%M:%S %z') for header in [ 'From', 'To', 'Delivered-To', 'Message-ID', 'Subject' ]: email_dict[header] = email[header] email_dict['plain'] = None email_dict['html'] = None for part in email.walk(): if part.get_content_type() == 'text/html': email_dict['html'] = part.get_body().get_content() elif part.get_content_type() == 'text/plain': email_dict['plain'] = part.get_body().get_content() result.append(email_dict) return result
def get_content(num): print(num) type, data = raw_conn.fetch(num, '(RFC822)') email_date = get_date(email_list[int(count)]) try: msg = BytesParser().parsebytes(data[0][1]) for part in msg.walk(): if not part.is_multipart(): charset = part.get_charset() contenttype = part.get_content_type() content = part.get_payload(decode=True) content = content.decode('GBK') temp = time_formate(email_date) print(temp) if temp == '1': print(temp) get_transfer_v1(content) elif temp == '2': print(temp) get_transfer_v2(content) # #print (content) except TypeError: print('empty-email') except UnicodeDecodeError: print('hahah')
def _get_email_content(uid, data): content = dict(text=None, html=None, attachments=[]) email = BytesParser(policy=policy.default).parsebytes(data) for part in email.walk(): if part.is_multipart(): continue if part.is_attachment(): content['attachments'].append(_read_attachment(part, uid)) continue if part.get_content_type() == 'text/plain': content['text'] = _read_text(part) continue if part.get_content_type() == 'text/html': content['html'] = _read_html(part, uid) continue if content['html'] and not content['text']: tmp = open(content['html'], 'r') content['text'] = tmp.read() tmp.close() return content
def emails_between(persons): keys = [k for k,v in persons.items()] red_flag = [] for k,v in persons.items(): [red_flag.append(li) for li in v] for key in keys: directory = os.path.join("C:/PythonProjects/",key) for subdir, dirs, files in os.walk(directory): for file in files: with open(os.path.join(subdir,file), 'rb') as fp: headers = BytesParser(policy=default).parse(fp) sender = '{}'.format(headers['from']) receiver = '{}'.format(headers['to']).split(',') cc_receiver = '{}'.format(headers['cc']).split(',') bcc_receiver = '{}'.format(headers['bcc']).split(',') email_dated = '{}'.format(headers['date']) date = re.findall(r"[\d]{1,2} [ADFJMNOS]\w* [\d]{4}", email_dated)[0] subject_line = '{}'.format(headers['subject']) if sender in red_flag: for el in receiver: if el.strip() in red_flag: print('[',date,']', sender, ' -> ', el, \ '\n\tSubject:', subject_line) for el in cc_receiver: if el.strip() in red_flag: print('[',date,']', sender, ' -> ', el, \ '\n\tSubject:', subject_line) for el in bcc_receiver: if el.strip() in red_flag: print('[',date,']', sender, ' -> ', el, \ '\n\tSubject:', subject_line)
def processEmail(emailBytes): try: msg = BytesParser(policy=policy.default).parse(io.BytesIO(emailBytes)) text = msg.get_body(preferencelist=('plain')).get_content() text = emailBytes.decode() except Exception as e: text = emailBytes.decode() lines = text.split('\n') if 'Subject:' in lines[0]: subject = lines[0][8:] else: subject = '' if subject != '': text = ' '.join(lines) else: text = ' '.join(lines[1:]) # print(f'Pre-formatted text: {text}') text = re.sub(r'https?://\S+', '', text, flags=re.MULTILINE) # remove links text = re.sub(r' +|\t+|\\n', ' ', text) # remove unnecessary spaces text = re.sub(r'\s([,?.!"](?:\s|$))', r'\1', text) # remove spaces before punctuation # print(f'Text: {text}') # Check if text is empty before forwarding return subject, text
def resolve_domain(self, request_string): try: _, parsed_request = request_string.split(b'\r\n', 1) headers = BytesParser().parsebytes(parsed_request) host = headers["host"].split(":") _q_s.logs.info([ "servers", { 'server': 'http_proxy_server', 'action': 'query', 'ip': self.transport.getPeer().host, 'port': self.transport.getPeer().port, 'payload': host[0] } ]) #return "127.0.0.1" return dsnquery(host[0], 'A')[0].address except Exception as e: _q_s.logs.error([ "errors", { 'server': 'http_proxy_server', 'error': 'resolve_domain', "type": "error -> " + repr(e) } ]) return None
def _parse_data(data: bytes) -> dict: data = data.split(b'|', 2) data_dict = { 'from': data[0].decode('utf-8'), 'to': data[1].decode('utf-8') } path, headers = data[2].split(b'\r\n', 1) payload = BytesParser().parsebytes(headers) host = payload['host'] path_part = path.split(b' ')[1].decode('utf-8') url = f'http://{host}{path_part}' if url.lower().endswith( ('.png', '.ico', '.jpeg', '.jpg', '.gif', '.svg')): data_dict['image'] = url else: data_dict['url'] = url if 'cookie' in payload: data_dict['cookie'] = payload['cookie'] post_data = data[2].split(b'\r\n\r\n') if len(post_data) == 2: if post_data[1].strip(): data_dict['post'] = post_data[1].decode('utf-8') return data_dict
def _read_multipart_field(fp, boundary, parser=BytesParser(_class=HTTPMessage)): """ Read a single part from a multipart/form-data message and return a tuple of ``(headers, data)``. Stream ``fp`` must be positioned at the start of the header block for the field. Return a tuple of ('<headers>', '<data>') ``headers`` is an instance of ``email.message.Message``. ``data`` is an instance of ``ExpandableOutput``. Note that this currently cannot handle nested multipart sections. """ data = ExpandableOutput() headers = parser.parse(DelimitedInput(fp, b'\r\n\r\n'), headersonly=True) fp = DelimitedInput(fp, b'\r\n--' + boundary) # XXX: handle base64 encoding etc for chunk in iter(lambda: fp.read(CHUNK_SIZE), b''): data.write(chunk) data.flush() # Fallen off the end of the input without having read a complete field? if not fp.delimiter_found: raise RequestParseError("Incomplete data (expected boundary)") return headers, data
def parse_body(body): """ Parse the body from the email and extract the required fields. Need to extract sender email, subject of the email, the receive date, and body of the email. """ msg = BytesParser(policy=policy.SMTP).parsebytes(body) print("This is the message: ", msg.keys()) print("From : ",msg['From']) print("Date: ",msg['Date']) print("To: ",msg['To']) print("Subject : ",msg['Subject']) plain = '' try: plain = msg.get_body(preferencelist=('plain')) plain = ''.join(plain.get_content().splitlines(keepends=True)) plain = '' if plain == None else plain except: print('Incoming message does not have an plain text part - skipping this part.') return { 'from': msg['From'], 'to': msg['To'], 'subject': msg['Subject'], 'date': msg['Date'], 'text':plain }
def fillUp(self): fruits= [] #status= open("status.remi","r",encoding="utf8") #self.myEmails #if self.connected==False: self.connect() result, data = self.mail.uid('search', None, "ALL") # search and return uids instead id_list = data[0].split() for latest_email_uid in id_list[-100::1]: uniqueEmail=repr(latest_email_uid) if False: pass else: result, data = self.mail.uid('fetch', latest_email_uid, '(RFC822)') raw_email = data[0][1] # here's the body, which is raw text of the whole email # including headers and alternate payloads #Parsing manager=BytesParser() email_message = manager.parsebytes(raw_email) try: message_juice= email_message.get_payload(decode=False) while type(message_juice)==type([1,2]) and type(message_juice[0].get_payload(decode=False))==type([1,2]): message_juice= message_juice[0].get_payload(decode=False) if type(message_juice)==type([1,2]): if message_juice[-1].get_filename() == None: html_message_juice= message_juice[-1].get_payload(decode=True) else: html_message_juice= message_juice[0].get_payload(decode=True) else: html_message_juice= email_message.get_payload(decode=True) try: #fruits.append(html_message_juice.decode()) ssd= open("Data/"+str(latest_email_uid)+".html","w",encoding="utf8") ssd.write(html_message_juice.decode()) ssd.close() #newBlog= Blog(title=email_message['Subject'], body= html_message_juice.decode()) #newBlog.save() #self.setData(self,uniqueID=uniqueEmail) #string of latest_email_uid except: #fruits.append(html_message_juice.decode('windows-1251')) ssd= open("Data/"+str(latest_email_uid)+".html","w",encoding="utf8") ssd.write(html_message_juice.decode('windows-1251')) ssd.close() #newBlog= Blog(title=email_message['Subject'], body= html_message_juice.decode('windows-1251')) #newBlog.save() #self.setData(self,uniqueID=uniqueEmail) #string of latest_email_uid except: #fruits.append("This email could not be processed see what happened \n\nSubject: "+email_message['Subject']) ssd= open("Data/"+str(latest_email_uid)+".html","w",encoding="utf8") ssd.write("This email could not be processed see what happened \n\nSubject: "+email_message['Subject']) ssd.close()
def parse(self, response): """ `parse` should always `yield` Meeting items. Change the `_parse_title`, `_parse_start`, etc methods to fit your scraping needs. """ msg = BytesParser(policy=default).parsebytes(response.body) detail_text = self._parse_email_text(msg) start = self._parse_start(detail_text) if not start: return meeting = Meeting( title="Commission", description="", classification=COMMISSION, start=start, end=None, all_day=False, time_notes="Confirm details with agency", location=self.location, links=[], source=response.url, ) meeting["status"] = self._get_status(meeting, text=detail_text) meeting["id"] = self._get_id(meeting) yield meeting
def _encode_parts(self, header_data, msg_data, encoder): """Encodes any MIME part in the current message that is 8-bit. :type header_data: :py:obj:`bytes` :type msg_data: :py:obj:`bytes` """ self.headers = None self.message = None if six.PY3: msg = BytesParser().parsebytes(header_data+msg_data) else: msg = Parser().parsestr(header_data+msg_data) for part in msg.walk(): if not part.is_multipart(): payload = part.get_payload() try: payload.encode('ascii') except UnicodeError: del part['Content-Transfer-Encoding'] encoder(part) self.parse_msg(msg)
def get_mail_content(self, file_name): # msg = email.message_from_file(open('sample.eml')) with open(file_name, 'rb') as fp: msg = BytesParser(policy=policy.default).parse(fp) text = msg.get_body(preferencelist=('plain')).get_content() fp.close() return text
def message_from_binary_file(fp, *args, **kws): """Read a binary file and parse its contents into a Message object model. Optional _class and strict are passed to the Parser constructor. """ from email.parser import BytesParser return BytesParser(*args, **kws).parse(fp)
def parseweekmail(el, pl, st): ''' :param el 邮箱长度 :param pl poplib server对象 :param st 解析周报的开始时间 :return 邮箱列表 ''' sender_list = [] for index in range(el, 0, -1): lines = pl.retr(index)[1] msg = BytesParser(EmailMessage).parsebytes(b'\r\n'.join(lines)) # 判断是否是本周 判断是否接受者是周报组 mail_date = parsedate_to_datetime(msg.get('Date', "")).date() mail_receiver = parseaddr(msg.get('To', ""))[1] mail_cc = parseaddr(msg.get('Cc', ""))[1] if mail_date < st: break mail_subject = decode_str(msg.get('Subject', "")) if (mail_receiver == WEEKLY_GROUP or WEEKLY_GROUP in mail_cc) and not ( mail_subject.startswith('项目周报') or decode_str(mail_subject).split('(')[0].endswith('项目周报') or decode_str(mail_subject).split('(')[0].endswith('项目周报')): sender_list.append(parseaddr(msg.get('From', ""))[1]) return sender_list
def get_email(num, conn): result = {} typ, content = conn.fetch(num, '(RFC822)') msg = BytesParser().parsebytes(content[0][1]) sub = msg.get('Subject') from_ = msg.get("From") # Body details result["From"] = decode_str(from_, "From") result["Subject"] = decode_str(sub, "Subject") result["File"] = [] for part in msg.walk(): if part.get_content_type() == "text/plain": body = part.get_payload(decode=True) charsets = part.get_charsets() result["Body"] = body.decode(charsets[0]) fileName = part.get_filename() if None != fileName: file_dict = {} file_dict["name"] = decode_str(fileName, "File") file_dict["attachment"] = part.get_payload(decode=True) file_dict["content_type"] = part.get_content_type() new_file = ContentFile(file_dict["attachment"]) file_obj = UploadedFile(new_file, file_dict["name"], file_dict["content_type"], new_file.size, None, None) result["File"].append(file_obj) # fileName_str = decode_str(fileName,"File") # att_path = os.path.join(settings.LOG_DIR,fileName_str) #result["File"] = part.get_payload(decode=True) # fp = open(att_path, 'wb') # fp.write(part.get_payload(decode=True)) # fp.close() return result
def __init__(self, mail_data, mysql_creds, threshold, sensitivity, account, logger, mail_id, spam_folder): self.JS_IMPORT_REGEX = r'/<script.*(?:src="(.*)").*>/s' self.JS_EXTRACT_REGEX = r'/<script.*>(.*?)<\/script>/s' self.URL_REGEX = "http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|[^\x00-\x7F]|(?:%[0-9a-fA-F][0-9a-fA-F]))+" self.parser = BytesParser() self.sensitivity = sensitivity self.threshold = threshold self.log = logger self.spam_folder = spam_folder self.mysql_db = mysql.connector.connect( user=mysql_creds["mysql_username"], password=mysql_creds["mysql_password"], database=mysql_creds["mysql_database"], host=mysql_creds["mysql_host"]) self.account = account self.spam_points = 0 self.js_code = {} self.urls_in_document = [] self.documents = {} self.mail_id = mail_id # The headers are defined as <key>:<to_remove_from key> # -1 is used to define the last header, after that comes the mail contents self.whitelisted = False self.blacklisted = False self.parsed_mail = self.parser.parsebytes(mail_data) self.header_data = dict(self.parsed_mail) self.message = "" self.extract_message() self._spam = -1 self.check_whitelist() self.check_blacklisted() self.urls = re.findall(self.URL_REGEX, self.message) for i in range(len(self.urls)): self.urls[i] = self.urls[i].strip()