def parseweekmail(el, pl, st): ''' :param el 邮箱长度 :param pl poplib server对象 :param st 解析周报的开始时间 :return 邮箱列表 ''' sender_list = [] for index in range(el, 0, -1): lines = pl.retr(index)[1] msg = BytesParser(EmailMessage).parsebytes(b'\r\n'.join(lines)) # 判断是否是本周 判断是否接受者是周报组 mail_date = parsedate_to_datetime(msg.get('Date', "")).date() mail_receiver = parseaddr(msg.get('To', ""))[1] mail_cc = parseaddr(msg.get('Cc', ""))[1] if mail_date < st: break mail_subject = decode_str(msg.get('Subject', "")) if (mail_receiver == WEEKLY_GROUP or WEEKLY_GROUP in mail_cc) and not ( mail_subject.startswith('项目周报') or decode_str(mail_subject).split('(')[0].endswith('项目周报') or decode_str(mail_subject).split('(')[0].endswith('项目周报')): sender_list.append(parseaddr(msg.get('From', ""))[1]) return sender_list
def get_email(num, conn): result = {} typ, content = conn.fetch(num, '(RFC822)') msg = BytesParser().parsebytes(content[0][1]) sub = msg.get('Subject') from_ = msg.get("From") # Body details result["From"] = decode_str(from_, "From") result["Subject"] = decode_str(sub, "Subject") result["File"] = [] for part in msg.walk(): if part.get_content_type() == "text/plain": body = part.get_payload(decode=True) charsets = part.get_charsets() result["Body"] = body.decode(charsets[0]) fileName = part.get_filename() if None != fileName: file_dict = {} file_dict["name"] = decode_str(fileName, "File") file_dict["attachment"] = part.get_payload(decode=True) file_dict["content_type"] = part.get_content_type() new_file = ContentFile(file_dict["attachment"]) file_obj = UploadedFile(new_file, file_dict["name"], file_dict["content_type"], new_file.size, None, None) result["File"].append(file_obj) # fileName_str = decode_str(fileName,"File") # att_path = os.path.join(settings.LOG_DIR,fileName_str) #result["File"] = part.get_payload(decode=True) # fp = open(att_path, 'wb') # fp.write(part.get_payload(decode=True)) # fp.close() return result
def process_email(raw_email): msg = BytesParser(policy=policy.default).parsebytes(raw_email) body = msg.get_body(preferencelist=['plain']) content = body.get_payload(decode=True) charset = body.get_content_charset() if not charset: charset = chardet.detect(content)['encoding'] content = content.decode(charset) regex = re.compile('^[^+@]+\+(?P<token>[a-zA-Z0-9]{80})@[^@]+$') for addr in msg.get('To', '').split(','): m = regex.match(addr.strip()) if m: break if not m: raise NoTokenFoundException token = m.group('token') try: in_reply_to, author = process_new_token(token) except InvalidTokenException: in_reply_to, author = process_old_token(token) subject = msg.get('Subject', '') Message.objects.create(thread=in_reply_to.thread, in_reply_to=in_reply_to, author=author, subject=subject, content=content)
def email_recv(request): if not hasattr(settings, 'REPLY_EMAIL') \ or not hasattr(settings, 'REPLY_KEY'): return HttpResponse(status=501) # Not Implemented key = request.POST.get('key').strip() if key != settings.REPLY_KEY: raise PermissionDenied if 'email' not in request.FILES: return HttpResponse(status=400) # Bad Request msg = request.FILES['email'] msg = BytesParser(policy=policy.default).parsebytes(msg.read()) body = msg.get_body(preferencelist=('plain', )) content = body.get_payload(decode=True) try: content = content.decode(body.get_content_charset()) except Exception: encoding = chardet.detect(content)['encoding'] content = content.decode(encoding) addr = settings.REPLY_EMAIL pos = addr.find('@') name = addr[:pos] domain = addr[pos + 1:] regexp = '^%s\+(?P<dest>[a-z0-9]{12})(?P<token>[a-z0-9]{60})(?P<key>[a-z0-9]{12})@%s$' % ( name, domain) p = re.compile(regexp) m = None addrs = map(lambda x: x.split(',') if x else [], [msg.get('To'), msg.get('Cc')]) addrs = reduce(lambda x, y: x + y, addrs) for _mto in map(lambda x: x.strip(), addrs): m = p.match(_mto) if m: break if not m: # no one matches raise Http404 author = get_object_or_404(User, profile__email_token=m.group('dest')) message = get_object_or_404(Message, token=m.group('token')) key = hexdigest_sha256(settings.SECRET_KEY, message.token, author.pk)[0:12] if key != m.group('key'): raise PermissionDenied answer = Message(conversation=message.conversation, author=author, content=content) answer.save() return HttpResponse()
def process_email(raw_email): msg = BytesParser(policy=policy.default).parsebytes(raw_email) body = msg.get_body(preferencelist=['plain']) content = body.get_payload(decode=True) charset = body.get_content_charset() if not charset: charset = chardet.detect(content)['encoding'] content = content.decode(charset) regex = re.compile('^[^+@]+\+(?P<token>[a-zA-Z0-9]{80})@[^@]+$') for addr in msg.get('To', '').split(','): m = regex.match(addr.strip()) if m: break if not m: raise NoTokenFoundException token = m.group('token') key = token[64:] try: thread = MessageThread.objects.get(token=token[:32]) sender = MessageCorrespondent.objects.get(token=token[32:64]) except models.DoesNotExist: raise InvalidTokenException if key != hexdigest_sha256(settings.SECRET_KEY, thread.token, sender.token)[:16]: raise InvalidKeyException Message.objects.create(thread=thread, from_email=sender.email, content=content)
def decode_eml(dir, filename): # './xxx/' print( '-------------------------------------------------------------------') print('Decoding: ' + dir + filename + "\n") # with open(dir + filename, 'r') as fp: fp = open(dir + filename, 'rb') # b => bytes msg = BytesParser(policy=policy.default).parse(fp) _from = msg.get('From') _to = msg.get('To') _subject = msg.get('Subject') print('From: ' + _from) print('To: ' + _to) print('Subject: ' + _subject + '\n') fp = open(dir + filename, 'r') msg = email.message_from_file(fp) for par in msg.walk(): # 对于每一个MIME块 if not par.is_multipart(): content_type = par.get('Content-Type') print('content_type: ' + content_type) name = par.get_param('filename') if name: h = Header(name) # 解码奇怪的文件名 dh = decode_header(h) fname = dh[0][0] # 附件名 print('附件:', str(fname, encoding='utf-8') + '\n') data = par.get_payload(decode=True) try: f = open(dir + str(fname, encoding='utf-8'), 'wb') # 注意一定要用wb来打开文件,因为附件一般都是二进制文件 f.write(data) f.close() except: print('error: 附件名含非法字符,存为tmp') f = open('tmp', 'wb') f.write(data) f.close() else: print( '文本内容: ', str(par.get_payload(decode=True), encoding='utf-8') + '\n') fp.close() print( '--------------------------------End--------------------------------\n' )
def get_email(num, conn): typ, content = conn.fetch(num, '(RFC822)') msg = BytesParser().parsebytes(content[0][1]) #print(msg) sub = msg.get('Subject') sender = msg.get('X-Sender') date = msg.get('Date') for part in msg.walk(): # fileName = part.get_filename() # fileName = decode_str(fileName) # if None != fileName: # print('+++++++++++++++++++') # print(fileName) if not part.is_multipart(): #print('+++++++++++++++++++') #print(part.get_payload(decode=True).decode('utf-8')) print(num, decode_str(sub), decode_str(sender), decode_str(date)) return part.get_payload(decode=True).decode('utf-8')
def handshake(self): data = self.request.recv(1024).strip() headers = BytesParser().parsebytes(data.split(b'\r\n', 1)[1]) if headers.get('Upgrade', None) != 'websocket': return key = headers['Sec-Websocket-Key'] response = self.build_response(key) return self.request.send(response.encode('ascii'))
def get_subject(num): type, data = raw_conn.fetch(num, '(RFC822)') try: msg = BytesParser().parsebytes(data[0][1]) sub = decode_str(msg.get('subject')) print(sub) return sub except TypeError: print('empty-email') except UnicodeDecodeError: print('hahah')
def get_date(num): type, data = raw_conn.fetch(num, '(RFC822)') try: msg = BytesParser().parsebytes(data[0][1]) sub = msg.get('Date') #print(sub) return sub #print(num, decode_str(sub)) except TypeError: print('empty-email') except UnicodeDecodeError: print('hahah')
def display_eml(eml_filepath): ## -> treba vyladit!!! with open(eml_filepath, 'rb') as eml_file: msg = BytesParser(policy=policy.default).parse(eml_file) text = msg.get_body(preferencelist=('plain')).get_content() # sk = get_info_from_mail_field(msg['from']) # eml_output = eml_file.read() eml_output = msg # eml_output = msg #get_all('Content-Dispositio found = [] for part in msg.walk(): if 'content-disposition' not in part: continue cdisp = part['content-disposition'].split(';') cdisp = [x.strip() for x in cdisp] if cdisp[0].lower() != 'attachment': continue parsed = {} for kv in cdisp[1:]: key, val = kv.split('=') if val.startswith('"'): val = val.strip('"') elif val.startswith("'"): val = val.strip("'") parsed[key] = val found.append((parsed, part)) eml_output = { "Odesílatel": msg.get('From'), "Příjemce": msg.get('To'), "Datum": msg.get('Date'), "Předmět": msg.get('Subject'), "Text zprávy": msg.get_body(preferencelist=('plain')).get_content(), "Přílohy": found #[0] } #print('eml_output',eml_output, msg.get('Cc')) if msg.get_content_maintype() == 'multipart': # <--zjisti zda potrebujes - jinak smaz # loop on the parts of the mail for part in msg.walk(): # find the attachment part - so skip all the other parts if part.get_content_maintype() == 'multipart': continue if part.get_content_maintype() == 'text': content = part.get_body(preferencelist=('plain')) if content: output = part.get_body(preferencelist=('plain')).get_content() else: output = None continue if part.get('Content-Disposition') == 'inline': continue if part.get('Content-Disposition') is None: continue # save the attachment in the program directory result_dict = { "Odesílatel": msg.get('From'), "Příjemce": msg.get('To'), "Datum": msg.get('Date'), "Předmět": msg.get('Subject'), "Text zprávy": output, #msg.get_body(preferencelist=('plain')).get_content(), "Přílohy": part.get_all('Content-Disposition') } #eml_output = result_dict #print('result_dict',result_dict) return eml_output
def get(self): ''' メールを受信し、内容と添付ファイルの情報をJSON形式で返す。 ''' result = [] # IMAP4の検索条件 # 設定可能な内容は下記を参照 # https://www.atmarkit.co.jp/fnetwork/rensai/netpro09/imap4-searchoption.html search_option = request.args.get('option') # 検索条件を指定しない場合は未読メールの検索とする if not search_option: search_option = 'UNSEEN' # メールサーバーに接続 cli = imaplib.IMAP4_SSL(DOMAIN) try: # 認証 cli.login(USER_ID, PASSWORD) # メールボックスを選択(標準はINBOX) cli.select() # 指定されたオプションを用いてメッセージを検索 status, data = cli.search(None, search_option) # 受信エラーの場合はエラーを返して終了 if status == 'NO': print('受信エラー') res = {'status': 'ERROR'} return jsonify(res) # メールの解析 for num in data[0].split(): status, data = cli.fetch(num, '(RFC822)') msg = BytesParser(policy=policy.default).parsebytes(data[0][1]) msg_id = msg.get('Message-Id', failobj='') from_ = msg.get('From', failobj='') to = msg.get('To', failobj='') cc = msg.get('Cc', failobj='') subject = msg.get('Subject', failobj='') date_str = msg.get('Date', failobj='') date_time = parsedate_to_datetime(date_str) if date_time: # タイムゾーンを日本国内向けに上書き date_time = date_time.astimezone( datetime.timezone(datetime.timedelta(hours=9))) date = date_time.strftime('%Y/%m/%d') if date_time else '' time = date_time.strftime('%H:%M:%S') if date_time else '' header_text = CommonUtil.get_header_text(self, msg) body, format_, charset = CommonUtil.get_main_content(self, msg) attachments = CommonUtil.get_attachments(self, msg, num) json_data = {} json_data['msg_id'] = msg_id json_data['header'] = header_text json_data['from'] = from_ json_data['to'] = to json_data['cc'] = cc json_data['subject'] = subject json_data['date'] = date json_data['time'] = time json_data['format'] = format_ json_data['charset'] = charset json_data['body'] = body json_data['attachments'] = attachments result.append(json_data) res = {'status': 'OK', 'result': result} return jsonify(res) except Exception as error: print(error) res = {'status': 'ERROR'} return jsonify(res) finally: cli.close() cli.logout()
def get_date(num): type,data=conn.fetch(num,'(RFC822)') msg=BytesParser().parsebytes(data[0][1]) sub=msg.get('Date') print(sub) return sub
def read(fp): """Deserialize an OOPS from an RFC822 format message.""" msg = BytesParser().parse(fp, headersonly=True) id = msg.get('oops-id') exc_type = msg.get('exception-type') exc_value = msg.get('exception-value') datestr = msg.get('date') if datestr is not None: date = iso8601.parse_date(msg.get('date')) else: date = None topic = msg.get('topic') if topic is None: topic = msg.get('page-id') username = msg.get('user') url = msg.get('url') try: duration = float(msg.get('duration', '-1')) except ValueError: duration = float(-1) informational = msg.get('informational') branch_nick = msg.get('branch') revno = msg.get('revision') reporter = msg.get('oops-reporter') # Explicitly use an iterator so we can process the file sequentially. lines = iter(msg.get_payload().splitlines(True)) statement_pat = re.compile(r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)') def is_req_var(line): return "=" in line and not statement_pat.match(line) def is_traceback(line): return line.lower().startswith('traceback') or line.startswith( '== EXTRA DATA ==') req_vars = [] statements = [] first_tb_line = '' for line in lines: first_tb_line = line line = line.strip() if line == '': continue else: match = statement_pat.match(line) if match is not None: start, end, db_id, statement = match.groups() if db_id is not None: db_id = intern(db_id) # This string is repeated lots. statements.append([int(start), int(end), db_id, statement]) elif is_req_var(line): key, value = line.split('=', 1) req_vars.append([unquote(key), unquote(value)]) elif is_traceback(line): break req_vars = dict(req_vars) # The rest is traceback. tb_text = ''.join([first_tb_line] + list(lines)) result = dict(id=id, type=exc_type, value=exc_value, time=date, topic=topic, tb_text=tb_text, username=username, url=url, duration=duration, req_vars=req_vars, timeline=statements, branch_nick=branch_nick, revno=revno) if informational is not None: result['informational'] = informational if reporter is not None: result['reporter'] = reporter return result
def get_subject(num): type,data=conn.fetch(num,'(RFC822)') msg=BytesParser().parsebytes(data[0][1]) sub=decode_str(msg.get('subject')) print(sub) return sub
def get_from(num): type,data=conn.fetch(num,'(RFC822)') msg=BytesParser().parsebytes(data[0][1]) sub=decode_str(msg.get('From')) return sub
print("Attachements count:", len(mail['attachments'])) for att in mail['attachments']: print(" - Att:", att['type'], att['filename']) print('---\n\n\n') '''for mail in db.search(Message.type=='mail' and Message.status=='error')[:1]: msg = BytesParser(policy=policy.default).parsebytes(base64.b64decode(mail['data'])) print("Subject:", msg['Subject']) '''try: print("From:", msg['From']) print("Tos:", msg.get('To')) except email.errors.HeaderParseError: print('Missing header as error')''' print("-BODY") #body = msg.get_body() #handle_part(body) print("-ATTACHMENTS") for att in msg.iter_attachments(): handle_part(att)''' print("---- Mailbox ----") '''mailboxes = list(await mailstore.get_mailboxes())
def process_probe(row): if not row["data"].startswith(b"HTTP/"): return {} # TODO: do some kind of content analysis #print(row["data"], "\n") response = row["data"].replace(b"\r\n\r\n", b"\n\n", 1) try: # split in headers and content raw_headers, content = response.split(b"\n\n", 1) request_line, headers_alone = raw_headers.split(b"\r\n", 1) except ValueError as e: return {} # parse first line try: protocol, status_code, status_text, version = None, None, None, None protocol, status_code, status_text = request_line.split(b" ", 2) protocol, version = protocol.split(b"/", 1) except ValueError as e: pass # get headers headers = BytesParser().parsebytes(headers_alone) server = headers.get("Server", "") date = headers.get("Date", "") content_type = headers.get("Content-Type", "") transfer_encoding = list( map(lambda s: s.strip(), headers.get("Transfer-Encoding", "").split(","))) charset = "utf-8" if "charset=" in content_type: charset = content_type[content_type.find("charset=") + len("charset="):] if charset == "undef": charset = "utf-8" try: codecs.lookup(charset) except LookupError: charset = "utf-8" if "chunked" in transfer_encoding: # the content is chunked and needs to be merged content = merge_chunks(content) # parse html tag_tree = "" try: tree = html.fromstring(content) tag_tree = tag_recursive(tree) except ParserError as e: pass data = {} probe_type = row["type"] try: # TODO: IIS server is dick and may return decimals in status_code :shrug: try: data["{}:status_code".format(probe_type)] = float(status_code) except ValueError: data["{}:status_code".format(probe_type)] = -1 except TypeError: data["{}:status_code".format(probe_type)] = None try: data["{}:status_text".format(probe_type)] = status_text except AttributeError: data["{}:status_text".format(probe_type)] = None try: data["{}:header_keys".format(probe_type)] = headers.keys() except TypeError: data["{}:header_keys".format(probe_type)] = None for header in headers: data["{}:header:{}".format(probe_type, header)] = headers[header] data["{}:dom_tree".format(probe_type)] = tag_tree return data
import os import re import email from email import policy from email.parser import Parser from email.parser import BytesParser # for all *.eml files in current directory – open and parse to plain text for f_name in os.listdir('.'): if f_name.endswith('.eml'): with open(f_name, 'rb') as fp: # for each file from the list msg = BytesParser(policy=policy.default).parse(fp) text = msg.get_body(preferencelist=('plain')).get_content() fullsubject = msg.get('Subject') # get data by templates subj = re.findall(r' %your_search_template% .*\w+', fullsubject, re.I) name = re.findall(r' %your_search_template% .*\w+', text, re.I) cont = re.findall(r' %your_search_template% .*\w+', text, re.I) print(name[0] + ';' + cont[0] + ';' + subj[0] + ';') # # then in my case I grab some unique data with "| awk '{print $1, $3, $n}' >> output.csv"
async def __run(self): # extract email from the recipient email_name = args.recipient.lower() try: email = await Email.objects.get(name=email_name) except NoMatch: logger.error('No recipient with this name') exit(1) # read mail from STDIN and parse to EmailMessage object message = BytesParser(policy=default).parsebytes(stdin.buffer.read()) sender = '' if message.get('sender'): sender = message.get('sender') elif message.get('from'): sender = message.get('from') else: logger.error('No Sender of From header') exit(1) sender = parseaddr(sender)[1] if not sender: logger.error('Could not parse sender') exit(1) maybe_subscriber = await EmailSubscribers.objects.filter(email=sender ).all() if len(maybe_subscriber ) != 1 or maybe_subscriber[0].hood.id != email.hood.id: logger.error('Not a subscriber') exit(1) # extract relevant data from mail text = sub( r'<[^>]*>', '', message.get_body(preferencelist=('plain', 'html')).get_content(), ) response = post( '%s/api/hoods/%d/email/messages/' % (config['root_url'], email.hood.pk), json={ 'text': text, 'secret': email.secret }, ) if response.status_code == status.HTTP_201_CREATED: exit(0) elif response.status_code == status.HTTP_451_UNAVAILABLE_FOR_LEGAL_REASONS: logger.error('Message was\'t accepted: %s' % text) elif response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY: logger.error('Malformed request: %s' % response.json()) elif response.status_code == status.HTTP_401_UNAUTHORIZED: logger.error( 'Wrong API secret. kibicara_mda seems to be misconfigured') else: logger.error('REST-API failed with response status %d' % response.status_code) exit(1)