def _get_mails(self, folder: Folder) -> iter: get_mail_num = 0 url = "https://mail.163.com/js6/s" # sid = self._get_sid() if self.sid is None: self._logger.error("Invalid cookie") # querystring = {"sid": self.sid, "func": "mbox:listMessages", # "LeftNavfolder1Click": "1", "mbox_folder_enter": folder.folderid} querystring = {"sid": self.sid, "func": "mbox:listMessages"} while True: payload_data = '<?xml version="1.0"?><object><int name="fid">{}</int>' \ '<string name="order">date</string><boolean name="desc">true</boolean>' \ '<int name="limit">50</int><int name="start">{}</int><boolean name="skipLockedFolders">' \ 'false</boolean><string name="topFlag">top</string><boolean name="returnTag">' \ 'true</boolean><boolean name="returnTotal">true</boolean></object>'.format(folder.folderid, get_mail_num) get_mail_num += 50 payload_url = urllib.parse.quote_plus(payload_data).replace('+', '%20') payload = 'var=' + payload_url headers = f''' Accept: text/javascript Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9,en;q=0.8 Cache-Control: no-cache Connection: keep-alive Content-Length: 539 Content-type: application/x-www-form-urlencoded Host: mail.163.com Origin: https://mail.163.com Pragma: no-cache Referer: https://mail.163.com/js6/main.jsp?sid={self.sid}&df=mail163_letter Sec-Fetch-Dest: empty Sec-Fetch-Mode: cors Sec-Fetch-Site: same-origin User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36 ''' try: # response = requests.request("POST", url, data=payload, headers=headers, params=querystring) re_text = self._ha.getstring(url, req_data=payload, headers=headers, params=querystring) re_mailid = re.compile("'id':'(.+?)',") mailidall = re_mailid.findall(re_text) for id_one in range(len(mailidall)): mail_id = mailidall[id_one] eml = EML(self._clientid, self.task, self._userid, mail_id, folder, self.task.apptype) eml_info = self.__download_eml(mail_id) eml.io_stream = eml_info[0] eml.stream_length = eml_info[1] yield eml re_total = re.compile('\'total\':(\d+)') total_res = re_total.search(re_text) if total_res: total = total_res.group(1) if int(total) <= get_mail_num: break else: self._logger.error("Cant get all email, something wrong") break except Exception: self._logger.error(f"Get email info error, err:{traceback.format_exc()}")
def __get_mails(self, mails_info: dict, folder: Folder): mails = mails_info.get('mailMap') if mails is None or len(mails) == 0: return for k, mail in mails.items(): try: mailid = mail.get('messageId') msid = mail.get('msId') newmail = mail.get('newMail') eml = EML(self._clientid, self.task, self._userid, mailid, folder, self.task.apptype) if newmail: eml.state = 0 else: eml.state = 1 eml.provider = mail.get('sender') sendtime = mail.get('sendDate') if sendtime is not None and sendtime != '': eml.sendtime = datetime.datetime.fromtimestamp(sendtime // 1000) eml.subject = mail.get('subject') eml_info = self.__get_mail_streams(mailid, msid) eml.io_stream = eml_info[0] eml.stream_length = eml_info[1] time.sleep(5) # 防止取邮件的频率过快 yield eml except Exception: self._logger.error(f'Get {mailid} email error, err:{traceback.format_exc()}') continue
def _get_mails(self, folder: Folder) -> iter: sid = self._get_cookie_sid() if sid is None: self._logger.error("Cannot get sid from cookie!") cguid = self._get_cookie_cguid() url = 'https://appmail.mail.10086.cn/s?func=mbox:listMessages&sid={}&&comefrom=54&cguid={}'.format( sid, cguid) payload = '<object>\n <int name="fid">{}</int>\n <string name="order">receiveDate</string>\n ' \ '<string name="desc">1</string>\n <int name="start">1</int>\n ' \ '<int name="total">2000</int>\n <string name="topFlag">top</string>\n ' \ '<int name="sessionEnable">2</int>\n</object>'.format(folder.folderid) payload = payload.encode('ascii') headers = { 'accept': "*/*", 'accept-encoding': "gzip, deflate, br", 'accept-language': "zh-CN,zh;q=0.9,en;q=0.8", 'cache-control': "no-cache,no-cache", 'content-length': "{}".format(len(payload)), 'Content-Type': "text/xml", 'cookie': self.task.cookie, 'origin': "https://appmail.mail.10086.cn", 'pragma': "no-cache", 'referer': 'https://appmail.mail.10086.cn/m2015/html/index.html?sid={}&tab=mailbox_1&resource=indexLogin&cguid={}' .format(sid, cguid), 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/70.0.3538.102 Safari/537.36" } try: response = requests.request("POST", url, data=payload, headers=headers) response.encoding = 'utf-8' all_mail_list = response.text if 'code' in all_mail_list and 'S_OK' in all_mail_list: jsonstr = re.sub('\'', '\"', all_mail_list) mail_list_data = json.loads(jsonstr) mail_list = mail_list_data.get('var') if mail_list is not None and len(mail_list) != 0: for mail in mail_list: if mail.get('mid') is None: continue mail_data = self._get_mail(mail, sid) if mail_data is not None: eml = EML(self._clientid, self.task, self._userid, mail['mid'], folder, self.task.apptype) eml.io_stream = mail_data[0] eml.stream_length = mail_data[1] yield eml except Exception as err: self._logger.error("Get mail error, err: {}".format(err)) return
def _get_mails(self, folder: Folder) -> iter: sid = self._get_sid() if sid is None: self._logger.error("Invalid cookie") get_mail_num = 0 url = "http://webmail.vip.163.com/js6/s" querystring = { "sid": sid, "func": "mbox:listMessages", "LeftNavfolder1Click": "1", "mbox_folder_enter": folder.folderid } while True: payload_data = '<?xml version="1.0"?><object><int name="fid">{}</int>' \ '<string name="order">date</string><boolean name="desc">true</boolean>' \ '<int name="limit">50</int><int name="start">{}</int><boolean name="skipLockedFolders">false' \ '</boolean><string name="topFlag">top</string><boolean name="returnTag">true</boolean>' \ '<boolean name="returnTotal">true</boolean></object>'.format(folder.folderid, get_mail_num) get_mail_num += 50 payload_url = urllib.parse.quote_plus(payload_data).replace( '+', '%20') payload = 'var=' + payload_url headers = { 'Accept': "text/javascript", 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "zh-CN,zh;q=0.9,en;q=0.8", 'Cache-Control': "no-cache", 'Content-Length': "539", 'Content-type': "application/x-www-form-urlencoded", 'Cookie': self.task.cookie.encode('utf-8'), 'Host': "webmail.vip.163.com", 'Origin': "http://webmail.vip.163.com", 'Pragma': "no-cache", 'Proxy-Connection': "keep-alive", 'Referer': "http://webmail.vip.163.com/js6/main.jsp?sid={}&df=mailvip". format(sid), 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" } try: response = requests.request("POST", url, data=payload, headers=headers, params=querystring) re_text = response.text re_mailid = re.compile("'id':'(.+?)',") mailidall = re_mailid.findall(re_text) # re_subject = re.compile("'subject':'(.+?)',") # subject_all = re_subject.findall(re_text) for id_one in range(len(mailidall)): mail_id = mailidall[id_one] # mail_subject = subject_all[id_one] eml = EML(self._clientid, self.task, self._userid, mail_id, folder, self.task.apptype) # eml.subject = mail_subject # eml.io_stream = self.__download_eml(mail_id) eml_info = self.__download_eml(mail_id) eml.io_stream = eml_info[0] eml.stream_length = eml_info[1] yield eml re_total = re.compile('\'total\':(\d+)') total = re_total.search(re_text).group(1) if int(total) <= get_mail_num: break except Exception: self._logger.error( f"Get email info error, err:{traceback.format_exc()}")
def _get_mails(self, folder: Folder) -> iter: """ 获取所有的邮件类表并,获取eml格式的邮件并保存为文件在本地 """ self._headers['Cookie'] = self.task.cookie url = 'https://mail.tom.com/webmail/query/queryfolder.action' formdata = {'folderName': '{}'.format(folder.name), 'currentPage': '1'} reqdata = f"folderName={folder.name}¤tPage=1" try: # res = requests.post(url, # headers=self._headers, # data=formdata, # timeout=60) html = self._ha.getstring( url, req_data=reqdata, # json=formdata, timeout=60, headers=""" Accept: application/json, text/javascript, */*; q=0.01 Accept-Encoding: gzip, deflate Accept-Language: en-US,en;q=0.9 Cache-Control: no-cache Connection: keep-alive Content-Type: application/x-www-form-urlencoded Origin: https://mail.tom.com Pragma: no-cache Referer: https://mail.tom.com/webmail/main/index.action Sec-Fetch-Dest: empty Sec-Fetch-Mode: cors Sec-Fetch-Site: same-origin User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 X-Requested-With: XMLHttpRequest""") resdict = json.loads(html) totalpage = resdict['result']['totalPage'] for i in range(int(totalpage)): # formdata['currentPage'] = i + 1 reqdata = f"folderName={folder.name}¤tPage={i+1}" # data = requests.post(url, headers=self._headers, data=formdata) html = self._ha.getstring( url, req_data=reqdata, # json=formdata, timeout=60, headers=""" Accept: application/json, text/javascript, */*; q=0.01 Accept-Encoding: gzip, deflate Accept-Language: en-US,en;q=0.9 Cache-Control: no-cache Connection: keep-alive Content-Type: application/x-www-form-urlencoded Origin: https://mail.tom.com Pragma: no-cache Referer: https://mail.tom.com/webmail/main/index.action Sec-Fetch-Dest: empty Sec-Fetch-Mode: cors Sec-Fetch-Site: same-origin User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 X-Requested-With: XMLHttpRequest""") dictres = json.loads(html) pagelist = dictres['result']['pageList'] for mail in pagelist: maildata = self.__getemlfile(mail) if maildata is None or not mail.__contains__( 'messageid') or mail["messageid"] is None: self._logger.info("Skip invalid email: {}".format( mail.get("subject"))) continue eml = EML(self._clientid, self.task, self._userid, mail['messageid'], folder, self.task.apptype) eml.io_stream = maildata[0] eml.stream_length = maildata[1] yield eml except Exception: self._logger.error("Downloading all mails error, err: {}".format( traceback.format_exc())) return
def _get_mails(self, folder: Folder) -> iter: error_times = 0 current_page = 1 all_pagenum = 1000 mail_url = 'http://m0.mail.sina.com.cn/wa.php?a=list_mail' headers = { 'Accept': "*/*", 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "zh-CN,zh;q=0.9,en;q=0.8", 'Cache-Control': "no-cache", 'Content-Length': "98", 'Content-type': "application/x-www-form-urlencoded;charset=UTF-8", 'Cookie': self.task.cookie, 'Host': "m0.mail.sina.com.cn", 'Origin': "http://m0.mail.sina.com.cn", 'Pragma': "no-cache", 'Proxy-Connection': "keep-alive", 'Referer': "http://m0.mail.sina.com.cn/classic/index.php", 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" } while True: try: if current_page >= all_pagenum: break payload_data = { 'fid': '{}'.format(folder.folderid), 'order': 'htime', 'sorttype': 'desc', 'type': '0', 'pageno': '{}'.format(current_page), 'tag': '-1', 'webmail': '1' } response = requests.request("POST", mail_url, data=payload_data, headers=headers) mail_data_json = json.loads(response.text) if not mail_data_json.get('result'): break if mail_data_json.get('errno') != 0: break mail_data = mail_data_json.get('data') all_pagenum = mail_data.get('pagenum') maillist = mail_data.get('maillist') if len(maillist) == 0: self._logger.info("No mail in mail list") break for mail_info in maillist: mailid = mail_info[0] eml = EML(self._clientid, self.task, self._userid, mailid, folder, self.task.apptype) eml.sendtime = datetime.datetime.fromtimestamp( mail_info[4]) eml.subject = mail_info[3] eml.provider = mail_info[1] eml.owner = mail_info[2] eml_info = self.__download_mails(folder.folderid, mailid) eml.io_stream = eml_info[0] eml.stream_length = eml_info[1] time.sleep(5) # 防止取邮件的频率过快 yield eml if current_page == all_pagenum: break else: current_page += 1 except Exception: if error_times >= 5: break self._logger.error( f"Get mail or download mail error, err:{traceback.format_exc()}" ) error_times += 1 continue
def parse_mail(self, message, fo: Folder): try: sj = json.loads(message) respMsgs = sj["Body"]["ResponseMessages"]["Items"] if not respMsgs: yield except: return for msg in respMsgs: items = msg["RootFolder"]["Items"] for item in items: size = 0 rcvTime = None if not item.__contains__('ItemId') or not item.__contains__( 'Subject') or not item.__contains__('IsRead'): continue if not item['ItemId'].__contains__('Id'): continue id = item["ItemId"]["Id"] subject = item["Subject"] isRead = item["IsRead"] if item.__contains__('DateTimeReceived'): # 2019-03-09T00:52:47+08:00 rcvTime = item["DateTimeReceived"] rcvTime = datetime.datetime.strptime( rcvTime, '%Y-%m-%dT%H:%M:%S+08:00') + datetime.timedelta( hours=8) if item.__contains__('Size'): size = item["Size"] # sender # if item.__contains__('IsDraft') and not item["IsDraft"]: # if item.__contains__('Sender') and item['Sender'].__contains__('Mailbox'): # if item["Sender"]["Mailbox"].__contains__('EmailAddress'): # sender = item["Sender"]["Mailbox"]["EmailAddress"] # elif item["Sender"]["Mailbox"].__contains__("Name"): # sender = item["Sender"]["Mailbox"]["Name"] # else: # sender = "Unknow" # elif item.__contains__("From") and item["From"].__contains__("Mailbox"): # if item["From"]["Mailbox"].__contains__("EmailAddress"): # sender = item["From"]["Mailbox"]["EmailAddress"] # elif item["From"]["Mailbox"].__contains__("Name"): # sender = item["From"]["Mailbox"]["Name"] # else: # sender = "Unknow" # else: # sender = "Unknow" # else: # sender = self._userid m = EML(self._clientid, self.task, self._userid, id, fo, self.task.apptype) m.subject = subject m.sendtime = rcvTime m.provider = 'outlook.live.com' if isRead: m.state = 1 else: m.state = 0 m.stream_length = size yield m
def _get_mails(self, folder: Folder) -> iter: pagenum = 0 pagecount = 10 # 初始值随便给 nextpage = True headers = { 'accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", 'accept-encoding': "gzip, deflate, br", 'accept-language': "zh-CN,zh;q=0.9,en;q=0.8", 'cache-control': "no-cache,no-cache", 'cookie': self.task.cookie, 'pragma': "no-cache", 'referer': "https://mail.qq.com/cgi-bin/frame_html?sid=LHMRQd34yoCnbiJ_&r=c8d8fa46b4a22664132d900770e7f25f", 'upgrade-insecure-requests': "1", 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" } while pagenum <= pagecount or nextpage is True: if nextpage is False: break url = "https://mail.qq.com/cgi-bin/mail_list" querystring = { "sid": self._sid, "folderid": f"{folder.folderid}", "folderkey": f"{folder.folderid}", "page": f"{pagenum}", "s": "inbox", "topmails": "0", "showinboxtop": "1", "ver": "952875.0", "cachemod": "maillist", "cacheage": "7200", "r": "" } response = requests.request("GET", url, headers=headers, params=querystring) res_text = response.text # re_allcount = re.compile("\<script \>document\.write\((\d+) \+ 1\)\;\<\/script\> 页 \ \;") # pagecount = re_allcount.search(res_text).group(1) re_mail_info = re.compile("\<nobr t\=\"6\" mailid=\"(.+?)\"") all_mail_info = re_mail_info.findall(res_text) for mailone in all_mail_info: eml = EML(self._clientid, self.task, self._userid, mailone, folder, self.task.apptype) eml_info = self.download_mail(mailone) eml.io_stream = eml_info[0] eml.stream_length = eml_info[1] time.sleep(1) yield eml if "下一页" not in res_text or "nextpage" not in res_text: nextpage = False pagenum += 1