Beispiel #1
0
    def _get_mails(self, folder: Folder) -> iter:
        get_mail_num = 0
        url = "https://mail.163.com/js6/s"
        # sid = self._get_sid()
        if self.sid is None:
            self._logger.error("Invalid cookie")
        # querystring = {"sid": self.sid, "func": "mbox:listMessages",
        #                "LeftNavfolder1Click": "1", "mbox_folder_enter": folder.folderid}
        querystring = {"sid": self.sid, "func": "mbox:listMessages"}
        while True:
            payload_data = '<?xml version="1.0"?><object><int name="fid">{}</int>' \
                           '<string name="order">date</string><boolean name="desc">true</boolean>' \
                           '<int name="limit">50</int><int name="start">{}</int><boolean name="skipLockedFolders">' \
                           'false</boolean><string name="topFlag">top</string><boolean name="returnTag">' \
                           'true</boolean><boolean name="returnTotal">true</boolean></object>'.format(folder.folderid,
                                                                                                      get_mail_num)
            get_mail_num += 50
            payload_url = urllib.parse.quote_plus(payload_data).replace('+', '%20')
            payload = 'var=' + payload_url
            headers = f'''
                Accept: text/javascript
                Accept-Encoding: gzip, deflate, br
                Accept-Language: zh-CN,zh;q=0.9,en;q=0.8
                Cache-Control: no-cache
                Connection: keep-alive
                Content-Length: 539
                Content-type: application/x-www-form-urlencoded
                Host: mail.163.com
                Origin: https://mail.163.com
                Pragma: no-cache
                Referer: https://mail.163.com/js6/main.jsp?sid={self.sid}&df=mail163_letter
                Sec-Fetch-Dest: empty
                Sec-Fetch-Mode: cors
                Sec-Fetch-Site: same-origin
                User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36
            '''

            try:
                # response = requests.request("POST", url, data=payload, headers=headers, params=querystring)
                re_text = self._ha.getstring(url, req_data=payload, headers=headers, params=querystring)
                re_mailid = re.compile("'id':'(.+?)',")
                mailidall = re_mailid.findall(re_text)
                for id_one in range(len(mailidall)):
                    mail_id = mailidall[id_one]
                    eml = EML(self._clientid, self.task, self._userid, mail_id, folder, self.task.apptype)
                    eml_info = self.__download_eml(mail_id)
                    eml.io_stream = eml_info[0]
                    eml.stream_length = eml_info[1]
                    yield eml
                re_total = re.compile('\'total\':(\d+)')
                total_res = re_total.search(re_text)
                if total_res:
                    total = total_res.group(1)
                    if int(total) <= get_mail_num:
                        break
                else:
                    self._logger.error("Cant get all email, something wrong")
                    break
            except Exception:
                self._logger.error(f"Get email info error, err:{traceback.format_exc()}")
Beispiel #2
0
 def __get_mails(self, mails_info: dict, folder: Folder):
     mails = mails_info.get('mailMap')
     if mails is None or len(mails) == 0:
         return
     for k, mail in mails.items():
         try:
             mailid = mail.get('messageId')
             msid = mail.get('msId')
             newmail = mail.get('newMail')
             eml = EML(self._clientid, self.task, self._userid, mailid, folder, self.task.apptype)
             if newmail:
                 eml.state = 0
             else:
                 eml.state = 1
             eml.provider = mail.get('sender')
             sendtime = mail.get('sendDate')
             if sendtime is not None and sendtime != '':
                 eml.sendtime = datetime.datetime.fromtimestamp(sendtime // 1000)
             eml.subject = mail.get('subject')
             eml_info = self.__get_mail_streams(mailid, msid)
             eml.io_stream = eml_info[0]
             eml.stream_length = eml_info[1]
             time.sleep(5)  # 防止取邮件的频率过快
             yield eml
         except Exception:
             self._logger.error(f'Get {mailid} email error, err:{traceback.format_exc()}')
             continue
Beispiel #3
0
    def _get_mails(self, folder: Folder) -> iter:
        sid = self._get_cookie_sid()
        if sid is None:
            self._logger.error("Cannot get sid from cookie!")
        cguid = self._get_cookie_cguid()
        url = 'https://appmail.mail.10086.cn/s?func=mbox:listMessages&sid={}&&comefrom=54&cguid={}'.format(
            sid, cguid)
        payload = '<object>\n    <int name="fid">{}</int>\n    <string name="order">receiveDate</string>\n    ' \
                  '<string name="desc">1</string>\n    <int name="start">1</int>\n    ' \
                  '<int name="total">2000</int>\n    <string name="topFlag">top</string>\n    ' \
                  '<int name="sessionEnable">2</int>\n</object>'.format(folder.folderid)
        payload = payload.encode('ascii')
        headers = {
            'accept':
            "*/*",
            'accept-encoding':
            "gzip, deflate, br",
            'accept-language':
            "zh-CN,zh;q=0.9,en;q=0.8",
            'cache-control':
            "no-cache,no-cache",
            'content-length':
            "{}".format(len(payload)),
            'Content-Type':
            "text/xml",
            'cookie':
            self.task.cookie,
            'origin':
            "https://appmail.mail.10086.cn",
            'pragma':
            "no-cache",
            'referer':
            'https://appmail.mail.10086.cn/m2015/html/index.html?sid={}&tab=mailbox_1&resource=indexLogin&cguid={}'
            .format(sid, cguid),
            'user-agent':
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/70.0.3538.102 Safari/537.36"
        }

        try:
            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers)
            response.encoding = 'utf-8'
            all_mail_list = response.text
            if 'code' in all_mail_list and 'S_OK' in all_mail_list:
                jsonstr = re.sub('\'', '\"', all_mail_list)
                mail_list_data = json.loads(jsonstr)
                mail_list = mail_list_data.get('var')
                if mail_list is not None and len(mail_list) != 0:
                    for mail in mail_list:
                        if mail.get('mid') is None:
                            continue
                        mail_data = self._get_mail(mail, sid)
                        if mail_data is not None:
                            eml = EML(self._clientid, self.task, self._userid,
                                      mail['mid'], folder, self.task.apptype)
                            eml.io_stream = mail_data[0]
                            eml.stream_length = mail_data[1]
                            yield eml
        except Exception as err:
            self._logger.error("Get mail error, err: {}".format(err))
            return
Beispiel #4
0
    def _get_mails(self, folder: Folder) -> iter:
        sid = self._get_sid()
        if sid is None:
            self._logger.error("Invalid cookie")
        get_mail_num = 0
        url = "http://webmail.vip.163.com/js6/s"

        querystring = {
            "sid": sid,
            "func": "mbox:listMessages",
            "LeftNavfolder1Click": "1",
            "mbox_folder_enter": folder.folderid
        }
        while True:
            payload_data = '<?xml version="1.0"?><object><int name="fid">{}</int>' \
                           '<string name="order">date</string><boolean name="desc">true</boolean>' \
                           '<int name="limit">50</int><int name="start">{}</int><boolean name="skipLockedFolders">false' \
                           '</boolean><string name="topFlag">top</string><boolean name="returnTag">true</boolean>' \
                           '<boolean name="returnTotal">true</boolean></object>'.format(folder.folderid, get_mail_num)
            get_mail_num += 50
            payload_url = urllib.parse.quote_plus(payload_data).replace(
                '+', '%20')
            payload = 'var=' + payload_url
            headers = {
                'Accept':
                "text/javascript",
                'Accept-Encoding':
                "gzip, deflate",
                'Accept-Language':
                "zh-CN,zh;q=0.9,en;q=0.8",
                'Cache-Control':
                "no-cache",
                'Content-Length':
                "539",
                'Content-type':
                "application/x-www-form-urlencoded",
                'Cookie':
                self.task.cookie.encode('utf-8'),
                'Host':
                "webmail.vip.163.com",
                'Origin':
                "http://webmail.vip.163.com",
                'Pragma':
                "no-cache",
                'Proxy-Connection':
                "keep-alive",
                'Referer':
                "http://webmail.vip.163.com/js6/main.jsp?sid={}&df=mailvip".
                format(sid),
                'User-Agent':
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
            }
            try:
                response = requests.request("POST",
                                            url,
                                            data=payload,
                                            headers=headers,
                                            params=querystring)
                re_text = response.text
                re_mailid = re.compile("'id':'(.+?)',")
                mailidall = re_mailid.findall(re_text)
                # re_subject = re.compile("'subject':'(.+?)',")
                # subject_all = re_subject.findall(re_text)
                for id_one in range(len(mailidall)):
                    mail_id = mailidall[id_one]
                    # mail_subject = subject_all[id_one]
                    eml = EML(self._clientid, self.task, self._userid, mail_id,
                              folder, self.task.apptype)
                    # eml.subject = mail_subject
                    # eml.io_stream = self.__download_eml(mail_id)
                    eml_info = self.__download_eml(mail_id)
                    eml.io_stream = eml_info[0]
                    eml.stream_length = eml_info[1]
                    yield eml
                re_total = re.compile('\'total\':(\d+)')
                total = re_total.search(re_text).group(1)
                if int(total) <= get_mail_num:
                    break
            except Exception:
                self._logger.error(
                    f"Get email info error, err:{traceback.format_exc()}")
Beispiel #5
0
 def _get_mails(self, folder: Folder) -> iter:
     """
     获取所有的邮件类表并,获取eml格式的邮件并保存为文件在本地
     """
     self._headers['Cookie'] = self.task.cookie
     url = 'https://mail.tom.com/webmail/query/queryfolder.action'
     formdata = {'folderName': '{}'.format(folder.name), 'currentPage': '1'}
     reqdata = f"folderName={folder.name}&currentPage=1"
     try:
         # res = requests.post(url,
         #                     headers=self._headers,
         #                     data=formdata,
         #                     timeout=60)
         html = self._ha.getstring(
             url,
             req_data=reqdata,
             #   json=formdata,
             timeout=60,
             headers="""
             Accept: application/json, text/javascript, */*; q=0.01
             Accept-Encoding: gzip, deflate
             Accept-Language: en-US,en;q=0.9
             Cache-Control: no-cache
             Connection: keep-alive
             Content-Type: application/x-www-form-urlencoded
             Origin: https://mail.tom.com
             Pragma: no-cache
             Referer: https://mail.tom.com/webmail/main/index.action
             Sec-Fetch-Dest: empty
             Sec-Fetch-Mode: cors
             Sec-Fetch-Site: same-origin
             User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36
             X-Requested-With: XMLHttpRequest""")
         resdict = json.loads(html)
         totalpage = resdict['result']['totalPage']
         for i in range(int(totalpage)):
             # formdata['currentPage'] = i + 1
             reqdata = f"folderName={folder.name}&currentPage={i+1}"
             # data = requests.post(url, headers=self._headers, data=formdata)
             html = self._ha.getstring(
                 url,
                 req_data=reqdata,
                 #   json=formdata,
                 timeout=60,
                 headers="""
                 Accept: application/json, text/javascript, */*; q=0.01
                 Accept-Encoding: gzip, deflate
                 Accept-Language: en-US,en;q=0.9
                 Cache-Control: no-cache
                 Connection: keep-alive
                 Content-Type: application/x-www-form-urlencoded
                 Origin: https://mail.tom.com
                 Pragma: no-cache
                 Referer: https://mail.tom.com/webmail/main/index.action
                 Sec-Fetch-Dest: empty
                 Sec-Fetch-Mode: cors
                 Sec-Fetch-Site: same-origin
                 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36
                 X-Requested-With: XMLHttpRequest""")
             dictres = json.loads(html)
             pagelist = dictres['result']['pageList']
             for mail in pagelist:
                 maildata = self.__getemlfile(mail)
                 if maildata is None or not mail.__contains__(
                         'messageid') or mail["messageid"] is None:
                     self._logger.info("Skip invalid email: {}".format(
                         mail.get("subject")))
                     continue
                 eml = EML(self._clientid, self.task, self._userid,
                           mail['messageid'], folder, self.task.apptype)
                 eml.io_stream = maildata[0]
                 eml.stream_length = maildata[1]
                 yield eml
     except Exception:
         self._logger.error("Downloading all mails error, err: {}".format(
             traceback.format_exc()))
         return
Beispiel #6
0
 def _get_mails(self, folder: Folder) -> iter:
     error_times = 0
     current_page = 1
     all_pagenum = 1000
     mail_url = 'http://m0.mail.sina.com.cn/wa.php?a=list_mail'
     headers = {
         'Accept':
         "*/*",
         'Accept-Encoding':
         "gzip, deflate",
         'Accept-Language':
         "zh-CN,zh;q=0.9,en;q=0.8",
         'Cache-Control':
         "no-cache",
         'Content-Length':
         "98",
         'Content-type':
         "application/x-www-form-urlencoded;charset=UTF-8",
         'Cookie':
         self.task.cookie,
         'Host':
         "m0.mail.sina.com.cn",
         'Origin':
         "http://m0.mail.sina.com.cn",
         'Pragma':
         "no-cache",
         'Proxy-Connection':
         "keep-alive",
         'Referer':
         "http://m0.mail.sina.com.cn/classic/index.php",
         'User-Agent':
         "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
     }
     while True:
         try:
             if current_page >= all_pagenum:
                 break
             payload_data = {
                 'fid': '{}'.format(folder.folderid),
                 'order': 'htime',
                 'sorttype': 'desc',
                 'type': '0',
                 'pageno': '{}'.format(current_page),
                 'tag': '-1',
                 'webmail': '1'
             }
             response = requests.request("POST",
                                         mail_url,
                                         data=payload_data,
                                         headers=headers)
             mail_data_json = json.loads(response.text)
             if not mail_data_json.get('result'):
                 break
             if mail_data_json.get('errno') != 0:
                 break
             mail_data = mail_data_json.get('data')
             all_pagenum = mail_data.get('pagenum')
             maillist = mail_data.get('maillist')
             if len(maillist) == 0:
                 self._logger.info("No mail in mail list")
                 break
             for mail_info in maillist:
                 mailid = mail_info[0]
                 eml = EML(self._clientid, self.task, self._userid, mailid,
                           folder, self.task.apptype)
                 eml.sendtime = datetime.datetime.fromtimestamp(
                     mail_info[4])
                 eml.subject = mail_info[3]
                 eml.provider = mail_info[1]
                 eml.owner = mail_info[2]
                 eml_info = self.__download_mails(folder.folderid, mailid)
                 eml.io_stream = eml_info[0]
                 eml.stream_length = eml_info[1]
                 time.sleep(5)  # 防止取邮件的频率过快
                 yield eml
             if current_page == all_pagenum:
                 break
             else:
                 current_page += 1
         except Exception:
             if error_times >= 5:
                 break
             self._logger.error(
                 f"Get mail or download mail error, err:{traceback.format_exc()}"
             )
             error_times += 1
             continue
Beispiel #7
0
    def parse_mail(self, message, fo: Folder):
        try:
            sj = json.loads(message)
            respMsgs = sj["Body"]["ResponseMessages"]["Items"]
            if not respMsgs:
                yield
        except:
            return

        for msg in respMsgs:
            items = msg["RootFolder"]["Items"]
            for item in items:
                size = 0
                rcvTime = None
                if not item.__contains__('ItemId') or not item.__contains__(
                        'Subject') or not item.__contains__('IsRead'):
                    continue
                if not item['ItemId'].__contains__('Id'):
                    continue
                id = item["ItemId"]["Id"]
                subject = item["Subject"]
                isRead = item["IsRead"]

                if item.__contains__('DateTimeReceived'):
                    # 2019-03-09T00:52:47+08:00
                    rcvTime = item["DateTimeReceived"]
                    rcvTime = datetime.datetime.strptime(
                        rcvTime,
                        '%Y-%m-%dT%H:%M:%S+08:00') + datetime.timedelta(
                            hours=8)
                if item.__contains__('Size'):
                    size = item["Size"]

                # sender
                # if item.__contains__('IsDraft') and not item["IsDraft"]:
                #     if item.__contains__('Sender') and item['Sender'].__contains__('Mailbox'):
                #         if item["Sender"]["Mailbox"].__contains__('EmailAddress'):
                #             sender = item["Sender"]["Mailbox"]["EmailAddress"]
                #         elif item["Sender"]["Mailbox"].__contains__("Name"):
                #             sender = item["Sender"]["Mailbox"]["Name"]
                #         else:
                #             sender = "Unknow"
                #     elif item.__contains__("From") and item["From"].__contains__("Mailbox"):
                #         if item["From"]["Mailbox"].__contains__("EmailAddress"):
                #             sender = item["From"]["Mailbox"]["EmailAddress"]
                #         elif item["From"]["Mailbox"].__contains__("Name"):
                #             sender = item["From"]["Mailbox"]["Name"]
                #         else:
                #             sender = "Unknow"
                #     else:
                #         sender = "Unknow"
                # else:
                #     sender = self._userid

                m = EML(self._clientid, self.task, self._userid, id, fo,
                        self.task.apptype)
                m.subject = subject
                m.sendtime = rcvTime
                m.provider = 'outlook.live.com'
                if isRead:
                    m.state = 1
                else:
                    m.state = 0
                m.stream_length = size
                yield m
Beispiel #8
0
    def _get_mails(self, folder: Folder) -> iter:
        pagenum = 0
        pagecount = 10  # 初始值随便给
        nextpage = True
        headers = {
            'accept':
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            'accept-encoding':
            "gzip, deflate, br",
            'accept-language':
            "zh-CN,zh;q=0.9,en;q=0.8",
            'cache-control':
            "no-cache,no-cache",
            'cookie':
            self.task.cookie,
            'pragma':
            "no-cache",
            'referer':
            "https://mail.qq.com/cgi-bin/frame_html?sid=LHMRQd34yoCnbiJ_&r=c8d8fa46b4a22664132d900770e7f25f",
            'upgrade-insecure-requests':
            "1",
            'user-agent':
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
        }
        while pagenum <= pagecount or nextpage is True:
            if nextpage is False:
                break
            url = "https://mail.qq.com/cgi-bin/mail_list"

            querystring = {
                "sid": self._sid,
                "folderid": f"{folder.folderid}",
                "folderkey": f"{folder.folderid}",
                "page": f"{pagenum}",
                "s": "inbox",
                "topmails": "0",
                "showinboxtop": "1",
                "ver": "952875.0",
                "cachemod": "maillist",
                "cacheage": "7200",
                "r": ""
            }

            response = requests.request("GET",
                                        url,
                                        headers=headers,
                                        params=querystring)
            res_text = response.text
            # re_allcount = re.compile("\<script \>document\.write\((\d+) \+ 1\)\;\<\/script\> 页        \&nbsp\;")
            # pagecount = re_allcount.search(res_text).group(1)
            re_mail_info = re.compile("\<nobr t\=\"6\" mailid=\"(.+?)\"")
            all_mail_info = re_mail_info.findall(res_text)
            for mailone in all_mail_info:
                eml = EML(self._clientid, self.task, self._userid, mailone,
                          folder, self.task.apptype)
                eml_info = self.download_mail(mailone)
                eml.io_stream = eml_info[0]
                eml.stream_length = eml_info[1]
                time.sleep(1)
                yield eml
            if "下一页" not in res_text or "nextpage" not in res_text:
                nextpage = False
            pagenum += 1