コード例 #1
0
    def _get_profile(self) -> iter:
        try:
            url = 'https://outlook.live.com/owa/0/sessiondata.ashx?app=Mail'
            headers = """
Host: outlook.live.com
Connection: keep-alive
Content-Length: 0
Pragma: no-cache
Cache-Control: no-cache
Origin: https://outlook.live.com
x-js-clienttype: 2
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36
Accept: */*
Sec-Fetch-Site: same-origin
Sec-Fetch-Mode: cors
Referer: https://outlook.live.com/
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
"""
            html = self._ha.getstring(url, req_data='', headers=headers)
            self._userid = helper_str.substring(html, '"UserEmailAddress":"',
                                                '"')
            res = PROFILE(self._clientid, self.task, self.task.apptype,
                          self._userid)
            res.nickname = helper_str.substring(html, '"UserDisplayName":"',
                                                '"')
            yield res
        except Exception:
            self._logger.error('Got profile fail: {}'.format(
                traceback.format_exc()))
コード例 #2
0
    def _get_profile(self) -> iter:
        self.__before_download()
        if self._html is None:
            if self.sid is None:
                self._logger.error("Invalid cookie")
            url = "https://mail.163.com/js6/main.jsp"

            querystring = {"sid": self.sid, "df": "mail163_letter"}
            headers = '''
                Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
                Accept-Encoding: gzip, deflate, br
                Accept-Language: zh-CN,zh;q=0.9,en;q=0.8
                Cache-Control: no-cache
                Connection: keep-alive
                Host: mail.163.com
                Pragma: no-cache
                Upgrade-Insecure-Requests: 1
                User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36
            '''
            # response = requests.request("GET", url, headers=headers, params=querystring)
            res_text = self._ha.getstring(url, headers=headers, params=querystring)
            if "已发送" in res_text:
                self._html = res_text
        try:
            re_uid = re.compile('uid:\'(.+?)\',suid')
            uid = re_uid.search(self._html)
            self._userid = uid.group(1)
            re_username = re.compile("'true_name':'(.+?)'")
            u_name = re_username.search(self._html)
            p_data = PROFILE(self._clientid, self.task, self.task.apptype, self._userid)
            if u_name:
                p_data.nickname = u_name.group(1)
            yield p_data
        except Exception:
            self._logger.error(f"Get profile info error, err:{traceback.format_exc()}")
コード例 #3
0
ファイル: spider163vip.py プロジェクト: Octoberr/sspywork
 def _get_profile(self) -> iter:
     self.__before_download()
     if self._html is None:
         sid = self._get_sid()
         if sid is None:
             self._logger.error("Invalid cookie")
         url = f"http://webmail.vip.163.com/js6/main.jsp?sid={sid}&df=mailvip"
         headers = {
             'Accept':
             "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
             'Accept-Encoding':
             "gzip, deflate",
             'Accept-Language':
             "zh-CN,zh;q=0.9,en;q=0.8",
             'Cache-Control':
             "no-cache",
             'Cookie':
             self.task.cookie.encode('utf-8'),
             'Host':
             "webmail.vip.163.com",
             'Pragma':
             "no-cache",
             'Proxy-Connection':
             "keep-alive",
             'Referer':
             "http://webmail.vip.163.com/js6/main.jsp?sid=BAsnkgKKqetoFpUtUVKKteyfONhRRwgX&df=mailvip",
             'Upgrade-Insecure-Requests':
             "1",
             'User-Agent':
             "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
         }
         response = requests.get(url, headers=headers)
         res_text = response.text
         if "已发送" in res_text:
             self._html = res_text
     try:
         re_uid = re.compile('uid:\'(.+?)\',suid')
         uid = re_uid.search(self._html)
         self._userid = uid.group(1)
         re_username = re.compile("'true_name':'(.+?)'")
         u_name = re_username.search(self._html)
         p_data = PROFILE(self._clientid, self.task, self.task.apptype,
                          self._userid)
         p_data.nickname = u_name.group(1)
         if self.task.account is None:
             self.task.account = self._userid  # 目前在去重的这个方法里,account是不能为空的
         yield p_data
     except Exception:
         self._logger.error(
             f"Get profile info error, err:{traceback.format_exc()}")
コード例 #4
0
 def _get_profile(self) -> iter:
     try:
         if self._html is None:
             url = "http://m0.mail.sina.com.cn/classic/index.php"
             headers = {
                 'Accept':
                 "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
                 'Accept-Encoding':
                 "gzip, deflate",
                 'Accept-Language':
                 "zh-CN,zh;q=0.9,en;q=0.8",
                 'Cache-Control':
                 "no-cache",
                 'Cookie':
                 self.task.cookie,
                 'Host':
                 "m0.mail.sina.com.cn",
                 'Pragma':
                 "no-cache",
                 'Proxy-Connection':
                 "keep-alive",
                 'Upgrade-Insecure-Requests':
                 "1",
                 'User-Agent':
                 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
             }
             response = requests.request("GET", url, headers=headers)
             res_text = response.text
             if "您好" in res_text and "设置" in res_text and "收件夹" in res_text:
                 print("登陆成功, cookie 有效")
                 print(res_text)
                 self._html = res_text
         re_userid = re.compile('"uid"\:"(.*?\@sina\.com)"')
         re_username = re.compile('"username":"******"')
         userid = re_userid.search(self._html)
         username = re_username.search(self._html)
         if not userid:
             self._userid = self.uname_str
         else:
             self._userid = userid.group(1)
         pdata = PROFILE(self._clientid, self.task, self.task.apptype,
                         self._userid)
         if username:
             pdata.nickname = username.group(1)
         yield pdata
     except Exception:
         self._logger.error(
             f"Get profile error, err:{traceback.format_exc()}")
コード例 #5
0
    def login_test(self) -> bool:
        """
        测试cookie登陆,和账密登陆的有效性
        :return:
        """
        res = False
        if self.task.tokentype == ETokenType.Pwd:
            loginfunc = self._pwd_login
            # 登陆测试
        elif self.task.tokentype == ETokenType.Cookie:
            loginfunc = self._cookie_login
            # cookie登陆测试,判断cookie是否有效
        elif self.task.tokentype == ETokenType.Sms:
            loginfunc = self._sms_login
        elif self.task.tokentype == ETokenType.Unknown:
            if self.task.cookie is not None and self.task.cookie != "":
                loginfunc = self._cookie_login
            else:
                raise Exception("Unknown tokentype, unknown login way")
        else:
            raise Exception("Wrong login tasktype to test login")
        try:
            res = loginfunc()
            if res:
                self._restore_resources()
            elif self.task.tokentype == ETokenType.Pwd:
                data = PROFILE(self._clientid, self.task, self.task.apptype,
                               self.task.account)
                self._output_data_and_log_output_data(data)

        except Exception:
            self._logger.error("Login test error, err:{}".format(
                traceback.format_exc()))
        return res
コード例 #6
0
ファイル: spider189.py プロジェクト: Octoberr/sspywork
 def _get_profile(self):
     url = 'https://webmail30.189.cn/w2/option/showAccount.do'
     now_time = datetime.datetime.now(pytz.timezone('Asia/Shanghai')).timestamp()
     headers = {
         'content-type': "multipart/form-data; boundary=----WebKitFormBoundary7MA4YWxkTrZu0gW",
         'Accept': "application/json, text/javascript, */*; q=0.01",
         'Accept-Encoding': "gzip, deflate, br",
         'Accept-Language': "zh-CN,zh;q=0.9,en;q=0.8",
         'Cache-Control': "no-cache",
         'Connection': "keep-alive",
         'Content-Length': "27",
         'Content-Type': "application/x-www-form-urlencoded; charset=UTF-8",
         'Cookie': self.task.cookie,
         'csId': "3cf18c9d43a2c30e59805d3c3d6ee122",
         'Host': "webmail30.189.cn",
         'Origin': "https://webmail30.189.cn",
         'Pragma': "no-cache",
         'Referer': f"https://webmail30.189.cn/w2/logon/signOn.do?t={int(now_time*1000)}",
         'Sec-Fetch-Mode': "cors",
         'Sec-Fetch-Site': "same-origin",
         'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36",
         'X-Requested-With': "XMLHttpRequest"
     }
     data = {'noCache': f'0.2735603709650913{random.randint(0, 9)}'}
     try:
         response = requests.post(url, headers=headers, data=data)
         if response.status_code == 200:
             res_text = response.text
             res_json = json.loads(res_text)
             userid = res_json.get('accountEmail')
             self._userid = userid + '@189.cn'
             p_data = PROFILE(self._clientid, self.task, self.task.apptype, self._userid)
             if self.task.account is None:
                 self.task.account = self._userid  # 目前在去重的这个方法里,account是不能为空的
             email = userid + '@189.cn'
             p_data.email = email
             # 这里可以拿到头像的base64,不过为了避免节外生枝所以还是先注释掉
             # headpic = res_json.get('smtpOpenUrl')
             yield p_data
     except:
         self._logger.info(f"Get profile error, err:{traceback.format_exc()}")
コード例 #7
0
 def __get_profile(self, sqlpath: str):
     """
     在文件夹里扫描profile后缀的数据,添加task信息后返回
     因为profile里包含了userid并且只有一个文件,所以会优先将
     profile数据获取到再进行后续数据获取
     :param sqlpath:
     :return:
     """
     sql = '''
     select * from users where self=?
     '''
     par = (1, )
     res = self._select_data(sqlpath, sql, par)
     if len(res) == 0:
         self._logger.error("No profile in db")
         return
     eldict = res[0]
     if eldict.get('id') is None:
         self._logger.error("No profile in db")
         return
     self._userid = eldict.get('id')
     p_data = PROFILE(self._clientid, self.task, self.task.apptype,
                      self._userid)
     phone = eldict.pop('phone')
     if phone is not None and phone != '':
         p_data.phone = self._output_format_str(phone)
     p_data.account = self._phone
     nickname = b''
     if eldict.get('last_name') is not None:
         nickname += eldict.pop('last_name')
     if eldict.get('first_name') is not None:
         nickname += eldict.pop('first_name')
     if nickname == b'' and eldict.get('username') is not None:
         nickname += eldict.pop('username')
     if nickname != b'':
         p_data.nickname = self._output_format_str(nickname)
     # if len(prodata) > 0:
     #     p_data.append_details(prodata)
     yield p_data
コード例 #8
0
ファイル: spider10086.py プロジェクト: Octoberr/sspywork
    def _get_profile(self):
        usernumber = self._get_cookie_usernumber()
        if usernumber is None:
            self._logger.error("Cannot get user number from cookie!")
            return
        sid = self._get_cookie_sid()
        if sid is None:
            self._logger.error("Cannot get sid from cookie!")
            return
        cguid = self._get_cookie_cguid()
        userinfo_url = 'https://smsrebuild1.mail.10086.cn/addrsvr/QueryUserInfo' \
                    '?sid={}&formattype=json&cguid={}'.format(sid, cguid)
        payload = '<QueryUserInfo><UserNumber>86{}</UserNumber></QueryUserInfo>'.format(
            usernumber)
        payload = payload.encode('ascii')
        headers = {
            'accept':
            "*/*",
            'accept-encoding':
            "gzip, deflate, br",
            'accept-language':
            "zh-CN,zh;q=0.9",
            'cache-control':
            "no-cache",
            'cookie':
            self.task.cookie,
            'pragma':
            "no-cache",
            'referer':
            "https://smsrebuild1.mail.10086.cn//proxy.htm",
            'origin':
            'https://smsrebuild1.mail.10086.cn',
            'sec-fetch-dest':
            'empty',
            'sec-fetch-mode':
            'cors',
            'sec-fetch-site':
            'same-origin',
            'user-agent':
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/70.0.3538.102 Safari/537.36",
        }
        try:
            response_text = requests.request("POST",
                                             userinfo_url,
                                             data=payload,
                                             headers=headers).text
            QueryUserInfoResp = re.search(r'QueryUserInfoResp=(.*)',
                                          response_text).group(1)
            userinfo_json = json.loads(QueryUserInfoResp)
            if userinfo_json["ResultCode"] == '0' and userinfo_json[
                    "ResultMsg"] == 'successful':
                userinfo = userinfo_json.get('UserInfo')[0]
                self._userid = usernumber + '@139.com'
                profile = PROFILE(self._clientid, self.task, self.task.apptype,
                                  self._userid)
                profile.nickname = userinfo['c'] if 'c' in userinfo else None
                gender_dict = {
                    '0': EGender.Male,
                    '1': EGender.Female,
                    '2': EGender.Unknown
                }
                profile.gender = gender_dict[
                    userinfo['f']] if 'f' in userinfo else None
                if 'g' in userinfo and 'h' in userinfo and 'j' in userinfo and 'k' in userinfo:
                    profile.address = userinfo['g'] + userinfo['h'] + userinfo[
                        'j'] + userinfo['k']
                profile.phone = userinfo['p'] if 'p' in userinfo else None
                profile.birthday = userinfo['o'] if 'o' in userinfo else None
                profile.email = userinfo['y'] if 'y' in userinfo else None
                yield profile
            # re_proinfo = re.compile('var addrQueryUserInfo = (.*?);')
            # proinfo = re_proinfo.search(self._html).group(1)
            # jsondata = json.loads(proinfo)
            # userinfo = jsondata.get('UserInfo')[0]
            # id = userinfo.get('y')
            # self._userid = id
            # name = userinfo.get('un')
            # profile = PROFILE(self._clientid, self.task, self.task.apptype, self._userid)
            # profile.nickname = name
            # yield profile

        except:
            self._logger.error(
                f"Download 10086 profile error,err:{traceback.format_exc()}")
コード例 #9
0
ファイル: spidertom.py プロジェクト: Octoberr/sspywork
 def _get_profile(self):
     self._headers['Cookie'] = self.task.cookie
     try:
         html = self._ha.getstring(self._indexurl,
                                   headers="""
         Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
         Accept-Encoding: gzip, deflate
         Accept-Language: en-US,en;q=0.9
         Cache-Control: no-cache
         Connection: keep-alive
         Pragma: no-cache
         Sec-Fetch-Dest: document
         Sec-Fetch-Mode: navigate
         Sec-Fetch-Site: none
         Sec-Fetch-User: ?1
         Upgrade-Insecure-Requests: 1
         User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"""
                                   )
         # indexres = requests.get(self._indexurl, headers=self._headers)
         # index_text = indexres.text
         self.__get_userid(html)
         unixtime = int(
             datetime.now(pytz.timezone('Asia/Shanghai')).timestamp() *
             1000)
         pindex = 'https://mail.tom.com/webmail/preference/getUserProfile.action'
         formdata = {'_ts': str(unixtime)}
         # r = requests.post(pindex, headers=self._headers, data=formdata)
         html = self._ha.getstring(pindex,
                                   req_data='',
                                   json=formdata,
                                   headers="""
         Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
         Accept-Encoding: gzip, deflate
         Accept-Language: en-US,en;q=0.9
         Cache-Control: no-cache
         Connection: keep-alive
         Pragma: no-cache
         Sec-Fetch-Dest: document
         Sec-Fetch-Mode: navigate
         Sec-Fetch-Site: none
         Sec-Fetch-User: ?1
         Upgrade-Insecure-Requests: 1
         User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"""
                                   )
         if html is None or html == "":
             profile = PROFILE(self._clientid, self.task, self.task.apptype,
                               self._userid)
             yield profile
         else:
             prodata = json.loads(html).get('result', {})
             if len(prodata) == 0:
                 return
             profile = PROFILE(self._clientid, self.task, self.task.apptype,
                               self._userid)
             profile.birthday = prodata.get('birthday')
             profile.nickname = prodata.get('name')
             gender = prodata.get('gender')
             if gender is not None:
                 if gender == '2':
                     profile.gender = EGender.Male
                 elif gender == '3':
                     profile.gender = EGender.Female
             profile.region = prodata.get('province')
             profile.address = prodata.get('city')
             profile.phone = prodata.get('mobileNumber')
             yield profile
     except Exception:
         self._logger.error("Get profile error, err: {}".format(
             traceback.format_exc()))
         return
コード例 #10
0
    def _read_datas(self, dbpath) -> iter:
        """read datas from db"""
        try:
            # 下载数据成功
            sql = '''SELECT * FROM msg
            '''
            res_lines = self._select_data(dbpath, sql)
            if len(res_lines) == 0:
                return
            contacts_all = {}
            logs_all = ICHATLOG(self._clientid, self.task, self.task.apptype)
            for line in res_lines:
                if line.get('content') is None and line.get('content') == '':
                    continue
                try:
                    conversionid = line.get('conversation_id').split(':')
                    if len(conversionid) != 0:
                        if self._userid == '':
                            self._userid = conversionid[-1]
                        contact_id = conversionid[-2]
                        contacts_all[contact_id] = 1
                    else:
                        continue
                    messagetype: EResourceType = self._resources_type.get(
                        line['type'])
                    sessionid = conversionid[-2]
                    messageid = line['msg_uuid']
                    senderid = line['sender']

                    cr_time = time.strftime(
                        '%Y-%m-%d %H:%M:%S',
                        time.localtime(line['created_time'] / 1000))
                    log_one = ICHATLOG_ONE(self.task, self.task.apptype,
                                           self._userid, messagetype.value,
                                           sessionid, 1, messageid, senderid,
                                           cr_time)
                    if messagetype == EResourceType.Picture or messagetype == EResourceType.Audio:
                        for res_file in self._get_resources(line, messagetype):
                            log_one.append_resource(res_file)
                            yield res_file
                    try:
                        json_text = json.loads(line['content'])
                    except Exception as err:
                        self._logger.error(
                            "Content data is not json, err:{}".format(err))
                        continue
                    # 文字
                    if json_text.get('text') is not None:
                        log_one.content = json_text.get('text')
                    logs_all.append_innerdata(log_one)
                except Exception:
                    self._logger.error(
                        "Parse one data line from db error: {} {}".format(
                            self.uname_str, traceback.format_exc()))
            if logs_all.innerdata_len > 0:
                yield logs_all
            if self._userid != '':
                p_data = PROFILE(self._clientid, self.task, self.task.apptype,
                                 self._userid)
                p_data.account = self._phone
                p_data.phone = self._phone
                yield p_data
            if len(contacts_all) != 0:
                ct_all = CONTACT(self._clientid, self.task, self.task.apptype)
                for ct in contacts_all:
                    ct_one = CONTACT_ONE(self._userid, ct, self.task,
                                         self.task.apptype)
                    ct_one.isfriend = 1
                    ct_one.isdeleted = 0
                    ct_all.append_innerdata(ct_one)
                yield ct_all
        except Exception:
            self._logger.error("Read datas from db error: {} {}".format(
                self.uname_str, traceback.format_exc()))
コード例 #11
0
ファイル: spiderqq.py プロジェクト: Octoberr/sspywork
    def _get_profile(self) -> iter:
        if self._html is None:
            url = "https://mail.qq.com/cgi-bin/login"

            querystring = {
                "vt": "passport",
                "vm": "wpt",
                "ft": "loginpage",
                "target": ""
            }

            payload = ""
            headers = {
                'accept':
                "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
                'accept-encoding':
                "gzip, deflate, br",
                'accept-language':
                "zh-CN,zh;q=0.9,en;q=0.8",
                'cache-control':
                "no-cache,no-cache",
                'cookie':
                self.task.cookie,
                'pragma':
                "no-cache",
                'referer':
                "https://mail.qq.com/",
                'upgrade-insecure-requests':
                "1",
                'user-agent':
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
            }

            try:
                response = requests.request("GET",
                                            url,
                                            data=payload,
                                            headers=headers,
                                            params=querystring)

                index_txt = response.text
                if "设置" in index_txt and "退出" in index_txt and "关联其他QQ邮箱":
                    self._logger.info("登陆成功,cookie有效")
                    self._html = index_txt
                    re_sid = re.compile("sid\=(.+?)\&")
                    sid = re_sid.search(self._html)
                    self._sid = sid.group(1)
            except Exception:
                self._logger.error(
                    f"Get profile error, err:{traceback.format_exc()}")
        try:
            re_userid = re.compile(
                "\<span id\=\"useraddr\" .*?\>(.+?)\<\/span\>")
            s_userid = re_userid.search(self._html)
            userid = s_userid.group(1)
            self._userid = userid
            re_username = re.compile("\<b id\=\"useralias\"\>(.+?)\<\/b\>")
            s_username = re_username.search(self._html)
            username = s_username.group(1)
            p_data = PROFILE(self._clientid, self.task, self.task.apptype,
                             self._userid)
            p_data.nickname = username
            yield p_data
        except Exception:
            self._logger.error(
                f"Get profile data error, err:{traceback.format_exc()}")