Esempio n. 1
0
class SpiderTaoBao(SpiderShoppingBase):
    def __init__(self, task, appcfg, clientid):
        super(SpiderTaoBao, self).__init__(task, appcfg, clientid)
        self.time = datetime.datetime.now(
            pytz.timezone('Asia/Shanghai')).strftime('%Y-%m-%d %H:%M:%S')
        self._ha = HttpAccess()
        self.userid = ''
        self.cookie = self.task.cookie

    def _cookie_login(self):
        self._ha._managedCookie.add_cookies("taobao.com", self.cookie)

        try:
            resopnse = self._ha.getstring(
                'https://member1.taobao.com/member/fresh/account_security.htm')
            soup1 = BeautifulSoup(resopnse, 'html.parser')
            account = soup1.find_all(
                "span", {"class": "default grid-msg "})[0].get_text()
            if account:
                self.userid = account + '-taobao'
                return True
            else:
                return False
        except:
            return False

    # def _needcode(self):
    #     # 判断是否需要验证码,一般都不需要。若需要就不再执行之后的代码,验证码待解决
    #     r = self._ha.getstring('https://login.taobao.com/member/request_nick_check.do?_input_charset=utf-8')
    #     pat = re.compile(r'"needcode":false')
    #     isneed = pat.findall(r)
    #     if isneed:
    #         res = True
    #     else:
    #         res = False
    #     return res
    #
    #
    #     def pwd_login(self):
    #         if self._needcode():
    #             # 手动登陆一次获取账号,和加密后的256位密码。um_token,ua 可以持续使用
    #             try:
    #                 # 获取um
    #                 url = 'https://ynuf.alipay.com/service/um.json'
    #                 data = '''data=ENCODE~~V01~~eyJ4diI6IjMuMy43IiwieHQiOiJDMTUzOTc1ODg3MzI3OTkwODY1NDAzMjI5MTUzOTc1ODg3MzI3OTI5NSIsImV0ZiI6InUiLCJ4YSI6InRhb2Jhb19sb2dpbiIsInNpdGVJZCI6IiIsInVpZCI6IiIsImVtbCI6IkFBIiwiZXRpZCI6IiIsImVzaWQiOiIiLCJ0eXBlIjoicGMiLCJuY2UiOnRydWUsInBsYXQiOiJXaW4zMiIsIm5hY24iOiJNb3ppbGxhIiwibmFuIjoiTmV0c2NhcGUiLCJubGciOiJ6aC1DTiIsInN3IjoxNDQwLCJzaCI6OTAwLCJzYXciOjE0NDAsInNhaCI6ODYwLCJic3ciOjE0MTUsImJzaCI6OTE5LCJlbG9jIjoiaHR0cHMlM0ElMkYlMkZsb2dpbi50YW9iYW8uY29tJTJGbWVtYmVyJTJGbG9naW4uamh0bWwiLCJldHoiOjQ4MCwiZXR0IjoxNTM5NzU4ODczNDYxLCJlY24iOiJiNmUzNGRlZDBhMGQxMWFkOWJhM2Q5MjI0MmIyZWExZThhMmU5MTYxIiwiZWNhIjoiRk1sTkZHUkJ2alVDQVdYTWU5ZGN6QU5CIiwiZXJkIjoiZGVmYXVsdCxjb21tdW5pY2F0aW9ucyxhOTY4MWU4MTYwMzk5ZGVmMjkwN2IzM2JlMDFjZDU1ZDVmY2Q0NTUyYWE0MmNjZGYxZDc0MzljNmNlM2VkNDVkIiwiY2FjaGVpZCI6ImE2MTU1OGRkMDk0ZGJjNDciLCJ4aCI6IiIsImlwcyI6IjE5Mi4xNjguNDAuMjciLCJlcGwiOjMsImVwIjoiMmZiZjRhMGQzNDIxNGQ0ZmRlNmNjOGEyMjg5N2QxMTVhNzY2NzgxMSIsImVwbHMiOiJDMzcwYzMwN2Y0YWNhNzg1ODQ5M2RmZTMyMjI1NGU1Y2I0MzhiZTk0NCxOMGZjZDZlMThmZjZkZjc0Zjk4YTY5OGI3ZjZiNmQ4MzhhNmMxMWU2OSIsImVzbCI6ZmFsc2V9'''
    #                 r0 = self._ha.getstring(url, req_data=data)
    #                 patum = re.compile(r'{"tn":"(.*?)"')
    #                 um = patum.findall(r0)[0]
    #
    #                 url = 'https://login.taobao.com/member/login.jhtml?redirectURL=http%3A%2F%2Fuland.taobao.com%2Fsem%2Ftbsearch%3Fkeyword%3D%25E7%25BD%2591%25E6%25B7%2598%25E5%25AE%259D%26refpid%3Dmm_26632258_3504122_32538762%26clk1%3D96c17d51a8de3455444def907818d976%26upsid%3D96c17d51a8de3455444def907818d976'
    #                 data = """TPL_username={account}&TPL_password=&ncoSig=&ncoSessionid=&ncoToken=8374672d18e483bd0f6f39b0638cf4f717e652a3&slideCodeShow=false&useMobile=false&lang=zh_CN&loginsite=0&newlogin=0&TPL_redirect_url=https%3A%2F%2Fi.taobao.com%2Fmy_taobao.htm%3Fspm%3Da2e15.8261149.1997525045.1.513d29b4SpO5fP&from=tbTop&fc=default&style=&css_style=&keyLogin=false&qrLogin=true&newMini=false&newMini2=false&tid=&loginType=3&minititle=&minipara=&pstrong=&sign=&need_sign=&isIgnore=&full_redirect=&sub_jump=&popid=&callback=&guf=&not_duplite_str=&need_user_id=&poy=&gvfdcname=10&gvfdcre=68747470733A2F2F756C616E642E74616F62616F2E636F6D2F73656D2F74627365617263683F7265667069643D6D6D5F32363633323235385F333530343132325F3332353338373632266B6579776F72643D26636C6B313D37333763303966343036323835646335356337353734373936303632366633362675707369643D3733376330396634303632383564633535633735373437393630363236663336&from_encoding=&sub=true&TPL_password_2={password}&loginASR=1&loginASRSuc=1&allp=&oslanguage=zh-CN&sr=1440*900&osVer=&naviVer=chrome%7C70.035284&osACN=Mozilla&osAV=5.0+%28Windows+NT+10.0%3B+Win64%3B+x64%29+AppleWebKit%2F537.36+%28KHTML%2C+like+Gecko%29+Chrome%2F70.0.3528.4+Safari%2F537.36&osPF=Win32&miserHardInfo=&appkey=00000000&nickLoginLink=&mobileLoginLink=https%3A%2F%2Flogin.taobao.com%2Fmember%2Flogin.jhtml%3Ffrom%3Dtaobaoindex%26f%3Dtop%26style%3D%26sub%3Dtrue%26redirect_url%3Dhttps%3A%2F%2Fi.taobao.com%2Fmy_taobao.htm%3Fspm%3Da2e15.8261149.1997525045.1.513d29b4SpO5fP%26useMobile%3Dtrue&showAssistantLink=&um_token={um}&ua=112%23y7ZAac4WDEN%2B4mHzkW7CXzLl83YWtnWIfYqlxuD9pkxezGSBy82xHdU1%2Bz9HuXkMfR26HWpFZNWOI4DAmypKUNAFwuPH73TKN9emJlAiVVAQ1PxFlwKtsUH2e1JUVzTAlhrXT3HKWgp97V6117sF07h%2B80F1DbfQOC9S%2BcI0LOuAZ9EhTbhtawmxPLUgc7ub3BPq6XoCuCKcHPW2ab%2Fm07lh4UXqZAHrs4SsVffYA8usbPYVhkr9qNcciJ7oQ9tKSydH16TUwGpoLupH6oZ73pNyazmIkh6ngIefwMzYow%2Fw1%2FrQuUvVNxvF2%2BBd0ZxNkGGL8smu0EuIN4tVkqotEe6vLvwlyfhtDBLx6w3r%2Bn1GcfNVfVcDcCfNryZRhVyJceksSl%2Bz3yuxMuvikeUYKCqr9nN%2B5R5VHeVf83cA7e6XP5ApLtdMNhMdPdk8crONeCpmo1F9F6695Sajqz0KXIfDZbh5vnjvfIU4bvMPZt48%2BSN8boo9M%2BPfJX1%2B%2Bpy4edvCrktGxHvLBLyg1d3pH1t7qFSxN6VmvpggiSOD1EOchJl1ayWIOIi9i4OGEPZY12XkWZM%2F0U0ZCTPuV8oeoD1FAeyNaDWtDO7pBs0ZZtW7lKC5wQx9vfV68F%2B0cgH24SToFWlHCsBX5WF9l0SsozHR%2F7xqN2xOZ%2FwDc62bh9LzmIXop7l%2Bsi5lpcR5u2nOzroru4xSgyH5pDQ2AcIumWGEuTyF4V808dkWo0ng9QApgco8KtNUfLQzZpCGh%2BZWtnON1vPOb3SexR5fLREAb1mM%2Fc7Uc9FDpP%2F17MMZpnL5krOZA4l%2B7fysXJ4KiqEDhPnz7cMZrqCw%2F%2FMxiR3sKfm%2F%2Fc7%2BTAv4Yy3g5WX5QcXcYm5qvRKPMl1VpYetEoIcx%2FK9v26IgW3PWh5u0%2F3mN87FlXNOw%2Bw09BTeD67nz4NITOPlhlYPH6XyGIfxnzwsJKzViMWPvgxfwlTCEUiGlafbi8Oy%2FDLyFEL3kj06%2FB%2B4WLdskNlinhO5TfulxWaZbCjcDR%2FSVMovkXl7B6rT4O1GeOq6qSN1gI%2Bi5fv0U4Qo9xz%2BcQg9A95Go0XwHaFUo7f5QkeeP%2F3hdd%2Bu5aj5IhBnl1D7lZzlUt1QQC2Fz%2B8uHk3X%2Beduq37gWATJBAU07MhWd%2Fq0Ou7lz9KjiVbarWJb0vQqyM40SJlsF6OWjaQ%2FVyRNMwi5afotZrV99yuInrrSVoeZ%2BawQxsj7eLZ%2F1fySG%2BhBgmGcsUeMEIbgw8PsUGnu9vopAfDD2S8Zsxo5FiFV318D3%2BefR8EXItfuDcy0VpymDa9PKOiZxL3CN%2F4Ih66elaec61KGI9kzeFJQvHxJo3%2BE31wxDjqvGGfWzSKpiP8LJI9L2OrH%2FnCpPtb1O0nFRxXO0y0cf%2BKAXLybh88M9sRc%2BFoSjzP0UpDzAMGWwnPmCNuGNeBGmA47O%2BjSpQvPQZoHyARO1ck2pZgFN%2FUIK9dbB64vBoOambxtKL%2BSPX7b%2B4Uqkgn%2BS71UarxKI9Z9%2F%2Flz0szGzOK2O9BR1JRo0vqMI9pOOwC8U%2FVmfZ5SqC1VgduP983JSiEhof7LYnnlBhcIcyuuqqJ%2F4iUVzTXb%2BwDFs%2F2e%2BPjDf%2FQM%2Bq%2F6cclZYrZjuxqjI1e0FKcaPRuFBVcL%2BVx4Pf4GQ4u2JXluJv2jdihrAevHxu3Zx1HIFPuqOCIYBoDF4zqcKgBWiymhrN%2FsRpe%2B""".format(account=self.account, password=self.password, um=um)
    #                 headers = """
    # accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
    # accept-encoding: gzip, deflate
    # accept-language: zh-CN,zh;q=0.9
    # Origin: https://login.taobao.com
    # Upgrade-Insecure-Requests: 1
    # Content-Type: application/x-www-form-urlencoded
    # Connection: keep-alive
    # Host: login.taobao.com
    # Referer: https://login.taobao.com/member/login.jhtml?spm=a2e15.8261149.754894437.1.118f29b4TQ4p9O&f=top&redirectURL=http%3A%2F%2Fuland.taobao.com%2Fsem%2Ftbsearch%3Fkeyword%3D%25E7%25BD%2591%25E6%25B7%2598%25E5%25AE%259D%26refpid%3Dmm_26632258_3504122_32538762%26clk1%3D96c17d51a8de3455444def907818d976%26upsid%3D96c17d51a8de3455444def907818d976
    # User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36
    # """
    #                 r = self._ha.getstring(url, req_data=data, headers=headers, encoding='gb2312')
    #                 pattoken = re.compile(r'token=(.*?)&')
    #                 token = pattoken.findall(r)[0]
    #
    #                 # 通过token获取st
    #                 url1 = 'https://passport.alibaba.com/mini_apply_st.js?site=0&token={token}&callback=callback'.format(token=token)
    #                 headers1 = """
    # Accept: */*
    # Accept-Encoding: gzip, deflate, br
    # Accept-Language: zh-CN,zh;q=0.9
    # User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36
    # Referer: https://login.taobao.com/member/login.jhtml?redirectURL=http%3A%2F%2Fuland.taobao.com%2Fsem%2Ftbsearch%3Fkeyword%3D%25E7%25BD%2591%25E6%25B7%2598%25E5%25AE%259D%26refpid%3Dmm_26632258_3504122_32538762%26clk1%3D96c17d51a8de3455444def907818d976%26upsid%3D96c17d51a8de3455444def907818d976
    # Host: passport.alibaba.com
    # Connection: keep-alive
    # """
    #                 r1 = self._ha.getstring(url1, headers=headers1)
    #                 patst = re.compile(r'st":"(.*?)"')
    #                 st = patst.findall(r1)[0]
    #
    #                 # 通过st模拟登陆
    #                 url2 = 'https://login.taobao.com/member/vst.htm?st={st}&TPL_username={account}'.format(st=st, account=self.account)
    #                 headers2 = """
    # accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
    # accept-encoding: gzip, deflate, br
    # accept-language: zh-CN,zh;q=0.9
    # User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36
    # Upgrade-Insecure-Requests: 1
    # Connection: keep-alive
    # Host: login.taobao.com
    # """
    #                 r2 = self._ha.getstring(url2, headers=headers2)
    #             except Exception as ex:
    #                 self._logger.error("Download error: %s" % ex)
    #                 return False
    #             newcookie = self._ha._managedCookie.get_cookie_for_domain('https://www.taobao.com')
    #             # print(newcookie)
    #             self.cookie = newcookie
    #             self.task.cookie = newcookie
    #             return True

    def _get_profile(self):
        # 个人信息
        try:
            url = 'https://i.taobao.com/user/baseInfoSet.htm'
            headers = """
accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
accept-encoding: gzip, deflate, br
accept-language: zh-CN,zh;q=0.9
cookie: {cookie}
upgrade-insecure-requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36
""".format(cookie=self.cookie)
            r = self._ha.getstring(url, headers=headers)
            # print(r)
            soup = BeautifulSoup(r, 'html.parser')
            res = PROFILE(self._clientid, self.task, self._appcfg._apptype,
                          self.userid)
            detail = {}

            photourl = soup.select_one('.pf-avatar img.png')['src']
            try:
                res.nickname = soup.select_one('#J_uniqueName')['value']
            except:
                pass
            try:
                detail['fullname'] = soup.select_one('#J_realname')['value']
            except:
                pass
            try:
                gender = soup.select_one(
                    '.except [checked="checked"]')['value']
                if gender == '0':
                    res.gender = EGender.Male
                elif gender == '1':
                    res.gender = EGender.Female
                else:
                    res.gender = EGender.Unknown
            except:
                pass
            try:
                year = soup.select_one(
                    '#J_Year [selected="selected"]')['value']
                month = soup.select_one(
                    '#J_Month [selected="selected"]')['value']
                data = soup.select_one(
                    '#J_Date [selected="selected"]')['value']
                res.birthday = year + '-' + month + '-' + data
            except:
                pass

            resopnse = self._ha.getstring(
                'https://member1.taobao.com/member/fresh/account_security.htm')
            soup1 = BeautifulSoup(resopnse, 'html.parser')
            try:
                res.account = soup1.find_all(
                    "span", {"class": "default grid-msg "})[0].get_text()
            except:
                pass
            try:
                res.email = soup1.find_all(
                    "span", {"class": "default grid-msg "})[1].get_text()
            except:
                pass
            try:
                res.phone = soup1.find("span", {
                    "class": "default grid-msg"
                }).get_text().strip()
            except:
                pass

            # 获取从地区表中获取住址

            try:
                liveDivisionCode = soup.select_one(
                    'input#liveDivisionCode')['value']
                if liveDivisionCode:
                    detail['hometown'] = self._get_address(liveDivisionCode)
            except:
                pass

            try:
                divisionCode = soup.select_one('input#divisionCode')['value']
                if divisionCode:
                    res.address = self._get_address(divisionCode)
                res.detail = json.dumps(detail)
            except:
                pass
            if photourl:
                photourl = 'https:' + photourl
                profilepic: RESOURCES = RESOURCES(self._clientid, self.task,
                                                  photourl,
                                                  EResourceType.Picture,
                                                  self._appcfg._apptype)

                resp_stream: ResponseIO = self._ha.get_response_stream(
                    photourl)
                profilepic.io_stream = resp_stream
                profilepic.filename = photourl.rsplit('/', 1)[-1]
                profilepic.sign = ESign.PicUrl
                res.append_resource(profilepic)
                yield profilepic
            yield res
        except Exception:
            self._logger.error('{} got profile fail: {}'.format(
                self.userid, traceback.format_exc()))

    def _get_orders(self):
        try:
            self._ha._managedCookie.add_cookies("taobao.com", self.cookie)
            url = 'https://buyertrade.taobao.com/trade/itemlist/asyncBought.htm?action=itemlist/BoughtQueryAction&event_submit_do_query=1&_input_charset=utf8'
            i = 0
            while True:
                i += 1
                formdata = """dateBegin=0&dateEnd=0&options=0&pageNum={i}&pageSize=15&queryOrder=desc&prePageNo={j}""".format(
                    i=i, j=i - 1)
                headers = """
accept: application/json, text/javascript, */*; q=0.01
accept-encoding: gzip, deflate, br
accept-language: zh-CN,zh;q=0.9
cache-control: no-cache
content-type: application/x-www-form-urlencoded; charset=UTF-8
origin: https://buyertrade.taobao.com
pragma: no-cache
referer: https://buyertrade.taobao.com/trade/itemlist/list_bought_items.htm?spm=a1z02.1.a2109.d1000368.1c2d782dHeADbf&nekot=1470211439694
user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36
x-requested-with: XMLHttpRequest
"""
                # cookie: {cookie}
                response = self._ha.getstring(url,
                                              headers=headers,
                                              req_data=formdata)
                html = json.loads(response)
                mainorders = html.get('mainOrders')
                if mainorders:
                    # print("抓取第{0:d}页。".format(i))

                    for order in mainorders:
                        try:
                            dic = {}
                            orderid = order.get('id')
                            dic['id'] = order.get('id')
                            dic['shopname'] = order.get('seller').get(
                                'shopName')

                            ordertime = order.get('orderInfo').get(
                                'createTime')
                            dic['actualFee'] = order.get('payInfo').get(
                                'actualFee')
                            dic['status'] = order.get('statusInfo').get('text')
                            goods = []
                            for item in order['subOrders']:
                                di = {}
                                try:
                                    di['title'] = item.get('itemInfo').get(
                                        'title')
                                except:
                                    pass
                                try:
                                    di['quantity'] = item['quantity']
                                except:
                                    pass
                                try:
                                    di['skuText'] = item['itemInfo']['skuText']
                                except:
                                    pass
                                try:
                                    di['priceInfo'] = item['priceInfo']
                                except:
                                    pass
                                goods.append(di)
                            dic['goods'] = goods
                            res_one = ISHOPPING_ONE(self.task,
                                                    self._appcfg._apptype,
                                                    self.userid, orderid)
                            res_one.ordertime = ordertime
                            res_one.append_orders(dic)
                            res_one.host = 'www.taobao.com'
                            yield res_one
                        except:
                            pass
                    time.sleep(1)
                else:
                    break
        except Exception:
            self._logger.error('{} got order fail: {}'.format(
                self.userid, traceback.format_exc()))

    def _get_address(self, adressid):
        table = self._ha.getstring(
            'https://www.taobao.com/home/js/sys/districtselector.js?t=20140318.js'
        )
        patdz = re.compile(
            r'TB.form.DistrictSelector._tb_ds_data=(.*?);TB.form.Di')
        dzdata = patdz.findall(table)[0]
        jsdata = json.loads(dzdata)
        res = []
        if adressid is not None and adressid != '1':
            dz1 = jsdata[adressid]
            res = dz1[0]
            if dz1[1] != '1':
                dz2 = jsdata[dz1[1]]
                res = dz2[0] + res
                if dz2[1] != '1':
                    dz3 = jsdata[dz2[1]]
                    res = dz3[0] + res
        return res

    def _logout(self):
        res = False
        try:
            url = 'https://login.taobao.com/member/logout.jhtml?spm=a1z02.1.754894437.7.7016782dPtkeCQ&f=top&out=true&redirectURL=https%3A%2F%2Fi.taobao.com%2Fmy_taobao.htm%3Fspm%3Da2e15.8261149.754894437.3.555929b48sljpe%26ad_id%3D%26am_id%3D%26cm_id%3D%26pm_id%3D1501036000a02c5c3739%26nekot%3DdGI4NTgzMzYzXzAw1553481160507'
            html = self._ha.getstring(url,
                                      headers="""
Host: login.taobao.com
Connection: keep-alive
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Referer: https://i.taobao.com/my_taobao.htm?spm=a2e15.8261149.754894437.3.555929b48sljpe&ad_id=&am_id=&cm_id=&pm_id=1501036000a02c5c3739&nekot=dGI4NTgzMzYzXzAw1553481160507
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
""")
            res = self._cookie_login()
            if not res:
                res = True
        except Exception:
            self._logger.error('login out fail:{}'.format(
                traceback.format_exc()))
        return res
Esempio n. 2
0
class SpiderTongCheng(SpiderTravelBase):
    def __init__(self, task, appcfg, clientid):
        super(SpiderTongCheng, self).__init__(task, appcfg, clientid)
        self.cookie = self.task.cookie
        self.ha = HttpAccess()
        if self.cookie:
            self.ha._managedCookie.add_cookies('ly.com', self.cookie)

    def _cookie_login(self):
        url = 'https://member.ly.com/information'
        headers = """
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: member.ly.com
Pragma: no-cache
Referer: https://member.ly.com/order
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"""
        html = self.ha.getstring(url,
                                 headers=headers).replace('\r', '').replace(
                                     '\n', '').replace('\t', '')
        html = re.sub(r'\s{2,}', '', html)
        soup = BeautifulSoup(html, 'lxml')
        try:
            phone = soup.select_one("#tel").get_text('-')
            phone = phone.split('-')[1]
            userid = substring(self.cookie, 'userid=', '&')
            if userid:
                self.userid = userid + '-tongcheng'
                return True
            elif phone:
                self.userid = phone + '-tongcheng'
                return True
            else:
                return False
        except:
            return False

    def _get_profile(self):
        try:
            url = 'https://member.ly.com/information'
            headers = """
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: member.ly.com
Pragma: no-cache
Referer: https://member.ly.com/order
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"""
            html = self.ha.getstring(url, headers=headers).replace(
                '\r', '').replace('\n', '').replace('\t', '')
            html = re.sub(r'\s{2,}', '', html)
            soup = BeautifulSoup(html, 'lxml')
            detail = {}
            res = PROFILE(self._clientid, self.task, self._appcfg._apptype,
                          self.userid)
            try:
                res.nickname = soup.select_one('#txtsmallName')['value']
            except:
                pass
            try:
                detail['realname'] = soup.select_one('#txtName')['value']
            except:
                pass
            try:
                email = soup.select_one("#email").get_text('-')
                res.emali = email.split('-')[1]
            except:
                pass
            try:
                phone = soup.select_one("#tel").get_text('-')
                res.phone = phone.split('-')[1]
            except:
                pass
            try:
                text = substring(html, 'class="sex1"',
                                 'checked="checked"')
                sexnum = re.findall(r'name="sex"', text)
                if len(sexnum) == 1:
                    res.gender = EGender.Male
                elif len(sexnum) == 2:
                    res.gender = EGender.Female
            except:
                pass
            try:
                detail['profession'] = soup.select_one('#ddlZhiye').get_text()
            except:
                pass
            try:
                res.bithday = soup.select_one('#hfYear')['value'] + '-' + soup.select_one('#hfMonth')['value'] + '-' + \
                          soup.select_one('#hfDay')['value']
            except:
                pass
            try:
                detail['QQ'] = soup.select_one('#txtQQ')['value']
            except:
                pass

            res.append_details(detail)
            photourl = soup.select_one('#contentHead img.png')['src']
            if photourl:
                photourl = 'https:' + photourl
                profilepic: RESOURCES = RESOURCES(self._clientid, self.task,
                                                  photourl,
                                                  EResourceType.Picture,
                                                  self._appcfg._apptype)

                resp_stream: ResponseIO = self.ha.get_response_stream(photourl)
                profilepic.io_stream = resp_stream
                profilepic.filename = photourl.rsplit('/', 1)[-1]
                profilepic.sign = ESign.PicUrl
                res.append_resource(profilepic)
                yield profilepic
            yield res
        except Exception:
            self._logger.error('{} got profile fail {}'.format(
                self.userid, traceback.format_exc()))

    def _get_orders(self):
        try:
            page = 0
            while True:
                page += 1
                url = 'https://member.ly.com/orderajax/default?OrderFilter=0&DateType=0&PageIndex={}'.format(
                    page)
                headers = """
Accept: */*
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: member.ly.com
Pragma: no-cache
Referer: https://member.ly.com/order
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
X-Requested-With: XMLHttpRequest"""
                html = self.ha.getstring(url, headers=headers)
                jshtml = json.loads(html)
                OrderDetailList = jshtml['ReturnValue']['OrderDetailList']
                if OrderDetailList:
                    for OrderDetail in OrderDetailList:
                        try:
                            orderid = OrderDetail['OrderId']
                            order = {}
                            order['title'] = OrderDetail['ProductName']
                            order['FirstDesc'] = OrderDetail['FirstDesc']
                            order['price'] = OrderDetail['ProductPrice']
                            order['status'] = OrderDetail['ChieseOrderStatus']
                            order['SerialId'] = OrderDetail['SerialId']
                            order['ExtendData'] = OrderDetail['ExtendData']
                            OrderDetailUrl = OrderDetail['OrderDetailUrl']
                            ordertime, detail = self._order_detail(
                                OrderDetailUrl)
                            if detail:
                                order['detail'] = detail
                            res_one = ITRAVELORDER_ONE(self.task,
                                                       self._appcfg._apptype,
                                                       self.userid, orderid)
                            res_one.append_orders(order)
                            res_one.ordertime = ordertime
                            res_one.host = 'www.ly.com'
                            yield res_one
                        except:
                            pass
                OrderListCount = jshtml['ReturnValue']['OrderListCount']
                if OrderListCount <= 10 * page:
                    break
        except Exception:
            self._logger.error('{} got order fail: {}'.format(
                self.userid, traceback.format_exc()))

    def _order_detail(self, orderurl):
        orderurl = 'https:' + orderurl
        headers = """
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: member.ly.com
Pragma: no-cache
Referer: https://member.ly.com/order
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"""
        html = self.ha.getstring(orderurl,
                                 headers=headers).replace('\n', '').replace(
                                     '\t', '')
        soup = BeautifulSoup(html, 'lxml')
        # 火车票全部js加密了暂时未获取

        # 景点
        dic = {}
        ordertime = ''
        if re.findall(r'Scenery', orderurl):
            ordertime = substring(html, '创建时间:', ' <')
            dic['Contacts'] = soup.select_one('.infor_box table').get_text(' ')

        # 机票
        elif re.findall(r'Flight', orderurl):
            ordertime = soup.select_one('.orderTime span').get_text('')
            dic['Passenger'] = soup.select_one('.no_bottom.infoLine').get_text(
                ' ')
            dic['Contacts'] = soup.select_one('.contactPerson').get_text(' ')

        # 酒店
        elif re.findall(r'hotel', orderurl):
            ordertime = soup.select_one('.time-point').get_text(' ')
            dic['checkinPerson'] = soup.select_one(
                '.checkin-info.part').get_text(' ')

        return ordertime, dic
Esempio n. 3
0
class SpiderMafengwo(SpiderTravelBase):
    def __init__(self, task, appcfg, clientid):
        super(SpiderMafengwo, self).__init__(task, appcfg, clientid)
        self.ha = HttpAccess()
        if self.task.cookie:
            self.ha._managedCookie.add_cookies('mafengwo.cn', self.task.cookie)

    def _check_registration(self):
        """
        查询手机号是否注册了马蜂窝
        :param account:
        :return:
        """
        t = time.strftime('%Y-%m-%d %H:%M:%S')
        try:
            phone = self.task.phone
            url = 'https://passport.mafengwo.cn/regist.html'
            headers = """
Host: passport.mafengwo.cn
Connection: keep-alive
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Referer: http://www.mafengwo.cn/?mfw_chid=3546
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
"""
            html = self.ha.getstring(url, headers=headers)
            token = substring(html, 'name="token" value="', '"')

            url = 'https://passport.mafengwo.cn/regist'
            headers = """
Host: passport.mafengwo.cn
Connection: keep-alive
Content-Length: 59
Cache-Control: max-age=0
Origin: https://passport.mafengwo.cn
Upgrade-Insecure-Requests: 1
Content-Type: application/x-www-form-urlencoded
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Referer: https://passport.mafengwo.cn/regist.html
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
"""
            data = f"token={token}&passport={phone}"
            html = self.ha.getstring(url, headers=headers, req_data=data)
            isreg = re.findall(r'<div class="alert alert-danger">', html)
            if isreg:
                self._write_task_back(ECommandStatus.Succeed, 'Registered', t,
                                      EBackResult.Registerd)
            else:
                self._write_task_back(ECommandStatus.Succeed, 'Not Registered',
                                      t, EBackResult.UnRegisterd)

        except Exception:
            self._logger.error('Check registration fail: {}'.format(
                traceback.format_exc()))
            self._write_task_back(ECommandStatus.Failed,
                                  'Check registration fail', t,
                                  EBackResult.CheckRegisterdFail)
        return

    def _cookie_login(self):
        try:
            url = 'https://passport.mafengwo.cn/setting/security/'
            headers = """
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: passport.mafengwo.cn
Pragma: no-cache
Referer: https://passport.mafengwo.cn/setting/
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"""
            html = self.ha.getstring(url, headers=headers)
            userid = substring(html, '"UID":', ',')
            if userid:
                self.userid = userid + '-mafengwo'
                return True
            else:
                return False
        except Exception:
            self._logger.error('Mafengwo cookie login error: {}'.format(
                traceback.format_exc()))
            return False

    def _get_profile(self):
        try:
            url = 'https://passport.mafengwo.cn/setting/'
            headers = """
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: passport.mafengwo.cn
Pragma: no-cache
Referer: https://www.mafengwo.cn
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"""
            html = self.ha.getstring(url, headers=headers)
            soup = BeautifulSoup(html, 'lxml')
            res = PROFILE(self._clientid, self.task, self._appcfg._apptype,
                          self.userid)
            res.nickname = soup.select_one('[name="name"]')['value']
            # 1男0女2保密
            sex = soup.select_one('[checked="true"]')['value']
            if sex == '1':
                res.gender = EGender.Male
            elif sex == '0':
                res.gender = EGender.Female
            else:
                res.gender = EGender.Unknown
            res.city = soup.select_one('[name="city"]')['value']
            res.birthday = soup.select_one('[name="birthday"]')['value']
            detail = {}
            detail['introduce'] = soup.select_one('[name="intro"]').get_text()
            if detail['introduce']:
                res.append_details(detail)

            url = 'https://passport.mafengwo.cn/setting/security/'
            headers = """
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: passport.mafengwo.cn
Pragma: no-cache
Referer: https://passport.mafengwo.cn/setting/
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"""
            html = self.ha.getstring(url, headers=headers).replace('\n', '')
            html = re.sub(r'\s{2,}', '', html)
            soup = BeautifulSoup(html, 'lxml')
            userid = substring(html, '"UID":', ',')
            email = soup.select('.userpass dd')[1].get_text('-')
            res.email = email.split('-')[0]
            phone = soup.select('.userpass dd')[2].get_text('-')
            res.phone = phone.split('-')[0]

            url = 'https://pagelet.mafengwo.cn/user/apps/pagelet/pageViewHeadInfo?callback=jQuery181042165802873390845_{}&params=%7B%22type%22%3A1%7D&_={}'.format(
                int(
                    datetime.now(pytz.timezone('Asia/Shanghai')).timestamp() *
                    1000),
                int(
                    datetime.now(pytz.timezone('Asia/Shanghai')).timestamp() *
                    1000))
            headers = """
Accept: */*
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: pagelet.mafengwo.cn
Pragma: no-cache
Referer: https://passport.mafengwo.cn/setting/
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"""
            html = self.ha.getstring(url, headers=headers).replace('\\', '')
            photourl = substring(html, '<img.png src="', '"')
            if photourl:
                profilepic: RESOURCES = RESOURCES(self._clientid, self.task,
                                                  photourl,
                                                  EResourceType.Picture,
                                                  self._appcfg._apptype)

                resp_stream: ResponseIO = self._ha.get_response_stream(
                    photourl)
                profilepic.io_stream = resp_stream
                profilepic.filename = photourl.rsplit('/', 1)[-1]
                profilepic.sign = ESign.PicUrl
                res.append_resource(profilepic)
                yield profilepic
            yield res
        except Exception:
            self._logger.error('{} got profile fail: {}'.format(
                self.userid, traceback))

    def _get_orders(self):
        try:
            start = -10
            while True:
                start += 10
                url = f'https://www.mafengwo.cn/order_center/?status=0&start={start}'
                headers = """
accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
accept-encoding: gzip, deflate, br
accept-language: zh-CN,zh;q=0.9
cache-control: no-cache
pragma: no-cache
referer: https://www.mafengwo.cn/order_center/?status=0&start=0
upgrade-insecure-requests: 1
user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"""
                html = self.ha.getstring(url, headers=headers).replace(
                    '\r', '').replace('\n', '').replace('\t', '')
                html = re.sub(r'\s{2,}', ' ', html)
                soup = BeautifulSoup(html, 'lxml')
                tables = soup.select('.order-item')
                for table in tables:
                    try:
                        order = {}
                        orderid = table.attrs.get('data-id')
                        order['orderid'] = orderid
                        ordertime = table.select_one('.time').get_text()
                        try:
                            order['supplier'] = table.select_one(
                                '.supplier').get_text()
                        except:
                            pass
                        try:
                            order['telphone'] = table.select_one(
                                '.telphone').get_text()
                        except:
                            pass
                        order['pro-detail'] = table.select_one(
                            '.pro-detail').get_text(' ')
                        order['td-date'] = table.select_one(
                            '.td-date').get_text()
                        order['price'] = table.select_one(
                            '.td-cost').get_text()
                        order['status'] = table.select_one(
                            '.td-status').get_text()

                        try:
                            orderurl = table.select_one('caption a')['href']
                            if orderurl:
                                detail = self._order_detail(orderurl)
                                order['detail'] = detail
                        except:
                            pass
                        res_one = ITRAVELORDER_ONE(self.task,
                                                   self._appcfg._apptype,
                                                   self.userid, orderid)
                        res_one.append_orders(order)
                        res_one.ordertime = ordertime.split(':', 1)[1]
                        res_one.host = "www.mafengwo.cn"
                        yield res_one
                    except Exception:
                        self._logger.error(
                            'Mafengwo one order get fail: {}'.format(
                                traceback.format_exc()))

                if not tables:
                    break
        except Exception:
            self._logger.error('{} get order fail: {}'.format(
                self.userid, traceback.format_exc()))

    def _order_detail(self, orderurl):
        orderurl = 'https://www.mafengwo.cn' + orderurl
        headers = """
Host: www.mafengwo.cn
Connection: keep-alive
Pragma: no-cache
Cache-Control: no-cache
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
"""
        html = self.ha.getstring(orderurl,
                                 headers=headers).replace('\r', '').replace(
                                     '\n', '').replace('\t', '')
        # html = ha.get_response(orderurl, headers=headers)
        html = re.sub(r'\s{2,}', ' ', html)
        soup = BeautifulSoup(html, 'lxml')
        order = soup.select_one('.order-detail').get_text(' ')
        return order

    def logout(self):
        res = False
        try:
            url = 'https://passport.mafengwo.cn/logout.html'
            html = self._ha.getstring(url,
                                      headers="""
            Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
            Accept-Encoding: gzip, deflate
            Accept-Language: zh-CN,zh;q=0.9
            Cache-Control: no-cache
            Host: www.mafengwo.cn
            Pragma: no-cache
            Proxy-Connection: keep-alive
            Upgrade-Insecure-Requests: 1
            User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"""
                                      )

            if not self._cookie_login():
                res = True
        except Exception:
            self._logger.error('log out fail: {}'.format(
                traceback.format_exc()))

        return res
Esempio n. 4
0
class SpiderSuning(SpiderShoppingBase):
    def __init__(self, task, appcfg, clientid):
        super(SpiderSuning, self).__init__(task, appcfg, clientid)
        self._ha = HttpAccess()
        self.userid = ""
        self.time = datetime.datetime.now(
            pytz.timezone("Asia/Shanghai")).strftime("%Y-%m-%d %H:%M:%S")
        if self.task.cookie:
            self._ha._managedCookie.add_cookies("suning.com", self.task.cookie)

    def _cookie_login(self):
        """
        cookie登陆测试
        """
        res = False
        url = "http://my.suning.com/person.do"
        headers = """
            Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
            Accept-Encoding: gzip, deflate
            Accept-Language: zh-CN,zh;q=0.9
            Host: my.suning.com
            Proxy-Connection: keep-alive
            Referer: http://my.suning.com/person.do
            Upgrade-Insecure-Requests: 1
            User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36
        """
        try:
            r = self._ha.getstring(url, headers=headers, timeout=10)
            soup = BeautifulSoup(r, "lxml")
            patuserid = re.compile(r"您的会员编号为:(.*?),", re.S)
            userid = patuserid.findall(str(soup))[0]
            if userid:
                self.userid = userid + "-suning"
                res = True
        except:
            self._logger.error(
                f"Cookie login error, err:{traceback.format_exc()}")
        return res

    def _check_registration(self):
        """
        查询手机号是否注册了suning
        :param account:
        :return:
        """
        t = time.strftime("%Y-%m-%d %H:%M:%S")
        try:
            headers = """
                Accept: application/json, text/javascript, */*; q=0.01
                Content-Type: application/x-www-form-urlencoded; charset=UTF-8
                Origin: http://passport.suning.com
                Referer: http://passport.suning.com/ids/login
                User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36
                X-Requested-With: XMLHttpRequest
            """
            url = "http://passport.suning.com/ids/login"
            postdata = f"jsonViewType=true&username={self.task.phone}&password=&password2=Ujsa1wIs9Jnzn%2Fc%2BqT%2FyQldPMMWVrWviEorr1ku8VnnZGydpUB55QyQZso%2B1%2BZYP97u1MIlXMoBbCTkKRMURME7dMO%2BGIuA6RwVOmFCawDE%2FMYMtuO1PmhgwRlxurrcKF8uBep9Sf8D4dgTv7w%2F8rYqrI3cxUTWmpedBArbxQ6Y%3D&loginTheme=defaultTheme&service=&rememberMe=true&client=app&sceneId=logonImg&detect=mmds_3nZnnnuzF3MnnnuzT3Znnniz83Znnn3zP3MnnnnzL3nZnnnnzS3ZnnnnzN3Znnnvz13Mnnn0zq3Ynnn9zE3nZnnnYzR3Znnn-z43mnnnrzr3YnnnczU3u3nncz-3nvinnczU3MnnnczU3bCnnczr3Mnnnfz43Znnnhz433ZnnnEz43MnnnXzf3Znnn.zf3Znnn1zO3Ynnnkzh3nmnnnVzB3YnnnPzB3ZnnnJzB3MnnnbzB3MnnnbzB3nq3nnFzh3ZnnnFzO3YnnnKzR3mnnnpzf3Znnnlzf3nQnnnlzt3fnnnlz43n3nnpz43Kinnpzc3znnnKzc3nMnnnFzr3YnnnTz-3o3nnJz-3CnnnLz_3MnnnNzY3nZnnnCzY3Mnnn1GY3ZnnnuGY3ZnnnA7Y3Mnnn67Y3iYnnnO7Y3VnnnO7Y3FnnnO7Z3CnnnR7M3Ynnnt753nZnnn-7o3Mnnn_7I3-nnnY7I35nnn97I3Mnnno7I3nXpnno7I3MnnnY7Z3Znnnt7U3Znnn67t3Mnnnw7R3nZnnn17h3YnnnA7h393nns7h3znnnW7h3Mnnnw7h3nMnnn.7B3QnnnA7B3mnnnT763ZnnnoGX3Ynnn1Gs33Mnnnizk3MnnnZzd3Ynnntza3Mnnnhzj3LAnnuCni3Ynnn7C3imnnnGCiiPnnnzC7i9nnneCCiMnnn5Coi3Znnn5CMiYnnn5CZiynnn5CYiCnnn5C_i5nnn5CUinmnnneCfiZnnnCCBiMnnnzCEi_3nnzC6iT3nnzCgi36nnnzCgiMnnnzCXiYnnnzCqiJCnnGCqiMnnnuCdinMnnn~zQiYYnngzLiMnnn2zViMnnn2zViZnnn2zSinl3nnXzaiennnXzaiMnnnqzdiZnnnqzNiMnnnqzNinxYa2E~~j2tjE7R.fE2EjY.Pj7Y2PRPYf.EaYaz~zf6~Eju7.xa2jPxRttxDYYR~auzz~ajx8Pnn~C_36enniCuinXoncCviZ23nk0UiMgnn4C3iMjnnk0viQConMCnirMnn4zgiMG3nwztifINnyzMi853npz43353nhzj3cY3nqzNixa22txaEEjPERP2EPutxxn3iuv7GzC0xjxtxa22tx~utxaxtxGBI3.uZinnn1znn.u9innnTvnn.uz3nnnnnnn.usinnnC5nn.uc3nnn6inn.uW3nnnnvnn.uminnnT7nn.uJnnnnnnnn.usinnn67nn.uT3nnnx2jPxs7mnnnnnvnnniu3nnnnnSgnndnnnnnnnh2nni5nnnnnn4nnnEqnnnnnnJqnnfHInnnnnUMnnAI3nnnnni7nnunnnnnnninnnKt7nnnnnlt7niz1nnnnne53nxPERxMTOnyyxLK03GCniMnnnGCiiMnnnGCGiZnnnvCCiZnnniC0i3Mnnn3CeiYnnn3CIiZnnnnCoimnnnyz5iZnnnyz9inYnnnyzMibCnnTzMi9nnnNzMiYnnntzMiZnnnPGmi3ZnnnmG5iZnnnQ75iMnnnB75iZnnnY75iMnnn97oingnnn97IiMnnn970iYnnn970iMnnn97Ci.3nn57CinZnnni70imnnnTvIiZnnnSv9iZnnnkvMiMnnnsvYi3ZnnnWvYi83nnWv_ikvnnWv_iN4nnzGUiEnnnGGUiio3nn7GUiYnnnvGUifnnnuGriCnnnvGci6nnnzGti3ZnnnoGfimnnnYGfiYnnnRGRiMnnn2GRiZnnn1Gti3ZnnnVG-iMnnnPGMiYnnnKGIiZnnnyGziZnnnizii3mnnnvzniZnnn7zl3Znnn7zp3Qnnn7zK3Mnnnvt~EzLzt~u2zF_._796d0c53-7e52-42b9-a978-a8944ba6c172_._&dfpToken=THP7fd1696fcef06aX5E3e4d3&terminal=PC&loginChannel=208000103001"
            response = self._ha.getstring(url,
                                          headers=headers,
                                          req_data=postdata)
            if '"errorCode":"badPassword.msg1"' in response:
                self._write_task_back(ECommandStatus.Succeed, "Registered", t,
                                      EBackResult.Registerd)
            elif '"errorCode":"needVerifyCode"' in response:
                self._write_task_back(
                    ECommandStatus.Failed,
                    "Need VerifyCode!",
                    t,
                    EBackResult.CheckRegisterdFail,
                )
            else:
                self._write_task_back(ECommandStatus.Succeed, "Not Registered",
                                      t, EBackResult.UnRegisterd)

        except Exception:
            self._logger.error("Check registration fail: {}".format(
                traceback.format_exc()))
            self._write_task_back(
                ECommandStatus.Failed,
                "Check registration fail",
                t,
                EBackResult.CheckRegisterdFail,
            )
        return

    def _get_orders(self):
        """
        获取订单信息
        """
        headers = """
            Accept: text/html, */*; q=0.01
            Accept-Encoding: gzip, deflate, br
            Accept-Language: zh-CN,zh;q=0.9
            Cache-Control: no-cache
            Connection: keep-alive
            Host: order.suning.com
            Pragma: no-cache
            sec-ch-ua: "Google Chrome";v="87", " Not;A Brand";v="99", "Chromium";v="87"
            sec-ch-ua-mobile: ?0
            Sec-Fetch-Dest: empty
            Sec-Fetch-Mode: cors
            Sec-Fetch-Site: same-origin
            User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36
            X-Requested-With: XMLHttpRequest
        """
        thistime = time.strftime("%Y-%m-%d")
        patorderlist = re.compile(r'<div class="table-list">')
        page = 1
        while True:
            try:
                url = f"https://order.suning.com/order/queryOrderList.do?transStatus=&pageNumber={page}&condition=&startDate=2009-01-01&endDate={thistime}&orderType="
                html = self._ha.getstring(url, headers=headers, timeout=10)
                orderlist = patorderlist.search(html)
                if orderlist:
                    soup = BeautifulSoup(html, "lxml")
                    orders = soup.select(".table-list .table-box")
                    for order in orders:
                        try:
                            dic1 = {}
                            patid = re.compile(r'id="table_box_(.*?)"', re.S)
                            orderid = patid.findall(str(order))[0]
                            ordertime = (
                                order.select_one(".item span").get_text() +
                                " " + "00:00:00")
                            dic1["shop"] = order.select(
                                ".item span")[1].get_text()
                            dic1["rowspan"] = order.select_one(
                                ".total-price").get("rowspan")
                            dic1["price"] = order.select_one(
                                ".total-price span").get_text()
                            dic1["含运费"] = order.select_one(
                                ".total-price em").get_text()
                            dic1["status"] = order.select_one(
                                ".state .opt-item").get_text()
                            dic1["contact"] = (order.select_one(
                                ".tax-tip").get_text(" ").replace(
                                    "\n", "").replace("\r", ""))
                            dic = []
                            o = order.select("table .order-info")
                            for item in o:
                                di = {}
                                di["title"] = item.select_one(
                                    '[name="pname_"]')["title"]
                                di["price"] = item.select_one(
                                    ".price span").get_text()
                                di["amount"] = (item.select_one(
                                    ".amount").get_text().strip())
                                dic.append(di)
                                dic1["goods"] = dic

                            res_one = ISHOPPING_ONE(self.task,
                                                    self._appcfg._apptype,
                                                    self.userid, orderid)
                            res_one.ordertime = ordertime
                            res_one.append_orders(dic1)
                            res_one.host = "www.suning.com"

                            yield res_one
                        except:
                            self._logger.error(
                                f"Parser order error\nerr:\n{traceback.format_exc()}"
                            )
                            continue
                    time.sleep(1)
                    page += 1
                else:
                    break
            except Exception:
                self._logger.error("{} got order fail: {}".format(
                    self.userid, traceback.format_exc()))

    def _get_profile(self):
        try:
            url = "http://my.suning.com/msi2pc/memberInfo.do"
            headers = """
            Accept: application/json, text/javascript, */*; q=0.01
            Accept-Encoding: gzip, deflate
            Accept-Language: zh-CN,zh;q=0.9
            Cache-Control: no-cache
            Connection: keep-alive
            Host: my.suning.com
            Pragma: no-cache
            Referer: http://my.suning.com/
            User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36
            X-Requested-With: XMLHttpRequest
            """
            r = self._ha.getstring(url, headers=headers, timeout=10)
            rd = json.loads(r)
            nickname = rd.get("nickName")
            res = PROFILE(self._clientid, self.task, self._appcfg._apptype,
                          self.userid)
            res.nickname = nickname
            yield res
        except Exception:
            self._logger.error("{} got profile fail: {}".format(
                self.userid, traceback.format_exc()))
Esempio n. 5
0
class Seebug(AutoPluginBase):
    tasktype = EAutoType.EXPDB

    def __init__(self):
        AutoPluginBase.__init__(self)
        self.ha = HttpAccess()
        self._get_cookie()

    def _get_cookie(self):
        try:
            ic = False
            chrome_options = ChromeOptions()
            chrome_options.add_argument('--headless')
            chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument('blink-settings=imagesEnabled=false')
            # chrome_options.add_argument('--no-sandbox')
            driver = webdriver.Chrome(chrome_options=chrome_options)
            success = False
            check_num = 1
            while True:
                try:
                    driver.get('https://cn.0day.today/')
                    time.sleep(5 * check_num)
                    driver.find_element_by_css_selector(
                        'body > div > div.agree > div:nth-child(9) > div:nth-child(3) > form > input').click()
                    success = True
                    break
                except:
                    check_num += 1
                    if check_num == 4:
                        break
            if success:
                cookies = driver.get_cookies()
                l_cookie = ''
                for cookie in cookies:
                    l_cookie = l_cookie + cookie['name'] + '=' + cookie['value'] + '; '
                if ic:
                    self._logger.info('Got cookie success!')
                    self.ha._managedCookie.add_cookies('0day.today', l_cookie)
            else:
                self._logger.info('Got cookie fail!')
            driver.close()
        except Exception:
            self._logger.error('Got cookie fail: {}'.format(traceback.format_exc()))

    def get_bug(self):
        failnum = 0
        while True:
            url = 'https://cn.0day.today/platforms'
            headers = """
            Host: cn.0day.today
            Connection: keep-alive
            Upgrade-Insecure-Requests: 1
            User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36
            Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
            Accept-Language: zh-CN,zh;q=0.9
            """
            html = self.ha.getstring(url, headers=headers)
            if "value='是的我同意'" in html or 'Just a moment...' in html:
                failnum += 1
                if failnum > 3:
                    self._logger.error('Requsts fail over 3 times!')
                    return
                self._logger.info('Cookie lose efficacy!')
                self._get_cookie()
            else:
                break
        soup = BeautifulSoup(html, 'lxml')
        tables = soup.select('.category_title a')
        for a in tables:
            href = a.attrs['href']
            if href == '/platforms' or href == '/webapps':
                continue
            url0 = 'https://cn.0day.today' + href
            page = 0
            last_url = None
            while True:
                page += 1
                url = url0 + '/' + str(page)
                html = self.ha.getstring(url, headers=f"""
        accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
        accept-language: zh-CN,zh;q=0.9
        cache-control: no-cache
        pragma: no-cache
        referer: {url0}
        upgrade-insecure-requests: 1
        user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36""")
                soup = BeautifulSoup(html, 'lxml')
                exploits = soup.select('.ExploitTableContent')
                for exploit in exploits:
                    d_href = exploit.select_one('h3 a').attrs['href']
                    id = d_href.split('/')[-1]
                    if self.is_data_unique(str(id) + '0day'):
                        return
                    name = exploit.select_one('h3 a').get_text()

                    if '你可以免费使用此漏洞利用' in str(exploit):
                        detail, referer = self.get_description(href, url)
                        if detail:
                            description = f'datasource: 0day\nid: {id}\nname: {name}\nurl:{url}\n'
                            self.write_text_string(description, detail, 'iscan_expdb_doc')
                    else:
                        continue

                    date = exploit.select_one('.td a').get_text()
                    date_d = date.split('-')[0]
                    date_y = date.split('-')[-1]
                    date = date_y + date.replace(date_d, '').replace(date_y, '') + date_d
                    verified = soup.select_one('.tips_verified_')
                    if verified:
                        verified = 0
                    else:
                        verified = 1
                    level_t = substring(str(exploit), "class='tips_risk_color_", "'>安全风险级别")
                    if level_t in ['0', '1']:
                        level = 1
                    elif level_t == '2':
                        level = 2
                    else:
                        level = 3
                    res = ExpDB(name, '0day', id, date, verified)
                    res.level = level
                    res, poc, url = self.get_detail(id, referer, res)
                    description = f'datasource: seebug\nid: {id}\nname: {name}\nurl:{url}\n'
                    self.write_text_string(description, poc, 'iscan_expdb_exp')
                    self.write_text(res, 'iscan_expdb')
                    self.store_data_unique(str(id) + '0day')

                if not last_url:
                    last_url = 'https://cn.0day.today' + soup.select('.pages a')[-1].attrs['href']
                if last_url == url:
                    break


    def get_description(self, href, referer):
        try:
            d_url = 'https://cn.0day.today/exploit' + href
            html = self.ha.getstring(d_url, headers=f"""
    accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
    accept-language: zh-CN,zh;q=0.9
    cache-control: no-cache
    pragma: no-cache
    referer: {referer}
    upgrade-insecure-requests: 1
    user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36""")
            description = ''
            if "<div class='td'>描述</div>" in html:
                try:
                    description = re.findall(r"<div class='td'>描述</div>.*?>(.*?)</div>", html, re.S)[0]
                except Exception:
                    self._logger.error('Get description fail: {}'.format(traceback.format_exc()))
            return description, d_url
        except Exception:
            self._logger.error(f'Description fail:{traceback.format_exc()}')

    def get_detail(self, id, referer, res):
        try:
            e_url = 'https://cn.0day.today/exploit/' + id
            e_html = self.ha.getstring(e_url, headers=f"""
    accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
    accept-language: zh-CN,zh;q=0.9
    cache-control: no-cache
    pragma: no-cache
    referer: {referer}
    upgrade-insecure-requests: 1
    user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36""")
            e_soup = BeautifulSoup(e_html, 'lxml')
            l1 = e_soup.select(
                "[style='float:left; width:150px; overflow:hidden; margin:5px 0px 0px 0px;']")
            author = {}
            author['name'] = l1[0].get_text()
            tags = []
            target = {}
            target['type'] = l1[1].get_text()
            target['platform'] = l1[2].get_text()
            tags.append(self.tag_mapping(target['type']))
            l3 = e_soup.select("[style='float:left; margin:5px 0px 0px 0px;']")
            code = []
            co = {}
            co['code_type'] = '0day-ID'
            co['code'] = l3[0].get_text()
            code.append(co)
            try:
                co['code_type'] = 'cve'
                co['code'] = l3[1].get_text(' ')
                code.append(co)
            except:
                pass
            res.tags = tags
            res.target = target
            res.author = author
            res.code = code
            poc = e_soup.select_one('pre').get_text()
            return res, poc, e_url
        except Exception:
            self._logger.error(f'ID: {id} get detail fail:{traceback.format_exc()}')

    def start(self):
        self.get_bug()
Esempio n. 6
0
class Seebug(AutoPluginBase):
    tasktype = EAutoType.EXPDB

    def __init__(self):
        AutoPluginBase.__init__(self)
        self.ha = HttpAccess()
        self._get_cookie()

    def _get_cookie(self):
        try:
            ic = False
            chrome_options = ChromeOptions()
            chrome_options.add_argument('--headless')
            chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument('blink-settings=imagesEnabled=false')
            # chrome_options.add_argument('--no-sandbox')
            driver = webdriver.Chrome(chrome_options=chrome_options)
            driver.get('https://www.seebug.org/')
            time.sleep(2)
            cookies = driver.get_cookies()
            l_cookie = ''
            for cookie in cookies:
                l_cookie = l_cookie + cookie['name'] + '=' + cookie['value'] + '; '
                if cookie['name'] == '__jsl_clearance':
                    ic = True
            self.cookie = l_cookie
            # print(self.cookie)
            self.ha._managedCookie.add_cookies('.seebug.org', self.cookie)
            if ic:
                self._logger.info('Got cookie success!')
            driver.close()
        except Exception:
            self._logger.error('Got cookie fail: {}'.format(traceback.format_exc()))
            
    def get_bug(self):
        page = 0
        max_page = None
        while True:
            page += 1
            fail_time = 0
            while True:
                url = f'https://www.seebug.org/vuldb/vulnerabilities?page={page}'
                html = self.ha.getstring(url, headers="""
                Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
                Accept-Encoding: gzip, deflate, br
                Accept-Language: zh-CN,zh;q=0.9
                Cache-Control: no-cache
                Connection: keep-alive
                Host: www.seebug.org
                Pragma: no-cache
                Upgrade-Insecure-Requests: 1
                User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36""")
                if 'Äú·ÃÎÊƵÂÊÌ«¸ß£¬ÇëÉÔºòÔÙÊÔ¡£' in html:
                    print(f'Äú·ÃÎÊƵÂÊÌ«¸ß£¬ÇëÉÔºòÔÙÊÔ¡£{fail_time} * 5s')
                    fail_time += 1
                    if fail_time == 5:
                        print('Fail time outnumber 5!')
                        break
                    time.sleep(5)
                else:
                    break
            soup = BeautifulSoup(html, 'lxml')
            tbody = soup.select('tbody tr')
            if not max_page:
                max_page = soup.select_one('#J-jump-form input').attrs['max']
            for tr in tbody:
                self.bug_detail(tr, url)
            if int(max_page) <= page:
                break

    def bug_detail(self, tr, referer):
        try:

            b_url = 'https://www.seebug.org' + tr.select_one('td a').attrs['href']
            name = tr.select_one('.vul-title-wrapper a').get_text()
            datasource = 'seebug'
            id = tr.select_one('td a').get_text()
            date_published = tr.select_one('.text-center.datetime.hidden-sm.hidden-xs').get_text()
            tooltip = tr.select_one('[data-toggle="tooltip"]').attrs['data-original-title']
            if tooltip == '¸ßΣ':
                level = 3
            elif tooltip == 'ÖÐΣ':
                level = 2
            else:
                level = 1
            fail_time = 0
            res = ExpDB(name, datasource, id, date_published, 0)
            while True:
                b_html = self.ha.getstring(b_url, headers=f"""
                            Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
                            Accept-Encoding: gzip, deflate, br
                            Accept-Language: zh-CN,zh;q=0.9
                            Cache-Control: no-cache
                            Connection: keep-alive
                            Host: www.seebug.org
                            Pragma: no-cache
                            Referer: {referer}
                            Upgrade-Insecure-Requests: 1
                            User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36""")
                if 'Äú·ÃÎÊƵÂÊÌ«¸ß£¬ÇëÉÔºòÔÙÊÔ¡£' in b_html:
                    fail_time += 1
                    print(f'Äú·ÃÎÊƵÂÊÌ«¸ß£¬ÇëÉÔºòÔÙÊÔ¡£{fail_time} * 45s')
                    if fail_time == 5:
                        print('Fail time outnumber 5!')
                        break
                    t = random.randint(30, 60)
                    time.sleep(t)
                else:
                    break
            b_soup = BeautifulSoup(b_html, 'lxml')
            try:
                file_data = b_soup.select_one('#j-md-detail').get_text()
                if 'µÇ¼ºó²é¿´' not in file_data:
                    description = f'datasource: seebug\nid: {id}\nname: {name}\nurl:{b_url}\n'
                    self.write_text_string(description, file_data, 'iscan_expdb_doc')
            except:
                pass
            tags = []
            try:
                tag_type = b_soup.select('.bug-msg .col-md-4')[1].select_one('dd').get_text()
                tags.append(tag_type)
                res.tags = tags
            except:
                pass
            target = []
            try:
                tar = {}
                ta_type = b_soup.select_one('.hover-scroll a').get_text().replace('\n', '')
                ta_type = re.sub(r'\s{2,}', '', ta_type)
                tar['type'] = ta_type
                try:
                    version = b_soup.select_one('.hover-scroll').get_text()
                    version = substring(version, '(', ')')
                    tar['version'] = {'list': version}
                except:
                    pass
                target.append(tar)
                res.target = target
            except:
                pass

            cve_id = b_soup.select('.bug-msg .col-md-4')[2].select_one('dd').get_text()
            code = []
            if '²¹³ä' not in cve_id:
                cve = {}
                cve['code_type'] = 'cve'
                cve['code'] = cve_id.replace('\n', '')
                code.append(cve)
                res.code = code
            author = {}
            author['name'] = b_soup.select('.bug-msg .col-md-4')[1].select('dd')[3].get_text()
            author['name'] = re.sub(r'\s{2,}', '', author['name'])
            res['author'] = author
            try:
                poc = b_soup.select_one('#J-poc').get_text()
                description = f'datasource: seebug\nid: {id}\nname: {name}\nurl:{b_url}\n'
                self.write_text_string(description, poc, 'iscan_expdb_exp')
            except:
                pass

            print(name, datasource, id, date_published, tooltip, level, tags, code, author)
            time.sleep(3)
        except Exception:
            self._logger.error('Got bug detail fail: {}'.format(traceback.format_exc()))

    def start(self):
        self.get_bug()
Esempio n. 7
0
class SpiderTuniu(SpiderTravelBase):
    def __init__(self, task, appcfg, clientid):
        super(SpiderTuniu, self).__init__(task, appcfg, clientid)
        self.cookie = self.task.cookie
        self.ha = HttpAccess()
        if self.cookie:
            self.ha._managedCookie.add_cookies('tuniu.com', self.cookie)

    def _check_registration(self):
        """
        查询手机号是否注册了途牛
        :param account:
        :return:
        """
        t = time.strftime('%Y-%m-%d %H:%M:%S')
        try:
            url = "https://passport.tuniu.com/register"
            html = self._ha.getstring(url,
                                      headers="""
accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
accept-encoding: gzip, deflate, br
accept-language: zh-CN,zh;q=0.9
cache-control: no-cache
pragma: no-cache
upgrade-insecure-requests: 1
user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"""
                                      )

            headers = """
Accept: */*
Content-Type: application/x-www-form-urlencoded; charset=UTF-8
Origin: https://passport.tuniu.com
Referer: https://passport.tuniu.com/register
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36
X-Requested-With: XMLHttpRequest"""
            url = 'https://passport.tuniu.com/register/isPhoneAvailable'
            postdata = f"intlCode=0086&tel={self.task.phone}"
            html = self._ha.getstring(url, headers=headers, req_data=postdata)
            if '"errno":-1,' in html:
                self._write_task_back(ECommandStatus.Succeed, 'Registered', t,
                                      EBackResult.Registerd)
            else:
                self._write_task_back(ECommandStatus.Succeed, 'Not Registered',
                                      t, EBackResult.UnRegisterd)

        except Exception:
            self._logger.error('Check registration fail: {}'.format(
                traceback.format_exc()))
            self._write_task_back(ECommandStatus.Failed,
                                  'Check registration fail', t,
                                  EBackResult.CheckRegisterdFail)
        return

    def _cookie_login(self):
        url = 'https://i.tuniu.com/usercenter/usercommonajax/japi'
        headers = """
Accept: application/json, text/javascript, */*; q=0.01
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Content-Length: 76
Content-Type: application/x-www-form-urlencoded; charset=UTF-8
Host: i.tuniu.com
Origin: https://i.tuniu.com
Pragma: no-cache
Referer: https://i.tuniu.com/userinfoconfirm
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
X-Requested-With: XMLHttpRequest"""
        postdata = 'serviceName=MOB.MEMBERS.InnerController.getUserInfo&serviceParamsJson=%7B%7D'
        try:
            html = self.ha.getstring(url, headers=headers, req_data=postdata)
            jshtml = json.loads(html)
            userid = jshtml['data']['data']['userId']
            if userid:
                self.userid = str(userid) + '-tuniu'
                return True
            else:
                return False
        except:
            return False

    def _get_profile(self):
        try:
            url = 'https://i.tuniu.com/usercenter/usercommonajax/japi'
            headers = """
Accept: application/json, text/javascript, */*; q=0.01
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Content-Length: 76
Content-Type: application/x-www-form-urlencoded; charset=UTF-8
Host: i.tuniu.com
Origin: https://i.tuniu.com
Pragma: no-cache
Referer: https://i.tuniu.com/userinfoconfirm
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
X-Requested-With: XMLHttpRequest"""
            postdata = 'serviceName=MOB.MEMBERS.InnerController.getUserInfo&serviceParamsJson=%7B%7D'
            html = self.ha.getstring(url, headers=headers, req_data=postdata)
            jshtml = json.loads(html)
            res = PROFILE(self._clientid, self.task, self._appcfg._apptype,
                          self.userid)
            userid = jshtml['data']['data']['userId']
            res.nickname = jshtml['data']['data']['nickName']
            res.phone = jshtml['data']['data']['tel']
            res.birthday = jshtml['data']['data']['birthday']
            res.email = jshtml['data']['data']['email']
            res.address = jshtml['data']['data']['additionalAddress']
            sex = jshtml['data']['data']['sex']
            if sex == 1:
                res.gender = EGender.Male
            elif sex == 0:
                res.gender = EGender.Female
            else:
                res.gender = EGender.Unknown
            detail = jshtml['data']['data']
            res.append_details(detail)
            photourl = jshtml['data']['data']['largeAvatarUrl']
            if photourl:
                profilepic: RESOURCES = RESOURCES(self._clientid, self.task,
                                                  photourl,
                                                  EResourceType.Picture,
                                                  self._appcfg._apptype)

                resp_stream: ResponseIO = self.ha.get_response_stream(photourl)
                profilepic.io_stream = resp_stream
                profilepic.filename = photourl.rsplit('/', 1)[-1]
                profilepic.sign = ESign.PicUrl
                res.append_resource(profilepic)
                yield profilepic
            yield res
        except Exception:
            self._logger.error('{} .got profile fail: {}'.format(
                self.userid, traceback.format_exc()))

    def _get_orders(self):
        try:
            page = 0
            while True:
                page += 1
                url = 'https://i.tuniu.com/usercenter/usercommonajax/japi/getOrderList?serviceName=MOB.MEMBER.InnerOrderController.getOrderList&serviceParamsJson=%7B%22type%22%3A0%2C%22page%22%3A{}%2C%22status%22%3A0%2C%22size%22%3A5%7D&_={}'.format(
                    page,
                    int(
                        datetime.now(
                            pytz.timezone('Asia/Shanghai')).timestamp() *
                        1000))
                headers = """
Accept: application/json, text/javascript, */*; q=0.01
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: i.tuniu.com
Pragma: no-cache
Referer: https://i.tuniu.com/list/
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
X-Requested-With: XMLHttpRequest"""
                html = self.ha.getstring(url, headers=headers)
                jshtml = json.loads(html)
                orderList = jshtml['data']['data']['orderList']
                if orderList:
                    for order in orderList:
                        try:
                            orderid = order['orderId']
                            ordertime = order['orderTime']
                            res_one = ITRAVELORDER_ONE(self.task,
                                                       self._appcfg._apptype,
                                                       self.userid, orderid)
                            res_one.append_orders(order)
                            res_one.ordertime = ordertime
                            res_one.host = 'www.tuniu.com'
                            yield res_one
                        except:
                            pass
                totalpage = jshtml['data']['data']['totalPage']
                if totalpage <= page:
                    break
        except Exception:
            self._logger.error('{} got order fail: {}'.format(
                self.userid, traceback.format_exc()))