Python HttpAccess 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: commonbaby.httpaccess.httpaccess

클래스/타입: HttpAccess

hotexamples.com에서의 예제들: 24

Python HttpAccess - 24개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 commonbaby.httpaccess.httpaccess.HttpAccess에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

HttpAccess(14)

getstring(7)

get_response_stream(4)

예제 #1

파일 보기

 def __init__(self, task, appcfg, clientid):
     super(SpiderTaoBao, self).__init__(task, appcfg, clientid)
     self.time = datetime.datetime.now(
         pytz.timezone('Asia/Shanghai')).strftime('%Y-%m-%d %H:%M:%S')
     self._ha = HttpAccess()
     self.userid = ''
     self.cookie = self.task.cookie

예제 #2

파일 보기

파일: spidersuning.py 프로젝트: Octoberr/sspywork

 def __init__(self, task, appcfg, clientid):
     super(SpiderSuning, self).__init__(task, appcfg, clientid)
     self._ha = HttpAccess()
     self.userid = ""
     self.time = datetime.datetime.now(
         pytz.timezone("Asia/Shanghai")).strftime("%Y-%m-%d %H:%M:%S")
     if self.task.cookie:
         self._ha._managedCookie.add_cookies("suning.com", self.task.cookie)

예제 #3

파일 보기

파일: twitterspiderinfo.py 프로젝트: Octoberr/sspywork

class TwitterSpider(object):
    def __init__(self):
        self._ha = HttpAccess()
        cookie = '''
        personalization_id="v1_pmt2sntu/a8PCORtco8eVg=="; guest_id=v1%3A156194413511829253; ct0=6bb1b8031784f711388377a485cd5bf9; _ga=GA1.2.1901222848.1561944140; _gid=GA1.2.214369962.1561944140; ads_prefs="HBERAAA="; kdt=l6Dkc64O0CPl4qCVtYAuXjXrtxkII2VUjRNqMOfT; remember_checked_on=1; _twitter_sess=BAh7CiIKZmxhc2hJQzonQWN0aW9uQ29udHJvbGxlcjo6Rmxhc2g6OkZsYXNo%250ASGFzaHsABjoKQHVzZWR7ADoPY3JlYXRlZF9hdGwrCGsaIKtrAToMY3NyZl9p%250AZCIlNzIyZDlhODdlMTVjYjU3MTRkYTBlY2Y4NGQ5MDQzMjQ6B2lkIiVjZTgw%250ANDQ0ZmIyOTAyY2U0MjQ0NjI4ZTFmNjU0MjgwOToJdXNlcmwrCQHglUis2NwN--8ae08c231e9599f1c5868e6518664b987d936c79; twid="u=998911451833163777"; auth_token=3378bb7bfd90d8bfb9de00b7fb7110633a256852; csrf_same_site_set=1; lang=en; csrf_same_site=1; _gat=1
        '''
        self._ha._managedCookie.add_cookies('twitter.com', cookie)

    def get_first_page_info(self):
        """
        获取第一页的信息
        :return:
        """
        url = 'https://twitter.com/Google/followers'
        headers = '''
accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
accept-encoding: gzip, deflate, br
accept-language: zh-CN,zh;q=0.9,en;q=0.8
cache-control: no-cache
pragma: no-cache
referer: https://twitter.com/login
upgrade-insecure-requests: 1
user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36
        '''
        restring = self._ha.getstring(url, headers=headers)
        # print(restring)
        soup = BeautifulSoup(restring, 'lxml')
        all_divs = soup.find_all(
            'div',
            attrs={
                'class': 'user-actions btn-group not-following not-muting '
            })
        pass

    def search(self):
        url = "https://twitter.com/GEMoving"

        headers = '''
        accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
accept-encoding: gzip, deflate, br
accept-language: zh-CN,zh;q=0.9,en;q=0.8
cache-control: no-cache
cookie: guest_id=v1%3A155921858819307425; _ga=GA1.2.999014813.1559218592; tfw_exp=0; kdt=0XTnTydp2g3vpbnSflAWv3kKb1zxBLeoftN3fQgd; remember_checked_on=0; csrf_same_site_set=1; csrf_same_site=1; personalization_id="v1_bHstwWEgjYsaq0IVqrj60Q=="; external_referer=padhuUp37zjgzgv1mFWxJ12Ozwit7owX|0|8e8t2xd8A2w%3D; ads_prefs="HBERAAA="; ct0=4fc617a6fb32b387a3d51da28a37910f; _gid=GA1.2.84942800.1562234004; twid="u=998911451833163777"; auth_token=f75059e44a60f7e68d723ded8571c6938c50c23b; _twitter_sess=BAh7CiIKZmxhc2hJQzonQWN0aW9uQ29udHJvbGxlcjo6Rmxhc2g6OkZsYXNo%250ASGFzaHsABjoKQHVzZWR7ADoPY3JlYXRlZF9hdGwrCKsZZ7xrAToMY3NyZl9p%250AZCIlNjAzZjE0NTU0ZjM1NWZlNjZhMmU1YWIyZDEzMzUxOTg6B2lkIiUxODU2%250AZWRhNWM0NjNhNzAyODM3YjJiMTFkNjlmOGY3NzoJdXNlcmwrCQHglUis2NwN--ba48cd0cf7d091fda283e2eb1f9e75fb2d4cd73c; lang=en; _gat=1
pragma: no-cache
referer: https://twitter.com/search?q=%E9%82%93%E7%B4%AB%E6%A3%8B&src=typd
upgrade-insecure-requests: 1
user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36
        '''
        restring = self._ha.getstring(url, headers=headers)
        soup = BeautifulSoup(restring, 'lxml')
        all_divs = soup.find_all(
            'div',
            attrs={
                'class': 'user-actions btn-group not-following not-muting '
            })
        pass

예제 #4

파일 보기

    def __init__(self):
        AutoPluginBase.__init__(self)
        self.ha = HttpAccess(0.5)
        self.ha._managedCookie.add_cookies(
            '.exploit-db.com',
            '_ga=GA1.3.659193532.1562029495; _gid=GA1.3.817890876.1562029495; _gat=1; XSRF-TOKEN=eyJpdiI6IkRQd3M1RHljcnhEM2hTVzhiMlcyV2c9PSIsInZhbHVlIjoiNzAwQVdWWFR3ck9oenBHWUd2a2NENTdrNXRqeWtOMU9iWk9pRGRxdVRFcGJyMmc0Q3gwQzBjbVg2bEdDWU5HTSIsIm1hYyI6IjFlMjRkMTcxMmFhODE1NzRmMDc0YWJlZTUzZTRlYTlmZjMyYTU1NDZjMjE5NjdkOTkzMGFjNDZlMzBhMWVjMjIifQ%3D%3D; exploit_database_session=eyJpdiI6IkE0QmJ3alZ3S0RFd3FHcnhQOTQzVGc9PSIsInZhbHVlIjoiYkpDNnh5azdtcmlOYms5cW5sMW9DUnhXYmhmSk9iZlVQa3pxOUVwUEpxQXhUeThMZUF3ZVBpMGRURzNcL1grNHAiLCJtYWMiOiIyODAzMmJjNjUzMWYyYzA0NmY0ZTYwMzFhYjg1YWUyOTc0OTMzNzBhYmYyZDc2MTkwZDYzYWY5Y2M5ZDhhMDI1In0%3D'
        )
        self.page_queue = queue.Queue()

        self.detailsuffix = 'iscan_expdb'

        # 带文件体的回馈
        self.scriptsuffix = 'iscan_expdb_exp'
        self.sourcecodesuffix = 'iscan_expdb_app'

예제 #5

파일 보기

 def __init__(self, task: IscoutTask):
     ScoutPlugBase.__init__(self)
     self.task = task
     self._ha: HttpAccess = HttpAccess()
     self._host: str = '.instagram.com'
     self._login()
     if Instagram._cookie:
         self._ha._managedCookie.add_cookies(self._host, Instagram._cookie)

예제 #6

파일 보기

파일: linkedin.py 프로젝트: Octoberr/sspywork

 def __init__(self, task: IscoutTask):
     ScoutPlugBase.__init__(self)
     self.task = task
     self._ha: HttpAccess = HttpAccess()
     self._host: str = '.linkedin.com'
     self._login()
     if LinkedIn._cookie:
         for key, value in LinkedIn._cookie.items():
             self._ha._managedCookie.add_cookies(key, value)
     self._first_page()

예제 #7

파일 보기

class TestCaseHttpAccess(unittest.TestCase):
    """"""

    def __init__(self, methodName="runTest"):
        unittest.TestCase.__init__(self, methodName=methodName)
        warnings.simplefilter('ignore', ResourceWarning)
        self._ha: HttpAccess = None

    @classmethod
    def setUpClass(cls):
        # print('Previous condition for all')
        pass

    @classmethod
    def tearDownClass(cls):
        # print('Post condition for all')
        pass

    def setUp(self):
        # print('Previous condition for each')
        if not isinstance(self._ha, HttpAccess):
            self._ha = HttpAccess()

    def tearDown(self):
        # print('Post condition for each')
        pass

    # 测试用例的命名必须以test开头，否则不予执行

    # @unittest.skip('')
    @unittest.skip
    def test_skip(self):
        pass

    def test_instruct(self):
        self.assertIsNotNone(self._ha)

    def test_getstring(self):
        self.assertIsNotNone(self._ha)
        url = 'https://www.baidu.com'
        html = self._ha.getstring(url, headers='''
        Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
        Accept-Encoding: gzip, deflate, br
        Accept-Language: zh-CN,zh;q=0.9
        Cache-Control: no-cache
        Connection: keep-alive
        Host: www.baidu.com
        Pragma: no-cache
        Upgrade-Insecure-Requests: 1
        User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36'''
                                  )
        self.assertFalse(html is None and html == '')

예제 #8

파일 보기

파일: spiderinnerdata.py 프로젝트: Octoberr/sspywork

    def __init__(self,
                 task: Task,
                 appcfg: AppCfg,
                 clientid: str,
                 logger_name_ext: str = ""):
        if not isinstance(task, Task):
            raise Exception("Task is invalid.")
        if not isinstance(appcfg, AppCfg):
            raise Exception("AppConfig is invalid.")
        if not isinstance(clientid, str) or clientid == "":
            raise Exception("Invalid clientid")

        self.task = task
        self._clientid: str = clientid
        self._appcfg = appcfg

        # logger和插件名
        self._name = type(self).__name__
        loggername = f"{self._name}_{self.task.batchid}"
        if not logger_name_ext is None and not logger_name_ext == "":
            loggername += "_{}".format(logger_name_ext)
        self._logger: MsLogger = MsLogManager.get_logger(loggername)

        # Http库对象
        self._ha: HttpAccess = HttpAccess()

        # 一些通用字段，用于存放当前插件登陆的账号的一些到处都要用的信息
        self._userid: str = None  # 网站对用户的唯一识别标识
        self._account: str = self.task.account  # 可以用于登陆的账号名
        self._username: str = None  # 用户昵称
        self._globaltelcode: str = self.task.globaltelcode  # 国际区号
        self._phone: str = self.task.phone  # 电话
        self._url: str = self.task.url
        self._host: str = self.task.host
        self._cookie: str = self.task.cookie

        # 一些状态对象
        self._errorcount: int = 0
        self.is_running: bool = False
        self.running_task = []
        # 验证码有效时间定为900秒, 15分钟足够了，一般验证码的有效时间最高也就10分钟
        self._effective_time = 900
        # self._outputtgfile = OutputManage()
        self._sqlfunc = DbManager
        # 线程运行
        self._running = True
        # 停止标志,默认不停止, 1表示继续下载不停False，0表示停止True
        self._stop_sign = False

예제 #9

파일 보기

    def __init__(self, task: IscoutTask):
        ScoutPlugBase.__init__(self)
        self.task = task
        self._ha: HttpAccess = HttpAccess()

        self.basic_url = "https://www.instagram.com/"
        self.headers = """
                    accept: */*,
                    accept-encoding: gzip, deflate, br,
                    accept-language: zh-CN,zh;q=0.9,
                    user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36,
                    """
        # 搜索栏api
        self.searchBox_api = 'https://www.instagram.com/web/search/topsearch/?context=blended&query={}&include_reel=true'
        # 用户数据api
        self.userData_api = 'https://www.instagram.com/{}/?__a=1'

        self.source = "instagram"
        self.reason = "instagram身份落地"

예제 #10

파일 보기

 def __init__(self, task, appcfg, clientid):
     super(SpiderTongCheng, self).__init__(task, appcfg, clientid)
     self.cookie = self.task.cookie
     self.ha = HttpAccess()
     if self.cookie:
         self.ha._managedCookie.add_cookies('ly.com', self.cookie)

예제 #11

파일 보기

class SpiderTongCheng(SpiderTravelBase):
    def __init__(self, task, appcfg, clientid):
        super(SpiderTongCheng, self).__init__(task, appcfg, clientid)
        self.cookie = self.task.cookie
        self.ha = HttpAccess()
        if self.cookie:
            self.ha._managedCookie.add_cookies('ly.com', self.cookie)

    def _cookie_login(self):
        url = 'https://member.ly.com/information'
        headers = """
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: member.ly.com
Pragma: no-cache
Referer: https://member.ly.com/order
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"""
        html = self.ha.getstring(url,
                                 headers=headers).replace('\r', '').replace(
                                     '\n', '').replace('\t', '')
        html = re.sub(r'\s{2,}', '', html)
        soup = BeautifulSoup(html, 'lxml')
        try:
            phone = soup.select_one("#tel").get_text('-')
            phone = phone.split('-')[1]
            userid = substring(self.cookie, 'userid=', '&')
            if userid:
                self.userid = userid + '-tongcheng'
                return True
            elif phone:
                self.userid = phone + '-tongcheng'
                return True
            else:
                return False
        except:
            return False

    def _get_profile(self):
        try:
            url = 'https://member.ly.com/information'
            headers = """
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: member.ly.com
Pragma: no-cache
Referer: https://member.ly.com/order
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"""
            html = self.ha.getstring(url, headers=headers).replace(
                '\r', '').replace('\n', '').replace('\t', '')
            html = re.sub(r'\s{2,}', '', html)
            soup = BeautifulSoup(html, 'lxml')
            detail = {}
            res = PROFILE(self._clientid, self.task, self._appcfg._apptype,
                          self.userid)
            try:
                res.nickname = soup.select_one('#txtsmallName')['value']
            except:
                pass
            try:
                detail['realname'] = soup.select_one('#txtName')['value']
            except:
                pass
            try:
                email = soup.select_one("#email").get_text('-')
                res.emali = email.split('-')[1]
            except:
                pass
            try:
                phone = soup.select_one("#tel").get_text('-')
                res.phone = phone.split('-')[1]
            except:
                pass
            try:
                text = substring(html, 'class="sex1"',
                                 'checked=&quot;checked&quot;')
                sexnum = re.findall(r'name="sex"', text)
                if len(sexnum) == 1:
                    res.gender = EGender.Male
                elif len(sexnum) == 2:
                    res.gender = EGender.Female
            except:
                pass
            try:
                detail['profession'] = soup.select_one('#ddlZhiye').get_text()
            except:
                pass
            try:
                res.bithday = soup.select_one('#hfYear')['value'] + '-' + soup.select_one('#hfMonth')['value'] + '-' + \
                          soup.select_one('#hfDay')['value']
            except:
                pass
            try:
                detail['QQ'] = soup.select_one('#txtQQ')['value']
            except:
                pass

            res.append_details(detail)
            photourl = soup.select_one('#contentHead img.png')['src']
            if photourl:
                photourl = 'https:' + photourl
                profilepic: RESOURCES = RESOURCES(self._clientid, self.task,
                                                  photourl,
                                                  EResourceType.Picture,
                                                  self._appcfg._apptype)

                resp_stream: ResponseIO = self.ha.get_response_stream(photourl)
                profilepic.io_stream = resp_stream
                profilepic.filename = photourl.rsplit('/', 1)[-1]
                profilepic.sign = ESign.PicUrl
                res.append_resource(profilepic)
                yield profilepic
            yield res
        except Exception:
            self._logger.error('{} got profile fail {}'.format(
                self.userid, traceback.format_exc()))

    def _get_orders(self):
        try:
            page = 0
            while True:
                page += 1
                url = 'https://member.ly.com/orderajax/default?OrderFilter=0&DateType=0&PageIndex={}'.format(
                    page)
                headers = """
Accept: */*
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: member.ly.com
Pragma: no-cache
Referer: https://member.ly.com/order
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
X-Requested-With: XMLHttpRequest"""
                html = self.ha.getstring(url, headers=headers)
                jshtml = json.loads(html)
                OrderDetailList = jshtml['ReturnValue']['OrderDetailList']
                if OrderDetailList:
                    for OrderDetail in OrderDetailList:
                        try:
                            orderid = OrderDetail['OrderId']
                            order = {}
                            order['title'] = OrderDetail['ProductName']
                            order['FirstDesc'] = OrderDetail['FirstDesc']
                            order['price'] = OrderDetail['ProductPrice']
                            order['status'] = OrderDetail['ChieseOrderStatus']
                            order['SerialId'] = OrderDetail['SerialId']
                            order['ExtendData'] = OrderDetail['ExtendData']
                            OrderDetailUrl = OrderDetail['OrderDetailUrl']
                            ordertime, detail = self._order_detail(
                                OrderDetailUrl)
                            if detail:
                                order['detail'] = detail
                            res_one = ITRAVELORDER_ONE(self.task,
                                                       self._appcfg._apptype,
                                                       self.userid, orderid)
                            res_one.append_orders(order)
                            res_one.ordertime = ordertime
                            res_one.host = 'www.ly.com'
                            yield res_one
                        except:
                            pass
                OrderListCount = jshtml['ReturnValue']['OrderListCount']
                if OrderListCount <= 10 * page:
                    break
        except Exception:
            self._logger.error('{} got order fail: {}'.format(
                self.userid, traceback.format_exc()))

    def _order_detail(self, orderurl):
        orderurl = 'https:' + orderurl
        headers = """
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: member.ly.com
Pragma: no-cache
Referer: https://member.ly.com/order
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"""
        html = self.ha.getstring(orderurl,
                                 headers=headers).replace('\n', '').replace(
                                     '\t', '')
        soup = BeautifulSoup(html, 'lxml')
        # 火车票全部js加密了暂时未获取

        # 景点
        dic = {}
        ordertime = ''
        if re.findall(r'Scenery', orderurl):
            ordertime = substring(html, '创建时间：', ' <')
            dic['Contacts'] = soup.select_one('.infor_box table').get_text(' ')

        # 机票
        elif re.findall(r'Flight', orderurl):
            ordertime = soup.select_one('.orderTime span').get_text('')
            dic['Passenger'] = soup.select_one('.no_bottom.infoLine').get_text(
                ' ')
            dic['Contacts'] = soup.select_one('.contactPerson').get_text(' ')

        # 酒店
        elif re.findall(r'hotel', orderurl):
            ordertime = soup.select_one('.time-point').get_text(' ')
            dic['checkinPerson'] = soup.select_one(
                '.checkin-info.part').get_text(' ')

        return ordertime, dic

예제 #12

파일 보기

파일: spidertuniu.py 프로젝트: Octoberr/sspywork

class SpiderTuniu(SpiderTravelBase):
    def __init__(self, task, appcfg, clientid):
        super(SpiderTuniu, self).__init__(task, appcfg, clientid)
        self.cookie = self.task.cookie
        self.ha = HttpAccess()
        if self.cookie:
            self.ha._managedCookie.add_cookies('tuniu.com', self.cookie)

    def _check_registration(self):
        """
        查询手机号是否注册了途牛
        :param account:
        :return:
        """
        t = time.strftime('%Y-%m-%d %H:%M:%S')
        try:
            url = "https://passport.tuniu.com/register"
            html = self._ha.getstring(url,
                                      headers="""
accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
accept-encoding: gzip, deflate, br
accept-language: zh-CN,zh;q=0.9
cache-control: no-cache
pragma: no-cache
upgrade-insecure-requests: 1
user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"""
                                      )

            headers = """
Accept: */*
Content-Type: application/x-www-form-urlencoded; charset=UTF-8
Origin: https://passport.tuniu.com
Referer: https://passport.tuniu.com/register
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36
X-Requested-With: XMLHttpRequest"""
            url = 'https://passport.tuniu.com/register/isPhoneAvailable'
            postdata = f"intlCode=0086&tel={self.task.phone}"
            html = self._ha.getstring(url, headers=headers, req_data=postdata)
            if '"errno":-1,' in html:
                self._write_task_back(ECommandStatus.Succeed, 'Registered', t,
                                      EBackResult.Registerd)
            else:
                self._write_task_back(ECommandStatus.Succeed, 'Not Registered',
                                      t, EBackResult.UnRegisterd)

        except Exception:
            self._logger.error('Check registration fail: {}'.format(
                traceback.format_exc()))
            self._write_task_back(ECommandStatus.Failed,
                                  'Check registration fail', t,
                                  EBackResult.CheckRegisterdFail)
        return

    def _cookie_login(self):
        url = 'https://i.tuniu.com/usercenter/usercommonajax/japi'
        headers = """
Accept: application/json, text/javascript, */*; q=0.01
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Content-Length: 76
Content-Type: application/x-www-form-urlencoded; charset=UTF-8
Host: i.tuniu.com
Origin: https://i.tuniu.com
Pragma: no-cache
Referer: https://i.tuniu.com/userinfoconfirm
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
X-Requested-With: XMLHttpRequest"""
        postdata = 'serviceName=MOB.MEMBERS.InnerController.getUserInfo&serviceParamsJson=%7B%7D'
        try:
            html = self.ha.getstring(url, headers=headers, req_data=postdata)
            jshtml = json.loads(html)
            userid = jshtml['data']['data']['userId']
            if userid:
                self.userid = str(userid) + '-tuniu'
                return True
            else:
                return False
        except:
            return False

    def _get_profile(self):
        try:
            url = 'https://i.tuniu.com/usercenter/usercommonajax/japi'
            headers = """
Accept: application/json, text/javascript, */*; q=0.01
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Content-Length: 76
Content-Type: application/x-www-form-urlencoded; charset=UTF-8
Host: i.tuniu.com
Origin: https://i.tuniu.com
Pragma: no-cache
Referer: https://i.tuniu.com/userinfoconfirm
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
X-Requested-With: XMLHttpRequest"""
            postdata = 'serviceName=MOB.MEMBERS.InnerController.getUserInfo&serviceParamsJson=%7B%7D'
            html = self.ha.getstring(url, headers=headers, req_data=postdata)
            jshtml = json.loads(html)
            res = PROFILE(self._clientid, self.task, self._appcfg._apptype,
                          self.userid)
            userid = jshtml['data']['data']['userId']
            res.nickname = jshtml['data']['data']['nickName']
            res.phone = jshtml['data']['data']['tel']
            res.birthday = jshtml['data']['data']['birthday']
            res.email = jshtml['data']['data']['email']
            res.address = jshtml['data']['data']['additionalAddress']
            sex = jshtml['data']['data']['sex']
            if sex == 1:
                res.gender = EGender.Male
            elif sex == 0:
                res.gender = EGender.Female
            else:
                res.gender = EGender.Unknown
            detail = jshtml['data']['data']
            res.append_details(detail)
            photourl = jshtml['data']['data']['largeAvatarUrl']
            if photourl:
                profilepic: RESOURCES = RESOURCES(self._clientid, self.task,
                                                  photourl,
                                                  EResourceType.Picture,
                                                  self._appcfg._apptype)

                resp_stream: ResponseIO = self.ha.get_response_stream(photourl)
                profilepic.io_stream = resp_stream
                profilepic.filename = photourl.rsplit('/', 1)[-1]
                profilepic.sign = ESign.PicUrl
                res.append_resource(profilepic)
                yield profilepic
            yield res
        except Exception:
            self._logger.error('{} .got profile fail: {}'.format(
                self.userid, traceback.format_exc()))

    def _get_orders(self):
        try:
            page = 0
            while True:
                page += 1
                url = 'https://i.tuniu.com/usercenter/usercommonajax/japi/getOrderList?serviceName=MOB.MEMBER.InnerOrderController.getOrderList&serviceParamsJson=%7B%22type%22%3A0%2C%22page%22%3A{}%2C%22status%22%3A0%2C%22size%22%3A5%7D&_={}'.format(
                    page,
                    int(
                        datetime.now(
                            pytz.timezone('Asia/Shanghai')).timestamp() *
                        1000))
                headers = """
Accept: application/json, text/javascript, */*; q=0.01
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: i.tuniu.com
Pragma: no-cache
Referer: https://i.tuniu.com/list/
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
X-Requested-With: XMLHttpRequest"""
                html = self.ha.getstring(url, headers=headers)
                jshtml = json.loads(html)
                orderList = jshtml['data']['data']['orderList']
                if orderList:
                    for order in orderList:
                        try:
                            orderid = order['orderId']
                            ordertime = order['orderTime']
                            res_one = ITRAVELORDER_ONE(self.task,
                                                       self._appcfg._apptype,
                                                       self.userid, orderid)
                            res_one.append_orders(order)
                            res_one.ordertime = ordertime
                            res_one.host = 'www.tuniu.com'
                            yield res_one
                        except:
                            pass
                totalpage = jshtml['data']['data']['totalPage']
                if totalpage <= page:
                    break
        except Exception:
            self._logger.error('{} got order fail: {}'.format(
                self.userid, traceback.format_exc()))

예제 #13

파일 보기

class SpiderTaoBao(SpiderShoppingBase):
    def __init__(self, task, appcfg, clientid):
        super(SpiderTaoBao, self).__init__(task, appcfg, clientid)
        self.time = datetime.datetime.now(
            pytz.timezone('Asia/Shanghai')).strftime('%Y-%m-%d %H:%M:%S')
        self._ha = HttpAccess()
        self.userid = ''
        self.cookie = self.task.cookie

    def _cookie_login(self):
        self._ha._managedCookie.add_cookies("taobao.com", self.cookie)

        try:
            resopnse = self._ha.getstring(
                'https://member1.taobao.com/member/fresh/account_security.htm')
            soup1 = BeautifulSoup(resopnse, 'html.parser')
            account = soup1.find_all(
                "span", {"class": "default grid-msg "})[0].get_text()
            if account:
                self.userid = account + '-taobao'
                return True
            else:
                return False
        except:
            return False

    # def _needcode(self):
    #     # 判断是否需要验证码，一般都不需要。若需要就不再执行之后的代码，验证码待解决
    #     r = self._ha.getstring('https://login.taobao.com/member/request_nick_check.do?_input_charset=utf-8')
    #     pat = re.compile(r'"needcode":false')
    #     isneed = pat.findall(r)
    #     if isneed:
    #         res = True
    #     else:
    #         res = False
    #     return res
    #
    #
    #     def pwd_login(self):
    #         if self._needcode():
    #             # 手动登陆一次获取账号，和加密后的256位密码。um_token,ua 可以持续使用
    #             try:
    #                 # 获取um
    #                 url = 'https://ynuf.alipay.com/service/um.json'
    #                 data = '''data=ENCODE~~V01~~eyJ4diI6IjMuMy43IiwieHQiOiJDMTUzOTc1ODg3MzI3OTkwODY1NDAzMjI5MTUzOTc1ODg3MzI3OTI5NSIsImV0ZiI6InUiLCJ4YSI6InRhb2Jhb19sb2dpbiIsInNpdGVJZCI6IiIsInVpZCI6IiIsImVtbCI6IkFBIiwiZXRpZCI6IiIsImVzaWQiOiIiLCJ0eXBlIjoicGMiLCJuY2UiOnRydWUsInBsYXQiOiJXaW4zMiIsIm5hY24iOiJNb3ppbGxhIiwibmFuIjoiTmV0c2NhcGUiLCJubGciOiJ6aC1DTiIsInN3IjoxNDQwLCJzaCI6OTAwLCJzYXciOjE0NDAsInNhaCI6ODYwLCJic3ciOjE0MTUsImJzaCI6OTE5LCJlbG9jIjoiaHR0cHMlM0ElMkYlMkZsb2dpbi50YW9iYW8uY29tJTJGbWVtYmVyJTJGbG9naW4uamh0bWwiLCJldHoiOjQ4MCwiZXR0IjoxNTM5NzU4ODczNDYxLCJlY24iOiJiNmUzNGRlZDBhMGQxMWFkOWJhM2Q5MjI0MmIyZWExZThhMmU5MTYxIiwiZWNhIjoiRk1sTkZHUkJ2alVDQVdYTWU5ZGN6QU5CIiwiZXJkIjoiZGVmYXVsdCxjb21tdW5pY2F0aW9ucyxhOTY4MWU4MTYwMzk5ZGVmMjkwN2IzM2JlMDFjZDU1ZDVmY2Q0NTUyYWE0MmNjZGYxZDc0MzljNmNlM2VkNDVkIiwiY2FjaGVpZCI6ImE2MTU1OGRkMDk0ZGJjNDciLCJ4aCI6IiIsImlwcyI6IjE5Mi4xNjguNDAuMjciLCJlcGwiOjMsImVwIjoiMmZiZjRhMGQzNDIxNGQ0ZmRlNmNjOGEyMjg5N2QxMTVhNzY2NzgxMSIsImVwbHMiOiJDMzcwYzMwN2Y0YWNhNzg1ODQ5M2RmZTMyMjI1NGU1Y2I0MzhiZTk0NCxOMGZjZDZlMThmZjZkZjc0Zjk4YTY5OGI3ZjZiNmQ4MzhhNmMxMWU2OSIsImVzbCI6ZmFsc2V9'''
    #                 r0 = self._ha.getstring(url, req_data=data)
    #                 patum = re.compile(r'{"tn":"(.*?)"')
    #                 um = patum.findall(r0)[0]
    #
    #                 url = 'https://login.taobao.com/member/login.jhtml?redirectURL=http%3A%2F%2Fuland.taobao.com%2Fsem%2Ftbsearch%3Fkeyword%3D%25E7%25BD%2591%25E6%25B7%2598%25E5%25AE%259D%26refpid%3Dmm_26632258_3504122_32538762%26clk1%3D96c17d51a8de3455444def907818d976%26upsid%3D96c17d51a8de3455444def907818d976'
    #                 data = """TPL_username={account}&TPL_password=&ncoSig=&ncoSessionid=&ncoToken=8374672d18e483bd0f6f39b0638cf4f717e652a3&slideCodeShow=false&useMobile=false&lang=zh_CN&loginsite=0&newlogin=0&TPL_redirect_url=https%3A%2F%2Fi.taobao.com%2Fmy_taobao.htm%3Fspm%3Da2e15.8261149.1997525045.1.513d29b4SpO5fP&from=tbTop&fc=default&style=&css_style=&keyLogin=false&qrLogin=true&newMini=false&newMini2=false&tid=&loginType=3&minititle=&minipara=&pstrong=&sign=&need_sign=&isIgnore=&full_redirect=&sub_jump=&popid=&callback=&guf=&not_duplite_str=&need_user_id=&poy=&gvfdcname=10&gvfdcre=68747470733A2F2F756C616E642E74616F62616F2E636F6D2F73656D2F74627365617263683F7265667069643D6D6D5F32363633323235385F333530343132325F3332353338373632266B6579776F72643D26636C6B313D37333763303966343036323835646335356337353734373936303632366633362675707369643D3733376330396634303632383564633535633735373437393630363236663336&from_encoding=&sub=true&TPL_password_2={password}&loginASR=1&loginASRSuc=1&allp=&oslanguage=zh-CN&sr=1440*900&osVer=&naviVer=chrome%7C70.035284&osACN=Mozilla&osAV=5.0+%28Windows+NT+10.0%3B+Win64%3B+x64%29+AppleWebKit%2F537.36+%28KHTML%2C+like+Gecko%29+Chrome%2F70.0.3528.4+Safari%2F537.36&osPF=Win32&miserHardInfo=&appkey=00000000&nickLoginLink=&mobileLoginLink=https%3A%2F%2Flogin.taobao.com%2Fmember%2Flogin.jhtml%3Ffrom%3Dtaobaoindex%26f%3Dtop%26style%3D%26sub%3Dtrue%26redirect_url%3Dhttps%3A%2F%2Fi.taobao.com%2Fmy_taobao.htm%3Fspm%3Da2e15.8261149.1997525045.1.513d29b4SpO5fP%26useMobile%3Dtrue&showAssistantLink=&um_token={um}&ua=112%23y7ZAac4WDEN%2B4mHzkW7CXzLl83YWtnWIfYqlxuD9pkxezGSBy82xHdU1%2Bz9HuXkMfR26HWpFZNWOI4DAmypKUNAFwuPH73TKN9emJlAiVVAQ1PxFlwKtsUH2e1JUVzTAlhrXT3HKWgp97V6117sF07h%2B80F1DbfQOC9S%2BcI0LOuAZ9EhTbhtawmxPLUgc7ub3BPq6XoCuCKcHPW2ab%2Fm07lh4UXqZAHrs4SsVffYA8usbPYVhkr9qNcciJ7oQ9tKSydH16TUwGpoLupH6oZ73pNyazmIkh6ngIefwMzYow%2Fw1%2FrQuUvVNxvF2%2BBd0ZxNkGGL8smu0EuIN4tVkqotEe6vLvwlyfhtDBLx6w3r%2Bn1GcfNVfVcDcCfNryZRhVyJceksSl%2Bz3yuxMuvikeUYKCqr9nN%2B5R5VHeVf83cA7e6XP5ApLtdMNhMdPdk8crONeCpmo1F9F6695Sajqz0KXIfDZbh5vnjvfIU4bvMPZt48%2BSN8boo9M%2BPfJX1%2B%2Bpy4edvCrktGxHvLBLyg1d3pH1t7qFSxN6VmvpggiSOD1EOchJl1ayWIOIi9i4OGEPZY12XkWZM%2F0U0ZCTPuV8oeoD1FAeyNaDWtDO7pBs0ZZtW7lKC5wQx9vfV68F%2B0cgH24SToFWlHCsBX5WF9l0SsozHR%2F7xqN2xOZ%2FwDc62bh9LzmIXop7l%2Bsi5lpcR5u2nOzroru4xSgyH5pDQ2AcIumWGEuTyF4V808dkWo0ng9QApgco8KtNUfLQzZpCGh%2BZWtnON1vPOb3SexR5fLREAb1mM%2Fc7Uc9FDpP%2F17MMZpnL5krOZA4l%2B7fysXJ4KiqEDhPnz7cMZrqCw%2F%2FMxiR3sKfm%2F%2Fc7%2BTAv4Yy3g5WX5QcXcYm5qvRKPMl1VpYetEoIcx%2FK9v26IgW3PWh5u0%2F3mN87FlXNOw%2Bw09BTeD67nz4NITOPlhlYPH6XyGIfxnzwsJKzViMWPvgxfwlTCEUiGlafbi8Oy%2FDLyFEL3kj06%2FB%2B4WLdskNlinhO5TfulxWaZbCjcDR%2FSVMovkXl7B6rT4O1GeOq6qSN1gI%2Bi5fv0U4Qo9xz%2BcQg9A95Go0XwHaFUo7f5QkeeP%2F3hdd%2Bu5aj5IhBnl1D7lZzlUt1QQC2Fz%2B8uHk3X%2Beduq37gWATJBAU07MhWd%2Fq0Ou7lz9KjiVbarWJb0vQqyM40SJlsF6OWjaQ%2FVyRNMwi5afotZrV99yuInrrSVoeZ%2BawQxsj7eLZ%2F1fySG%2BhBgmGcsUeMEIbgw8PsUGnu9vopAfDD2S8Zsxo5FiFV318D3%2BefR8EXItfuDcy0VpymDa9PKOiZxL3CN%2F4Ih66elaec61KGI9kzeFJQvHxJo3%2BE31wxDjqvGGfWzSKpiP8LJI9L2OrH%2FnCpPtb1O0nFRxXO0y0cf%2BKAXLybh88M9sRc%2BFoSjzP0UpDzAMGWwnPmCNuGNeBGmA47O%2BjSpQvPQZoHyARO1ck2pZgFN%2FUIK9dbB64vBoOambxtKL%2BSPX7b%2B4Uqkgn%2BS71UarxKI9Z9%2F%2Flz0szGzOK2O9BR1JRo0vqMI9pOOwC8U%2FVmfZ5SqC1VgduP983JSiEhof7LYnnlBhcIcyuuqqJ%2F4iUVzTXb%2BwDFs%2F2e%2BPjDf%2FQM%2Bq%2F6cclZYrZjuxqjI1e0FKcaPRuFBVcL%2BVx4Pf4GQ4u2JXluJv2jdihrAevHxu3Zx1HIFPuqOCIYBoDF4zqcKgBWiymhrN%2FsRpe%2B""".format(account=self.account, password=self.password, um=um)
    #                 headers = """
    # accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
    # accept-encoding: gzip, deflate
    # accept-language: zh-CN,zh;q=0.9
    # Origin: https://login.taobao.com
    # Upgrade-Insecure-Requests: 1
    # Content-Type: application/x-www-form-urlencoded
    # Connection: keep-alive
    # Host: login.taobao.com
    # Referer: https://login.taobao.com/member/login.jhtml?spm=a2e15.8261149.754894437.1.118f29b4TQ4p9O&f=top&redirectURL=http%3A%2F%2Fuland.taobao.com%2Fsem%2Ftbsearch%3Fkeyword%3D%25E7%25BD%2591%25E6%25B7%2598%25E5%25AE%259D%26refpid%3Dmm_26632258_3504122_32538762%26clk1%3D96c17d51a8de3455444def907818d976%26upsid%3D96c17d51a8de3455444def907818d976
    # User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36
    # """
    #                 r = self._ha.getstring(url, req_data=data, headers=headers, encoding='gb2312')
    #                 pattoken = re.compile(r'token=(.*?)&')
    #                 token = pattoken.findall(r)[0]
    #
    #                 # 通过token获取st
    #                 url1 = 'https://passport.alibaba.com/mini_apply_st.js?site=0&token={token}&callback=callback'.format(token=token)
    #                 headers1 = """
    # Accept: */*
    # Accept-Encoding: gzip, deflate, br
    # Accept-Language: zh-CN,zh;q=0.9
    # User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36
    # Referer: https://login.taobao.com/member/login.jhtml?redirectURL=http%3A%2F%2Fuland.taobao.com%2Fsem%2Ftbsearch%3Fkeyword%3D%25E7%25BD%2591%25E6%25B7%2598%25E5%25AE%259D%26refpid%3Dmm_26632258_3504122_32538762%26clk1%3D96c17d51a8de3455444def907818d976%26upsid%3D96c17d51a8de3455444def907818d976
    # Host: passport.alibaba.com
    # Connection: keep-alive
    # """
    #                 r1 = self._ha.getstring(url1, headers=headers1)
    #                 patst = re.compile(r'st":"(.*?)"')
    #                 st = patst.findall(r1)[0]
    #
    #                 # 通过st模拟登陆
    #                 url2 = 'https://login.taobao.com/member/vst.htm?st={st}&TPL_username={account}'.format(st=st, account=self.account)
    #                 headers2 = """
    # accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
    # accept-encoding: gzip, deflate, br
    # accept-language: zh-CN,zh;q=0.9
    # User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36
    # Upgrade-Insecure-Requests: 1
    # Connection: keep-alive
    # Host: login.taobao.com
    # """
    #                 r2 = self._ha.getstring(url2, headers=headers2)
    #             except Exception as ex:
    #                 self._logger.error("Download error: %s" % ex)
    #                 return False
    #             newcookie = self._ha._managedCookie.get_cookie_for_domain('https://www.taobao.com')
    #             # print(newcookie)
    #             self.cookie = newcookie
    #             self.task.cookie = newcookie
    #             return True

    def _get_profile(self):
        # 个人信息
        try:
            url = 'https://i.taobao.com/user/baseInfoSet.htm'
            headers = """
accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
accept-encoding: gzip, deflate, br
accept-language: zh-CN,zh;q=0.9
cookie: {cookie}
upgrade-insecure-requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36
""".format(cookie=self.cookie)
            r = self._ha.getstring(url, headers=headers)
            # print(r)
            soup = BeautifulSoup(r, 'html.parser')
            res = PROFILE(self._clientid, self.task, self._appcfg._apptype,
                          self.userid)
            detail = {}

            photourl = soup.select_one('.pf-avatar img.png')['src']
            try:
                res.nickname = soup.select_one('#J_uniqueName')['value']
            except:
                pass
            try:
                detail['fullname'] = soup.select_one('#J_realname')['value']
            except:
                pass
            try:
                gender = soup.select_one(
                    '.except [checked="checked"]')['value']
                if gender == '0':
                    res.gender = EGender.Male
                elif gender == '1':
                    res.gender = EGender.Female
                else:
                    res.gender = EGender.Unknown
            except:
                pass
            try:
                year = soup.select_one(
                    '#J_Year [selected="selected"]')['value']
                month = soup.select_one(
                    '#J_Month [selected="selected"]')['value']
                data = soup.select_one(
                    '#J_Date [selected="selected"]')['value']
                res.birthday = year + '-' + month + '-' + data
            except:
                pass

            resopnse = self._ha.getstring(
                'https://member1.taobao.com/member/fresh/account_security.htm')
            soup1 = BeautifulSoup(resopnse, 'html.parser')
            try:
                res.account = soup1.find_all(
                    "span", {"class": "default grid-msg "})[0].get_text()
            except:
                pass
            try:
                res.email = soup1.find_all(
                    "span", {"class": "default grid-msg "})[1].get_text()
            except:
                pass
            try:
                res.phone = soup1.find("span", {
                    "class": "default grid-msg"
                }).get_text().strip()
            except:
                pass

            # 获取从地区表中获取住址

            try:
                liveDivisionCode = soup.select_one(
                    'input#liveDivisionCode')['value']
                if liveDivisionCode:
                    detail['hometown'] = self._get_address(liveDivisionCode)
            except:
                pass

            try:
                divisionCode = soup.select_one('input#divisionCode')['value']
                if divisionCode:
                    res.address = self._get_address(divisionCode)
                res.detail = json.dumps(detail)
            except:
                pass
            if photourl:
                photourl = 'https:' + photourl
                profilepic: RESOURCES = RESOURCES(self._clientid, self.task,
                                                  photourl,
                                                  EResourceType.Picture,
                                                  self._appcfg._apptype)

                resp_stream: ResponseIO = self._ha.get_response_stream(
                    photourl)
                profilepic.io_stream = resp_stream
                profilepic.filename = photourl.rsplit('/', 1)[-1]
                profilepic.sign = ESign.PicUrl
                res.append_resource(profilepic)
                yield profilepic
            yield res
        except Exception:
            self._logger.error('{} got profile fail: {}'.format(
                self.userid, traceback.format_exc()))

    def _get_orders(self):
        try:
            self._ha._managedCookie.add_cookies("taobao.com", self.cookie)
            url = 'https://buyertrade.taobao.com/trade/itemlist/asyncBought.htm?action=itemlist/BoughtQueryAction&event_submit_do_query=1&_input_charset=utf8'
            i = 0
            while True:
                i += 1
                formdata = """dateBegin=0&dateEnd=0&options=0&pageNum={i}&pageSize=15&queryOrder=desc&prePageNo={j}""".format(
                    i=i, j=i - 1)
                headers = """
accept: application/json, text/javascript, */*; q=0.01
accept-encoding: gzip, deflate, br
accept-language: zh-CN,zh;q=0.9
cache-control: no-cache
content-type: application/x-www-form-urlencoded; charset=UTF-8
origin: https://buyertrade.taobao.com
pragma: no-cache
referer: https://buyertrade.taobao.com/trade/itemlist/list_bought_items.htm?spm=a1z02.1.a2109.d1000368.1c2d782dHeADbf&nekot=1470211439694
user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36
x-requested-with: XMLHttpRequest
"""
                # cookie: {cookie}
                response = self._ha.getstring(url,
                                              headers=headers,
                                              req_data=formdata)
                html = json.loads(response)
                mainorders = html.get('mainOrders')
                if mainorders:
                    # print("抓取第{0:d}页。".format(i))

                    for order in mainorders:
                        try:
                            dic = {}
                            orderid = order.get('id')
                            dic['id'] = order.get('id')
                            dic['shopname'] = order.get('seller').get(
                                'shopName')

                            ordertime = order.get('orderInfo').get(
                                'createTime')
                            dic['actualFee'] = order.get('payInfo').get(
                                'actualFee')
                            dic['status'] = order.get('statusInfo').get('text')
                            goods = []
                            for item in order['subOrders']:
                                di = {}
                                try:
                                    di['title'] = item.get('itemInfo').get(
                                        'title')
                                except:
                                    pass
                                try:
                                    di['quantity'] = item['quantity']
                                except:
                                    pass
                                try:
                                    di['skuText'] = item['itemInfo']['skuText']
                                except:
                                    pass
                                try:
                                    di['priceInfo'] = item['priceInfo']
                                except:
                                    pass
                                goods.append(di)
                            dic['goods'] = goods
                            res_one = ISHOPPING_ONE(self.task,
                                                    self._appcfg._apptype,
                                                    self.userid, orderid)
                            res_one.ordertime = ordertime
                            res_one.append_orders(dic)
                            res_one.host = 'www.taobao.com'
                            yield res_one
                        except:
                            pass
                    time.sleep(1)
                else:
                    break
        except Exception:
            self._logger.error('{} got order fail: {}'.format(
                self.userid, traceback.format_exc()))

    def _get_address(self, adressid):
        table = self._ha.getstring(
            'https://www.taobao.com/home/js/sys/districtselector.js?t=20140318.js'
        )
        patdz = re.compile(
            r'TB.form.DistrictSelector._tb_ds_data=(.*?);TB.form.Di')
        dzdata = patdz.findall(table)[0]
        jsdata = json.loads(dzdata)
        res = []
        if adressid is not None and adressid != '1':
            dz1 = jsdata[adressid]
            res = dz1[0]
            if dz1[1] != '1':
                dz2 = jsdata[dz1[1]]
                res = dz2[0] + res
                if dz2[1] != '1':
                    dz3 = jsdata[dz2[1]]
                    res = dz3[0] + res
        return res

    def _logout(self):
        res = False
        try:
            url = 'https://login.taobao.com/member/logout.jhtml?spm=a1z02.1.754894437.7.7016782dPtkeCQ&f=top&out=true&redirectURL=https%3A%2F%2Fi.taobao.com%2Fmy_taobao.htm%3Fspm%3Da2e15.8261149.754894437.3.555929b48sljpe%26ad_id%3D%26am_id%3D%26cm_id%3D%26pm_id%3D1501036000a02c5c3739%26nekot%3DdGI4NTgzMzYzXzAw1553481160507'
            html = self._ha.getstring(url,
                                      headers="""
Host: login.taobao.com
Connection: keep-alive
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Referer: https://i.taobao.com/my_taobao.htm?spm=a2e15.8261149.754894437.3.555929b48sljpe&ad_id=&am_id=&cm_id=&pm_id=1501036000a02c5c3739&nekot=dGI4NTgzMzYzXzAw1553481160507
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
""")
            res = self._cookie_login()
            if not res:
                res = True
        except Exception:
            self._logger.error('login out fail:{}'.format(
                traceback.format_exc()))
        return res

예제 #14

파일 보기

 def __init__(self, task, appcfg, clientid):
     super(SpiderMafengwo, self).__init__(task, appcfg, clientid)
     self.ha = HttpAccess()
     if self.task.cookie:
         self.ha._managedCookie.add_cookies('mafengwo.cn', self.task.cookie)

예제 #15

파일 보기

class SpiderMafengwo(SpiderTravelBase):
    def __init__(self, task, appcfg, clientid):
        super(SpiderMafengwo, self).__init__(task, appcfg, clientid)
        self.ha = HttpAccess()
        if self.task.cookie:
            self.ha._managedCookie.add_cookies('mafengwo.cn', self.task.cookie)

    def _check_registration(self):
        """
        查询手机号是否注册了马蜂窝
        :param account:
        :return:
        """
        t = time.strftime('%Y-%m-%d %H:%M:%S')
        try:
            phone = self.task.phone
            url = 'https://passport.mafengwo.cn/regist.html'
            headers = """
Host: passport.mafengwo.cn
Connection: keep-alive
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Referer: http://www.mafengwo.cn/?mfw_chid=3546
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
"""
            html = self.ha.getstring(url, headers=headers)
            token = substring(html, 'name="token" value="', '"')

            url = 'https://passport.mafengwo.cn/regist'
            headers = """
Host: passport.mafengwo.cn
Connection: keep-alive
Content-Length: 59
Cache-Control: max-age=0
Origin: https://passport.mafengwo.cn
Upgrade-Insecure-Requests: 1
Content-Type: application/x-www-form-urlencoded
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Referer: https://passport.mafengwo.cn/regist.html
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
"""
            data = f"token={token}&passport={phone}"
            html = self.ha.getstring(url, headers=headers, req_data=data)
            isreg = re.findall(r'<div class="alert alert-danger">', html)
            if isreg:
                self._write_task_back(ECommandStatus.Succeed, 'Registered', t,
                                      EBackResult.Registerd)
            else:
                self._write_task_back(ECommandStatus.Succeed, 'Not Registered',
                                      t, EBackResult.UnRegisterd)

        except Exception:
            self._logger.error('Check registration fail: {}'.format(
                traceback.format_exc()))
            self._write_task_back(ECommandStatus.Failed,
                                  'Check registration fail', t,
                                  EBackResult.CheckRegisterdFail)
        return

    def _cookie_login(self):
        try:
            url = 'https://passport.mafengwo.cn/setting/security/'
            headers = """
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: passport.mafengwo.cn
Pragma: no-cache
Referer: https://passport.mafengwo.cn/setting/
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"""
            html = self.ha.getstring(url, headers=headers)
            userid = substring(html, '"UID":', ',')
            if userid:
                self.userid = userid + '-mafengwo'
                return True
            else:
                return False
        except Exception:
            self._logger.error('Mafengwo cookie login error: {}'.format(
                traceback.format_exc()))
            return False

    def _get_profile(self):
        try:
            url = 'https://passport.mafengwo.cn/setting/'
            headers = """
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: passport.mafengwo.cn
Pragma: no-cache
Referer: https://www.mafengwo.cn
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"""
            html = self.ha.getstring(url, headers=headers)
            soup = BeautifulSoup(html, 'lxml')
            res = PROFILE(self._clientid, self.task, self._appcfg._apptype,
                          self.userid)
            res.nickname = soup.select_one('[name="name"]')['value']
            # 1男0女2保密
            sex = soup.select_one('[checked="true"]')['value']
            if sex == '1':
                res.gender = EGender.Male
            elif sex == '0':
                res.gender = EGender.Female
            else:
                res.gender = EGender.Unknown
            res.city = soup.select_one('[name="city"]')['value']
            res.birthday = soup.select_one('[name="birthday"]')['value']
            detail = {}
            detail['introduce'] = soup.select_one('[name="intro"]').get_text()
            if detail['introduce']:
                res.append_details(detail)

            url = 'https://passport.mafengwo.cn/setting/security/'
            headers = """
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: passport.mafengwo.cn
Pragma: no-cache
Referer: https://passport.mafengwo.cn/setting/
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"""
            html = self.ha.getstring(url, headers=headers).replace('\n', '')
            html = re.sub(r'\s{2,}', '', html)
            soup = BeautifulSoup(html, 'lxml')
            userid = substring(html, '"UID":', ',')
            email = soup.select('.userpass dd')[1].get_text('-')
            res.email = email.split('-')[0]
            phone = soup.select('.userpass dd')[2].get_text('-')
            res.phone = phone.split('-')[0]

            url = 'https://pagelet.mafengwo.cn/user/apps/pagelet/pageViewHeadInfo?callback=jQuery181042165802873390845_{}&params=%7B%22type%22%3A1%7D&_={}'.format(
                int(
                    datetime.now(pytz.timezone('Asia/Shanghai')).timestamp() *
                    1000),
                int(
                    datetime.now(pytz.timezone('Asia/Shanghai')).timestamp() *
                    1000))
            headers = """
Accept: */*
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Host: pagelet.mafengwo.cn
Pragma: no-cache
Referer: https://passport.mafengwo.cn/setting/
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"""
            html = self.ha.getstring(url, headers=headers).replace('\\', '')
            photourl = substring(html, '<img.png src="', '"')
            if photourl:
                profilepic: RESOURCES = RESOURCES(self._clientid, self.task,
                                                  photourl,
                                                  EResourceType.Picture,
                                                  self._appcfg._apptype)

                resp_stream: ResponseIO = self._ha.get_response_stream(
                    photourl)
                profilepic.io_stream = resp_stream
                profilepic.filename = photourl.rsplit('/', 1)[-1]
                profilepic.sign = ESign.PicUrl
                res.append_resource(profilepic)
                yield profilepic
            yield res
        except Exception:
            self._logger.error('{} got profile fail: {}'.format(
                self.userid, traceback))

    def _get_orders(self):
        try:
            start = -10
            while True:
                start += 10
                url = f'https://www.mafengwo.cn/order_center/?status=0&start={start}'
                headers = """
accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
accept-encoding: gzip, deflate, br
accept-language: zh-CN,zh;q=0.9
cache-control: no-cache
pragma: no-cache
referer: https://www.mafengwo.cn/order_center/?status=0&start=0
upgrade-insecure-requests: 1
user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"""
                html = self.ha.getstring(url, headers=headers).replace(
                    '\r', '').replace('\n', '').replace('\t', '')
                html = re.sub(r'\s{2,}', ' ', html)
                soup = BeautifulSoup(html, 'lxml')
                tables = soup.select('.order-item')
                for table in tables:
                    try:
                        order = {}
                        orderid = table.attrs.get('data-id')
                        order['orderid'] = orderid
                        ordertime = table.select_one('.time').get_text()
                        try:
                            order['supplier'] = table.select_one(
                                '.supplier').get_text()
                        except:
                            pass
                        try:
                            order['telphone'] = table.select_one(
                                '.telphone').get_text()
                        except:
                            pass
                        order['pro-detail'] = table.select_one(
                            '.pro-detail').get_text(' ')
                        order['td-date'] = table.select_one(
                            '.td-date').get_text()
                        order['price'] = table.select_one(
                            '.td-cost').get_text()
                        order['status'] = table.select_one(
                            '.td-status').get_text()

                        try:
                            orderurl = table.select_one('caption a')['href']
                            if orderurl:
                                detail = self._order_detail(orderurl)
                                order['detail'] = detail
                        except:
                            pass
                        res_one = ITRAVELORDER_ONE(self.task,
                                                   self._appcfg._apptype,
                                                   self.userid, orderid)
                        res_one.append_orders(order)
                        res_one.ordertime = ordertime.split('：', 1)[1]
                        res_one.host = "www.mafengwo.cn"
                        yield res_one
                    except Exception:
                        self._logger.error(
                            'Mafengwo one order get fail: {}'.format(
                                traceback.format_exc()))

                if not tables:
                    break
        except Exception:
            self._logger.error('{} get order fail: {}'.format(
                self.userid, traceback.format_exc()))

    def _order_detail(self, orderurl):
        orderurl = 'https://www.mafengwo.cn' + orderurl
        headers = """
Host: www.mafengwo.cn
Connection: keep-alive
Pragma: no-cache
Cache-Control: no-cache
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
"""
        html = self.ha.getstring(orderurl,
                                 headers=headers).replace('\r', '').replace(
                                     '\n', '').replace('\t', '')
        # html = ha.get_response(orderurl, headers=headers)
        html = re.sub(r'\s{2,}', ' ', html)
        soup = BeautifulSoup(html, 'lxml')
        order = soup.select_one('.order-detail').get_text(' ')
        return order

    def logout(self):
        res = False
        try:
            url = 'https://passport.mafengwo.cn/logout.html'
            html = self._ha.getstring(url,
                                      headers="""
            Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
            Accept-Encoding: gzip, deflate
            Accept-Language: zh-CN,zh;q=0.9
            Cache-Control: no-cache
            Host: www.mafengwo.cn
            Pragma: no-cache
            Proxy-Connection: keep-alive
            Upgrade-Insecure-Requests: 1
            User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"""
                                      )

            if not self._cookie_login():
                res = True
        except Exception:
            self._logger.error('log out fail: {}'.format(
                traceback.format_exc()))

        return res

예제 #16

파일 보기

class Seebug(AutoPluginBase):
    tasktype = EAutoType.EXPDB

    def __init__(self):
        AutoPluginBase.__init__(self)
        self.ha = HttpAccess()
        self._get_cookie()

    def _get_cookie(self):
        try:
            ic = False
            chrome_options = ChromeOptions()
            chrome_options.add_argument('--headless')
            chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument('blink-settings=imagesEnabled=false')
            # chrome_options.add_argument('--no-sandbox')
            driver = webdriver.Chrome(chrome_options=chrome_options)
            driver.get('https://www.seebug.org/')
            time.sleep(2)
            cookies = driver.get_cookies()
            l_cookie = ''
            for cookie in cookies:
                l_cookie = l_cookie + cookie['name'] + '=' + cookie['value'] + '; '
                if cookie['name'] == '__jsl_clearance':
                    ic = True
            self.cookie = l_cookie
            # print(self.cookie)
            self.ha._managedCookie.add_cookies('.seebug.org', self.cookie)
            if ic:
                self._logger.info('Got cookie success!')
            driver.close()
        except Exception:
            self._logger.error('Got cookie fail: {}'.format(traceback.format_exc()))
            
    def get_bug(self):
        page = 0
        max_page = None
        while True:
            page += 1
            fail_time = 0
            while True:
                url = f'https://www.seebug.org/vuldb/vulnerabilities?page={page}'
                html = self.ha.getstring(url, headers="""
                Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
                Accept-Encoding: gzip, deflate, br
                Accept-Language: zh-CN,zh;q=0.9
                Cache-Control: no-cache
                Connection: keep-alive
                Host: www.seebug.org
                Pragma: no-cache
                Upgrade-Insecure-Requests: 1
                User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36""")
                if 'Äú·ÃÎÊÆµÂÊÌ«¸ß£¬ÇëÉÔºòÔÙÊÔ¡£' in html:
                    print(f'Äú·ÃÎÊÆµÂÊÌ«¸ß£¬ÇëÉÔºòÔÙÊÔ¡£{fail_time} * 5s')
                    fail_time += 1
                    if fail_time == 5:
                        print('Fail time outnumber 5!')
                        break
                    time.sleep(5)
                else:
                    break
            soup = BeautifulSoup(html, 'lxml')
            tbody = soup.select('tbody tr')
            if not max_page:
                max_page = soup.select_one('#J-jump-form input').attrs['max']
            for tr in tbody:
                self.bug_detail(tr, url)
            if int(max_page) <= page:
                break

    def bug_detail(self, tr, referer):
        try:

            b_url = 'https://www.seebug.org' + tr.select_one('td a').attrs['href']
            name = tr.select_one('.vul-title-wrapper a').get_text()
            datasource = 'seebug'
            id = tr.select_one('td a').get_text()
            date_published = tr.select_one('.text-center.datetime.hidden-sm.hidden-xs').get_text()
            tooltip = tr.select_one('[data-toggle="tooltip"]').attrs['data-original-title']
            if tooltip == '¸ßÎ£':
                level = 3
            elif tooltip == 'ÖÐÎ£':
                level = 2
            else:
                level = 1
            fail_time = 0
            res = ExpDB(name, datasource, id, date_published, 0)
            while True:
                b_html = self.ha.getstring(b_url, headers=f"""
                            Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
                            Accept-Encoding: gzip, deflate, br
                            Accept-Language: zh-CN,zh;q=0.9
                            Cache-Control: no-cache
                            Connection: keep-alive
                            Host: www.seebug.org
                            Pragma: no-cache
                            Referer: {referer}
                            Upgrade-Insecure-Requests: 1
                            User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36""")
                if 'Äú·ÃÎÊÆµÂÊÌ«¸ß£¬ÇëÉÔºòÔÙÊÔ¡£' in b_html:
                    fail_time += 1
                    print(f'Äú·ÃÎÊÆµÂÊÌ«¸ß£¬ÇëÉÔºòÔÙÊÔ¡£{fail_time} * 45s')
                    if fail_time == 5:
                        print('Fail time outnumber 5!')
                        break
                    t = random.randint(30, 60)
                    time.sleep(t)
                else:
                    break
            b_soup = BeautifulSoup(b_html, 'lxml')
            try:
                file_data = b_soup.select_one('#j-md-detail').get_text()
                if 'µÇÂ¼ºó²é¿´' not in file_data:
                    description = f'datasource: seebug\nid: {id}\nname: {name}\nurl:{b_url}\n'
                    self.write_text_string(description, file_data, 'iscan_expdb_doc')
            except:
                pass
            tags = []
            try:
                tag_type = b_soup.select('.bug-msg .col-md-4')[1].select_one('dd').get_text()
                tags.append(tag_type)
                res.tags = tags
            except:
                pass
            target = []
            try:
                tar = {}
                ta_type = b_soup.select_one('.hover-scroll a').get_text().replace('\n', '')
                ta_type = re.sub(r'\s{2,}', '', ta_type)
                tar['type'] = ta_type
                try:
                    version = b_soup.select_one('.hover-scroll').get_text()
                    version = substring(version, '(', ')')
                    tar['version'] = {'list': version}
                except:
                    pass
                target.append(tar)
                res.target = target
            except:
                pass

            cve_id = b_soup.select('.bug-msg .col-md-4')[2].select_one('dd').get_text()
            code = []
            if '²¹³ä' not in cve_id:
                cve = {}
                cve['code_type'] = 'cve'
                cve['code'] = cve_id.replace('\n', '')
                code.append(cve)
                res.code = code
            author = {}
            author['name'] = b_soup.select('.bug-msg .col-md-4')[1].select('dd')[3].get_text()
            author['name'] = re.sub(r'\s{2,}', '', author['name'])
            res['author'] = author
            try:
                poc = b_soup.select_one('#J-poc').get_text()
                description = f'datasource: seebug\nid: {id}\nname: {name}\nurl:{b_url}\n'
                self.write_text_string(description, poc, 'iscan_expdb_exp')
            except:
                pass

            print(name, datasource, id, date_published, tooltip, level, tags, code, author)
            time.sleep(3)
        except Exception:
            self._logger.error('Got bug detail fail: {}'.format(traceback.format_exc()))

    def start(self):
        self.get_bug()

예제 #17

파일 보기

class Exploit(AutoPluginBase):
    tasktype = EAutoType.EXPDB

    def __init__(self):
        AutoPluginBase.__init__(self)
        self.ha = HttpAccess(0.5)
        self.ha._managedCookie.add_cookies(
            '.exploit-db.com',
            '_ga=GA1.3.659193532.1562029495; _gid=GA1.3.817890876.1562029495; _gat=1; XSRF-TOKEN=eyJpdiI6IkRQd3M1RHljcnhEM2hTVzhiMlcyV2c9PSIsInZhbHVlIjoiNzAwQVdWWFR3ck9oenBHWUd2a2NENTdrNXRqeWtOMU9iWk9pRGRxdVRFcGJyMmc0Q3gwQzBjbVg2bEdDWU5HTSIsIm1hYyI6IjFlMjRkMTcxMmFhODE1NzRmMDc0YWJlZTUzZTRlYTlmZjMyYTU1NDZjMjE5NjdkOTkzMGFjNDZlMzBhMWVjMjIifQ%3D%3D; exploit_database_session=eyJpdiI6IkE0QmJ3alZ3S0RFd3FHcnhQOTQzVGc9PSIsInZhbHVlIjoiYkpDNnh5azdtcmlOYms5cW5sMW9DUnhXYmhmSk9iZlVQa3pxOUVwUEpxQXhUeThMZUF3ZVBpMGRURzNcL1grNHAiLCJtYWMiOiIyODAzMmJjNjUzMWYyYzA0NmY0ZTYwMzFhYjg1YWUyOTc0OTMzNzBhYmYyZDc2MTkwZDYzYWY5Y2M5ZDhhMDI1In0%3D'
        )
        self.page_queue = queue.Queue()

        self.detailsuffix = 'iscan_expdb'

        # 带文件体的回馈
        self.scriptsuffix = 'iscan_expdb_exp'
        self.sourcecodesuffix = 'iscan_expdb_app'

    def get_url(self):
        draw = 1
        start = 0
        t = int(
            datetime.now(pytz.timezone('Asia/Shanghai')).timestamp() * 1000)
        url = f'https://www.exploit-db.com/?draw={draw}&columns%5B0%5D%5Bdata%5D=date_published&columns%5B0%5D%5Bname%5D=date_published&columns%5B0%5D%5Bsearchable%5D=true&columns%5B0%5D%5Borderable%5D=true&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=download&columns%5B1%5D%5Bname%5D=download&columns%5B1%5D%5Bsearchable%5D=false&columns%5B1%5D%5Borderable%5D=false&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=application_md5&columns%5B2%5D%5Bname%5D=application_md5&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=false&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=verified&columns%5B3%5D%5Bname%5D=verified&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=false&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=description&columns%5B4%5D%5Bname%5D=description&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=false&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=type_id&columns%5B5%5D%5Bname%5D=type_id&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=false&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=platform_id&columns%5B6%5D%5Bname%5D=platform_id&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=false&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=author_id&columns%5B7%5D%5Bname%5D=author_id&columns%5B7%5D%5Bsearchable%5D=false&columns%5B7%5D%5Borderable%5D=false&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=code&columns%5B8%5D%5Bname%5D=code.code&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=id&columns%5B9%5D%5Bname%5D=id&columns%5B9%5D%5Bsearchable%5D=false&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=9&order%5B0%5D%5Bdir%5D=desc&start={start}&length=120&search%5Bvalue%5D=&search%5Bregex%5D=false&author=&port=&type=&tag=&platform=&_={t}'
        headers = """
    accept: application/json, text/javascript, */*; q=0.01
    accept-encoding: gzip, deflate, br
    accept-language: zh-CN,zh;q=0.9
    cache-control: no-cache
    pragma: no-cache
    referer: https://www.exploit-db.com/
    user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36
    x-requested-with: XMLHttpRequest"""
        html = self.ha.getstring(url, headers=headers)
        jshtml = json.loads(html)
        total = jshtml['recordsTotal']
        max = int(total / 120) + 1
        for draw in range(1, max + 1):
            start = (draw - 1) * 120
            self.page_queue.put((draw, start))
        print('Got all page! start download!')

    def get_onepage(self, draw, start):
        t = int(
            datetime.now(pytz.timezone('Asia/Shanghai')).timestamp() * 1000)
        url = f'https://www.exploit-db.com/?draw={draw}&columns%5B0%5D%5Bdata%5D=date_published&columns%5B0%5D%5Bname%5D=date_published&columns%5B0%5D%5Bsearchable%5D=true&columns%5B0%5D%5Borderable%5D=true&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=download&columns%5B1%5D%5Bname%5D=download&columns%5B1%5D%5Bsearchable%5D=false&columns%5B1%5D%5Borderable%5D=false&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=application_md5&columns%5B2%5D%5Bname%5D=application_md5&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=false&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=verified&columns%5B3%5D%5Bname%5D=verified&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=false&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=description&columns%5B4%5D%5Bname%5D=description&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=false&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=type_id&columns%5B5%5D%5Bname%5D=type_id&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=false&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=platform_id&columns%5B6%5D%5Bname%5D=platform_id&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=false&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=author_id&columns%5B7%5D%5Bname%5D=author_id&columns%5B7%5D%5Bsearchable%5D=false&columns%5B7%5D%5Borderable%5D=false&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=code&columns%5B8%5D%5Bname%5D=code.code&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=id&columns%5B9%5D%5Bname%5D=id&columns%5B9%5D%5Bsearchable%5D=false&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=9&order%5B0%5D%5Bdir%5D=desc&start={start}&length=120&search%5Bvalue%5D=&search%5Bregex%5D=false&author=&port=&type=&tag=&platform=&_={t}'
        headers = """
        accept: application/json, text/javascript, */*; q=0.01
        accept-encoding: gzip, deflate, br
        accept-language: zh-CN,zh;q=0.9
        cache-control: no-cache
        pragma: no-cache
        referer: https://www.exploit-db.com/
        user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36
        x-requested-with: XMLHttpRequest"""
        html = self.ha.getstring(url, headers=headers)
        jshtml = json.loads(html)
        if not jshtml['data']:
            return
        # print(jshtml['data'][0])
        for data in jshtml['data']:
            id = data['id']
            if self.is_expdbdata_unique(str(id) + 'exploit'):
                return
            name = data['description'][1]
            datasource = 'exploitdb'

            date_published = data['date_published']
            verified = data['verified']
            description = data['description']
            tags = []
            if data['tags']:
                tag = data['tags'][0]['title']
                tag = self.tag_mapping(tag)
                tags.append(tag)

            version = {}
            version['list'] = []
            ver = substring(name, ' ', ' - ')
            if ver:
                if '.' in ver:
                    version['list'].append(ver)

            type = data['type']['name']
            platform = data['platform_id']
            target = []
            tar = {}
            tar['type'] = type
            tar['platform'] = platform
            tar['version'] = version
            target.append(tar)

            author = {}
            author['name'] = data['author']['name']

            code = []
            if data['code']:
                for dat in data['code']:
                    co = {}
                    co['code_type'] = dat['code_type']
                    co['code'] = dat['code']
                    code.append(co)

            app = []
            if data['application_md5'] and data['application_path']:
                ap = {}
                application_md5 = data['application_md5']
                ap['name'] = substring(application_md5, '<a href="/apps/', '"')
                ap['url'] = 'https://www.exploit-db.com/apps/' + ap['name']
                ap['path'] = self.download_app(ap['url'], id, ap['name'])
                app.append(ap)

            exploit = []
            exp = {}
            exp['name'] = str(id) + '.txt'
            exp['url'] = 'https://www.exploit-db.com/download/' + str(id)
            exp['path'] = self.download_exploit(exp['url'], id, exp['name'])
            exploit.append(exp)

            res = {}
            res['name'] = name
            res['datasource'] = datasource
            res['id'] = str(id)
            res['date_published'] = date_published
            res['verified'] = verified
            res['description'] = description
            res['tags'] = tags
            res['target'] = target

            res['author'] = author
            res['code'] = code
            res['app'] = app
            res['exploit'] = exploit
            self.write_text(res, 'iscan_expdb')

            # with open(f'./exploit/{id}.iscan_expdb', 'w') as f:
            #     f.write(json.dumps(res))
            self.store_expdbdata_unique(str(id) + 'exploit')

    def download_exploit(self, url, id, name):
        failnum = 0
        while True:
            try:
                headers = f"""
            accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
            accept-encoding: gzip, deflate, br
            accept-language: zh-CN,zh;q=0.9
            cache-control: no-cache
            pragma: no-cache
            referer: https://www.exploit-db.com/exploits/{id}
            upgrade-insecure-requests: 1
            user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"""
                html = self.ha.get_response_stream(url, headers=headers)
                # with open(f'./exploit/{id}.iscan_expdb_exp', 'w') as f:
                description = f'datasource: exploitdb\nid: {id}\nname: {name}\nurl:{url}\n'
                data = html.read()
                outname = self.write_text_binary(description, data,
                                                 'iscan_expdb_exp')
                print(f"Output data, filename:{outname}")
                if failnum != 0:
                    self._logger.info(
                        f'ID: {id} try download expdb_exp again success！')
                return outname

            except Exception:
                failnum += 1
                self._logger.error(
                    f'ID: {id} download expdb_exp error: {traceback.format_exc()}\ntry download again!'
                )
                if failnum > 3:
                    self._logger.info(
                        f'ID: {id} download expdb_exp error over 3 times!')
                    return None
                time.sleep(3)

        # f.write(description)
        #
        # with open(f'./exploit/{id}.iscan_expdb_exp', 'ab') as f:
        #     f.write(html.read())

    def download_app(self, url, id, name):
        failnum = 0
        while True:
            try:
                headers = f"""
        accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
        accept-encoding: gzip, deflate, br
        accept-language: zh-CN,zh;q=0.9
        cache-control: no-cache
        pragma: no-cache
        referer: https://www.exploit-db.com/exploits/{id}
        upgrade-insecure-requests: 1
        user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"""
                html = self.ha.get_response_stream(url, headers=headers)
                # with open(f'./exploit/{id}.iscan_expdb_app', 'w') as f:
                description = f'datasource: exploitdb\nid: {id}\nname: {name}\nurl:{url}\n'
                # f.write(description)
                data = html.read()
                # with open(f'./exploit/{id}.iscan_expdb_app', 'ab') as f:
                #     f.write(html.read())
                outname = self.write_text_binary(description, data,
                                                 'iscan_expdb_app')
                print(f"Output data, filename:{outname}")
                if failnum != 0:
                    self._logger.info(
                        f'ID: {id} try download expdb_app again success！')
                return outname
            except Exception:
                failnum += 1
                self._logger.error(
                    f'ID: {id} download expdb_app error: {traceback.format_exc()}\ntry download again!'
                )
                if failnum > 3:
                    self._logger.info(
                        f'ID: {id} download expdb_app error over 3 times!')
                    return None
                time.sleep(3)

    def run(self):
        got = False
        none_count = 0
        while True:
            try:
                got = False
                draw, start = self.page_queue.get(timeout=3)
                got = True

                self.get_onepage(draw, start)

            except queue.Empty:
                none_count += 1
                if none_count >= 5:
                    break
                time.sleep(5)
            except Exception:
                print("获取出错: page={} error={}".format(draw,
                                                      traceback.format_exc()))
            finally:
                if got:
                    self.page_queue.task_done()

    def __del__(self):
        print("EXPdb misson accomplished")

    def start(self):
        self.get_url()
        ths = [
            threading.Thread(target=self.run, name=f'thread{i}')
            for i in range(7)
        ]
        for el in ths:
            el.start()
        for el in ths:
            el.join()

예제 #18

파일 보기

파일: spidersuning.py 프로젝트: Octoberr/sspywork

class SpiderSuning(SpiderShoppingBase):
    def __init__(self, task, appcfg, clientid):
        super(SpiderSuning, self).__init__(task, appcfg, clientid)
        self._ha = HttpAccess()
        self.userid = ""
        self.time = datetime.datetime.now(
            pytz.timezone("Asia/Shanghai")).strftime("%Y-%m-%d %H:%M:%S")
        if self.task.cookie:
            self._ha._managedCookie.add_cookies("suning.com", self.task.cookie)

    def _cookie_login(self):
        """
        cookie登陆测试
        """
        res = False
        url = "http://my.suning.com/person.do"
        headers = """
            Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
            Accept-Encoding: gzip, deflate
            Accept-Language: zh-CN,zh;q=0.9
            Host: my.suning.com
            Proxy-Connection: keep-alive
            Referer: http://my.suning.com/person.do
            Upgrade-Insecure-Requests: 1
            User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36
        """
        try:
            r = self._ha.getstring(url, headers=headers, timeout=10)
            soup = BeautifulSoup(r, "lxml")
            patuserid = re.compile(r"您的会员编号为：(.*?)，", re.S)
            userid = patuserid.findall(str(soup))[0]
            if userid:
                self.userid = userid + "-suning"
                res = True
        except:
            self._logger.error(
                f"Cookie login error, err:{traceback.format_exc()}")
        return res

    def _check_registration(self):
        """
        查询手机号是否注册了suning
        :param account:
        :return:
        """
        t = time.strftime("%Y-%m-%d %H:%M:%S")
        try:
            headers = """
                Accept: application/json, text/javascript, */*; q=0.01
                Content-Type: application/x-www-form-urlencoded; charset=UTF-8
                Origin: http://passport.suning.com
                Referer: http://passport.suning.com/ids/login
                User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36
                X-Requested-With: XMLHttpRequest
            """
            url = "http://passport.suning.com/ids/login"
            postdata = f"jsonViewType=true&username={self.task.phone}&password=&password2=Ujsa1wIs9Jnzn%2Fc%2BqT%2FyQldPMMWVrWviEorr1ku8VnnZGydpUB55QyQZso%2B1%2BZYP97u1MIlXMoBbCTkKRMURME7dMO%2BGIuA6RwVOmFCawDE%2FMYMtuO1PmhgwRlxurrcKF8uBep9Sf8D4dgTv7w%2F8rYqrI3cxUTWmpedBArbxQ6Y%3D&loginTheme=defaultTheme&service=&rememberMe=true&client=app&sceneId=logonImg&detect=mmds_3nZnnnuzF3MnnnuzT3Znnniz83Znnn3zP3MnnnnzL3nZnnnnzS3ZnnnnzN3Znnnvz13Mnnn0zq3Ynnn9zE3nZnnnYzR3Znnn-z43mnnnrzr3YnnnczU3u3nncz-3nvinnczU3MnnnczU3bCnnczr3Mnnnfz43Znnnhz433ZnnnEz43MnnnXzf3Znnn.zf3Znnn1zO3Ynnnkzh3nmnnnVzB3YnnnPzB3ZnnnJzB3MnnnbzB3MnnnbzB3nq3nnFzh3ZnnnFzO3YnnnKzR3mnnnpzf3Znnnlzf3nQnnnlzt3fnnnlz43n3nnpz43Kinnpzc3znnnKzc3nMnnnFzr3YnnnTz-3o3nnJz-3CnnnLz_3MnnnNzY3nZnnnCzY3Mnnn1GY3ZnnnuGY3ZnnnA7Y3Mnnn67Y3iYnnnO7Y3VnnnO7Y3FnnnO7Z3CnnnR7M3Ynnnt753nZnnn-7o3Mnnn_7I3-nnnY7I35nnn97I3Mnnno7I3nXpnno7I3MnnnY7Z3Znnnt7U3Znnn67t3Mnnnw7R3nZnnn17h3YnnnA7h393nns7h3znnnW7h3Mnnnw7h3nMnnn.7B3QnnnA7B3mnnnT763ZnnnoGX3Ynnn1Gs33Mnnnizk3MnnnZzd3Ynnntza3Mnnnhzj3LAnnuCni3Ynnn7C3imnnnGCiiPnnnzC7i9nnneCCiMnnn5Coi3Znnn5CMiYnnn5CZiynnn5CYiCnnn5C_i5nnn5CUinmnnneCfiZnnnCCBiMnnnzCEi_3nnzC6iT3nnzCgi36nnnzCgiMnnnzCXiYnnnzCqiJCnnGCqiMnnnuCdinMnnn~zQiYYnngzLiMnnn2zViMnnn2zViZnnn2zSinl3nnXzaiennnXzaiMnnnqzdiZnnnqzNiMnnnqzNinxYa2E~~j2tjE7R.fE2EjY.Pj7Y2PRPYf.EaYaz~zf6~Eju7.xa2jPxRttxDYYR~auzz~ajx8Pnn~C_36enniCuinXoncCviZ23nk0UiMgnn4C3iMjnnk0viQConMCnirMnn4zgiMG3nwztifINnyzMi853npz43353nhzj3cY3nqzNixa22txaEEjPERP2EPutxxn3iuv7GzC0xjxtxa22tx~utxaxtxGBI3.uZinnn1znn.u9innnTvnn.uz3nnnnnnn.usinnnC5nn.uc3nnn6inn.uW3nnnnvnn.uminnnT7nn.uJnnnnnnnn.usinnn67nn.uT3nnnx2jPxs7mnnnnnvnnniu3nnnnnSgnndnnnnnnnh2nni5nnnnnn4nnnEqnnnnnnJqnnfHInnnnnUMnnAI3nnnnni7nnunnnnnnninnnKt7nnnnnlt7niz1nnnnne53nxPERxMTOnyyxLK03GCniMnnnGCiiMnnnGCGiZnnnvCCiZnnniC0i3Mnnn3CeiYnnn3CIiZnnnnCoimnnnyz5iZnnnyz9inYnnnyzMibCnnTzMi9nnnNzMiYnnntzMiZnnnPGmi3ZnnnmG5iZnnnQ75iMnnnB75iZnnnY75iMnnn97oingnnn97IiMnnn970iYnnn970iMnnn97Ci.3nn57CinZnnni70imnnnTvIiZnnnSv9iZnnnkvMiMnnnsvYi3ZnnnWvYi83nnWv_ikvnnWv_iN4nnzGUiEnnnGGUiio3nn7GUiYnnnvGUifnnnuGriCnnnvGci6nnnzGti3ZnnnoGfimnnnYGfiYnnnRGRiMnnn2GRiZnnn1Gti3ZnnnVG-iMnnnPGMiYnnnKGIiZnnnyGziZnnnizii3mnnnvzniZnnn7zl3Znnn7zp3Qnnn7zK3Mnnnvt~EzLzt~u2zF_._796d0c53-7e52-42b9-a978-a8944ba6c172_._&dfpToken=THP7fd1696fcef06aX5E3e4d3&terminal=PC&loginChannel=208000103001"
            response = self._ha.getstring(url,
                                          headers=headers,
                                          req_data=postdata)
            if '"errorCode":"badPassword.msg1"' in response:
                self._write_task_back(ECommandStatus.Succeed, "Registered", t,
                                      EBackResult.Registerd)
            elif '"errorCode":"needVerifyCode"' in response:
                self._write_task_back(
                    ECommandStatus.Failed,
                    "Need VerifyCode!",
                    t,
                    EBackResult.CheckRegisterdFail,
                )
            else:
                self._write_task_back(ECommandStatus.Succeed, "Not Registered",
                                      t, EBackResult.UnRegisterd)

        except Exception:
            self._logger.error("Check registration fail: {}".format(
                traceback.format_exc()))
            self._write_task_back(
                ECommandStatus.Failed,
                "Check registration fail",
                t,
                EBackResult.CheckRegisterdFail,
            )
        return

    def _get_orders(self):
        """
        获取订单信息
        """
        headers = """
            Accept: text/html, */*; q=0.01
            Accept-Encoding: gzip, deflate, br
            Accept-Language: zh-CN,zh;q=0.9
            Cache-Control: no-cache
            Connection: keep-alive
            Host: order.suning.com
            Pragma: no-cache
            sec-ch-ua: "Google Chrome";v="87", " Not;A Brand";v="99", "Chromium";v="87"
            sec-ch-ua-mobile: ?0
            Sec-Fetch-Dest: empty
            Sec-Fetch-Mode: cors
            Sec-Fetch-Site: same-origin
            User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36
            X-Requested-With: XMLHttpRequest
        """
        thistime = time.strftime("%Y-%m-%d")
        patorderlist = re.compile(r'<div class="table-list">')
        page = 1
        while True:
            try:
                url = f"https://order.suning.com/order/queryOrderList.do?transStatus=&pageNumber={page}&condition=&startDate=2009-01-01&endDate={thistime}&orderType="
                html = self._ha.getstring(url, headers=headers, timeout=10)
                orderlist = patorderlist.search(html)
                if orderlist:
                    soup = BeautifulSoup(html, "lxml")
                    orders = soup.select(".table-list .table-box")
                    for order in orders:
                        try:
                            dic1 = {}
                            patid = re.compile(r'id="table_box_(.*?)"', re.S)
                            orderid = patid.findall(str(order))[0]
                            ordertime = (
                                order.select_one(".item span").get_text() +
                                " " + "00:00:00")
                            dic1["shop"] = order.select(
                                ".item span")[1].get_text()
                            dic1["rowspan"] = order.select_one(
                                ".total-price").get("rowspan")
                            dic1["price"] = order.select_one(
                                ".total-price span").get_text()
                            dic1["含运费"] = order.select_one(
                                ".total-price em").get_text()
                            dic1["status"] = order.select_one(
                                ".state .opt-item").get_text()
                            dic1["contact"] = (order.select_one(
                                ".tax-tip").get_text(" ").replace(
                                    "\n", "").replace("\r", ""))
                            dic = []
                            o = order.select("table .order-info")
                            for item in o:
                                di = {}
                                di["title"] = item.select_one(
                                    '[name="pname_"]')["title"]
                                di["price"] = item.select_one(
                                    ".price span").get_text()
                                di["amount"] = (item.select_one(
                                    ".amount").get_text().strip())
                                dic.append(di)
                                dic1["goods"] = dic

                            res_one = ISHOPPING_ONE(self.task,
                                                    self._appcfg._apptype,
                                                    self.userid, orderid)
                            res_one.ordertime = ordertime
                            res_one.append_orders(dic1)
                            res_one.host = "www.suning.com"

                            yield res_one
                        except:
                            self._logger.error(
                                f"Parser order error\nerr:\n{traceback.format_exc()}"
                            )
                            continue
                    time.sleep(1)
                    page += 1
                else:
                    break
            except Exception:
                self._logger.error("{} got order fail: {}".format(
                    self.userid, traceback.format_exc()))

    def _get_profile(self):
        try:
            url = "http://my.suning.com/msi2pc/memberInfo.do"
            headers = """
            Accept: application/json, text/javascript, */*; q=0.01
            Accept-Encoding: gzip, deflate
            Accept-Language: zh-CN,zh;q=0.9
            Cache-Control: no-cache
            Connection: keep-alive
            Host: my.suning.com
            Pragma: no-cache
            Referer: http://my.suning.com/
            User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36
            X-Requested-With: XMLHttpRequest
            """
            r = self._ha.getstring(url, headers=headers, timeout=10)
            rd = json.loads(r)
            nickname = rd.get("nickName")
            res = PROFILE(self._clientid, self.task, self._appcfg._apptype,
                          self.userid)
            res.nickname = nickname
            yield res
        except Exception:
            self._logger.error("{} got profile fail: {}".format(
                self.userid, traceback.format_exc()))

예제 #19

파일 보기

 def __init__(self):
     AutoPluginBase.__init__(self)
     self.ha = HttpAccess()
     self._get_cookie()

예제 #20

파일 보기

class Seebug(AutoPluginBase):
    tasktype = EAutoType.EXPDB

    def __init__(self):
        AutoPluginBase.__init__(self)
        self.ha = HttpAccess()
        self._get_cookie()

    def _get_cookie(self):
        try:
            ic = False
            chrome_options = ChromeOptions()
            chrome_options.add_argument('--headless')
            chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument('blink-settings=imagesEnabled=false')
            # chrome_options.add_argument('--no-sandbox')
            driver = webdriver.Chrome(chrome_options=chrome_options)
            success = False
            check_num = 1
            while True:
                try:
                    driver.get('https://cn.0day.today/')
                    time.sleep(5 * check_num)
                    driver.find_element_by_css_selector(
                        'body > div > div.agree > div:nth-child(9) > div:nth-child(3) > form > input').click()
                    success = True
                    break
                except:
                    check_num += 1
                    if check_num == 4:
                        break
            if success:
                cookies = driver.get_cookies()
                l_cookie = ''
                for cookie in cookies:
                    l_cookie = l_cookie + cookie['name'] + '=' + cookie['value'] + '; '
                if ic:
                    self._logger.info('Got cookie success!')
                    self.ha._managedCookie.add_cookies('0day.today', l_cookie)
            else:
                self._logger.info('Got cookie fail!')
            driver.close()
        except Exception:
            self._logger.error('Got cookie fail: {}'.format(traceback.format_exc()))

    def get_bug(self):
        failnum = 0
        while True:
            url = 'https://cn.0day.today/platforms'
            headers = """
            Host: cn.0day.today
            Connection: keep-alive
            Upgrade-Insecure-Requests: 1
            User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36
            Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
            Accept-Language: zh-CN,zh;q=0.9
            """
            html = self.ha.getstring(url, headers=headers)
            if "value='是的我同意'" in html or 'Just a moment...' in html:
                failnum += 1
                if failnum > 3:
                    self._logger.error('Requsts fail over 3 times!')
                    return
                self._logger.info('Cookie lose efficacy!')
                self._get_cookie()
            else:
                break
        soup = BeautifulSoup(html, 'lxml')
        tables = soup.select('.category_title a')
        for a in tables:
            href = a.attrs['href']
            if href == '/platforms' or href == '/webapps':
                continue
            url0 = 'https://cn.0day.today' + href
            page = 0
            last_url = None
            while True:
                page += 1
                url = url0 + '/' + str(page)
                html = self.ha.getstring(url, headers=f"""
        accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
        accept-language: zh-CN,zh;q=0.9
        cache-control: no-cache
        pragma: no-cache
        referer: {url0}
        upgrade-insecure-requests: 1
        user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36""")
                soup = BeautifulSoup(html, 'lxml')
                exploits = soup.select('.ExploitTableContent')
                for exploit in exploits:
                    d_href = exploit.select_one('h3 a').attrs['href']
                    id = d_href.split('/')[-1]
                    if self.is_data_unique(str(id) + '0day'):
                        return
                    name = exploit.select_one('h3 a').get_text()

                    if '你可以免费使用此漏洞利用' in str(exploit):
                        detail, referer = self.get_description(href, url)
                        if detail:
                            description = f'datasource: 0day\nid: {id}\nname: {name}\nurl:{url}\n'
                            self.write_text_string(description, detail, 'iscan_expdb_doc')
                    else:
                        continue

                    date = exploit.select_one('.td a').get_text()
                    date_d = date.split('-')[0]
                    date_y = date.split('-')[-1]
                    date = date_y + date.replace(date_d, '').replace(date_y, '') + date_d
                    verified = soup.select_one('.tips_verified_')
                    if verified:
                        verified = 0
                    else:
                        verified = 1
                    level_t = substring(str(exploit), "class='tips_risk_color_", "'>安全风险级别")
                    if level_t in ['0', '1']:
                        level = 1
                    elif level_t == '2':
                        level = 2
                    else:
                        level = 3
                    res = ExpDB(name, '0day', id, date, verified)
                    res.level = level
                    res, poc, url = self.get_detail(id, referer, res)
                    description = f'datasource: seebug\nid: {id}\nname: {name}\nurl:{url}\n'
                    self.write_text_string(description, poc, 'iscan_expdb_exp')
                    self.write_text(res, 'iscan_expdb')
                    self.store_data_unique(str(id) + '0day')

                if not last_url:
                    last_url = 'https://cn.0day.today' + soup.select('.pages a')[-1].attrs['href']
                if last_url == url:
                    break


    def get_description(self, href, referer):
        try:
            d_url = 'https://cn.0day.today/exploit' + href
            html = self.ha.getstring(d_url, headers=f"""
    accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
    accept-language: zh-CN,zh;q=0.9
    cache-control: no-cache
    pragma: no-cache
    referer: {referer}
    upgrade-insecure-requests: 1
    user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36""")
            description = ''
            if "<div class='td'>描述</div>" in html:
                try:
                    description = re.findall(r"<div class='td'>描述</div>.*?>(.*?)</div>", html, re.S)[0]
                except Exception:
                    self._logger.error('Get description fail: {}'.format(traceback.format_exc()))
            return description, d_url
        except Exception:
            self._logger.error(f'Description fail:{traceback.format_exc()}')

    def get_detail(self, id, referer, res):
        try:
            e_url = 'https://cn.0day.today/exploit/' + id
            e_html = self.ha.getstring(e_url, headers=f"""
    accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
    accept-language: zh-CN,zh;q=0.9
    cache-control: no-cache
    pragma: no-cache
    referer: {referer}
    upgrade-insecure-requests: 1
    user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36""")
            e_soup = BeautifulSoup(e_html, 'lxml')
            l1 = e_soup.select(
                "[style='float:left; width:150px; overflow:hidden; margin:5px 0px 0px 0px;']")
            author = {}
            author['name'] = l1[0].get_text()
            tags = []
            target = {}
            target['type'] = l1[1].get_text()
            target['platform'] = l1[2].get_text()
            tags.append(self.tag_mapping(target['type']))
            l3 = e_soup.select("[style='float:left; margin:5px 0px 0px 0px;']")
            code = []
            co = {}
            co['code_type'] = '0day-ID'
            co['code'] = l3[0].get_text()
            code.append(co)
            try:
                co['code_type'] = 'cve'
                co['code'] = l3[1].get_text(' ')
                code.append(co)
            except:
                pass
            res.tags = tags
            res.target = target
            res.author = author
            res.code = code
            poc = e_soup.select_one('pre').get_text()
            return res, poc, e_url
        except Exception:
            self._logger.error(f'ID: {id} get detail fail:{traceback.format_exc()}')

    def start(self):
        self.get_bug()

예제 #21

파일 보기

파일: google.py 프로젝트: Octoberr/sspywork

 def __init__(self):
     ScoutPlugBase.__init__(self)
     self.ha = HttpAccess()
     self.proxy_List = []
     self.is_first = True
     self.cookie = 'CGIC=InZ0ZXh0L2h0bWwsYXBwbGljYXRpb24veGh0bWwreG1sLGFwcGxpY2F0aW9uL3htbDtxPTAuOSxpbWFnZS93ZWJwLGltYWdlL2FwbmcsKi8qO3E9MC44LGFwcGxpY2F0aW9uL3NpZ25lZC1leGNoYW5nZTt2PWIz; HSID=AJAuKs_OF4zjl_QiH; SSID=AOt5d7crf8ErXjqt5; APISID=ywI9lJQCGtyX_UgX/AVPHvH0i8rRjLpy3q; SAPISID=qPfZ04-88QBXo551/Aw4J_YiM8bTsz4m9H; CONSENT=YES+CN.zh-CN+; SEARCH_SAMESITE=CgQI3o0B; _gcl_au=1.1.423014040.1568171077; NID=188=gkZ2tXD8e7EWvfFwVQ9g0g5Ny1h2S1gY62TzUODI6ypQxzhcyqwfRgkZxnuA9c1kHGpLn9XDvmuJN4_kBEPJYoLSmv_USmqai_6IYBQ9RxxBYr5HXlDo60sFVnUMFqU9L9Im9jeVWz8KjdzAWdMaXDwAlUN7VZbILOZ8qm_0etxVhmAyGNvBzBI9C3ZvCG3M6OV6Gc3e0QoTqtsq87egQ8cjl-riBP-A9hPp8v5_SCohQA; SID=oge9GlNhzXILiY9MW_P2brb9HPE0noiUCwKqq4Z-w4IFcUYyBLodJCIIfQl8R8IqgQclJQ.; 1P_JAR=2019-09-20-06; SIDCC=AN0-TYu9Bf7Mrwg4TJmxordC1vli8GeqPk_MiHeE-3rahVe8d0bVfvQcfHx9GiSL3t_0JBmJQd0'

예제 #22

파일 보기

파일: twitterspiderinfo.py 프로젝트: Octoberr/sspywork

 def __init__(self):
     self._ha = HttpAccess()
     cookie = '''
     personalization_id="v1_pmt2sntu/a8PCORtco8eVg=="; guest_id=v1%3A156194413511829253; ct0=6bb1b8031784f711388377a485cd5bf9; _ga=GA1.2.1901222848.1561944140; _gid=GA1.2.214369962.1561944140; ads_prefs="HBERAAA="; kdt=l6Dkc64O0CPl4qCVtYAuXjXrtxkII2VUjRNqMOfT; remember_checked_on=1; _twitter_sess=BAh7CiIKZmxhc2hJQzonQWN0aW9uQ29udHJvbGxlcjo6Rmxhc2g6OkZsYXNo%250ASGFzaHsABjoKQHVzZWR7ADoPY3JlYXRlZF9hdGwrCGsaIKtrAToMY3NyZl9p%250AZCIlNzIyZDlhODdlMTVjYjU3MTRkYTBlY2Y4NGQ5MDQzMjQ6B2lkIiVjZTgw%250ANDQ0ZmIyOTAyY2U0MjQ0NjI4ZTFmNjU0MjgwOToJdXNlcmwrCQHglUis2NwN--8ae08c231e9599f1c5868e6518664b987d936c79; twid="u=998911451833163777"; auth_token=3378bb7bfd90d8bfb9de00b7fb7110633a256852; csrf_same_site_set=1; lang=en; csrf_same_site=1; _gat=1
     '''
     self._ha._managedCookie.add_cookies('twitter.com', cookie)

예제 #23

파일 보기

파일: fbbase.py 프로젝트: Octoberr/sspywork

    def __init__(self, task: IscoutTask, loggername: str = None):
        ScoutPlugBase.__init__(self, loggername=loggername)

        if not isinstance(task, IscoutTask):
            raise Exception("Invalid IscoutTask")

        self._dtools = dtools

        self.task: IscoutTask = task
        self._ha: HttpAccess = HttpAccess()

        self._userid: str = None  # 网站对用户的唯一识别标识
        self._account: str = None  # 可以用于登陆的账号名
        self._username: str = None  # 用户昵称
        self._globaltelcode: str = None  # 国际区号
        self._phone: str = None  # 电话
        self.phone: str = None
        self._url: str = None
        self._host: str = None
        self._cookie: str = None

        self.is_new_facebook = False  # 是不是新版facebook
        # web needed fileds
        self._pc = None
        self._rev = None
        self.lsd = None
        self._req = helper_num.MakeNumber(FbBase._msgr_charsets, 20)
        self.fb_dtsg = None
        self.fb_dtsg_ag = None
        self.jazoest = None
        self._spin_r = None
        self._spin_t = None
        self._spin_b = None
        self.hsi = None
        self._s = None

        self.docid = None
        self.ajaxpipe_token = None  # 新版没有这个参数
        self.quickling_ver = None
        self.docid_profile = None
        self.docid_contact = None
        self.docid_group = None
        self.homepage = None

        # 缓存所有init页面里的资源js脚本，，用于查找各种docid
        self._jspages: dict = {}
        self._jspages_listpage = None
        self._jspages_itemurls: dict = {}
        self._jspages_ok: bool = False
        self._jspages_locker = threading.Lock()

        # sms login contract fields...
        self.hash_ = None
        self.sms_redir = None

        # data
        self.is_messenger_only_user: bool = False  # 是否仅为messenger用户
        self.is_deactived_allowed_on_messenger: bool = False  # 是否为messenger禁用用户？
        # 以下数据暂时不存在多线程并发问题，后面需要的话加个锁
        # self._contacts: dict = {}  # 好友列表，以好友userid为key
        # self._chatlogs: dict = {}  # 聊天记录，以好友userid为key
        # self._groups: dict = {}  # 群组，以群组id为key
        # self._resources: dict = {}  # 资源，需存库去重，实现增量下载
        self._exist_msgtypes: dict = {}  # 消息类型有哪些，方便调试

예제 #24

파일 보기

 def setUp(self):
     # print('Previous condition for each')
     if not isinstance(self._ha, HttpAccess):
         self._ha = HttpAccess()