Example #1
0
 def _set_headers(self):
     self.headers = {
         'Connection': 'keep-alive',
         'Upgrade-Insecure-Requests': '1',
         'User-Agent': get_random_phone_ua(),
         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
         'Accept-Encoding': 'gzip, deflate',
         'Accept-Language': 'zh-CN,zh;q=0.9',
     }
Example #2
0
 def _set_headers(self):
     self.headers = {
         'accept-encoding': 'gzip, deflate, br',
         'accept-language': 'zh-CN,zh;q=0.9',
         'user-agent': get_random_phone_ua(),
         'content-type': 'application/x-www-form-urlencoded',
         'accept': '*/*',
         # 'authority': 'm-goods.kaola.com',
         'x-requested-with': 'XMLHttpRequest',
     }
Example #3
0
 def _get_phone_headers():
     return {
         'cache-control': 'max-age=0',
         'upgrade-insecure-requests': '1',
         'user-agent': get_random_phone_ua(),
         'accept':
         'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
         'accept-encoding': 'gzip, deflate, br',
         'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
     }
Example #4
0
 def _set_headers(self):
     self.headers = {
         'Accept': '*/*',
         'Connection': 'keep-alive',
         'Content-Type': 'application/x-www-form-urlencoded',
         'Accept-Encoding': 'br, gzip, deflate',
         'Host': 'app.quanmama.com',
         'User-Agent': get_random_phone_ua(),
         'Content-Length': '885',
         'Accept-Language': 'zh-Hans-CN;q=1, en-CN;q=0.9',
     }
 def _get_headers():
     return {
         'Connection': 'keep-alive',
         'Cache-Control': 'max-age=0',
         'Upgrade-Insecure-Requests': '1',
         'User-Agent': get_random_phone_ua(),
         'Accept':
         'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
         'Accept-Encoding': 'gzip, deflate, br',
         'Accept-Language': 'zh-CN,zh;q=0.9',
     }
Example #6
0
 def _set_headers(self):
     self.headers = {
         'Origin': 'https://home.mi.com',
         'Accept-Encoding': 'gzip, deflate, br',
         'Accept-Language': 'zh-CN,zh;q=0.9',
         'User-Agent': get_random_phone_ua(),
         'Content-Type': 'application/x-www-form-urlencoded',
         'Accept': '*/*',
         # 'Referer': 'https://home.mi.com/detail?gid=101421',
         'DToken': '',
         'Connection': 'keep-alive',
     }
Example #7
0
 def _set_headers(self):
     self.headers = {
         # 'cookie': 'davisit=2; usertrack=O2+g2Ftatitk7YwIAwY2Ag==; _ntes_nnid=7732365205c88dc47486ad1208406e7e,1532671534874; _ga=GA1.2.960357080.1532671535; _gid=GA1.2.1543960295.1532671535; JSESSIONID-WKL-8IO=JpPe0U2ISOSX%2B7b86uwx%2FDCCROKOxwv%2B9vh7Yj%2BBTVVOOIQXHVnSAe19xxMrURx2OK5Q6PV1E%2FSR5UOnm%5C0U2i1RDD3ur5uh%2F7lHemHDcbf90BrkXSqTqZySf%2F%5CWgGSu81cjbESgntQrE%2FYJU89hyhg%5CtPZ6jYgVrxw3yil6BxlEonas%3A1532757935029; _klhtxd_=31; kaola_user_key=47cca4d0-57c9-41ca-ae67-2172c4a81500; KAOLA_NEW_USER_COOKIE=yes; __da_ntes_utma=2525167.1705273738.1532671535.1532671535.1532671535.1; davisit=1; __da_ntes_utmb=2525167.1.10.1532671535; __da_ntes_utmz=2525167.1532671535.1.1.utmcsr%3D(direct)%7Cutmccn%3D(direct)%7Cutmcmd%3D(none); __da_ntes_utmfc=utmcsr%3D(direct)%7Cutmccn%3D(direct)%7Cutmcmd%3D(none); _jzqa=1.658432386831847000.1532671536.1532671536.1532671536.1; _jzqc=1; _jzqx=1.1532671536.1532671536.1.jzqsr=google%2Ecom|jzqct=/.-; _jzqckmp=1; WM_TID=BuJzWuW25WT9h9YnJbNPwKuHb0%2FJdiEw; __kaola_usertrack=20180727140634933960; _da_ntes_uid=20180727140634933960; NTES_KAOLA_ADDRESS_CONTROL=330000|330100|330102|1; _jzqb=1.8.10.1532671536.1; NTES_KAOLA_RV=1472242_1532671698324_0|27979_1532671614705_0; _gat=1',
         'accept-encoding': 'gzip, deflate, br',
         'accept-language': 'zh-CN,zh;q=0.9',
         'user-agent': get_random_phone_ua(),
         'content-type': 'application/x-www-form-urlencoded',
         'accept': '*/*',
         # 'referer': 'https://m-goods.kaola.com/product/27979.html?ri=navigation&from=page1&zn=result&zp=page1-5&position=5&istext=0&srId=7891cc6632688f65bdcb4f04e150950c&isMarketPriceShow=true&hcAntiCheatSwitch=0&anstipamActiCheatSwitch=1&anstipamActiCheatToken=de3223456456fa2e3324354u4567lt&anstipamActiCheatValidate=anstipam_acti_default_validate',
         'authority': 'm-goods.kaola.com',
         'x-requested-with': 'XMLHttpRequest',
     }
Example #8
0
 def __init__(self):
     self.headers = {
         'accept-encoding': 'gzip, deflate, br',
         'accept-language': 'zh-CN,zh;q=0.9',
         'upgrade-insecure-requests': '1',
         'user-agent': get_random_phone_ua(),
         'accept':
         'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
         'cache-control': 'max-age=0',
         'authority': 'www.douyin.com',
         # 'cookie': '_ba=BA0.2-20180330-5199e-OeUxtvwJvy5ElpWGFLId; _ga=GA1.2.390071767.1522391891; sso_login_status=1; tt_webid=6540458660484122126; __tea_sdk__user_unique_id=10_; __tea_sdk__ssid=e88eef4a-ec1f-497d-b2c7-301239bfdc67; login_flag=d6ee54ffebe3021c3fb67ff863970736; sessionid=7bdfd0e36df78f38c25abd13f0eff3cc; uid_tt=644e532b271dae498b62c659de17afdf; sid_tt=7bdfd0e36df78f38c25abd13f0eff3cc; sid_guard="7bdfd0e36df78f38c25abd13f0eff3cc|1522819290|2591999|Fri\\054 04-May-2018 05:21:29 GMT"',
     }
Example #9
0
    async def _search(self, search_key) -> list:
        '''
        天眼查搜索功能
        :param search_key: 待搜索key
        :return:
        '''
        headers = {
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': get_random_phone_ua(),
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'Referer': 'https://m.tianyancha.com/',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'zh-CN,zh;q=0.9',
        }
        params = (('key', str(search_key)), )
        url = 'https://m.tianyancha.com/search'
        body = Requests.get_url_body(url=url,
                                     headers=headers,
                                     params=params,
                                     cookies=None)
        # print(body)
        if body == '':
            return []

        search_list = []
        try:
            # div.new-border-bottom
            search_res = Selector(text=body).css(
                'div.search_result_container ::text').extract_first() or ''
            company_name = Selector(
                text=search_res).css('div.new-border-bottom a span text ::text'
                                     ).extract_first() or ''
            assert company_name != '', 'company_name为空值!'
            url = Selector(text=search_res).css(
                'div.new-border-bottom a ::attr("href")').extract_first() or ''
            assert url != '', 'url为空值!'
            legal_person = Selector(text=search_res).css(
                'a.legalPersonName ::text').extract_first() or ''
            legal_person_url = Selector(text=search_res).css(
                'a.legalPersonName ::attr("href")').extract_first() or ''
            legal_person_url = 'https://m.tianyancha.com' + legal_person_url if legal_person_url != '' else ''

        except AssertionError as e:
            print(e)
            return []
Example #10
0
    def _get_one_page_comment_info(self, goods_id, page_num) -> list:
        """
        获取单页comment info
        :return:
        """
        headers = {
            'Referer':
            'https://item.m.jd.com/product/{}.html'.format(goods_id),
            'User-Agent': get_random_phone_ua(),
        }
        params = (
            # ('callback', 'skuJDEvalA'),
            ('sorttype', '5'),
            ('pagesize', '10'),
            ('sceneval', '2'),
            ('score', '3'),  # 取好评的
            ('sku', str(goods_id)),
            ('page', str(page_num)),
            # ('t', '0.7175421988280679'),
        )
        url = 'https://wq.jd.com/commodity/comment/getcommentlist'
        body = Requests.get_url_body(
            url=url,
            headers=headers,
            params=params,
            ip_pool_type=self.ip_pool_type,
        )
        # self.lg.info(body)
        assert body != '', 'body不为空值!'
        data = []
        try:
            data = json_2_dict(
                json_str=re.compile('\((.*)\)').findall(body)[0],
                default_res={}).get('result', {}).get('comments', [])
        except IndexError:
            pass
        # pprint(data)
        self.lg.info('[{}] page_num: {}'.format(
            '+' if data != [] else '-',
            page_num,
        ))
        # assert data != [], 'data不为空list! 出错goods_id: {}'.format(goods_id)

        return data
Example #11
0
@author = super_fazai
@File    : gd_map_spider.py
@connect : [email protected]
'''

from pprint import pprint
from fzutils.internet_utils import get_random_phone_ua
from fzutils.spider.fz_requests import Requests
from fzutils.ip_pools import tri_ip_pool
from fzutils.common_utils import json_2_dict

# 高德map 单页shop搜索
headers = {
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
    'user-agent': get_random_phone_ua(),
    'accept': 'application/json',
    'referer': 'https://m.amap.com/search/view/keywords=%E8%A1%A3%E6%9C%8D',
    'authority': 'm.amap.com',
    'x-requested-with': 'XMLHttpRequest',
}

params = (
    ('pagenum', '2'),
    ('user_loc', '120.153576,30.287459'),
    ('geoobj', '120.089203|30.177242|120.217949|30.397676'),
    ('city', '杭州'),
    ('keywords', '衣服'),
    ('cluster_state', '5'),
    ('client_network_class', '4'),
    # ('uuid', '2a21e0af-009d-4a1a-a63e-ee5c6dec2488'),