Exemple #1
0
    def get_all_shop(self, url):
        headers = {
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Cache-Control': 'max-age=0',
            # 'Connection': 'keep-alive',
            'Host': 'www.dianping.com',
            'Cookie': 'navCtgScroll=0; _lxsdk_cuid=167c944a374c8-0367edd4469fcb-5d1e331c-15f900-167c944a374c8; _lxsdk=167c944a374c8-0367edd4469fcb-5d1e331c-15f900-167c944a374c8; _hc.v=f1b4e908-09e1-96ed-8f92-b4bb681c5966.1545269980; thirdtoken=abd3d7c7-e2e4-4b41-b231-dded6297e0cc; uamo=15638829723; dper=7d7f2fc7cdd242974d2b259b2c75ca7e8648c47620fec8ef82796aa30d5be07d9a6ce08a4b544e50ed734b716dee6a45712672cd403d7b3baeb8b546d0f33f8ad8b86955c52b409b69a75ced67f864780bdda60d0f96880f9b5b5392ce8b8e44; ll=7fd06e815b796be3df069dec7836c3df; ua=%E5%88%AB%E9%82%A3; ctu=0e996ce0644db9ede257ad23843b309df390781b7ecdd57481956f9ef8a41718; s_ViewType=10; aburl=1; cy=1; cye=shanghai; cityInfo=%7B%22cityId%22%3A1%2C%22cityEnName%22%3A%22shanghai%22%2C%22cityName%22%3A%22%E4%B8%8A%E6%B5%B7%22%7D; _lxsdk_s=167cf52b312-e53-0cc-661%7C%7C21',
            # 'Referer': 'http://www.dianping.com/search/keyword/1/10_%E5%98%89%E9%87%8C%E4%B8%AD%E5%BF%83/g101',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36'
        }

        response = requests.get(url, headers=headers, proxies={'http': self.ip_one})  # , proxies={'http': self.ip_one}
        data = etree.HTML(response.content.decode())
        shop_url_list = data.xpath('.//div[@id="shop-all-list"]/ul/li/div[2]/div[1]/a[1]/@href')
        print('-------------', shop_url_list)
        if shop_url_list:
            for shop_url in shop_url_list:
                print('2222222222222222222222', shop_url)
                if shop_url not in self.set_list:
                    with open('url_shop_two.txt', 'a') as f:
                        f.write(shop_url + '\n')
                    self.set_list.append(shop_url)
                    # self.get_shop_page(shop_url, url)
                    # time.sleep(1)

            next_page_url_list = data.xpath('.//div[@class="page"]/a[@class="next"]/@href')
            if next_page_url_list:
                print('3333333333333333333333333')
                self.get_all_shop(next_page_url_list[0])
        else:
            try:
                self.ip_one = res_ip()
            except:
                time.sleep(3)
                self.ip_one = res_ip()
                self.get_all_shop(url)
Exemple #2
0
    def __init__(self):

        self.headers_one = {
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Host': 'www.baidu.com',
            'Proxy-Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'
        }

        self.headers_two = {
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Connection': 'keep-alive',
            'Host': 'www.baidu.com',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'
        }
        self.headers_three = {
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Connection': 'keep-alive',
            'Host': 'www.xiaohongshu.com',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'
        }

        self.start_url = ''
        # 评论接口模板
        self.commnet_port_url = ''
        # # 打开json文件
        # self.news_jsonfile = open('./sina_newsfile.json', 'wb')
        # self.comment_jsonfile = open('./sina_commentfile.json', 'wb')
        # 定义开始时间 y-m-d
        self.start_time = '2018-11-13'
        # 定义结束时间 y-m-d
        self.end_time = '2018-11-20'
        # 标记爬虫工作
        self.is_work = True
        # ip代理
        self.proxies_list = [
            '115.195.75.189:4217',

        ]
        self.ip = res_ip()
Exemple #3
0
    def __init__(self):

        self.headers_one = {

        }

        self.start_url = ''
        # 评论接口模板
        self.commnet_port_url = ''
        # # 打开json文件
        # self.news_jsonfile = open('./sina_newsfile.json', 'wb')
        # self.comment_jsonfile = open('./sina_commentfile.json', 'wb')
        # 定义开始时间 y-m-d
        self.start_time = '2018-11-13'
        # 定义结束时间 y-m-d
        self.end_time = '2018-11-20'
        # 标记爬虫工作
        self.is_work = True

        # ip
        ip = res_ip()
        self.ip_one = ip
        self.ip_two = ip
        print('使用IP:{}'.format(ip))
        self.user_agent = [
            'Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124',
            'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)',
            'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)',
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
            'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
            'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0',
        ]

        # 去重列表
        self.set_list = []
        # 表示商铺css样式坐标链接
        self.shop_css_url = ''
        # 表示商铺css坐标字典
        self.shop_css_dict = {}

        # 表示评论css样式坐标的链接
        self.css_url_first = ''
        # 评论css坐标字典
        self.css_list = {}
Exemple #4
0
    def get_comment(self, shop_id):
        headers = {
            'Cookie': '__mta=188618391.1545359031056.1545359031056.1545359031056.1; _lxsdk_cuid=167c944a374c8-0367edd4469fcb-5d1e331c-15f900-167c944a374c8; _lxsdk=167c944a374c8-0367edd4469fcb-5d1e331c-15f900-167c944a374c8; _hc.v=f1b4e908-09e1-96ed-8f92-b4bb681c5966.1545269980; dper=7d7f2fc7cdd242974d2b259b2c75ca7e8648c47620fec8ef82796aa30d5be07d9a6ce08a4b544e50ed734b716dee6a45712672cd403d7b3baeb8b546d0f33f8ad8b86955c52b409b69a75ced67f864780bdda60d0f96880f9b5b5392ce8b8e44; ua=%E5%88%AB%E9%82%A3; ctu=0e996ce0644db9ede257ad23843b309df390781b7ecdd57481956f9ef8a41718; s_ViewType=10; aburl=1; cy=1; cye=shanghai; cityInfo=%7B%22cityId%22%3A1%2C%22cityEnName%22%3A%22shanghai%22%2C%22cityName%22%3A%22%E4%B8%8A%E6%B5%B7%22%7D; ll=7fd06e815b796be3df069dec7836c3df; m_flash2=1; cityid=1; pvhistory=6L+U5ZuePjo8L2Vycm9yL2Vycm9yX3BhZ2U+OjwxNTQ1NzEzNzMwMTIxXV9b; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _lxsdk_s={}'.format(uuid.uuid4()),
            'Host': 'www.dianping.com',
            'Referer': 'http://www.dianping.com/search/keyword/1/10_%E5%98%89%E9%87%8C%E4%B8%AD%E5%BF%83/g101r812',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36'
        }

        for i in range(1, 10000):
            time.sleep(1)
            url = 'http://www.dianping.com/shop/{}/review_all/p{}'.format(shop_id, str(i))
            # url = 'http://www.dianping.com/shop/14184826/review_all/p{}'.format(str(i))
            response = requests.get(url, headers=headers, allow_redirects=False, proxies={'http': self.ip_one})
            status_code = response.status_code
            if status_code == 200:
                css_url = re.search('//s3plus.meituan.net/.*/svgtextcss/.*css', response.text)[0]  # 通过正则匹配出css样式定位的css链接
                print(css_url)
                css_url = 'http:' + css_url
                if css_url != self.css_url_first:  # 如果css样式变更,则会获取和生成新的css样式字典
                    logging.info('新的css样式链接:{}'.format(css_url))
                    css_pojie = CssPojie(css_url)
                    item_svg = css_pojie.get_css_page()
                    logging.info('新的css样式字典:{}'.format(str(item_svg)))
                    self.css_list = item_svg
                    self.css_url_first = css_url
                # print(response.text)
                time.sleep(3)
                response.encoding = 'utf-8'
                html = etree.HTML(response.text)
                comment = html.xpath('.//div[@class="reviews-items"]/ul/li')
                if comment:  # 判断是否获取到评论列表,如果为空的话就是没有评论或者评论已经翻页完毕
                    for i in comment:
                        item = {}
                        if i.xpath('./div/div[@class="review-words Hide"]/text()'):

                            xx = i.xpath('./div/div[@class="review-words Hide"]/text() | ./div/div[@class="review-words Hide"]/span/@class')
                        else:
                            xx = i.xpath('./div/div[@class="review-truncated-words"]/text() | ./div/div[@class="review-truncated-words"]/span/@class | .//div/div[@class="review-words"]/text() | .//div/div[@class="review-words"]/span/@class')

                        con = self.change_css_index(xx)
                        con = con.strip()
                        user_name = i.xpath('.//div[@class="dper-info"]/a/text()')[0]
                        grade = i.xpath('.//div[@class="review-rank"]/span/@class')[0]
                        grade = str(re.search('\d{2}', grade).group(0)).replace('0', '')
                        other_info = i.xpath('.//span[@class="score"]//text()')
                        con_t = ''
                        for j in other_info:
                            con_t = con_t + j.strip() + ' '
                        other_info = con_t
                        date_all = i.xpath('.//div[@class="misc-info clearfix"]/span[1]/text()')[0]
                        date = date_all.split(' ')[0].strip()
                        comment_time = date_all.split(' ')[1]
                        shop_name = i.xpath('.//div[@class="misc-info clearfix"]/span[2]/text()')[0]
                        try:
                            likes = i.xpath('.//em[@class="col-exp"]/text()')[0]
                        except:
                            likes = ''
                        if i.xpath('.//img[@class="user-rank-rst "]/@src | .//img[@class="user-rank-rst user-rank-rst-high"]/@src'):
                            lv = i.xpath('.//img[@class="user-rank-rst "]/@src | .//img[@class="user-rank-rst user-rank-rst-high"]/@src')[0]
                            lv = lv.split('/')[-1]
                            lv = re.search('\d{1,2}', lv).group(0)
                        else:
                            lv = ''
                        if i.xpath('.//div[@class="dper-info"]/span/@class'):
                            vip = i.xpath('.//div[@class="dper-info"]/span/@class')[0]
                            if vip == 'vip-gray':
                                is_vip = '否'
                            else:
                                is_vip = '是'
                        else:
                            is_vip = '否'
                        item['shop_name'] = shop_name
                        item['user_name'] = user_name.strip()
                        item['user_lv'] = lv
                        item['is_vip'] = is_vip
                        item['grade'] = grade
                        item['other_info'] = other_info
                        item['date'] = date
                        item['time'] = comment_time
                        item['likes'] = likes
                        shop_url = 'http://www.dianping.com/shop/' + shop_id
                        item['shop_url'] = shop_url
                        item['comment_url'] = url
                        item['content'] = con
                        print(item)
                        self.write_comment_jsonfile(item)
                else:
                    print('未获取到评论')
                    break
            else:
                try:
                    self.ip_one = res_ip()
                except:
                    time.sleep(2)
                    self.ip_one = res_ip()
                logging.info('爬取评论的IP可能被封,更换ip:{}'.format(str(self.ip_one)))
Exemple #5
0
    def get_shop_page(self, url, up_url):
        user_agent = [
            'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36',
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.1',
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/536.6',
            'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/536.6',
            'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.1',
        ]

        Cookies = [
            '_lxsdk_s=b458c64cc9ef92adce92c199a6b7%7C%7C4; __mta=214784936.1512623224439.1512623224439.1512623226558.2; client-id=8d050bd8-51b6-46fc-be96-cb6871d91fe7; ci=89; webloc_geo=31.778615%2C119.958931%2Cwgs84; _lxsdk=1602f5eb154c8-05c63a41825c168-1b451e24-13c680-1602f5eb154c8; uuid={}',
            '_lxsdk_s=55618265813fc43043ce562acf29%7C%7C2; ci=89; __mta=252597174.1512623399814.1512623399814.1512623399814.1; _lxsdk=1602f6161c1c8-034ae66c3842d88-1b451e24-13c680-1602f6161c170; client-id=8ca2fe5f-ba96-41ae-b919-f8d3665e2f10; webloc_geo=31.778620%2C119.958935%2Cwgs84; uuid={}',
            '_lxsdk_s=4ffb12d5a90c2361745e6d0993a2%7C%7C2; ci=89; __mta=40613533.1512623465821.1512623465821.1512623465821.1; client-id=a31f1c79-3287-4ef1-8bd3-56bfb431a7da; webloc_geo=31.778616%2C119.958936%2Cwgs84; _lxsdk=1602f6262117d-0f173d7fde38d18-1b451e24-13c680-1602f626212c8; uuid={}',
            '_lxsdk_s=4ba654e682eaf4d69efb90ae9cf1%7C%7C2; __mta=152351622.1512623525333.1512623525333.1512623525333.1; ci=89; client-id=e8101f58-a778-4332-81c2-75ba410b2bb3; webloc_geo=31.778611%2C119.958934%2Cwgs84; _lxsdk=1602f634c448d-0c74adf6d00e2e8-1b451e24-13c680-1602f634c45c8; uuid={}',
            '_lxsdk_s=f6511ca715103c33b7bb4aea1e9f%7C%7C2; __mta=40629917.1512623573270.1512623573270.1512623573270.1; ci=89; client-id=c69db65e-b5d1-4fe6-aa7a-340e3608aeb6; webloc_geo=31.778617%2C119.958930%2Cwgs84; _lxsdk=1602f64072fc8-04cf33034885b08-1b451e24-13c680-1602f640730c8; uuid={}',
            '_lxsdk_s=38327d7cfda78d0cfc4810838c32%7C%7C2; __mta=150795529.1512623625315.1512623625315.1512623625315.1; ci=89; client-id=221306b1-bfb7-4cfa-ab3f-2e4a85089864; webloc_geo=31.778610%2C119.958933%2Cwgs84; _lxsdk=1602f64d24531-0623a27320ba75-1b451e24-13c680-1602f64d246c8; uuid={}',
            '_lxsdk_s=c2f111bde916cad4d653e07a35c9%7C%7C2; __mta=42950426.1512623702441.1512623702441.1512623702441.1; _lxsdk=1602f66000dc8-0bb60c750cf17b8-1b451e24-13c680-1602f66000d7f; ci=89; client-id=38cc3a86-fe57-491b-8564-c9b1a251cd4a; webloc_geo=31.778617%2C119.958931%2Cwgs84; uuid={}',
            '_lxsdk_s=fd711080efee1adf6de57c1d4c88%7C%7C2; ci=89; webloc_geo=31.778612%2C119.958939%2Cwgs84; __mta=218070313.1512623752950.1512623752950.1512623752950.1; _lxsdk=1602f66c53e35-0b363d23b0e435-1b451e24-13c680-1602f66c53fc8; client-id=1a4a2af3-4201-4e61-bf26-92016709444b; uuid={}',
            '_lxsdk_s=810677247f21b53cd75826e60cb8%7C%7C2; __mta=146594315.1512623869611.1512623869611.1512623869611.1; _lxsdk=1602f688ce80-074a1ec97ff043-1b451e24-13c680-1602f688ce9c8; ci=89; client-id=e60488fa-51fe-4d12-99a3-257a5a0853cc; webloc_geo=31.778609%2C119.958928%2Cwgs84; uuid={}',
            '_lxsdk_s=c44c26606c13c651ab5a20447428%7C%7C2; __mta=89516452.1512623906613.1512623906613.1512623906613.1; _lxsdk=1602f691da0c8-024ebcb15833bc-1b451e24-13c680-1602f691da04b; ci=89; client-id=a6846506-a50a-4612-8106-ff31978802a8; webloc_geo=31.778607%2C119.958929%2Cwgs84; uuid={}',
            '_lxsdk_s=071dfe26ce64d6e46d03323d2f71%7C%7C2; __mta=210099498.1512623985254.1512623985254.1512623985254.1; ci=89; client-id=f2cea0fa-0d83-479a-a863-f2b7156fb392; webloc_geo=31.778621%2C119.958925%2Cwgs84; _lxsdk=1602f6a4f88c8-0ce1602d9c121a-1b451e24-13c680-1602f6a4f88c8; uuid={}',
            '_lxsdk_s=833017bd50026c6c57b327f1f51a%7C%7C2; __mta=244702778.1512624031536.1512624031536.1512624031536.1; ci=89; client-id=ec7106e4-9dda-4ce0-aa8d-c12288812644; webloc_geo=31.778622%2C119.958921%2Cwgs84; _lxsdk=1602f6b0459c8-011b3f8b51c9328-1b451e24-13c680-1602f6b045ac8; uuid={}',
            '_lxsdk_s=9a68aaba42beb1ebdfc3e726d94e%7C%7C2; ci=89; webloc_geo=31.778629%2C119.958912%2Cwgs84; __mta=45406747.1512624277952.1512624277952.1512624277952.1; _lxsdk=1602f6ec7b84-095c2ecf9438458-1b451e24-13c680-1602f6ec7b9c8; uuid=33acafd1-4a03-4f0f-9572-1ccbfd570efc; client-id={}',
            '_lxsdk_s=c1508df449b0616ba37c5d9166f2%7C%7C2; __mta=216814121.1512624317347.1512624317347.1512624317347.1; ci=89; client-id=5d8e3ff2-5370-4c08-855e-9d26e5187040; webloc_geo=31.778633%2C119.958912%2Cwgs84; _lxsdk=1602f6f6187a2-029abec138ea508-1b451e24-13c680-1602f6f6188c8; uuid={}',
            '_lxsdk_s=7a249ecc9a048cbb826ea3b1f4e7%7C%7C2; __mta=256753588.1512624355644.1512624355644.1512624355644.1; _lxsdk=1602f6ff822c8-01f025c7119f968-1b451e24-13c680-1602f6ff823c8; ci=89; client-id=33339c7c-4dd0-4da8-8617-add389e72438; webloc_geo=31.778634%2C119.958912%2Cwgs84; uuid={}',
            '_lxsdk_s=93f6847b5f37e552ae083beb48c2%7C%7C2; __mta=218397606.1512624389136.1512624389136.1512624389136.1; ci=89; client-id=21540b20-0240-48fa-ae1e-c1ce4dba892f; webloc_geo=31.778631%2C119.958925%2Cwgs84; _lxsdk=1602f707a3d3-0d7db668e6d9498-1b451e24-13c680-1602f707a3ec8; uuid={}',
            '_lxsdk_s=0ffdd4e2dddd57d3c05606eb82f3%7C%7C2; __mta=141367821.1512624428164.1512624428164.1512624428164.1; ci=89; client-id=d6e8de67-48b8-4262-b3b3-47851c46db5c; webloc_geo=31.778620%2C119.958977%2Cwgs84; _lxsdk=1602f7112b9c8-09d6e858772c1d8-1b451e24-13c680-1602f7112bac8; uuid={}',
            ]

        cook = random.choice(Cookies)
        cook = cook.format(uuid.uuid4())
        headers_three = {
            # 'Cookie': 'Cookie: cy=1; cye=shanghai; _lxsdk_cuid=167ce86cb5cc8-0ef0bb253f622a-c343567-15f900-167ce86cb5cc8; _lxsdk=167ce86cb5cc8-0ef0bb253f622a-c343567-15f900-167ce86cb5cc8; _hc.v=6c8d00bc-7fd5-adef-2b8f-8ea0623850c7.1545358200; s_ViewType=10; ua=%E3%80%81%E5%94%90%E5%AE%8B_8205; ctu=baaf5442d52416965af0cf11004f094d95add57d4366fa42d38f98e316f2c8cc; uamo=13912816467; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _lxsdk_s={}'.format(random.choice(cookies_list)),
            'Cookie': cook,
            'Host': 'www.dianping.com',
            'Referer': up_url,
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': '{}'.format(random.choice(user_agent))
        }

        print(111, url)
        try:
            response = requests.get(url, headers=headers_three, proxies={'http': self.ip_two})  # , proxies={'http': self.ip_two}
            print(response.status_code)
            data = response.content.decode()

            # print(data)
            if '页面不存在' in data or '页面无法访问' in data:
                try:
                    self.ip_two = res_ip()
                except IndexError:
                    time.sleep(2)
                    self.ip_two = res_ip()
                print('ip被封,更换ip中......')
                print('更换ip为:{}'.format(self.ip_two))
                logging.warning('爬取店铺信息的IP被封,更换IP为:{}'.format(self.ip_two))
                self.get_shop_page(url, up_url)
            else:
                css_url = re.search(r'//s3plus.meituan.net/.*/svgtextcss/[a-z0-9]*\.css', response.text)[0]  # 通过正则匹配出css样式定位的css链接
                print(css_url)
                css_url = 'http:' + css_url
                if css_url != self.shop_css_url:  # 如果css样式变更,则会获取和生成新的css样式字典
                    logging.info('新的css样式链接:{}'.format(css_url))
                    css_pojie = CssPojie(css_url)
                    item_svg = css_pojie.get_css_page()
                    logging.info('新的css样式字典:{}'.format(str(item_svg)))
                    self.shop_css_dict = item_svg
                    self.shop_css_url = css_url
                shop_item = {}
                data = etree.HTML(data)
                tel = data.xpath('.//div[@id="basic-info"]/p/text() | .//div[@id="basic-info"]/p/d/@class')
                tel = self.change_shop_css_index(tel)
                # 店铺名称
                shop_name = data.xpath('.//h1[@class="shop-name"]/text()')[0]
                shop_item['shop_name'] = shop_name
                # 星级
                star_level = data.xpath('.//div[@class="brief-info"]/span[1]/@title')[0]
                shop_item['star_level'] = star_level
                shop_item['tel'] = tel
                # 人均
                per_capita = data.xpath('.//span[@id="avgPriceTitle"]/text() | .//span[@id="avgPriceTitle"]/child::*/@class')
                per_capita = self.change_shop_css_index(per_capita)
                shop_item['per_capita'] = per_capita
                # 口味
                taste = data.xpath('.//span[@id="comment_score"]/span[1]/text() | .//span[@id="comment_score"]/span[1]/child::*/@class')
                taste = self.change_shop_css_index(taste)
                shop_item['taste'] = taste
                # 环境
                environment = data.xpath('.//span[@id="comment_score"]/span[2]/text() | .//span[@id="comment_score"]/span[2]/child::*/@class')
                environment = self.change_shop_css_index(environment)
                shop_item['environment'] = environment
                # 服务
                serve = data.xpath('.//span[@id="comment_score"]/span[3]/text() | .//span[@id="comment_score"]/span[3]/child::*/@class')
                serve = self.change_shop_css_index(serve)
                shop_item['serve'] = serve
                # 地址
                address = data.xpath('.//span[@id="address"]/text() | .//span[@id="address"]/child::*/@class')
                address = self.change_shop_css_index(address)
                shop_item['address'] = address
                # 评论数
                comment_count = data.xpath('.//span[@id="reviewCount"]/text() | .//span[@id="reviewCount"]/child::*/@class')
                comment_count = self.change_shop_css_index(comment_count)
                shop_item['comment_count'] = comment_count

                shop_item['url'] = url
                print(shop_item)
                self.write_news_jsonfile(shop_item)

        except requests.exceptions.ProxyError:
            print('ip出现问题')
            self.ip_two = res_ip()
            self.get_shop_page(url, up_url)