コード例 #1
0
ファイル: util.py プロジェクト: uiwe/meituan
def get_food_list(category, poiInfos):
    item_list = []
    for i in range(0, len(poiInfos)):
        item = MeituanItem()
        item.pk_id = str(uuid.uuid1())
        item.dish_type = category
        item.restaurant_name = poiInfos[i]['title']
        item.location = poiInfos[i]['address']
        item.price = 0 if poiInfos[i]['avgPrice'] is None else int(
            poiInfos[i]['avgPrice'])
        item.star = float(poiInfos[i]['avgScore'])
        item.img_url = poiInfos[i]['frontImg']
        item.comment_num = int(poiInfos[i]['allCommentNum'])
        item_list.append(item)
    return item_list
コード例 #2
0
 def parse_comment(self, response):
     content = response.body
     data = re.findall('<weak class="username">(.*?)</weak>', content)
     if data:
         item = MeituanItem()
         item['content'] = content
         shop_id = response.meta['shop_id']
         item['meituantype'] = shop_id
         item['dt'] = dt
         yield item
         if len(data) == 15:
             url = response.url
             page = response.meta['page']
             page = page + 1
             url_list = url.split('page_')
             comment_url = url_list[0] + 'page_' + str(page)
             shop_id = response.meta['shop_id']
             yield Request(comment_url,
                           callback=self.parse_comment,
                           dont_filter=True,
                           headers=header,
                           meta={
                               'shop_id': shop_id,
                               'page': page
                           })
コード例 #3
0
 def parse_comment1(self, response):
     content = response.body
     try:
         content_json = json.loads(content)
         item = MeituanItem()
         item['content'] = content
         shop_id = response.meta['shop_id']
         item['meituantype'] = shop_id
         yield item
         page_sign = response.meta['page_sign']
         if page_sign:
             total = content_json.get('content_json')
             page_ = divmod(int(total), 30)
             if page_[0]:
                 pages = page_[0] + 1
                 for i in xrange(1, pages):
                     offset = i * 30
                     print offset
                     url = response.url
                     url = url.split('&offset=')
                     comment_url = url[0] + '&offset=%s' % offset
                     print comment_url
                     comment_url = comment_url
                     yield Request(comment_url,
                                   callback=self.parse_comment,
                                   dont_filter=True,
                                   headers=header,
                                   meta={
                                       'shop_id': shop_id,
                                       'page_sign': 0
                                   })
     except:
         pass
コード例 #4
0
 def parse_shop(self, response):
     content = response.body
     item = MeituanItem()
     item['content'] = content
     item['meituantype'] = 'shop'
     item['dt'] = dt
     yield item
コード例 #5
0
 def parse(self, response):
     self.offset += 20
     result = json.loads(response.text)
     # 拿到酒店信息和id,id用来构造请求照片的链接
     for i in result['data']['searchresult']:
         item = MeituanItem()
         item['name'] = i['name']
         item['poiid'] = i['poiid']
         # 价格-多少元起
         item['originalPrice'] = i['originalPrice']
         # 地址
         item['addr'] = i['addr']
         # 评论数
         item['commentsCountDesc'] = re.search(
             '\d+', i['commentsCountDesc']).group()
         # 评分
         item['avgScore'] = i['avgScore']
         # 所属商圈
         item['areaName'] = i['areaName']
         # 标签
         item['poiAttrTagList'] = i['poiAttrTagList']
         meta = {'name': i['name']}
         yield item
         # 请求酒店图片的接口
         get_img_url = 'https://ihotel.meituan.com/group/v1/poi/' + str(
             i['poiid']
         ) + '/imgs?utm_medium=touch&version_name=999.9&classified=true&X-FOR-WITH=s99Eh6sbSKFk2CtgH69UM3vUAY6t0ETpasHKECThU0OOv6duCovPaszE2v8xYNb0y5tFbd0R1Bz7HxqoWbDCnlQzCaz1U%2FWG2Is1UD6ycN%2Fv5sM9vSw%2BtMe6IAQAogdarTlC3CQPD6IfxiaVmV005g%3D%3D'
         yield scrapy.Request(get_img_url,
                              callback=self.parse_img,
                              meta=meta)
     next_page_url = 'https://ihotel.meituan.com/hbsearch/HotelSearch?utm_medium=pc&version_name=999.9&cateId=20&attr_28=129&uuid=4C4844EA1E383289F62BCF8C630947BC4DA3121E283B39A9EF152D2C34203C23%401533035734323&cityId=96&offset=' + str(
         self.offset
     ) + '&limit=20&startDay=20180731&endDay=20180731&q=&sort=defaults&X-FOR-WITH=14%2FqUl622CaRIB0XobpocXLZRvKEUIWbrzBk%2BTGfRG79yI5uZ60MWS%2BwGukD4LfoDQ%2BVr%2BQFEJQOTlTggX031A4wTNWNW1rEcwxOSyFjHbQK6x1dLtpcn9xIOIfd%2F%2BdCWXRVaKJX7Sc9TSdBPzjiJg%3D%3D'
     yield scrapy.Request(next_page_url, callback=self.parse)
コード例 #6
0
 def parse_deal(self, response):
     content = response.body
     try:
         content_json = json.loads(content)
         item = MeituanItem()
         item['content'] = content
         item['meituantype'] = 'deal'
         item['dt'] = dt
         yield item
     except:
         pass
コード例 #7
0
    def parse(self, response):

        div_list = response.xpath("//div[@class='alphabet-city-area']//div")

        for div in div_list:
            citys = div.xpath("./span[2]//a")
            for city in citys:
                item = MeituanItem()
                item["city"] = city.xpath("./text()").extract_first()

                city_url = city.xpath("./@href").extract_first()

                city_url = urljoin(response.url, city_url)

                yield scrapy.Request(url=city_url + "/meishi/",
                                     callback=self.parse_url,
                                     meta={"item": item})
コード例 #8
0
    def parse_shop(self, response):
        item = MeituanItem()
        try:
            res_json = json.loads(response.text)
            result_list = res_json["data"]["searchresult"]
            for result in result_list:
                shop_id = result["poiid"]
                title = result["name"]
                city = response.meta["city"]
                phone = result["phone"]

                item["shop_id"] = shop_id
                item["title"] = title
                item["city"] = city
                item["phone"] = phone
                yield item
        except:
            print("返回内容无数据,内容为:",response.text)
コード例 #9
0
    def parse(self, response):
        #soup=BeautifulSoup(response)
        #print soup.prettify()
        movies = response.xpath('//a[@class="reco-movieinfo__cover"]')
        titles =movies.xpath('@title').extract()
        rates=movies.xpath("string(strong)").extract()
        img_url1=movies.xpath('img/@src').extract()[0:5]
        img_url2=movies.xpath('img/@data-src').extract()
        img_urls = img_url1+img_url2

        count = 0
        for title,rate,img_url in zip(titles,rates,img_urls):
            item = MeituanItem()
            item['title']=title
            item['rate']=rate
            item['source']='meituan'
            item['img_url']=img_url
            #print item
            yield  item
コード例 #10
0
ファイル: citys.py プロジェクト: jiehao321/meituan
    def parse(self, response):
        response_str = response.body_as_unicode()
        detail_info = re.search(
            r'"detailInfo":\{"poiId":(\d+),"name":"(.*?)","avgScore":(.*?),"address":"(.*?)","phone":"(.*?)","openTime":"(.*?)","extraInfos":\[(.*?)\],"hasFoodSafeInfo":(.*?),"longitude":(.*?),"latitude":(.*?),"avgPrice":(\d+),"brandId":(\d+),"brandName":"(.*?)",".*?photos":{"frontImgUrl":"(.*?)","albumImgUrls":(.*?)},"recommended":(.*?),"crumbNav":(.*?),"prefer',
            response_str)
        if detail_info:
            poiId = detail_info.group(1)
            name = detail_info.group(2)
            avgScore = detail_info.group(3)
            address = detail_info.group(4)
            phone = detail_info.group(5)
            openTime = detail_info.group(6)
            extraInfos = detail_info.group(7)
            hasFoodSafeInfo = detail_info.group(8)
            longitude = detail_info.group(9)
            latitude = detail_info.group(10)
            avgPrice = detail_info.group(11)
            brandId = detail_info.group(12)
            brandName = detail_info.group(13)
            frontImgUrl = detail_info.group(14)
            albumImgUrls = detail_info.group(15)
            # 其他信息解析
            if extraInfos:
                items = json.loads("[" + extraInfos + "]")
                extraInfos = ''
                for item_1 in items:
                    extraInfos = item_1.get('text') + '  ' + extraInfos
            # 推荐菜处理
            recommended = json.loads(detail_info.group(16))
            #     # 打印推荐菜
            global recommend_name
            global recommend_price
            global recommend_img
            for item_1 in list(recommended):
                # recommend_id = item['id']  # 推荐菜id
                recommend_name = item_1['name']  # 推荐菜名
                recommend_price = item_1['price']  # 菜品价格
                recommend_img = item_1['frontImgUrl']  # 菜品图片
                # print(recommend_name, end=' ')

            # 面包屑抽离
            crumbNav = json.loads(detail_info.group(17))
            area = crumbNav[0].get('title')[:-2]
            food_type = crumbNav[2].get('title')[len(area):]
            item = MeituanItem()
            item['area'] = area,
            item['food_type'] = food_type,
            item['poiId'] = poiId,
            item['name'] = name,
            item['avgScore'] = avgScore,
            item['address'] = address,
            item['phone'] = phone,
            item['openTime'] = openTime,
            item['extraInfos'] = extraInfos,
            item['hasFoodSafeInfo'] = hasFoodSafeInfo,
            item['longitude'] = longitude,
            item['latitude'] = latitude,
            item['avgPrice'] = avgPrice,
            item['brandId'] = brandId,
            item['brandName'] = brandName,
            item['frontImgUrl'] = frontImgUrl,
            item['albumImgUrls'] = albumImgUrls,
            item['recommend_name'] = recommend_name,
            item['recommend_price'] = recommend_price,
            item['recommend_img'] = recommend_img,
            yield item