Ejemplo n.º 1
0
    def parse_list(self, response):

        item = DianpingItem()

        selector = Selector(response)

        div = selector.xpath('//div[@id="shop-all-list"]/ul/li')

        for dd in div:
            shopnames = dd.xpath('div[2]/div[1]/a[1]/h4/text()').extract()
            item['shopname'] = shopnames[0]
            print shopnames[0]

            shopurls = dd.xpath('div[2]/div[1]/a[1]/@href').extract()
            item['shopurl'] = 'http://www.dianping.com' + str(shopurls[0])

            shoplevels = dd.xpath('div[2]/div[2]/span/@title').extract()
            item['shoplevel'] = shoplevels[0]

            commentnums = dd.xpath('div[2]/div[2]/a[1]/b/text()').extract()
            if len(commentnums) > 0:
                item['commentnum'] = commentnums[0]
            else:
                item['commentnum'] = '0'

            avgcosts = dd.xpath('div[2]/div[2]/a[2]/b/text()').extract()

            if len(avgcosts) > 0:
                item['avgcost'] = filter(str.isdigit, str(avgcosts[0]))

            else:
                item['avgcost'] = '0'

            tastes = dd.xpath('div[2]/span/span[1]/b/text()').extract()
            if len(tastes) > 0:
                item['taste'] = tastes[0]
            else:
                item['taste'] = '0'

            envis = dd.xpath('div[2]/span/span[2]/b/text()').extract()
            if len(envis) > 0:
                item['envi'] = envis[0]
            else:
                item['envi'] = '0'

            services = dd.xpath('div[2]/span/span[3]/b/text()').extract()
            if len(services) > 0:
                item['service'] = services[0]
            else:
                item['service'] = '0'

            foodtypes = dd.xpath('div[2]/div[3]/a[1]/span/text()').extract()
            item['foodtype'] = foodtypes[0]

            locs = dd.xpath('div[2]/div[3]/a[2]/span/text()').extract()
            item['loc'] = locs[0]

            yield item
Ejemplo n.º 2
0
    def parse_0(self, response):
        item = DianpingItem()

        selector = Selector(response)

        div = selector.xpath('//div[@id="shop-all-list"]/ul/li')

        for dd in div:

            photourls = dd.xpath('div[1]/a[1]/img/@data-src').extract()
            item['photourl'] = photourls[0]
            print photourls[0]

            shopnames = dd.xpath('div[2]/div[1]/a[1]/h4/text()').extract()
            item['shopname'] = shopnames[0]
            print shopnames[0]

            shopurls = dd.xpath('div[2]/div[1]/a[1]/@href').extract()
            item['shopurl'] = 'http://www.dianping.com' + str(shopurls[0])
            print 'http://www.dianping.com' + str(shopurls[0])

            shoplevels = dd.xpath('div[2]/div[2]/span/@title').extract()
            item['shoplevel'] = shoplevels[0]

            commentnums = dd.xpath('div[2]/div[2]/a[1]/b/text()').extract()
            if len(commentnums) > 0:
                item['commentnum'] = commentnums[0]
            else:
                item['commentnum'] = '0'

            avgcosts = dd.xpath('div[2]/div[2]/a[2]/b/text()').extract()

            if len(avgcosts) > 0:
                item['avgcost'] = filter(str.isdigit, str(avgcosts[0]))

            else:
                item['avgcost'] = '0'

            foodtypes = dd.xpath('div[2]/div[3]/a[1]/span/text()').extract()
            item['foodtype'] = foodtypes[0]

            locs = dd.xpath('div[2]/div[3]/a[2]/span/text()').extract()
            item['loc'] = locs[0]
Ejemplo n.º 3
0
    def parse_info(self, response):
        print('Here is response!!')
        print response
        item = DianpingItem()
        selector = Selector(response)

        div = selector.xpath('//div[@id="basic-info"]')
        short_div = selector.xpath('//div[@class="breadcrumb"]')

        pic = selector.xpath(
            '//a[@class="J_main-photo"]/img/@src').extract_first()
        item['pic'] = pic

        foodtype = short_div.xpath('a[3]/text()').extract_first()
        temp = foodtype[13:]
        item['foodtype'] = temp[:-9]

        loc = short_div.xpath('a[2]/text()').extract_first()
        temp = loc[13:]
        item['loc'] = temp[:-9]

        shopname = div.xpath('h1/text()').extract_first()
        temp = shopname[1:]
        item['shopname'] = temp[:-1]
        print shopname

        shopurl = response.url
        item['shopurl'] = shopurl

        item['ID'] = shopurl[29:]

        shoplevelstr = div.xpath('div[1]/span[1]/@class').extract_first()
        shoplevel = shoplevelstr[-2] + '.' + shoplevelstr[-1]
        item['shoplevel'] = shoplevel

        avgcost = div.xpath('div[1]/span[3]/text()').extract_first()
        item['avgcost'] = avgcost

        taste = div.xpath('div[1]/span[4]/span[1]/text()').extract_first()
        item['taste'] = taste[3:]

        envi = div.xpath('div[1]/span[4]/span[2]/text()').extract_first()
        item['envi'] = envi[3:]

        service = div.xpath('div[1]/span[4]/span[3]/text()').extract_first()
        item['service'] = service[3:]

        street_address = div.xpath('div[2]/span[2]/@title').extract_first()
        item['street_address'] = street_address

        tel = div.xpath('p/span[2]/text()').extract_first()
        item['tel'] = tel

        div_comments = selector.xpath('//ul[@class="comment-list J-list"]/li')
        comments = []
        for comment in div_comments:
            flag = comment.xpath('div/div/@class').extract_first()
            if flag == 'photos':
                context = comment.xpath('div/p[2]/text()').extract_first()
                if context != '':
                    print context
                    comments.append(context)
            elif flag == 'info J-info-short':
                context = comment.xpath('div/div[2]/p/text()').extract_first()
                if context != '':
                    print context
                    comments.append(context)
        item['comments'] = comments

        yield item
Ejemplo n.º 4
0
    def parse_list(self, response):

        item_loader = ItemLoader(item=DianpingItem(), response=response)

        selector = Selector(response)

        div = selector.xpath('//div[@id="shop-all-list"]/ul/li')

        for dd in div:
            # shopnames = dd.xpath('div[2]/div[1]/a[1]/h4/text()').extract()
            # item['shopname'] = shopnames[0]
            # print shopnames[0]
            #
            # shopurls = dd.xpath('div[2]/div[1]/a[1]/@href').extract()
            # item['shopurl'] = 'http://www.dianping.com' + str(shopurls[0])
            #
            # shoplevels = dd.xpath('div[2]/div[2]/span/@title').extract()
            # item['shoplevel'] = shoplevels[0]
            #
            # commentnums = dd.xpath('div[2]/div[2]/a[1]/b/text()').extract()
            # if len(commentnums) > 0:
            #     item['commentnum'] = commentnums[0]
            # else:
            #     item['commentnum'] = '0'
            #
            # avgcosts = dd.xpath('div[2]/div[2]/a[2]/b/text()').extract()
            #
            # if len(avgcosts) > 0:
            #     item['avgcost'] = filter(str.isdigit, str(avgcosts[0]))
            #
            # else:
            #     item['avgcost'] = '0'
            #
            # tastes = dd.xpath('div[2]/span/span[1]/b/text()').extract()
            # if len(tastes) > 0:
            #     item['taste'] = tastes[0]
            # else:
            #     item['taste'] = '0'
            #
            # envis = dd.xpath('div[2]/span/span[2]/b/text()').extract()
            # if len(envis) > 0:
            #     item['envi'] = envis[0]
            # else:
            #     item['envi'] = '0'
            #
            # services = dd.xpath('div[2]/span/span[3]/b/text()').extract()
            # if len(services) > 0:
            #     item['service'] = services[0]
            # else:
            #     item['service'] = '0'
            #
            # foodtypes = dd.xpath('div[2]/div[3]/a[1]/span/text()').extract()
            # item['foodtype'] = foodtypes[0]
            #
            # locs = dd.xpath('div[2]/div[3]/a[2]/span/text()').extract()
            # item['loc'] = locs[0]
            item_loader.add_css("shop_name", ".txt .tit a h4::text")
            item_loader.add_css("shop_url", ".txt .tit a::attr(href)")
            item_loader.add_css("shop_lev", ".txt .comment span::attr(title)")
            item_loader.add_css("comment_num", ".txt .comment .review-num b::text")
            item_loader.add_css("avg_cost", ".txt .comment .mean-price b::text")
            item_loader.add_css("shop_taste", ".txt .comment-list span[0] b::text")
            item_loader.add_css("shop_env", ".txt .comment-list span[1] b::text")
            item_loader.add_css("shop_service", ".txt .comment-list span[2] b::text")
            item_loader.add_css("shop_tag", "txt .tag-addr a[0] span::text")
            item_loader.add_css("shop_tag_addr", ".txt .tag-addr a[1] span::text")
            item_loader.add_css("shop_addr", ".txt .tag-addr a[2] span::text")

            result_item = item_loader.load_item()
            yield result_item