Python DoubanBookItem Examples

Programming Language: Python

Namespace/Package Name: douban.items

Class/Type: DoubanBookItem

Examples at hotexamples.com: 4

Python DoubanBookItem - 4 examples found. These are the top rated real world Python examples of douban.items.DoubanBookItem extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DoubanBookItem(4)

Frequently Used Methods

DoubanBookItem (4)

Example #1

Show file

    def parse(self, response):
        """ 创建解析规则 """

        book_item = DoubanBookItem()

        ul_xpath = "//div[@class='bd']/ul/li"
        info_xpath = "./div[@class='info']"

        book_list = response.xpath(ul_xpath)
        for item in book_list:

            book_item['book_name'] = item.xpath("%s/div[@class='title']/a/text()" % info_xpath).extract_first()
            book_item['subtitle'] = item.xpath(
                "%s/div[@class='title']/p/text()" % info_xpath).extract_first()
            book_item['price'] = item.xpath(
                "%s//div[@class='action-buttons']/span/text()" % info_xpath).extract_first()
            book_item['author'] = item.xpath(
                "%s//a[@class='author-item']/text()" % info_xpath).extract_first()
            book_item['category'] = item.xpath(
                "%s//span[@class='category']/span[@class='labeled-text']/span/text()" % info_xpath).extract_first()
            book_item['average'] = item.xpath(
                "%s//span[@class='rating-average']/text()" % info_xpath).extract_first()
            book_item['evaluate'] = item.xpath(
                "%s//span[@class='rating-amount']/a/span/text()" % info_xpath).extract_first()
            book_item['desc'] = item.xpath(
                "%s/div[@class='article-desc-brief']/text()" % info_xpath).extract_first()
            book_item['cover'] = item.xpath(
                "./div[1]/a/img/@src").extract_first()
            yield book_item

        next_link = response.xpath(".//li[@class='next']/a/@href").extract_first()
        if next_link:
            next_link = "%s%s" % (self.start_urls[0], next_link)

Example #2

Show file

 def parse_next(self, response):
     for item in response.xpath('//tr[@class="item"]'):
         book = DoubanBookItem()
         book['name'] = item.xpath('td[2]/div[1]/a/@title').extract()[0]
         book['price'] = item.xpath('td[2]/p/text()').extract()[0]
         book['ratings'] = item.xpath('td[2]/div[2]/span[2]/text()').extract()[0]
         yield book

Example #3

Show file

File: DoubanBooks.py Project: harbourlga/dataanalysis

    def parse(self, response):

        selector = Selector(response)
        books = selector.xpath('//tr[@class="item"]')

        for eachbook in books:
            item = DoubanBookItem()

            title = eachbook.xpath(
                'td[@valign="top"  and not(@width)]/div[@class="pl2"]/a/text()'
            ).extract()
            title = title[0]

            title2 = eachbook.xpath(
                'td[@valign="top"  and not(@width)]/div[@class="pl2"]/span/text()'
            ).extract()
            title2 = title2[0] if len(title2) > 0 else ''

            info = eachbook.xpath(
                'td[@valign="top"  and not(@width)]/p[@class="pl"]/text()'
            ).extract()
            info = info[0]
            rate = eachbook.xpath(
                'td[@valign="top"  and not(@width)]/div[@class="star clearfix"]/span[@class="rating_nums"]/text()'
            ).extract()
            rate = rate[0]
            hot = eachbook.xpath(
                'td[@valign="top"  and not(@width)]/div[@class="star clearfix"]/span[@class="pl"]/text()'
            ).extract()
            hot = hot[0]

            img_url = eachbook.xpath(
                'td[@valign="top"  and @width]/a[@class="nbg"]/img/@src'
            ).extract()
            item['title'] = title
            item['title2'] = title2
            item['info'] = info
            item['rate'] = rate
            item['hot'] = hot
            item['img_url'] = img_url

            yield item

        nextlink = selector.xpath('//span[@class="next"]/a/@href').extract()
        if nextlink:
            nextlink = nextlink[0]
            yield Request(nextlink, callback=self.parse)

Example #4

Show file

File: douban_spider.py Project: xyzhelloworld/douban-spider

    def parse_item(self, response):
        sel = Selector(response)
        item = DoubanBookItem()

        infos = filter(lambda info: not info.startswith('\n'),
                       sel.xpath('//div[@id="info"]/text()').extract())

        try:
            item['bookname'] = sel.xpath(
                '//span[@property="v:itemreviewed"]/text()').extract()[0]
            item['bookauthor'] = sel.xpath(
                '//a[@class=""]/text()').extract()[0]
            item['bookpress'] = infos[0]
            item['pressdate'] = infos[1]
            item['bookpages'] = infos[2]
            item['bookprice'] = infos[3]
            item['bookisbn'] = infos[5]
            item['bookcode'] = sel.xpath(
                '//strong[@property="v:average"]/text()').extract()[0]

            item['bookprice'] = item['bookprice'].split('.')[0]
            item['bookcode'] = item['bookcode'].strip()
        except Exception as e:
            log.msg('Error Info: %s' % e, level=log.DEBUG)
            log.msg('Item: %s' % item, level=log.DEBUG)
        '''
        In [37]: infos = infos[]

         湖南文艺出版社
         2015-7-1
         192
         48.00元
         平装
         9787540471699

        '''

        return item