Python xpath_extract Examples

Programming Language: Python

Namespace/Package Name: amazon.utils.util

Method/Function: xpath_extract

Examples at hotexamples.com: 3

Python xpath_extract - 3 examples found. These are the top rated real world Python examples of amazon.utils.util.xpath_extract extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def parse(self, response):
        #with open( '/tmp/amazon.html', 'w' ) as f:
        #    f.write( response.body )

        logger.info('fetch : ' + response.url)
        prdid = response.url.split('?')[0].split('/')[-1]

        review = f_xpath(response, '//table[@id="productReviews"]/tr/td')
        if review is None:
            yield scrapy.http.Request(url=next_product_url(),
                                      callback=self.parse)

        rids = xpath_extract(review, './a/@name')
        details = xpath(review, './div')

        lenth = min(len(rids), len(details))
        for i in xrange(lenth):

            rdetail = details[i]
            divs = xpath(rdetail, './div')

            # max of len( divs ) is 7, ( 0 - 6 )
            # 0 : number of helpful review
            # 1 : star, helpful text, date
            # 2 : reviewer, reviewer from
            # 3 : from
            # 4 : free product
            # 5 : reviewText
            # 6 : helpful?

            d = self.empty_item()
            d['prdid'] = prdid
            d['rid'] = rids[i]
            d['text'] = ' '.join(
                xpath_extract(rdetail, './div[@class="reviewText"]/text()'))

            while len(divs) > 0:
                div = divs[0]
                divs = divs[1:]

                text = div.extract()

                if 'people found the following review helpful' in text:
                    d['num_help_review'] = self.parse_num_help_review(div)
                    continue

                if 'out of' in text and 'stars' in text and '</nobr>' in text:
                    d[ 'star' ], d[ 'help_text' ], d[ 'date' ] = \
                                self.parse_star_help_date( div )
                    continue

                if 'By' in text and 'See all my reviews' in text:
                    d[ 'reviewer' ], d[ 'reviewer_from' ] = \
                                self.parse_reviewer_from( div )
                    continue

                if 'This review is from' in text:
                    d['rfrom'] = self.parse_from(div)

                break

            yield d

        next_url = self.next_page( response ) or \
                        next_product_url()

        # see http://doc.scrapy.org/en/latest/topics/request-response.html
        yield scrapy.http.Request(url=next_url,
                                  callback=self.parse,
                                  dont_filter=True)

Example #2

Show file

File: findnsavesales_spider.py Project: w403210331/iNT_fb

    def _parse_desc(self, p):
        desc = f_xpath( p, './div[@class="offer-descriptions"]' )
        desc = ' '.join( [ x.strip() for x in \
                    xpath_extract( desc, './/div[@class="offer-description"]/text()' ) ] )

        return desc

Example #3

Show file

File: amazon_spider.py Project: haiyandeng/iNT_fb

    def parse(self, response):
        #with open( '/tmp/amazon.html', 'w' ) as f:
        #    f.write( response.body )

        logger.info( 'fetch : ' + response.url )
        prdid = response.url.split( '?' )[ 0 ].split( '/' )[ -1 ]

        review = f_xpath( response, '//table[@id="productReviews"]/tr/td' )
        if review is None:
            yield scrapy.http.Request( url = next_product_url(),
                                       callback = self.parse )

        rids = xpath_extract( review, './a/@name' )
        details = xpath( review, './div' )

        lenth = min( len( rids ), len( details ) )
        for i in xrange( lenth ):

            rdetail = details[ i ]
            divs = xpath( rdetail, './div' )

            # max of len( divs ) is 7, ( 0 - 6 )
            # 0 : number of helpful review
            # 1 : star, helpful text, date
            # 2 : reviewer, reviewer from
            # 3 : from
            # 4 : free product
            # 5 : reviewText
            # 6 : helpful?

            d = self.empty_item()
            d[ 'prdid' ] = prdid
            d[ 'rid' ] = rids[ i ]
            d[ 'text' ] = ' '.join( xpath_extract( rdetail,
                                    './div[@class="reviewText"]/text()' ) )

            while len( divs ) > 0:
                div = divs[ 0 ]
                divs = divs[ 1: ]

                text = div.extract()

                if 'people found the following review helpful' in text:
                    d[ 'num_help_review' ] = self.parse_num_help_review( div )
                    continue

                if 'out of' in text and 'stars' in text and '</nobr>' in text:
                    d[ 'star' ], d[ 'help_text' ], d[ 'date' ] = \
                                self.parse_star_help_date( div )
                    continue

                if 'By' in text and 'See all my reviews' in text:
                    d[ 'reviewer' ], d[ 'reviewer_from' ] = \
                                self.parse_reviewer_from( div )
                    continue

                if 'This review is from' in text:
                    d[ 'rfrom' ] = self.parse_from( div )

                break

            yield d

        next_url = self.next_page( response ) or \
                        next_product_url()

        # see http://doc.scrapy.org/en/latest/topics/request-response.html
        yield scrapy.http.Request( url = next_url, callback = self.parse,
                                   dont_filter = True )