コード例 #1
0
    def parse_one_sale(self, response):
        #with open( '/tmp/findnsave_sales_one.html', 'w' ) as f:
        #    f.write( response.body )

        sale = f_xpath( response, '//div[contains(@class, "offer-description-wrapper") ' + \
                                   ' and contains(@class, "clearfix")]' )
        if not sale:
            return

        starts, expiration = self._parse_date( response )
        pct_off = self._parse_callout( sale )
        lg_img = self._parse_large_img( sale )

        sr = f_xpath( sale, './div[@class="offer-right"]' )
        name = fx_extract( sr, './h1[@itemprop="name"]/text()' )
        if name is None:
            logger.debug( 'not crawl name in : ' + response.url )
            return

        p_c, p_p, p_r, p_u = self._parse_price( sr )
        desc = self._parse_desc( sr )
        retailer, category, brand = self._parse_retailer_category_brand( sr )

        data = [ response.meta[ 'id' ], name,
                    p_c, p_p, p_r, p_u, pct_off,
                    starts, expiration,
                    retailer, category, brand,
                    response.url, response.meta[ 'th_img' ], lg_img,
                    desc, ]

        d = FindnsaveSaleItem()
        d['area'] = 'newyork'
        d['id'] = response.meta[ 'id' ]
        d['name'] = escape(name)
        d['priceCurrency'] = p_c
        d['price'] = p_p
        d['priceRegular'] = p_r
        d['priceUtilDate'] = p_u
        d['priceOff'] = pct_off
        d['retailer'] = escape(retailer)
        d['category'] = escape(category)
        d['brand'] = escape(brand)
        d['desc'] = escape(desc)

        yield d

        #self.jsonfile.write( json.dumps( data ) + '\n' )
        logger.info( 'crawl : `' + name + '` OK' )
        return
コード例 #2
0
    def parse(self, response):

        logger.info('fetch : ' + response.url)
        brands = f_xpath( response, '//ul[contains(@class, "brands") ' + \
                                    ' and contains(@class, "columnize")' + \
                                    ' and contains(@class, "clearfix")]' ).xpath( './li' )

        for br in brands:
            br = f_xpath(br, './a')
            if not br:
                continue

            href = fx_extract(br, './@href')
            name = fx_extract(br, './text()')

            try:
                _b, bid, id = href.strip('/').split('/')
            except:
                continue

            #csv.writer( self.csv_fd ).writerow( [ id, bid, name, href ] )

            d = FindnsaveBrandItem()
            d['id'] = id
            d['name'] = escape(name)
            d['nameid'] = bid
            d['uri'] = href

            yield d

        next_url = self.brand_next_page(response)
        if next_url is None:
            return

        yield scrapy.http.Request(url=next_url,
                                  callback=self.parse,
                                  dont_filter=True)
コード例 #3
0
    def parse(self, response):

        logger.info( 'fetch : ' + response.url )
        brands = f_xpath( response, '//ul[contains(@class, "brands") ' + \
                                    ' and contains(@class, "columnize")' + \
                                    ' and contains(@class, "clearfix")]' ).xpath( './li' )

        for br in brands:
            br = f_xpath( br, './a' )
            if not br:
                continue

            href = fx_extract( br, './@href' )
            name = fx_extract( br, './text()' )

            try:
                _b, bid, id = href.strip( '/' ).split( '/' )
            except:
                continue

            #csv.writer( self.csv_fd ).writerow( [ id, bid, name, href ] )

            d = FindnsaveBrandItem()
            d['id'] = id
            d['name'] = escape(name)
            d['nameid'] = bid
            d['uri'] = href

            yield d

        next_url = self.brand_next_page( response )
        if next_url is None:
            return

        yield scrapy.http.Request( url = next_url, callback = self.parse,
                                   dont_filter = True )