Exemple #1
0
    def fetch_detail_from_listing_page(self, response):
        item = DetailItem()
        item['asin'] = response.meta['asin']
        item['image'] = response.css('#olpProductImage img::attr(src)')[0].extract().strip().replace('_SS160', '_SS320')
        item['title'] = response.css('title::text')[0].extract().split(':')[2].strip()

        try:
            item['star'] = response.css('.a-icon-star span::text')[0].extract().split(' ')[0].strip()
        except:
            item['star'] = 0
        try:
            item['reviews'] = response.css('.a-size-small > .a-link-normal::text')[0].extract().strip().split(' ')[0]
        except:
            item['reviews'] = 0

        price_info_list = response.css(".olpOffer[role=\"row\"] ")
        item['amazon_price'] = 0
        item['seller_price'] = 0
        for row in price_info_list:
            if (item['amazon_price'] == 0) and row.css(".olpSellerName > img"):
                try:
                    item['amazon_price'] = row.css('.olpOfferPrice::text')[0].extract().strip().lstrip('$')
                except:
                    item['amazon_price'] = 0
                continue
            if (item['seller_price'] == 0) and (not row.css(".olpSellerName > img")):
                try:
                    item['seller_price'] = row.css('.olpOfferPrice::text')[0].extract().strip().lstrip('$')
                except:
                    item['seller_price'] = 0
        return item
Exemple #2
0
    def parse(self, response):

        #404 or unsupport asin
        if not response.css('#olpProductImage'):
            print(response.meta['cid'], ':', response.meta['asin'])
            return []
        try:
            item = DetailItem()
            item['asin'] = response.meta['asin']
            item['image'] = response.css('#olpProductImage img::attr(src)'
                                         )[0].extract().strip().replace(
                                             '_SS160', '_SS320')
            item['title'] = response.css('title::text')[0].extract().split(
                ':')[2].strip()

            try:
                item['star'] = response.css('.a-icon-star span::text')[
                    0].extract().split(' ')[0].strip()
            except:
                item['star'] = 0
            try:
                item['reviews'] = response.css(
                    '.a-size-small > .a-link-normal::text')[0].extract().strip(
                    ).split(' ')[0]
            except:
                item['reviews'] = 0

            price_info_list = response.css(".olpOffer[role=\"row\"] ")
            item['amazon_price'] = 0
            item['seller_price'] = 0
            for row in price_info_list:
                if (item['amazon_price']
                        == 0) and row.css(".olpSellerName > img"):
                    try:
                        item['amazon_price'] = row.css('.olpOfferPrice::text')[
                            0].extract().strip().lstrip('$')
                    except:
                        item['amazon_price'] = 0
                    continue
                if (item['seller_price']
                        == 0) and (not row.css(".olpSellerName > img")):
                    try:
                        item['seller_price'] = row.css('.olpOfferPrice::text')[
                            0].extract().strip().lstrip('$')
                    except:
                        item['seller_price'] = 0
            self.product_pool.append(item)
            pass
        except Exception as err:
            print(err)
            print(response.meta['asin'])

        yield item
Exemple #3
0
    def fetch_detail_from_review_page(self, response):


        info = response.css('#cm_cr-product_info')[0].extract()
        item = DetailItem()
        item['asin'] = response.meta['asin']
        item['image'] = response.css('.product-image img::attr(src)')[0].extract().strip().replace('S60', 'S320')
        item['title'] = response.css('.product-title >h1>a::text')[0].extract().strip()
        item['star'] = re.findall("([0-9].[0-9]) out of", info)[0]

        # 获取评价总数
        item['reviews'] = response.css('.AverageCustomerReviews .totalReviewCount::text')[0].extract().strip()
        item['reviews'] = Helper.get_num_split_comma(item['reviews'])
        item['seller_price'] = 0
        item['amazon_price'] = 0
        price = response.css('.arp-price::text')[0].extract().strip().lstrip('$')
        item['amazon_price'] = price
        return item