def fetch_detail_from_listing_page(self, response): item = DetailItem() item['asin'] = response.meta['asin'] item['image'] = response.css('#olpProductImage img::attr(src)')[0].extract().strip().replace('_SS160', '_SS320') item['title'] = response.css('title::text')[0].extract().split(':')[2].strip() try: item['star'] = response.css('.a-icon-star span::text')[0].extract().split(' ')[0].strip() except: item['star'] = 0 try: item['reviews'] = response.css('.a-size-small > .a-link-normal::text')[0].extract().strip().split(' ')[0] except: item['reviews'] = 0 price_info_list = response.css(".olpOffer[role=\"row\"] ") item['amazon_price'] = 0 item['seller_price'] = 0 for row in price_info_list: if (item['amazon_price'] == 0) and row.css(".olpSellerName > img"): try: item['amazon_price'] = row.css('.olpOfferPrice::text')[0].extract().strip().lstrip('$') except: item['amazon_price'] = 0 continue if (item['seller_price'] == 0) and (not row.css(".olpSellerName > img")): try: item['seller_price'] = row.css('.olpOfferPrice::text')[0].extract().strip().lstrip('$') except: item['seller_price'] = 0 return item
def parse(self, response): #404 or unsupport asin if not response.css('#olpProductImage'): print(response.meta['cid'], ':', response.meta['asin']) return [] try: item = DetailItem() item['asin'] = response.meta['asin'] item['image'] = response.css('#olpProductImage img::attr(src)' )[0].extract().strip().replace( '_SS160', '_SS320') item['title'] = response.css('title::text')[0].extract().split( ':')[2].strip() try: item['star'] = response.css('.a-icon-star span::text')[ 0].extract().split(' ')[0].strip() except: item['star'] = 0 try: item['reviews'] = response.css( '.a-size-small > .a-link-normal::text')[0].extract().strip( ).split(' ')[0] except: item['reviews'] = 0 price_info_list = response.css(".olpOffer[role=\"row\"] ") item['amazon_price'] = 0 item['seller_price'] = 0 for row in price_info_list: if (item['amazon_price'] == 0) and row.css(".olpSellerName > img"): try: item['amazon_price'] = row.css('.olpOfferPrice::text')[ 0].extract().strip().lstrip('$') except: item['amazon_price'] = 0 continue if (item['seller_price'] == 0) and (not row.css(".olpSellerName > img")): try: item['seller_price'] = row.css('.olpOfferPrice::text')[ 0].extract().strip().lstrip('$') except: item['seller_price'] = 0 self.product_pool.append(item) pass except Exception as err: print(err) print(response.meta['asin']) yield item
def fetch_detail_from_review_page(self, response): info = response.css('#cm_cr-product_info')[0].extract() item = DetailItem() item['asin'] = response.meta['asin'] item['image'] = response.css('.product-image img::attr(src)')[0].extract().strip().replace('S60', 'S320') item['title'] = response.css('.product-title >h1>a::text')[0].extract().strip() item['star'] = re.findall("([0-9].[0-9]) out of", info)[0] # 获取评价总数 item['reviews'] = response.css('.AverageCustomerReviews .totalReviewCount::text')[0].extract().strip() item['reviews'] = Helper.get_num_split_comma(item['reviews']) item['seller_price'] = 0 item['amazon_price'] = 0 price = response.css('.arp-price::text')[0].extract().strip().lstrip('$') item['amazon_price'] = price return item