class Model_Scraper_Product_Offer1(Model_Scraper_Standard): def __init__(self, region): self.region = region self.processor = Service_Functions().getProcessor( 'Product_Offer', region) def process(self, asin): self.processOffer = Model_Scraper_Standard(self.region) content = self.processOffer.processOffer(self.region, asin) if (content): return content # def scrapeInventory(self, data): # if (data == '' or data == None): # return Model_Static_Scrape_Status.FAILED # url ="http://www.amazon."+self.region+"/gp/aws/cart/add.html" # fields = [] # session_id = None def scrape(self, asin): content = self.process(asin) if (content): # 这边写解析代码, 通过解析返回的数据再进行库存的抓取 print(content) data = self.processor.process(content.encode('utf-8')) if (data): print(data) # 通过解析得到的数据进行库存的计算 # Inventory = self.scrapeInventory(asin, data) # print (Inventory) pageCount = self.processor.getPageCount(content) # print (pageCount) if (pageCount > 1): for i in range(2, int(pageCount) + 1): # print (i) index = str((i - 1) * 10) pageUrl = "http://www.amazon." + "com" + "/gp/offer-listing/" + asin + "/ref=olpOffersSuppressed?ie=UTF8&f_new=true&overridePriceSuppression=1&startIndex=" + index # print (pageUrl) pageContent = self.processPageOffer(pageUrl) if (pageContent): print(pageContent) pageResult = self.processor.process( pageContent.encode('utf-8')) if (pageResult): print(pageResult)
class Model_Scraper_Keywords(Model_Scraper_Standard): def __init__(self, region): super(Model_Scraper_Keywords, self).__init__(region) self.region = region self.processor = Service_Functions().getProcessor('Keywords', region) def scraper(self, keywords): self.process = Model_Scraper_Standard(self.region) url = "https://www.amazon."+self.region+"/gp/search?keywords="+keywords+"&page=1" # 不显示浏览器 # with Display(backend="xvfb", size=(1440, 900)): print (url) try: content = self.process.processkeywords(url) except Exception as err: print (err) try: if (content): # 这边写解析代码 data = [] result = self.processor.process(content.encode('utf-8'), 1) if (result): # print (result) data.append(result) pagecount = int(self.processor.getPageCount(content)) if (pagecount > 5): pagecount = 5 # pagecount = 1 if (pagecount > 1): for i in range(2, pagecount + 1): pageurl = "https://www.amazon." + self.region + "/gp/search?keywords=" + keywords + "&page=" + str(i) print (pageurl) pagecontent = self.process.processkeywords(pageurl) if (pagecontent): pageresult = self.processor.process(pagecontent.encode('utf-8'), i) # print (pageresult) data.append(pageresult) return data elif (content == None): return None else: return False except: return False
class Model_Scraper_Seller_Product(Model_Scraper_Standard): def __init__(self, region): self.region = region self.processor = Service_Functions().getProcessor( 'Seller_Product', region) def scrape(self, merchantId): if not merchantId: return False url = "https://www.amazon." + self.region + "/s?merchant=" + merchantId print(url) content = Model_Scraper_Standard(self.region).processSellerProduct(url) if (content): result = self.processor.process(content) if (result): data = [] data.append(result) pagecount = int(self.processor.getPageCount(content)) pagecount = 1 # 测试 if (pagecount > 1): if (pagecount > 50): pagecount = 50 # 测试 原为50 for i in range(2, pagecount + 1): pageurl = "https://www.amazon." + self.region + "/s?merchant=" + merchantId + "&page=" + str( i) print(pageurl) pageContent = Model_Scraper_Standard( self.region).processSellerProduct(pageurl) if not pageContent: continue pageResult = self.processor.process(pageContent) if (pageResult): data.append(pageResult) return data return Model_Static_DownloadQueue_Status().SCRAPED_NO_DATA return Model_Static_DownloadQueue_Status().FAILED