def getAmazonKeywordsUploadData(self, region, keywords): data = {} # 数据库初始化操作,判断数据有效性,然后对数据进行后续操作 amazon = Model_Mapper_Amazon() # 连接数据库 db = Model_Mapper_Connect('localhost', 3306, 'root', '123123', 'scraper', 'utf8') conn = db.connects() mapper = Model_Mapper_Mysql(conn) # 获取rank keywords数据 sql = amazon.getAmazonProductKeywordsRank_select_sql_joint( region, keywords) rankData = mapper.select(sql) rankData_key = Model_Keys.rankdata_key rankData_list = [] for rankdata in rankData: rankData_list.append(dict(zip(rankData_key, rankdata))) if (rankData_list): data['rank'] = { 'region': region, 'keywords': keywords, 'list': rankData_list, } # 获取ad keywords数据 sql = amazon.getAmazonProductKeywordsAd_select_sql_joint( region, keywords) adData = mapper.select(sql) adData_key = Model_Keys.addata_key adData_list = [] for addata in adData: adData_list.append(dict(zip(adData_key, addata))) if (adData_list): data['ad'] = { 'region': region, 'keywords': keywords, 'list': adData_list, } # 获取搜索关键词数据 sql = amazon.getKeywords(region, keywords) keywordsData = mapper.select(sql) if (keywordsData): keywordsData_key = Model_Keys.keywords_key keywords_list = [] for keywordsdata in keywordsData: keywords_list.append(dict(zip(keywordsData_key, keywordsdata))) data['keywords'] = keywords_list[0] # print (data) if (len(data) > 0): return data return False
def scrapeTopReviewer(self, downloadQueue): try: value = downloadQueue[4] try: value = value.split(":") except Exception as err: print(err) if (len(value) == 2): begin = int(value[0]) end = int(value[1]) else: begin = 1 end = int(value[0]) region = Model_Static_Region().getText(downloadQueue[2]) self.scraper = Model_Scraper_TopReviewer(region) if (begin < 1): begin = 1 if (end > 1000): end = 1000 data = self.scraper.scrape(begin, end + 1) # 数据库初始化操作,判断数据有效性,然后对数据进行后续操作 amazon = Model_Mapper_Amazon() # 连接数据库 db = Model_Mapper_Connect('localhost', 3306, 'root', '123123', 'scraper', 'utf8') conn = db.connects() mapper = Model_Mapper_Mysql(conn) if (data): rankBegin = begin * 10 - 9 rankEnd = end * 10 sql = amazon.TopReviewer_delete_sql_joint( region, rankBegin, rankEnd) mapper.delete(sql) for items in data: for item in items: # print (item) sql = amazon.TopReviewer_insert_sql_joint(region, item) mapper.insert(sql) return Model_Static_DownloadQueue_Status.SCRAPED elif (data == ""): return Model_Static_DownloadQueue_Status.SCRAPED_NO_DATA else: return Model_Static_DownloadQueue_Status.FAILED except Exception as err: print(err)
def __init__(self): # 数据库初始化,判断数据有效性,然后对数据进行后续操作 # 连接数据库 self.db = Model_Mapper_Connect('localhost', 3306, 'root', '123123', 'All_Scraper', 'utf8') conn = self.db.connects() self.mapper = Model_Mapper_Mysql(conn) self.amazon = Model_Mapper_Amazon()
def open_file(self, region, asin): html_content = open('../../../Downloader/Amazon_Data/'+asin+'.html', 'r') html = html_content.read() html_content.close() com = Model_Processor_Product_Base_Com() data = com.process(html) # print (data['images'][0]) # sys.exit() amazon = Model_Mapper_Amazon() sql = amazon.product_sql_joint(region, asin, data) print sql sys.exit() # 连接数据库 db = Model_Mapper_Connect('localhost', 3306, 'root', '123123', 'scraper', 'utf8') conn = db.connects() mapper = Model_Mapper_Mysql(conn) mapper.insert(sql) conn.close()
def getAmazonTopReviewerUploadData(self, region, begin, end): # 数据库初始化操作,判断数据有效性,然后对数据进行后续操作 amazon = Model_Mapper_Amazon() # 连接数据库 db = Model_Mapper_Connect('localhost', 3306, 'root', '123123', 'scraper', 'utf8') conn = db.connects() mapper = Model_Mapper_Mysql(conn) rankBegin = begin * 10 - 9 rankEnd = end * 10 sql = amazon.TopReviewerUpload_select_sql_joint( region, rankBegin, rankEnd) TopReveiwerData = mapper.select(sql) # print (TopReveiwerData) topreviewer_key = Model_Keys.topreviewer_key TopReivew_list = [] for topreveiwerdata in TopReveiwerData: TopReivew_list.append(dict(zip(topreviewer_key, topreveiwerdata))) if (TopReivew_list): return TopReivew_list return False
def processMobileUploadQueues(self, mobileuploadQueues, region): data = {'region': region} for mobileuploadQueue in mobileuploadQueues: queueRegion = Model_Static_Region().getText(mobileuploadQueue[1]) queueValue = mobileuploadQueue[3] result = False if (str(mobileuploadQueue[2]).isdigit()): type = mobileuploadQueue[2] # 产品页 0 if (type == Model_Static_DownloadQueue_Type.PRODUCT): result = self.getProductService( ).getAmazonProductUploadData(queueRegion, queueValue, None) # # 前5页关键词 3 # elif (type == Model_Static_DownloadQueue_Type.KEYWORDS): # value = queueValue # keywords = value # result = self.getKeywordsService().getAmazonKeywordsRankUploadData(queueRegion, keywords) # # 首页关键词 4 # elif (type == Model_Static_DownloadQueue_Type.KEYWORDS_INFO): # value = queueValue # keywords = value # result = self.getKeywordsService().getAmazonKeywordsInfoUploadData(queueRegion, keywords) # # offer页面计算库存 1 # elif (type == Model_Static_DownloadQueue_Type.PRODUCT_OFFER): # result = self.getProductService().getAmazonProductUploadData(queueRegion, queueValue, 'OFFER') # # elif (type == Model_Static_DownloadQueue_Type.PRODUCT_REVIEW): # # pass # elif (type == Model_Static_DownloadQueue_Type.SELLER): # merchant_id = queueValue # result = self.getSellerService().getAmazonSellerUploadData(queueRegion, merchant_id) # elif (type == Model_Static_DownloadQueue_Type.SELLER_PRODUCT): # merchant_id = queueValue # result = self.getSellerService().getAmazonSellerUploadData(queueRegion, merchant_id, True) # elif (type == Model_Static_DownloadQueue_Type.TOP_REVIEWER): # value = queueValue # value = value.split(":") # if (len(value) == 2): # begin = value[0] # end = value[1] # else: # begin = 1 # end = value[0] # result = self.getCustomerService().getAmazonTopReviewerUploadData(queueRegion, begin, end) else: pass # 整合数据 data['region_data'] = { mobileuploadQueue[0]: { "data": result, 'region': mobileuploadQueue[1], 'type': mobileuploadQueue[2], 'value': mobileuploadQueue[3], } } # print data uploadService = Service_Upload() result = uploadService.upload(data, 'UploadQueue') # print (result) if (result): # 开始更改下载队列中的上传状态和上传次数 amazon = Model_Mapper_Amazon() # 连接数据库 db = Model_Mapper_Connect('localhost', 3306, 'root', '123123', 'scraper', 'utf8') conn = db.connects() mapper = Model_Mapper_Mysql(conn) # 根据云服务器返回数据进行判断,这里还要改进 for upload_queue_id, subresult in result.items(): # subresult = 1 if (subresult == True): sql = amazon.MobileUploadQueue_select_sql_joint( str(upload_queue_id)) uploadQueue = mapper.select(sql) if (uploadQueue): sql = amazon.MobileUploadQueue_delete_sql_joint( str(upload_queue_id)) mapper.delete(sql) conn.close()
def uploadDownloadQueues(self, downloadQueues, region): data = {'region': region} # 开始更改下载队列中的上传状态和上传次数 amazon = Model_Mapper_Amazon() # 连接数据库 db = Model_Mapper_Connect('localhost', 3306, 'root', '123123', 'scraper', 'utf8') conn = db.connects() mapper = Model_Mapper_Mysql(conn) for downloadQueue in downloadQueues: queueRegion = Model_Static_Region().getText(downloadQueue[2]) queueValue = downloadQueue[4] result = False if (downloadQueue[5] == Model_Static_DownloadQueue_Status.SCRAPED): type = downloadQueue[3] # 产品页 0 if (type == Model_Static_DownloadQueue_Type.PRODUCT or type == Model_Static_DownloadQueue_Type.PRODUCT_INFO): result = self.getProductService( ).getAmazonProductUploadData(queueRegion, queueValue, None) # # 前5页关键词 3 或首页关键词 elif (type == Model_Static_DownloadQueue_Type.KEYWORDS or type == Model_Static_DownloadQueue_Type.KEYWORDS_INFO): value = queueValue keywords = value result = self.getKeywordsService( ).getAmazonKeywordsUploadData(queueRegion, keywords) # offer页面计算库存 1 elif (type == Model_Static_DownloadQueue_Type.PRODUCT_OFFER): result = self.getProductService( ).getAmazonProductUploadData(queueRegion, queueValue, "OFFER") elif (type == Model_Static_DownloadQueue_Type.PRODUCT_REVIEW): pass elif (type == Model_Static_DownloadQueue_Type.SELLER): merchant_id = queueValue result = self.getSellerService().getAmazonSellerUploadData( queueRegion, merchant_id) elif (type == Model_Static_DownloadQueue_Type.SELLER_PRODUCT): merchant_id = queueValue result = self.getSellerService().getAmazonSellerUploadData( queueRegion, merchant_id, True) elif (type == Model_Static_DownloadQueue_Type.TOP_REVIEWER): value = queueValue value = value.split(":") if (len(value) == 2): begin = value[0] end = value[1] else: begin = 1 end = value[0] result = self.getCustomerService( ).getAmazonTopReviewerUploadData(queueRegion, begin, end) else: pass # 整合数据 data['region_data'] = { downloadQueue[0]: { "data": result, 'region': str(downloadQueue[2]), 'type': str(downloadQueue[3]), 'value': downloadQueue[4], "ac_download_queue_id": str(downloadQueue[1]), "status": str(downloadQueue[5]) } } # print data uploadService = Service_Upload() result = uploadService.upload(data) # print (result) if (result): # 根据云服务器返回数据进行判断,这里还要改进 for download_queue_id, subresult in result.items(): # download_queue_id = downloadQueue[0] # subresult = 1 sql = amazon.DownloadQueue_select_sql_joint( download_queue_id) downloadQueue = mapper.select(sql) if (len(downloadQueue) > 0): id = downloadQueue[0][0] count = downloadQueue[0][9] count += 1 status = Model_Static_DownloadQueue_UploadStatus( ).PENDING if (subresult): status = Model_Static_DownloadQueue_UploadStatus( ).UPLOADED else: if (count > 2): status = Model_Static_DownloadQueue_UploadStatus( ).FAILED sql = amazon.DownloadQueue_update_sql_joint( status, count, id) mapper.update(sql) conn.close()
def scrapeProduct(self, downloadQueue): try: self.region = downloadQueue[2] region = Model_Static_Region() reg = region.getText(self.region) merchant_id = downloadQueue[4] self.scraper = Model_Scraper_Seller_Product(reg) results = self.scraper.scrape(merchant_id) if (results): # print (results) # 数据库初始化操作,判断数据有效性,然后对数据进行后续操作 amazon = Model_Mapper_Amazon() # 连接数据库 db = Model_Mapper_Connect('localhost', 3306, 'root', '123123', 'scraper', 'utf8') conn = db.connects() mapper = Model_Mapper_Mysql(conn) # 插入卖家产品信息前先将卖家信息插入卖家信息表 self.getAmazonSellerMapper().save_productsseller( reg, merchant_id) # sql = amazon.seller_select_sql_joint(reg, merchant_id) # seller = mapper.select(sql) # if (seller): # pass # else: # sql = amazon.product_seller_insert_sql_joint(reg, merchant_id) # mapper.insert(sql) # 插入卖家产品信息 rank = 1 # 更新排名前现将该店铺产品rank清空 self.getAmazonSellerMapper().save_updaterank(reg, merchant_id) # sql = amazon.sellerproducts_rankupdate_sql_joint(reg, merchant_id) # mapper.update(sql) for items in results: # print (result) for item in items: asin = item['asin'] sql = amazon.sellerproduct_select_sql_joint( reg, asin, merchant_id) result = mapper.select(sql) if (result): sql = amazon.sellerproducts_update_sql_joint( reg, asin, merchant_id, rank, item) result = mapper.update(sql) else: sql = amazon.sellerproducts_insert_sql_joint( reg, merchant_id, rank, item) result = mapper.insert(sql) if (result): rank += 1 # 插入产品数据(amazon_product表) sql = amazon.product_select_sql_joint(reg, asin) result = mapper.select(sql) if (result): sql = amazon.products_update_sql_joint( reg, asin, item) result = mapper.update(sql) else: sql = amazon.products_insert_sql_joint( reg, asin, item) result = mapper.insert(sql) if (result): # 插入图片数据(amazon_product_image表) if (item['image']): sql = amazon.product_image_select_sql_joint( reg, asin) result = mapper.select(sql) if (result): pass else: sql = amazon.product_image_insert_sql_joint( reg, asin, item) mapper.insert(sql) conn.close() if (result): return Model_Static_DownloadQueue_Status.SCRAPED else: return Model_Static_DownloadQueue_Status.FAILED else: return Model_Static_DownloadQueue_Status.FAILED except Exception as err: print(err)