예제 #1
0
 def getAmazonKeywordsUploadData(self, region, keywords):
     data = {}
     # 数据库初始化操作,判断数据有效性,然后对数据进行后续操作
     amazon = Model_Mapper_Amazon()
     # 连接数据库
     db = Model_Mapper_Connect('localhost', 3306, 'root', '123123',
                               'scraper', 'utf8')
     conn = db.connects()
     mapper = Model_Mapper_Mysql(conn)
     # 获取rank keywords数据
     sql = amazon.getAmazonProductKeywordsRank_select_sql_joint(
         region, keywords)
     rankData = mapper.select(sql)
     rankData_key = Model_Keys.rankdata_key
     rankData_list = []
     for rankdata in rankData:
         rankData_list.append(dict(zip(rankData_key, rankdata)))
     if (rankData_list):
         data['rank'] = {
             'region': region,
             'keywords': keywords,
             'list': rankData_list,
         }
     # 获取ad keywords数据
     sql = amazon.getAmazonProductKeywordsAd_select_sql_joint(
         region, keywords)
     adData = mapper.select(sql)
     adData_key = Model_Keys.addata_key
     adData_list = []
     for addata in adData:
         adData_list.append(dict(zip(adData_key, addata)))
     if (adData_list):
         data['ad'] = {
             'region': region,
             'keywords': keywords,
             'list': adData_list,
         }
     # 获取搜索关键词数据
     sql = amazon.getKeywords(region, keywords)
     keywordsData = mapper.select(sql)
     if (keywordsData):
         keywordsData_key = Model_Keys.keywords_key
         keywords_list = []
         for keywordsdata in keywordsData:
             keywords_list.append(dict(zip(keywordsData_key, keywordsdata)))
         data['keywords'] = keywords_list[0]
     # print (data)
     if (len(data) > 0):
         return data
     return False
예제 #2
0
    def scrapeTopReviewer(self, downloadQueue):
        try:
            value = downloadQueue[4]
            try:
                value = value.split(":")
            except Exception as err:
                print(err)
            if (len(value) == 2):
                begin = int(value[0])
                end = int(value[1])
            else:
                begin = 1
                end = int(value[0])
            region = Model_Static_Region().getText(downloadQueue[2])
            self.scraper = Model_Scraper_TopReviewer(region)
            if (begin < 1):
                begin = 1
            if (end > 1000):
                end = 1000
            data = self.scraper.scrape(begin, end + 1)

            # 数据库初始化操作,判断数据有效性,然后对数据进行后续操作
            amazon = Model_Mapper_Amazon()
            # 连接数据库
            db = Model_Mapper_Connect('localhost', 3306, 'root', '123123',
                                      'scraper', 'utf8')
            conn = db.connects()
            mapper = Model_Mapper_Mysql(conn)

            if (data):
                rankBegin = begin * 10 - 9
                rankEnd = end * 10
                sql = amazon.TopReviewer_delete_sql_joint(
                    region, rankBegin, rankEnd)
                mapper.delete(sql)
                for items in data:
                    for item in items:
                        # print (item)
                        sql = amazon.TopReviewer_insert_sql_joint(region, item)
                        mapper.insert(sql)
                return Model_Static_DownloadQueue_Status.SCRAPED
            elif (data == ""):
                return Model_Static_DownloadQueue_Status.SCRAPED_NO_DATA
            else:
                return Model_Static_DownloadQueue_Status.FAILED
        except Exception as err:
            print(err)
예제 #3
0
    def __init__(self):

        # 数据库初始化,判断数据有效性,然后对数据进行后续操作
        # 连接数据库
        self.db = Model_Mapper_Connect('localhost', 3306, 'root', '123123',
                                       'All_Scraper', 'utf8')
        conn = self.db.connects()
        self.mapper = Model_Mapper_Mysql(conn)
        self.amazon = Model_Mapper_Amazon()
예제 #4
0
파일: Mysql.py 프로젝트: iotwlw/All_Scraper
 def open_file(self, region, asin):
     html_content = open('../../../Downloader/Amazon_Data/'+asin+'.html', 'r')
     html = html_content.read()
     html_content.close()
     com = Model_Processor_Product_Base_Com()
     data = com.process(html)
     # print (data['images'][0])
     # sys.exit()
     amazon = Model_Mapper_Amazon()
     sql = amazon.product_sql_joint(region, asin, data)
     print sql
     sys.exit()
     # 连接数据库
     db = Model_Mapper_Connect('localhost', 3306, 'root', '123123', 'scraper', 'utf8')
     conn = db.connects()
     mapper = Model_Mapper_Mysql(conn)
     mapper.insert(sql)
     conn.close()
예제 #5
0
 def getAmazonTopReviewerUploadData(self, region, begin, end):
     # 数据库初始化操作,判断数据有效性,然后对数据进行后续操作
     amazon = Model_Mapper_Amazon()
     # 连接数据库
     db = Model_Mapper_Connect('localhost', 3306, 'root', '123123',
                               'scraper', 'utf8')
     conn = db.connects()
     mapper = Model_Mapper_Mysql(conn)
     rankBegin = begin * 10 - 9
     rankEnd = end * 10
     sql = amazon.TopReviewerUpload_select_sql_joint(
         region, rankBegin, rankEnd)
     TopReveiwerData = mapper.select(sql)
     # print (TopReveiwerData)
     topreviewer_key = Model_Keys.topreviewer_key
     TopReivew_list = []
     for topreveiwerdata in TopReveiwerData:
         TopReivew_list.append(dict(zip(topreviewer_key, topreveiwerdata)))
     if (TopReivew_list):
         return TopReivew_list
     return False
예제 #6
0
파일: Queue.py 프로젝트: iotwlw/All_Scraper
    def processMobileUploadQueues(self, mobileuploadQueues, region):

        data = {'region': region}
        for mobileuploadQueue in mobileuploadQueues:
            queueRegion = Model_Static_Region().getText(mobileuploadQueue[1])
            queueValue = mobileuploadQueue[3]
            result = False
            if (str(mobileuploadQueue[2]).isdigit()):
                type = mobileuploadQueue[2]
                # 产品页 0
                if (type == Model_Static_DownloadQueue_Type.PRODUCT):
                    result = self.getProductService(
                    ).getAmazonProductUploadData(queueRegion, queueValue, None)
                # # 前5页关键词 3
                # elif (type == Model_Static_DownloadQueue_Type.KEYWORDS):
                #     value = queueValue
                #     keywords = value
                #     result = self.getKeywordsService().getAmazonKeywordsRankUploadData(queueRegion, keywords)
                # # 首页关键词 4
                # elif (type == Model_Static_DownloadQueue_Type.KEYWORDS_INFO):
                #     value = queueValue
                #     keywords = value
                #     result = self.getKeywordsService().getAmazonKeywordsInfoUploadData(queueRegion, keywords)
                # # offer页面计算库存 1
                # elif (type == Model_Static_DownloadQueue_Type.PRODUCT_OFFER):
                #     result = self.getProductService().getAmazonProductUploadData(queueRegion, queueValue, 'OFFER')
                # # elif (type == Model_Static_DownloadQueue_Type.PRODUCT_REVIEW):
                # #     pass
                # elif (type == Model_Static_DownloadQueue_Type.SELLER):
                #     merchant_id = queueValue
                #     result = self.getSellerService().getAmazonSellerUploadData(queueRegion, merchant_id)
                # elif (type == Model_Static_DownloadQueue_Type.SELLER_PRODUCT):
                #     merchant_id = queueValue
                #     result = self.getSellerService().getAmazonSellerUploadData(queueRegion, merchant_id, True)
                # elif (type == Model_Static_DownloadQueue_Type.TOP_REVIEWER):
                #     value = queueValue
                #     value = value.split(":")
                #     if (len(value) == 2):
                #         begin = value[0]
                #         end = value[1]
                #     else:
                #         begin = 1
                #         end = value[0]
                #     result = self.getCustomerService().getAmazonTopReviewerUploadData(queueRegion, begin, end)
                else:
                    pass

            # 整合数据
            data['region_data'] = {
                mobileuploadQueue[0]: {
                    "data": result,
                    'region': mobileuploadQueue[1],
                    'type': mobileuploadQueue[2],
                    'value': mobileuploadQueue[3],
                }
            }
            # print data
            uploadService = Service_Upload()
            result = uploadService.upload(data, 'UploadQueue')
            # print (result)
            if (result):
                # 开始更改下载队列中的上传状态和上传次数
                amazon = Model_Mapper_Amazon()
                # 连接数据库
                db = Model_Mapper_Connect('localhost', 3306, 'root', '123123',
                                          'scraper', 'utf8')
                conn = db.connects()
                mapper = Model_Mapper_Mysql(conn)
                # 根据云服务器返回数据进行判断,这里还要改进
                for upload_queue_id, subresult in result.items():
                    # subresult = 1
                    if (subresult == True):
                        sql = amazon.MobileUploadQueue_select_sql_joint(
                            str(upload_queue_id))
                        uploadQueue = mapper.select(sql)
                        if (uploadQueue):
                            sql = amazon.MobileUploadQueue_delete_sql_joint(
                                str(upload_queue_id))
                            mapper.delete(sql)
                conn.close()
예제 #7
0
파일: Queue.py 프로젝트: iotwlw/All_Scraper
    def uploadDownloadQueues(self, downloadQueues, region):
        data = {'region': region}
        # 开始更改下载队列中的上传状态和上传次数
        amazon = Model_Mapper_Amazon()
        # 连接数据库
        db = Model_Mapper_Connect('localhost', 3306, 'root', '123123',
                                  'scraper', 'utf8')
        conn = db.connects()
        mapper = Model_Mapper_Mysql(conn)
        for downloadQueue in downloadQueues:
            queueRegion = Model_Static_Region().getText(downloadQueue[2])
            queueValue = downloadQueue[4]
            result = False
            if (downloadQueue[5] == Model_Static_DownloadQueue_Status.SCRAPED):
                type = downloadQueue[3]
                # 产品页 0
                if (type == Model_Static_DownloadQueue_Type.PRODUCT or type
                        == Model_Static_DownloadQueue_Type.PRODUCT_INFO):
                    result = self.getProductService(
                    ).getAmazonProductUploadData(queueRegion, queueValue, None)
                # # 前5页关键词 3 或首页关键词
                elif (type == Model_Static_DownloadQueue_Type.KEYWORDS or type
                      == Model_Static_DownloadQueue_Type.KEYWORDS_INFO):
                    value = queueValue
                    keywords = value
                    result = self.getKeywordsService(
                    ).getAmazonKeywordsUploadData(queueRegion, keywords)
                # offer页面计算库存 1
                elif (type == Model_Static_DownloadQueue_Type.PRODUCT_OFFER):
                    result = self.getProductService(
                    ).getAmazonProductUploadData(queueRegion, queueValue,
                                                 "OFFER")
                elif (type == Model_Static_DownloadQueue_Type.PRODUCT_REVIEW):
                    pass
                elif (type == Model_Static_DownloadQueue_Type.SELLER):
                    merchant_id = queueValue
                    result = self.getSellerService().getAmazonSellerUploadData(
                        queueRegion, merchant_id)
                elif (type == Model_Static_DownloadQueue_Type.SELLER_PRODUCT):
                    merchant_id = queueValue
                    result = self.getSellerService().getAmazonSellerUploadData(
                        queueRegion, merchant_id, True)
                elif (type == Model_Static_DownloadQueue_Type.TOP_REVIEWER):
                    value = queueValue
                    value = value.split(":")
                    if (len(value) == 2):
                        begin = value[0]
                        end = value[1]
                    else:
                        begin = 1
                        end = value[0]
                    result = self.getCustomerService(
                    ).getAmazonTopReviewerUploadData(queueRegion, begin, end)
                else:
                    pass

            # 整合数据
            data['region_data'] = {
                downloadQueue[0]: {
                    "data": result,
                    'region': str(downloadQueue[2]),
                    'type': str(downloadQueue[3]),
                    'value': downloadQueue[4],
                    "ac_download_queue_id": str(downloadQueue[1]),
                    "status": str(downloadQueue[5])
                }
            }
            # print data
            uploadService = Service_Upload()
            result = uploadService.upload(data)
            # print (result)
            if (result):
                # 根据云服务器返回数据进行判断,这里还要改进
                for download_queue_id, subresult in result.items():
                    # download_queue_id = downloadQueue[0]
                    #     subresult = 1
                    sql = amazon.DownloadQueue_select_sql_joint(
                        download_queue_id)
                    downloadQueue = mapper.select(sql)
                    if (len(downloadQueue) > 0):
                        id = downloadQueue[0][0]
                        count = downloadQueue[0][9]
                        count += 1
                        status = Model_Static_DownloadQueue_UploadStatus(
                        ).PENDING
                        if (subresult):
                            status = Model_Static_DownloadQueue_UploadStatus(
                            ).UPLOADED
                        else:
                            if (count > 2):
                                status = Model_Static_DownloadQueue_UploadStatus(
                                ).FAILED
                        sql = amazon.DownloadQueue_update_sql_joint(
                            status, count, id)
                        mapper.update(sql)
        conn.close()
예제 #8
0
    def scrapeProduct(self, downloadQueue):
        try:
            self.region = downloadQueue[2]
            region = Model_Static_Region()
            reg = region.getText(self.region)
            merchant_id = downloadQueue[4]
            self.scraper = Model_Scraper_Seller_Product(reg)
            results = self.scraper.scrape(merchant_id)
            if (results):
                # print (results)
                # 数据库初始化操作,判断数据有效性,然后对数据进行后续操作
                amazon = Model_Mapper_Amazon()
                # 连接数据库
                db = Model_Mapper_Connect('localhost', 3306, 'root', '123123',
                                          'scraper', 'utf8')
                conn = db.connects()
                mapper = Model_Mapper_Mysql(conn)

                # 插入卖家产品信息前先将卖家信息插入卖家信息表
                self.getAmazonSellerMapper().save_productsseller(
                    reg, merchant_id)
                # sql = amazon.seller_select_sql_joint(reg, merchant_id)
                # seller = mapper.select(sql)
                # if (seller):
                #     pass
                # else:
                #     sql = amazon.product_seller_insert_sql_joint(reg, merchant_id)
                #     mapper.insert(sql)

                # 插入卖家产品信息
                rank = 1
                # 更新排名前现将该店铺产品rank清空
                self.getAmazonSellerMapper().save_updaterank(reg, merchant_id)
                # sql = amazon.sellerproducts_rankupdate_sql_joint(reg, merchant_id)
                # mapper.update(sql)
                for items in results:
                    # print (result)
                    for item in items:
                        asin = item['asin']
                        sql = amazon.sellerproduct_select_sql_joint(
                            reg, asin, merchant_id)
                        result = mapper.select(sql)
                        if (result):
                            sql = amazon.sellerproducts_update_sql_joint(
                                reg, asin, merchant_id, rank, item)
                            result = mapper.update(sql)
                        else:
                            sql = amazon.sellerproducts_insert_sql_joint(
                                reg, merchant_id, rank, item)
                            result = mapper.insert(sql)
                        if (result):
                            rank += 1
                        # 插入产品数据(amazon_product表)
                        sql = amazon.product_select_sql_joint(reg, asin)
                        result = mapper.select(sql)
                        if (result):
                            sql = amazon.products_update_sql_joint(
                                reg, asin, item)
                            result = mapper.update(sql)
                        else:
                            sql = amazon.products_insert_sql_joint(
                                reg, asin, item)
                            result = mapper.insert(sql)
                        if (result):
                            # 插入图片数据(amazon_product_image表)
                            if (item['image']):
                                sql = amazon.product_image_select_sql_joint(
                                    reg, asin)
                                result = mapper.select(sql)
                                if (result):
                                    pass
                                else:
                                    sql = amazon.product_image_insert_sql_joint(
                                        reg, asin, item)
                                    mapper.insert(sql)
                        conn.close()
                        if (result):
                            return Model_Static_DownloadQueue_Status.SCRAPED
                        else:
                            return Model_Static_DownloadQueue_Status.FAILED
            else:
                return Model_Static_DownloadQueue_Status.FAILED
        except Exception as err:
            print(err)