コード例 #1
0
class Model_Scraper_Keywords_First(Model_Scraper_Standard):
    def __init__(self, region):
        super(Model_Scraper_Keywords_First, self).__init__(region)
        self.region = region
        self.processor = Service_Functions().getProcessor(
            'Keywords_First', region)

    def scraper(self, keywords):
        self.process = Model_Scraper_Standard(self.region)
        url = "https://www.amazon." + self.region + "/gp/search?keywords=" + keywords + "&page=1"
        print(url)
        try:
            content = self.process.processkeywords(url)
        except Exception as err:
            print(err)
        try:
            if (content):
                # 这边写解析代码
                result = self.processor.process(content)
                if (result):
                    return result
            elif (content == None):
                return None
            else:
                return False
        except:
            return False
コード例 #2
0
ファイル: keywords.py プロジェクト: iotwlw/All_Scraper
class Model_Scraper_Keywords(Model_Scraper_Standard):
    def __init__(self, region):
        super(Model_Scraper_Keywords, self).__init__(region)
        self.region = region
        self.processor = Service_Functions().getProcessor('Keywords', region)

    def scraper(self, keywords):
        self.process = Model_Scraper_Standard(self.region)
        url = "https://www.amazon."+self.region+"/gp/search?keywords="+keywords+"&page=1"
        # 不显示浏览器
        # with Display(backend="xvfb", size=(1440, 900)):
        print (url)
        try:
            content = self.process.processkeywords(url)
        except Exception as err:
            print (err)
        try:
            if (content):
                # 这边写解析代码
                data = []
                result = self.processor.process(content.encode('utf-8'), 1)
                if (result):
                    # print (result)
                    data.append(result)
                    pagecount = int(self.processor.getPageCount(content))
                    if (pagecount > 5):
                        pagecount = 5
                    # pagecount = 1
                    if (pagecount > 1):
                        for i in range(2, pagecount + 1):
                            pageurl = "https://www.amazon." + self.region + "/gp/search?keywords=" + keywords + "&page=" + str(i)
                            print (pageurl)
                            pagecontent = self.process.processkeywords(pageurl)
                            if (pagecontent):
                                pageresult = self.processor.process(pagecontent.encode('utf-8'), i)
                                # print (pageresult)
                                data.append(pageresult)
                    return data
            elif (content == None):
                return None
            else:
                return False
        except:
            return False