Esempio n. 1
0
    def __init__(self,
                 keywordList,
                 extkey=None,
                 se='baidu',
                 pages=2,
                 MONGODB_SERVER='127.0.0.1',
                 MONGODB_PORT=27017,
                 MONGODB_DB='BGY_ZC_META',
                 *args,
                 **kwargs):
        super(keywordSpider, self).__init__(*args, **kwargs)
        self.keywordList = keywordList.lower()
        self.searchEngine = se.lower()
        self.selector = SearchEngineResultSelectors[self.searchEngine]
        self.dateSelector = SearchEngineResultDateSelectors[self.searchEngine]

        if 'WX' in MONGODB_DB or 'WB' in MONGODB_DB:
            settings['DOWNLOAD_DELAY'] = 5

        settings['MONGODB_SERVER'] = MONGODB_SERVER
        settings['MONGODB_PORT'] = int(MONGODB_PORT)
        settings['MONGODB_DB'] = MONGODB_DB
        keys = keywordList.split(',')
        for key in keys:
            key = key.strip()
            if key == '': continue
            key = key.strip()
            if extkey != None:
                key = key + ' ' + extkey.strip()
            pageUrls = searResultPages(key, se, int(pages))
            for url in pageUrls:
                self.start_urls.append(url)
Esempio n. 2
0
 def __init__(self, keyword, se = 'bing', pages = 50,  *args, **kwargs):
     super(keywordSpider, self).__init__(*args, **kwargs)
     self.keyword = keyword.lower()
     self.searchEngine = se.lower()
     self.selector = SearchEngineResultSelectors[self.searchEngine]
     pageUrls = searResultPages(keyword, se, int(pages))
     currUrl = pageUrls.next()
     self.start_urls.append(currUrl)
Esempio n. 3
0
 def __init__(self, keyword, se = 'bing', pages = 50,  *args, **kwargs):
     super(keywordSpider, self).__init__(*args, **kwargs)
     self.keyword = keyword.lower()
     self.searchEngine = se.lower()
     self.selector = SearchEngineResultSelectors[self.searchEngine]
     pageUrls = searResultPages(keyword, se, int(pages))
     for url in pageUrls:
         print(url)
         self.start_urls.append(url)
Esempio n. 4
0
 def __init__(self, keyword, se='amazon', pages=2, *args, **kwargs):
     self.driver = webdriver.Chrome('/usr/local/bin/chromedriver')
     super(keywordSpider, self).__init__(*args, **kwargs)
     self.keyword = keyword.lower()
     self.searchEngine = se.lower()
     self.selector = SearchEngineResultSelectors[self.searchEngine]
     pageUrls = searResultPages(keyword, se, int(pages))
     for url in pageUrls:
         print(url)
         self.start_urls.append(url)