def _run(self, kw): source = self._OTHER_KWARGS.get('source') source_kwargs = {'source': source} if self._DRIVER: browser_kwargs = {'driver': self._DRIVER} else: browser_kwargs = {} extra_kwargs = {} extra_kwargs.update(browser_kwargs) if source: extra_kwargs.update(source_kwargs) if self._BROWSER == 'bing': browser = BrowseBing(kw=kw, max_page=self._MAX_PAGES, method=self._SCRAPE_METHOD, **browser_kwargs) elif self._BROWSER == 'stackoverflow': browser = BrowseStackOverFlow(kw=kw, max_page=self._MAX_PAGES, method=self._SCRAPE_METHOD, **extra_kwargs) elif self._BROWSER == 'stackoverflow-doc': browser = BrowseStackOverFlowDocumentation(kw=kw, max_page=self._MAX_PAGES, method=self._SCRAPE_METHOD, **extra_kwargs) elif self._BROWSER == 'wordpress': browser = BrowseWordPress(kw=kw, max_page=self._MAX_PAGES, base_url=self._BASE_URL, method=self._SCRAPE_METHOD, **extra_kwargs) browser.search() logger.debug("Gathered the data for keyword", kw) self._append_data(browser.data)
def test_browse_with_bing(): bing = BrowseBing(kw="Ravi RT Merugu", max_page=1, method="requests") bing.search() result = bing.data assert type(result) is dict assert "results" in result assert "related_keywords" in result bing.close()
def test_browser_no_nextpage(): bing = BrowseBing( kw= "XxXXXXXXxxxxxbas dans dsand msad asd amd ansd am dna smda sdn asdmas dm", max_page=1) bing.search() result = bing.data assert result['next_url'] is None bing.close()
def test_browse_with_bing(): max_page = 1 bing = BrowseBing(kw="Ravi RT Merugu", max_page=max_page) bing.search() result = bing.data assert bing.data['results_count'] != 0 assert bing.data['results_count'] <= DEFAULT_MAX_RESULTS_PER_PAGE * max_page assert "selenium-htmlunit" == bing.shift_method() assert type(result) is dict assert "results" in result assert "related_keywords" in result bing.close()
import sys sys.path.append('../') from trawler.settings import DEFAULT_MAX_PAGES, DEFAULT_MAX_RESULTS_PER_PAGE from trawler.browsers import BrowseBing if __name__ == "__main__": max_page = 1 bing = BrowseBing(kw="Ravi RT Merugu", max_page=max_page, source="en-in") bing.search() result = bing.data print (result, "+++++++++") assert bing.data['results_count'] != 0 assert bing.data['results_count'] <= DEFAULT_MAX_RESULTS_PER_PAGE * max_page assert "selenium-htmlunit" == bing.shift_method() assert type(result) is dict assert "results" in result assert "related_keywords" in result # bing.close()
def test_browser_implamentation_error(): with pytest.raises(BrowerScrapeMethodNotImplemented) as excinfo: bing = BrowseBing(kw="Hello", max_page=1, method="chromejjj") bing.search() bing.close() assert "Not implemented" in str(excinfo)