Exemple #1
0
    def _run(self, kw):

        source = self._OTHER_KWARGS.get('source')
        source_kwargs = {'source': source}
        if self._DRIVER:
            browser_kwargs = {'driver': self._DRIVER}
        else:
            browser_kwargs = {}

        extra_kwargs = {}
        extra_kwargs.update(browser_kwargs)
        if source:
            extra_kwargs.update(source_kwargs)
        if self._BROWSER == 'bing':
            browser = BrowseBing(kw=kw, max_page=self._MAX_PAGES, method=self._SCRAPE_METHOD,
                                 **browser_kwargs)
        elif self._BROWSER == 'stackoverflow':
            browser = BrowseStackOverFlow(kw=kw, max_page=self._MAX_PAGES, method=self._SCRAPE_METHOD, **extra_kwargs)
        elif self._BROWSER == 'stackoverflow-doc':
            browser = BrowseStackOverFlowDocumentation(kw=kw, max_page=self._MAX_PAGES, method=self._SCRAPE_METHOD,
                                                       **extra_kwargs)
        elif self._BROWSER == 'wordpress':
            browser = BrowseWordPress(kw=kw, max_page=self._MAX_PAGES, base_url=self._BASE_URL,
                                      method=self._SCRAPE_METHOD, **extra_kwargs)

        browser.search()
        logger.debug("Gathered the data for keyword", kw)
        self._append_data(browser.data)
def test_browse_with_bing():
    bing = BrowseBing(kw="Ravi RT Merugu", max_page=1, method="requests")
    bing.search()
    result = bing.data
    assert type(result) is dict
    assert "results" in result
    assert "related_keywords" in result
    bing.close()
def test_browse_with_bing():
    max_page = 1
    bing = BrowseBing(kw="Ravi RT Merugu", max_page=max_page)
    bing.search()
    result = bing.data
    assert bing.data['results_count'] != 0
    assert bing.data['results_count'] <= DEFAULT_MAX_RESULTS_PER_PAGE * max_page
    assert "selenium-htmlunit" == bing.shift_method()
    assert type(result) is dict
    assert "results" in result
    assert "related_keywords" in result
    bing.close()
def test_browser_no_nextpage():
    bing = BrowseBing(
        kw=
        "XxXXXXXXxxxxxbas dans dsand msad asd amd ansd am dna smda sdn asdmas dm",
        max_page=1)
    bing.search()
    result = bing.data
    assert result['next_url'] is None
    bing.close()
Exemple #5
0
import sys
sys.path.append('../')
from trawler.settings import DEFAULT_MAX_PAGES, DEFAULT_MAX_RESULTS_PER_PAGE
from trawler.browsers import BrowseBing

if __name__ == "__main__":
    max_page = 1
    bing = BrowseBing(kw="Ravi RT Merugu", max_page=max_page, source="en-in")
    bing.search()
    result = bing.data
    print (result, "+++++++++")
    assert bing.data['results_count'] != 0
    assert bing.data['results_count'] <= DEFAULT_MAX_RESULTS_PER_PAGE * max_page
    assert "selenium-htmlunit" == bing.shift_method()
    assert type(result) is dict
    assert "results" in result
    assert "related_keywords" in result
    # bing.close()
def test_browser_implamentation_error():
    with pytest.raises(BrowerScrapeMethodNotImplemented) as excinfo:
        bing = BrowseBing(kw="Hello", max_page=1, method="chromejjj")
        bing.search()
        bing.close()
    assert "Not implemented" in str(excinfo)