def test_paginate(self):
     # to get 2 pages
     start = 0
     end = 20
     # basic search parameters
     params = {
         "q": "coca cola",
         "tbm": "nws",
         "api_key": os.getenv("API_KEY"),
         "start": start,
         "end": end
     }
     # as proof of concept
     #  urls collects
     urls = []
     # initialize a search
     search = GoogleSearch(params)
     # create a python generator
     pages = search.pagination()
     # fetch one search result per iteration
     #  using a basic python for loop
     #   which invokes python iterator under the hood.
     for page in pages:
         print(f"Current page: {page['serpapi_pagination']['current']}")
         for news_result in page["news_results"]:
             print(
                 f"Title: {news_result['title']}\nLink: {news_result['link']}\n"
             )
             urls.append(news_result['link'])
     # double check if things adds up.
     # total number pages expected
     #  the exact number if variable depending on the search engine backend
     self.assertGreater(len(urls), 200)
 def test_paginate(self):
     search = GoogleSearch({"q": "Coffee", "location": "Austin,Texas"})
     pages = search.pagination(0, 20, 10)
     urls = []
     for page in pages:
         urls.append(page['serpapi_pagination']['next'])
     self.assertEqual(len(urls), 2)
     self.assertTrue("start=10" in urls[0])
     print(urls[1])
     self.assertTrue("start=21" in urls[1])
Beispiel #3
0
    def test_paginate_page_size(self):
        # to get 2 pages with each page contains 20 search results
        start = 0
        end = 80
        page_size = 20

        # use parameters in
        params = {
            "q": "coca cola",
            "tbm": "nws",
            "api_key": os.getenv("API_KEY"),
            "start": start,
            "end": end,
            "num": page_size
        }
        title = []
        search = GoogleSearch(params)
        # parameter start,end,page_size will be used instead of pagination
        pages = search.pagination()
        page_count = 0
        count = 0
        for page in pages:
            page_count += 1
            # print(f"Current page: {page['serpapi_pagination']['current']}")
            for news_result in page["news_results"]:
                count += 1
                i = 0
                for t in title:
                    i += 1
                    if t == news_result['title']:
                        print(("%d duplicated title: %s at index: %d" %
                               (count, t, i)))
                #print(f"{count} - title: {news_result['title']}")
                title.append(news_result['title'])

            self.assertEqual(
                count % 2, 0,
                ("page %s does not contain 20 elements" % page_count))

        # check number of pages match
        self.assertEqual(page_count, 4)
        self.assertEqual(len(title), end, "number of search results")