def googlesearch(name, location=False):
    """
    Perform Google Search lookup.
    """

    # The base plan for SerpAPI is rate limited to 1k calls per hour.
    # We intentionally slow this down to avoid hitting the rate limit.
    if not serp_api_fast:
        sleep(2.5)

    if not location:
        client = GoogleSearch({"q": name, "api_key": serp_api_key})
    else:
        client = GoogleSearch({
            "q": name,
            "location": location,
            "api_key": serp_api_key
        })

    result = client.get_json()
    try:
        domain = result['organic_results'][0]['link']
        tldr = tldextract.extract(domain)
        return '{}.{}'.format(tldr.domain, tldr.suffix)
    except KeyError:
        print("Unable to lookup record from SerpAPI.")
    return
 def test_get_json(self):
     search = GoogleSearch({"q": "Coffee", "location": "Austin,Texas"})
     data = search.get_json()
     self.assertEqual(data["search_metadata"]["status"], "Success")
     self.assertIsNone(data.get("error"))
     self.assertIsNotNone(data["search_metadata"]["google_url"])
     self.assertIsNotNone(data["search_metadata"]["id"])
     self.assertIsNotNone(data['local_results']['places'][0])
Exemple #3
0
 def test_get_json(self):
     search = GoogleSearch({"q": "Coffee", "location": "Austin,Texas"})
     data = search.get_json()
     self.assertEqual(data["search_metadata"]["status"], "Success")
     self.assertIsNotNone(data["search_metadata"]["google_url"])
     self.assertIsNotNone(data["search_metadata"]["id"])
     # pp = pprint.PrettyPrinter(indent=2)
     # pp.pprint(data['local_results'])
     self.assertIsNotNone(data['local_results']['places'][0])
Exemple #4
0
 def test_get_json(self):
     search = GoogleSearch({"q": "Coffee", "engine": "google_scholar"})
     data = search.get_json()
     print(data['search_metadata'])
     search_id = data['search_metadata']['id']
     # retrieve search from the archive - blocker
     print(search_id + ": get search from archive")
     raw_html = search.get_search_archive(search_id, 'html')
     # print(search_id + ": status = " + search_archived['search_metadata']['status'])
     print(raw_html)
Exemple #5
0
 def test_search_google_images(self):
     search = GoogleSearch({"q": "coffe", "tbm": "isch"})
     for image_result in search.get_json()['images_results']:
         try:
             link = image_result["original"]
             print("link is found: " + link)
             # uncomment the line below to down the original image
             # wget.download(link, '.')
         except:
             print("link is not found.")
             pass
Exemple #6
0
 def test_search_google_shopping(self):
     search = GoogleSearch({
         "q": "coffe",  # search search
         "tbm": "shop",  # news
         "tbs": "p_ord:rv",  # last 24h
         "num": 100
     })
     data = search.get_json()
     for shopping_result in data['shopping_results']:
         print(
             str(shopping_result['position']) + " - " +
             shopping_result['title'])
Exemple #7
0
 def test_search_by_location(self):
     for city in ["new york", "paris", "berlin"]:
         location = GoogleSearch({}).get_location(city,
                                                  1)[0]["canonical_name"]
         search = GoogleSearch({
             "q": "best coffee shop",  # search search
             "location": location,
             "num": 10,
             "start": 0
         })
         data = search.get_json()
         top_result = data['organic_results'][0]["title"]
         print("top coffee result for " + location + " is: " + top_result)
Exemple #8
0
 def test_search_google_news(self):
     search = GoogleSearch({
         "q": "coffe",  # search search
         "tbm": "nws",  # news
         "tbs": "qdr:d",  # last 24h
         "num": 10
     })
     for offset in [0, 1, 2]:
         search.params_dict["start"] = offset * 10
         data = search.get_json()
         for news_result in data['news_results']:
             print(
                 str(news_result['position'] + offset * 10) + " - " +
                 news_result['title'])
Exemple #9
0
 def test_search_google_shopping(self):
     search = GoogleSearch({
         "q": "coffe",  # search search
         "tbm": "shop",  # news
         "tbs": "p_ord:rv",  # last 24h
         "num": 100
     })
     data = search.get_json()
     if 'shopping_results' in data:
         for shopping_result in data['shopping_results']:
             print(
                 str(shopping_result['position']) + " - " +
                 shopping_result['title'])
     else:
         print(
             "WARNING: oops shopping_results is missing from search result with tbm=shop"
         )
Exemple #10
0
def reg_scrape(year):
    collected_voters = {}
    search = GoogleSearch({
    "q": f"site:https://michiganvoters.info was born in {year}", 
    "location": "Detroit,Michigan",
    "api_key": "GET_A_KEY_FROM_HERE:https://serpapi.com/manage-api-key"
    })
    results = search.get_json()
    google_results = results['organic_results']
    for voter in google_results:
        snippet = voter['snippet']
        name_match = snippet.split(' was born in ')
        birth_year = name_match[1].split(' and')[0]
        full_name = name_match[0].split(', ')
        first_name = full_name[1]
        last_name = full_name[0]
        zip_match = re.search(zip_regex, snippet, re.MULTILINE)
        if zip_match != None:
            zipstr = str(zip_match.group(0))
            zipcode = zipstr.strip(' U')
            if ' ' in first_name:
                first_name = first_name.split(' ')[1]
            collected_voters[f"{last_name}_{first_name}"] = {'first': first_name, 'last': last_name, 'zipcode':zipcode, 'birth_year': birth_year}
    return(collected_voters)
Exemple #11
0
 def test_get_json(self):
     search = GoogleSearch({"q": "Coffee", "engine": "google_scholar"})
     data = search.get_json()
     self.assertIsNotNone(data["organic_results"][0]["title"])