def googlesearch(name, location=False): """ Perform Google Search lookup. """ # The base plan for SerpAPI is rate limited to 1k calls per hour. # We intentionally slow this down to avoid hitting the rate limit. if not serp_api_fast: sleep(2.5) if not location: client = GoogleSearch({"q": name, "api_key": serp_api_key}) else: client = GoogleSearch({ "q": name, "location": location, "api_key": serp_api_key }) result = client.get_json() try: domain = result['organic_results'][0]['link'] tldr = tldextract.extract(domain) return '{}.{}'.format(tldr.domain, tldr.suffix) except KeyError: print("Unable to lookup record from SerpAPI.") return
def test_get_json(self): search = GoogleSearch({"q": "Coffee", "location": "Austin,Texas"}) data = search.get_json() self.assertEqual(data["search_metadata"]["status"], "Success") self.assertIsNone(data.get("error")) self.assertIsNotNone(data["search_metadata"]["google_url"]) self.assertIsNotNone(data["search_metadata"]["id"]) self.assertIsNotNone(data['local_results']['places'][0])
def test_get_json(self): search = GoogleSearch({"q": "Coffee", "location": "Austin,Texas"}) data = search.get_json() self.assertEqual(data["search_metadata"]["status"], "Success") self.assertIsNotNone(data["search_metadata"]["google_url"]) self.assertIsNotNone(data["search_metadata"]["id"]) # pp = pprint.PrettyPrinter(indent=2) # pp.pprint(data['local_results']) self.assertIsNotNone(data['local_results']['places'][0])
def test_get_json(self): search = GoogleSearch({"q": "Coffee", "engine": "google_scholar"}) data = search.get_json() print(data['search_metadata']) search_id = data['search_metadata']['id'] # retrieve search from the archive - blocker print(search_id + ": get search from archive") raw_html = search.get_search_archive(search_id, 'html') # print(search_id + ": status = " + search_archived['search_metadata']['status']) print(raw_html)
def test_search_google_images(self): search = GoogleSearch({"q": "coffe", "tbm": "isch"}) for image_result in search.get_json()['images_results']: try: link = image_result["original"] print("link is found: " + link) # uncomment the line below to down the original image # wget.download(link, '.') except: print("link is not found.") pass
def test_search_google_shopping(self): search = GoogleSearch({ "q": "coffe", # search search "tbm": "shop", # news "tbs": "p_ord:rv", # last 24h "num": 100 }) data = search.get_json() for shopping_result in data['shopping_results']: print( str(shopping_result['position']) + " - " + shopping_result['title'])
def test_search_by_location(self): for city in ["new york", "paris", "berlin"]: location = GoogleSearch({}).get_location(city, 1)[0]["canonical_name"] search = GoogleSearch({ "q": "best coffee shop", # search search "location": location, "num": 10, "start": 0 }) data = search.get_json() top_result = data['organic_results'][0]["title"] print("top coffee result for " + location + " is: " + top_result)
def test_search_google_news(self): search = GoogleSearch({ "q": "coffe", # search search "tbm": "nws", # news "tbs": "qdr:d", # last 24h "num": 10 }) for offset in [0, 1, 2]: search.params_dict["start"] = offset * 10 data = search.get_json() for news_result in data['news_results']: print( str(news_result['position'] + offset * 10) + " - " + news_result['title'])
def test_search_google_shopping(self): search = GoogleSearch({ "q": "coffe", # search search "tbm": "shop", # news "tbs": "p_ord:rv", # last 24h "num": 100 }) data = search.get_json() if 'shopping_results' in data: for shopping_result in data['shopping_results']: print( str(shopping_result['position']) + " - " + shopping_result['title']) else: print( "WARNING: oops shopping_results is missing from search result with tbm=shop" )
def reg_scrape(year): collected_voters = {} search = GoogleSearch({ "q": f"site:https://michiganvoters.info was born in {year}", "location": "Detroit,Michigan", "api_key": "GET_A_KEY_FROM_HERE:https://serpapi.com/manage-api-key" }) results = search.get_json() google_results = results['organic_results'] for voter in google_results: snippet = voter['snippet'] name_match = snippet.split(' was born in ') birth_year = name_match[1].split(' and')[0] full_name = name_match[0].split(', ') first_name = full_name[1] last_name = full_name[0] zip_match = re.search(zip_regex, snippet, re.MULTILINE) if zip_match != None: zipstr = str(zip_match.group(0)) zipcode = zipstr.strip(' U') if ' ' in first_name: first_name = first_name.split(' ')[1] collected_voters[f"{last_name}_{first_name}"] = {'first': first_name, 'last': last_name, 'zipcode':zipcode, 'birth_year': birth_year} return(collected_voters)
def test_get_json(self): search = GoogleSearch({"q": "Coffee", "engine": "google_scholar"}) data = search.get_json() self.assertIsNotNone(data["organic_results"][0]["title"])