def import_locations(): params = { "hl": "en", "gl": "us", "google_domain": "google.com", "api_key": "550793bf576080db90a6392a62372dc5134e77a6a91953e5447e31dbd774ecbc" } with open('hirings.csv', mode='r') as csv_file: hirings = list(csv.DictReader(csv_file, delimiter=';')) with open('7-usa.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=';') writer.writerow(['id', 'company', 'status', 'longitude', 'latitude']) for h in hirings: print(f"Searching for ... {h['company']}") params['q'] = h['company'] client = GoogleSearchResults(params) map = client.get_dict().get('local_map') if map and map.get('gps_coordinates'): long, lat = map['gps_coordinates']['longitude'], map[ 'gps_coordinates']['latitude'] print(h['company'], h['status'], long, lat) writer.writerow([h['company'], h['status'], long, lat]) time.sleep(1)
def test_get_json(self): client = GoogleSearchResults({ "q": "Coffee", "engine": "google_scholar" }) data = client.get_json() self.assertIsNotNone(data["organic_results"][0]["title"])
def test_get_html(self): client = GoogleSearchResults({ "q": "Coffee", "location": "Austin,Texas" }) data = client.get_html() self.assertGreater(len(data), 10)
def search(keyword, date_start, date_end): """ Function to find specific search result from Google. Using the already collated article data to seek wider breadth of information for the specific company at a specific time range Result from GoogleSearchResults returns a dictionary with the following fields in order: 1. search_metadata - Includes "created_at" field useful for filtering results of specific date time 2. search_parameters 3. search_information 4. organic_results - most relevant field - use "title" and "displayed_link" for extracting text data """ client = GoogleSearchResults({ "q": keyword, "location": "Austin,Texas", "api_key": "demo", "created_at": "2019-04-05" }) result = client.get_dict() return result
def GetAnswer(return_text): params = { "api_key": "609d4beab3d238843bcf22588195a5b6db5ae8490a089215eac71985ba37f654", "engine": "google", "q": return_text, "location": "Los Angeles, CA, California, United States", "google_domain": "google.com", "gl": "us", "hl": "en", } client = GoogleSearchResults(params) results = client.get_dict() #print(results['knowledge_graph']) while True: try: useful_output = results['knowledge_graph'] answer = useful_output['description'] break except KeyError: try: useful_output = results['answer_box'] answer = useful_output['type'] break except KeyError: try: useful_output = results['knowledge_graph'] answer = useful_output['title'] except KeyError: answer = str("I don't know, try ask me something else")#this can go on and on to increase accuracy of the answe print(answer) return answer
def test_get_dictionary(self): client = GoogleSearchResults({ "q": "Coffee", "location": "Austin,Texas" }) data = client.get_dictionary() self.assertIsNotNone(data.get('local_results'))
def test_get_json(self): client = GoogleSearchResults({"q": "Coffee", "location": "Austin,Texas"}) data = client.get_json() self.assertEqual(data["search_metadata"]["status"], "Success") self.assertIsNotNone(data["search_metadata"]["google_url"]) self.assertIsNotNone(data["search_metadata"]["id"]) # pp = pprint.PrettyPrinter(indent=2) # pp.pprint(data['local_results']) self.assertIsNotNone(data['local_results']['places'][0])
def test_search_google_images(self): client = GoogleSearchResults({"q": "coffe", "tbm": "isch"}) for image_result in client.get_json()['images_results']: link = image_result["original"] try: print("link: " + link) # wget.download(link, '.') except: pass
def test_get_json(self): client = GoogleSearchResults({ "q": "Coffee", "location": "Austin,Texas" }) data = client.get_json() # pp = pprint.PrettyPrinter(indent=2) # pp.pprint(data['local_results']) self.assertIsNotNone(data['local_results']['places'][0])
def search(self, query: str) -> list: key = self.get_credentials() params = {'engine': 'google_scholar', 'q': query, 'api_key': key} client = GoogleSearchResults(params) res = client.get_dict() filtered_res = self.get_filtered_results(res['organic_results']) return filtered_res
def test_search_google_shopping(self): client = GoogleSearchResults({ "q": "coffe", # search client "tbm": "shop", # news "tbs": "p_ord:rv", # last 24h "num": 100 }) data = client.get_json() for shopping_result in data['shopping_results']: print( str(shopping_result['position']) + " - " + shopping_result['title'])
def test_search_by_location(self): for city in ["new york", "paris", "berlin"]: location = GoogleSearchResults({}).get_location( city, 1)[0]["canonical_name"] client = GoogleSearchResults({ "q": "best coffee shop", # search client "location": location, "num": 10, "start": 0 }) data = client.get_json() top_result = data['organic_results'][0]["title"] print("top coffee result for " + location + " is: " + top_result)
def test_search_google_news(self): client = GoogleSearchResults({ "q": "coffe", # search client "tbm": "nws", # news "tbs": "qdr:d", # last 24h "num": 10 }) for offset in [0, 1, 2]: client.params_dict["start"] = offset * 10 data = client.get_json() for news_result in data['news_results']: print( str(news_result['position'] + offset * 10) + " - " + news_result['title'])
def pictureUrl(name): nameKeyword = "'" + name + " basketball + '" params = { "api_key": "5953e1e6264c6019fa48feea446a6ff68616adc5789be4e1fed149f11a89b020", "engine": "google", "ijn": "0", "q": nameKeyword, "google_domain": "google.com", "tbm": "isch", } client = GoogleSearchResults(params) results = client.get_dict() url = results["images_results"][0]["original"] return url
def visual(q): params = { "q": q, "tbm": "isch", "ijn": "0", "api_key": "872d5dde1c1b4ad04b1b288b3cc26683597993dd25a9eff1777c5c20a52b7d50" } client = GoogleSearchResults(params) results = client.get_dict() images_results = results['images_results'] link = images_results[5]["original"] return link
def fetch_serp_api_data(domain, keywords): results = [] for keyword in keywords: keyword_text = keyword['keyword'] print("Getting shopping results for: %s with keyword: %s..." % (domain, keyword_text)) params = { "api_key": "bd9c6284ab9a91ffe763e700cfc6f8b0035b7db18b59591a2a6498723b79ad24", "engine": "google", "q": keyword_text, "google_domain": "google.com", } client = GoogleSearchResults(params) result = client.get_dict() results.append(result) time.sleep(1) return results
def top_results(keyword): """Return list of (position, link) tuples for keyword results.""" query_parameters = { "q": keyword, "hl": "en", "gl": "us", "google_domain": "google.com", "num": N_RESULTS, } response = GoogleSearchResults(query_parameters).get_dict() return [(result["position"], result["link"]) for result in response["organic_results"]]
def test_get_search_archive(self): client = GoogleSearchResults({ "q": "Coffee", "location": "Austin,Texas" }) search_result = client.get_dictionary() search_id = search_result.get("search_metadata").get("id") archived_search_result = GoogleSearchResults({}).get_search_archive( search_id, 'json') self.assertEqual( archived_search_result.get("search_metadata").get("id"), search_id) GoogleSearchResults({}).get_search_archive(search_id, 'html')
def query_serp(query: str, site: str, dates: list, num_results: int, paper_name: str) -> list: """ Make dict for serp query using make_params() query serpAPI :param query: query string for google news :param site: site root url (eg. www.google.com) :param dates: list of dates :param num_results: number of results :param paper_name: name of paper :return: list of query dicts containing query information and sites list for the query """ all_sites = [] total_sites_count = 0 for d in dates: try: # Get query dict and params dict query_r, params = make_params(query=query, site=site, date_start=d[0], date_end=d[1], num_results=num_results, paper=paper_name) # serpAPI query client = GoogleSearchResults(params) results = client.get_dict() news_results = results['news_results'] count = 0 sites_date = [] # Loop through till end of search results or error encountered while (news_results and len(news_results) > 0) or ('error' not in results): sites = [news['link'] for news in news_results] sites_date.extend(sites) count += len(sites) params['start'] = count client = GoogleSearchResults(params) results = client.get_dict() news_results = results['news_results'] print('Date Range: {}-{}\tTotal Sites: {}'.format( d[0], d[1], len(sites_date))) # add list of sites to query dict query_r['sites'] = sites_date all_sites.append(query_r) total_sites_count += len(sites_date) except Exception as e: print(e) print(d) continue print('Total Sites: {}'.format(total_sites_count)) return all_sites
def test_async(self): # store searches search_queue = Queue() # Serp API client client = GoogleSearchResults({ "location": "Austin,Texas", "async": True }) # loop through companies for company in ['amd', 'nvidia', 'intel']: print("execute async search: q = " + company) client.params_dict["q"] = company search = client.get_dict() print("add search to the queue where id: " + search['search_metadata']['id']) # add search to the search_queue search_queue.put(search) print("wait until all search statuses are cached or success") # Create regular client client = GoogleSearchResults({"async": True}) while not search_queue.empty(): search = search_queue.get() search_id = search['search_metadata']['id'] # retrieve search from the archive - blocker print(search_id + ": get search from archive") search_archived = client.get_search_archive(search_id) print(search_id + ": status = " + search_archived['search_metadata']['status']) # check status if re.search('Cached|Success', search_archived['search_metadata']['status']): print(search_id + ": search done with q = " + search_archived['search_parameters']['q']) else: # requeue search_queue print(search_id + ": requeue search") search_queue.put(search) # wait 1s time.sleep(1) print('all searches completed')
def query_serp(query, site, dates, num_results, paper_name): all_sites = [] total_sites_count = 0 for d in dates: try: query_r, params = make_params(query=query, site=site, date_start=d[0], date_end=d[1], num_results=num_results, paper=paper_name) client = GoogleSearchResults(params) results = client.get_dict() news_results = results['news_results'] count = 0 sites_date = [] while (news_results and len(news_results) > 0) or ('error' not in results): sites = [news['link'] for news in news_results] sites_date.extend(sites) count += len(sites) params['start'] = count client = GoogleSearchResults(params) results = client.get_dict() news_results = results['news_results'] print('Date Range: {}-{}\tTotal Sites: {}'.format( d[0], d[1], len(sites_date))) query_r['sites'] = sites_date all_sites.append(query_r) total_sites_count += len(sites_date) except Exception as e: print(e) print(d) continue print('Total Sites: {}'.format(total_sites_count)) return all_sites
def query_serp_count(query, site, dates, num_results, paper_name): siteCounts = [] totalSites = 0 for i, d in enumerate(dates): try: # Get query dict and params dict query_r, params = make_params(query=query, site=site, date_start=d[0], date_end=d[1], num_results=num_results, paper=paper_name) # serpAPI query client = GoogleSearchResults(params) results = client.get_dict() news_results = results['news_results'] count = 0 while (news_results and len(news_results) > 0) or ('error' not in results): count += len(news_results) params['start'] = count client = GoogleSearchResults(params) results = client.get_dict() news_results = results['news_results'] print('Date Range: {}-{}\tTotal Sites: {}'.format( d[0], d[1], count)) query_r['site_count'] = count siteCounts.append(query_r) totalSites += count except Exception as e: print(e) print(d) continue print('\nTotal Sites: {}'.format(totalSites)) return siteCounts
def test_get_account(self): client = GoogleSearchResults({}) account = client.get_account() self.assertIsNotNone(account.get("account_id")) self.assertEqual(account.get("api_key"), GoogleSearchResults.SERP_API_KEY)
def main(): GoogleSearchResults.SERP_API_KEY = "3d42b081b7807b6f9c6a37504613b07a02f5f9b00b6aed88731ff9483e161bc2" months = {'January':1, 'February':2, 'March':3, 'April':4, 'May':5, 'June':6, 'July':7,'August':8,'September':9, 'October':10, 'November':11,'December':12} platform_altnames = {'PS3':'PlayStation 3', 'PS4':'PlayStation 4', 'PSP':'PlayStation Portable'} sales = pd.read_csv("games_w_keywords.csv") sales.reset_index(drop=True, inplace=True) release_date = [] new_sales = pd.DataFrame(columns=sales.columns) for g, game in enumerate(sales['Name']): try: page = wptools.page(game) if 'released' in page.get_parse().infobox: date_str = page.get_parse().infobox['released'] else: date_str = page.get_parse().infobox['release'] except Exception: #wiki page not found or non-game wiki page detected try: google_qry = game + ' site:wikipedia.org' client = GoogleSearchResults({"q": google_qry, "serp_api_key": GoogleSearchResults.SERP_API_KEY}) site = client.get_dict()['organic_results'][0]['link'] wiki_qry = site.split('/')[-1] page = wptools.page(wiki_qry) if 'released' in page.get_parse().infobox: date_str = page.get_parse().infobox['released'] else: date_str = page.get_parse().infobox['release'] except Exception: #no results found or wiki infobox has no 'release' section continue date_str_split = date_str.split("|") #preprocess to narrow down dates to right platform and right region for i, q in enumerate(date_str_split): if sales['Platform'][g] in ''.join(date_str_split[:i]): date_str_split = date_str_split[i:] break elif sales['Platform'][g] in platform_altnames: if platform_altnames[sales['Platform'][g]] in ''.join(date_str_split[:i]): date_str_split = date_str_split[i:] break for i, q in enumerate(date_str_split): if 'NA' in date_str_split[i-1] or 'WW' in date_str_split[i-1]: date_str_split = [date_str_split[i]] break for i, q in enumerate(date_str_split): if 'JP' in date_str_split[i-1]: continue day, month, year = '', '', '' date = date_str_split[i].split() for mem in date: mem= re.sub('[(){}<>,]', '', mem) if mem in months: month = str(months[mem]) if mem.isnumeric(): if int(mem) < 32: day = str(mem) else: year = str(mem) try: int(year) except Exception: continue if (day != '' and month != '' and int(year) == int(sales['Year_of_Release'][g])): break try: int(year) except Exception: continue if day == '' or month == '' or year == '' or int(year) != int(sales['Year_of_Release'][g]): continue if month == '12' and int(year) == 2016: continue rd = month + '/' + day + '/' + year release_date.append(rd) new_sales = new_sales.append(sales.iloc[[g]]) new_sales['release date'] = release_date f = open('games_w_rldates.csv', "w+") new_sales.to_csv('games_w_rldates.csv', index=False)
from serpapi.google_search_results import GoogleSearchResults params = { "q": "Pythagorean Theorem", "hl": "en", "gl": "us", "google_domain": "google.com", "api_key": "secret_api_key" } client = GoogleSearchResults(params) results = client.get_dict()
for line in csv_file: new = line.strip() print("new:" + new) params = { "api_key": "5b4695dcbf33b844d77818346074c2268857ec9c42f24202d18cce2fd1709ba3", "engine": "google", "q": new, "location": "London, England, United Kingdom", "google_domain": "google.co.uk", "gl": "uk", "hl": "en", "no_cache": "true", "device": "mobile", } client = GoogleSearchResults(params) #results = client.get_dict() time.sleep(2) json_results = client.get_json() try: for position in json_results['ads']: data.append(new + ":" + position['displayed_link'] + ":" + str(position['position'])) #print (json_results) # with open('fileName.csv', "wb") as csv_file: # writer = csv.writer(csv_file, delimiter =":") # writer.writerow(data)
def test_get_location(self): client = GoogleSearchResults({"q": None, "async": True}) location_list = client.get_location("Austin", 3) self.assertIsNotNone(location_list[0].get("id")) pp = pprint.PrettyPrinter(indent=2) pp.pprint(location_list)
def _search(params): client = GoogleSearchResults(params) return client.get_dict()
url = #Insert URL String Here from Github from serpapi.google_search_results import GoogleSearchResults import pandas as pd import csv #Add your SERP API KEY between the '' below api_key = 'API_KEY_GOES_HERE' df1 = pd.read_csv(url) # Dataset is now stored in a Pandas Dataframe # The While Loop putting items into a queue query_queue = [] i = 0 while i < data_frame_size: query = df1.iloc[i][0] query_queue.append(query) i+=1 for search_query in query_queue: query_params = { "location": location, "engine": engine, "api_key": api_key, "q": search_query } search_client = GoogleSearchResults(query_params) search_results = search_client.get_dict() for link in search_results['organic_results']: print(link['link'])