def query_serp(query: str, site: str, dates: list, num_results: int,
               paper_name: str) -> list:
    """
    Make dict for serp query using make_params()
    query serpAPI
    :param query: query string for google news
    :param site: site root url (eg. www.google.com)
    :param dates: list of dates
    :param num_results: number of results
    :param paper_name: name of paper
    :return: list of query dicts containing query information and sites list for the query
    """
    all_sites = []
    total_sites_count = 0

    for d in dates:
        try:
            # Get query dict and params dict
            query_r, params = make_params(query=query,
                                          site=site,
                                          date_start=d[0],
                                          date_end=d[1],
                                          num_results=num_results,
                                          paper=paper_name)
            # serpAPI query
            client = GoogleSearchResults(params)
            results = client.get_dict()
            news_results = results['news_results']

            count = 0
            sites_date = []
            # Loop through till end of search results or error encountered
            while (news_results and len(news_results) > 0) or ('error'
                                                               not in results):
                sites = [news['link'] for news in news_results]
                sites_date.extend(sites)
                count += len(sites)

                params['start'] = count
                client = GoogleSearchResults(params)
                results = client.get_dict()
                news_results = results['news_results']

            print('Date Range: {}-{}\tTotal Sites: {}'.format(
                d[0], d[1], len(sites_date)))

            # add list of sites to query dict
            query_r['sites'] = sites_date
            all_sites.append(query_r)
            total_sites_count += len(sites_date)
        except Exception as e:
            print(e)
            print(d)
            continue
    print('Total Sites: {}'.format(total_sites_count))
    return all_sites
Ejemplo n.º 2
0
def import_locations():
    params = {
        "hl":
        "en",
        "gl":
        "us",
        "google_domain":
        "google.com",
        "api_key":
        "550793bf576080db90a6392a62372dc5134e77a6a91953e5447e31dbd774ecbc"
    }
    with open('hirings.csv', mode='r') as csv_file:
        hirings = list(csv.DictReader(csv_file, delimiter=';'))
    with open('7-usa.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile, delimiter=';')
        writer.writerow(['id', 'company', 'status', 'longitude', 'latitude'])
        for h in hirings:
            print(f"Searching for ... {h['company']}")
            params['q'] = h['company']
            client = GoogleSearchResults(params)
            map = client.get_dict().get('local_map')
            if map and map.get('gps_coordinates'):
                long, lat = map['gps_coordinates']['longitude'], map[
                    'gps_coordinates']['latitude']
                print(h['company'], h['status'], long, lat)
                writer.writerow([h['company'], h['status'], long, lat])
                time.sleep(1)
Ejemplo n.º 3
0
def GetAnswer(return_text):
    params = {
      "api_key": "609d4beab3d238843bcf22588195a5b6db5ae8490a089215eac71985ba37f654",
      "engine": "google",
      "q": return_text,
      "location": "Los Angeles, CA, California, United States",
      "google_domain": "google.com",
      "gl": "us",
      "hl": "en",
    }
    client = GoogleSearchResults(params)
    results = client.get_dict()
    #print(results['knowledge_graph'])
    while True:
        try:
            useful_output = results['knowledge_graph']
            answer = useful_output['description']
            break
        except KeyError:
            try: 
                useful_output = results['answer_box']
                answer = useful_output['type']
                break
            except KeyError:
                try:
                    useful_output = results['knowledge_graph']
                    answer = useful_output['title']
                except KeyError:
                    answer = str("I don't know, try ask me something else")#this can go on and on to increase accuracy of the answe
    print(answer)
    return answer   
Ejemplo n.º 4
0
def search(keyword, date_start, date_end):
    """
	Function to find specific search result from Google.
	Using the already collated article data to seek wider breadth of information
	for the specific company at a specific time range

	Result from GoogleSearchResults returns a dictionary with the following fields in order:

	1. search_metadata
		- Includes "created_at" field useful for filtering results of specific date time
	2. search_parameters
	3. search_information
	4. organic_results
		- most relevant field
		- use "title" and "displayed_link" for extracting text data
	"""

    client = GoogleSearchResults({
        "q": keyword,
        "location": "Austin,Texas",
        "api_key": "demo",
        "created_at": "2019-04-05"
    })
    result = client.get_dict()

    return result
Ejemplo n.º 5
0
 def test_get_dict(self):
     client = GoogleSearchResults({
         "q": "Coffee",
         "location": "Austin,Texas"
     })
     data = client.get_dict()
     self.assertIsNotNone(data.get('local_results'))
Ejemplo n.º 6
0
    def search(self, query: str) -> list:
        key = self.get_credentials()

        params = {'engine': 'google_scholar', 'q': query, 'api_key': key}

        client = GoogleSearchResults(params)
        res = client.get_dict()

        filtered_res = self.get_filtered_results(res['organic_results'])
        return filtered_res
Ejemplo n.º 7
0
def query_serp(query, site, dates, num_results, paper_name):
    all_sites = []
    total_sites_count = 0

    for d in dates:
        try:
            query_r, params = make_params(query=query,
                                          site=site,
                                          date_start=d[0],
                                          date_end=d[1],
                                          num_results=num_results,
                                          paper=paper_name)
            client = GoogleSearchResults(params)
            results = client.get_dict()
            news_results = results['news_results']

            count = 0
            sites_date = []
            while (news_results and len(news_results) > 0) or ('error'
                                                               not in results):
                sites = [news['link'] for news in news_results]
                sites_date.extend(sites)
                count += len(sites)

                params['start'] = count
                client = GoogleSearchResults(params)
                results = client.get_dict()
                news_results = results['news_results']

            print('Date Range: {}-{}\tTotal Sites: {}'.format(
                d[0], d[1], len(sites_date)))

            query_r['sites'] = sites_date
            all_sites.append(query_r)
            total_sites_count += len(sites_date)
        except Exception as e:
            print(e)
            print(d)
            continue
    print('Total Sites: {}'.format(total_sites_count))
    return all_sites
Ejemplo n.º 8
0
def query_serp_count(query, site, dates, num_results, paper_name):
    siteCounts = []
    totalSites = 0
    for i, d in enumerate(dates):
        try:
            # Get query dict and params dict
            query_r, params = make_params(query=query,
                                          site=site,
                                          date_start=d[0],
                                          date_end=d[1],
                                          num_results=num_results,
                                          paper=paper_name)
            # serpAPI query
            client = GoogleSearchResults(params)
            results = client.get_dict()

            news_results = results['news_results']

            count = 0
            while (news_results and len(news_results) > 0) or ('error'
                                                               not in results):
                count += len(news_results)
                params['start'] = count
                client = GoogleSearchResults(params)
                results = client.get_dict()
                news_results = results['news_results']

            print('Date Range: {}-{}\tTotal Sites: {}'.format(
                d[0], d[1], count))
            query_r['site_count'] = count
            siteCounts.append(query_r)
            totalSites += count
        except Exception as e:
            print(e)
            print(d)
            continue
    print('\nTotal Sites: {}'.format(totalSites))
    return siteCounts
Ejemplo n.º 9
0
def pictureUrl(name):
    nameKeyword = "'" + name + " basketball + '"
    params = {
        "api_key":
        "5953e1e6264c6019fa48feea446a6ff68616adc5789be4e1fed149f11a89b020",
        "engine": "google",
        "ijn": "0",
        "q": nameKeyword,
        "google_domain": "google.com",
        "tbm": "isch",
    }
    client = GoogleSearchResults(params)
    results = client.get_dict()
    url = results["images_results"][0]["original"]
    return url
    def test_async(self):
        # store searches
        search_queue = Queue()

        # Serp API client
        client = GoogleSearchResults({
            "location": "Austin,Texas",
            "async": True
        })

        # loop through companies
        for company in ['amd', 'nvidia', 'intel']:
            print("execute async search: q = " + company)
            client.params_dict["q"] = company
            search = client.get_dict()
            print("add search to the queue where id: " +
                  search['search_metadata']['id'])
            # add search to the search_queue
            search_queue.put(search)

        print("wait until all search statuses are cached or success")

        # Create regular client
        client = GoogleSearchResults({"async": True})
        while not search_queue.empty():
            search = search_queue.get()
            search_id = search['search_metadata']['id']

            # retrieve search from the archive - blocker
            print(search_id + ": get search from archive")
            search_archived = client.get_search_archive(search_id)
            print(search_id + ": status = " +
                  search_archived['search_metadata']['status'])

            # check status
            if re.search('Cached|Success',
                         search_archived['search_metadata']['status']):
                print(search_id + ": search done with q = " +
                      search_archived['search_parameters']['q'])
            else:
                # requeue search_queue
                print(search_id + ": requeue search")
                search_queue.put(search)

                # wait 1s
                time.sleep(1)

        print('all searches completed')
Ejemplo n.º 11
0
def visual(q):
    params = {
        "q":
        q,
        "tbm":
        "isch",
        "ijn":
        "0",
        "api_key":
        "872d5dde1c1b4ad04b1b288b3cc26683597993dd25a9eff1777c5c20a52b7d50"
    }
    client = GoogleSearchResults(params)
    results = client.get_dict()
    images_results = results['images_results']
    link = images_results[5]["original"]
    return link
Ejemplo n.º 12
0
def fetch_serp_api_data(domain, keywords):
    results = []
    for keyword in keywords:
        keyword_text = keyword['keyword']
        print("Getting shopping results for: %s with keyword: %s..." %
              (domain, keyword_text))
        params = {
            "api_key":
            "bd9c6284ab9a91ffe763e700cfc6f8b0035b7db18b59591a2a6498723b79ad24",
            "engine": "google",
            "q": keyword_text,
            "google_domain": "google.com",
        }

        client = GoogleSearchResults(params)
        result = client.get_dict()
        results.append(result)
        time.sleep(1)
    return results
Ejemplo n.º 13
0
def _search(params):
    client = GoogleSearchResults(params)
    return client.get_dict()
Ejemplo n.º 14
0
from serpapi.google_search_results import GoogleSearchResults

params = {
    "q": "Pythagorean Theorem",
    "hl": "en",
    "gl": "us",
    "google_domain": "google.com",
    "api_key": "secret_api_key"
}

client = GoogleSearchResults(params)
results = client.get_dict()
Ejemplo n.º 15
0
url = #Insert URL String Here from Github 

from serpapi.google_search_results import GoogleSearchResults
import pandas as pd
import csv

#Add your SERP API KEY between the '' below
api_key = 'API_KEY_GOES_HERE'

df1 = pd.read_csv(url) # Dataset is now stored in a Pandas Dataframe

# The While Loop putting items into a queue
query_queue = []
i = 0
while i < data_frame_size:
 query = df1.iloc[i][0]
 query_queue.append(query)
 i+=1

for search_query in query_queue:
 query_params = {
 "location": location,
 "engine": engine,
 "api_key": api_key,
 "q": search_query
 }
 search_client = GoogleSearchResults(query_params)
 search_results = search_client.get_dict()
 for link in search_results['organic_results']:
   print(link['link'])
Ejemplo n.º 16
0
def main():
  GoogleSearchResults.SERP_API_KEY = "3d42b081b7807b6f9c6a37504613b07a02f5f9b00b6aed88731ff9483e161bc2"
  months = {'January':1, 'February':2, 'March':3, 'April':4, 'May':5, 'June':6, 'July':7,'August':8,'September':9, 'October':10, 'November':11,'December':12}
  platform_altnames = {'PS3':'PlayStation 3', 'PS4':'PlayStation 4', 'PSP':'PlayStation Portable'}

  sales = pd.read_csv("games_w_keywords.csv")
  sales.reset_index(drop=True, inplace=True)
  release_date = []
  new_sales = pd.DataFrame(columns=sales.columns)
  for g, game in enumerate(sales['Name']):
    try:
      page = wptools.page(game)
      if 'released' in page.get_parse().infobox:
        date_str = page.get_parse().infobox['released']
      else:
        date_str = page.get_parse().infobox['release']
    except Exception:  #wiki page not found or non-game wiki page detected
      try:
        google_qry = game + ' site:wikipedia.org'
        client = GoogleSearchResults({"q": google_qry, "serp_api_key": GoogleSearchResults.SERP_API_KEY})
        site = client.get_dict()['organic_results'][0]['link']
        wiki_qry = site.split('/')[-1]
        page = wptools.page(wiki_qry)
        if 'released' in page.get_parse().infobox:
          date_str = page.get_parse().infobox['released']
        else:
          date_str = page.get_parse().infobox['release']
      except Exception:  #no results found or wiki infobox has no 'release' section
        continue
    date_str_split = date_str.split("|")

    #preprocess to narrow down dates to right platform and right region
    for i, q in enumerate(date_str_split):
      if sales['Platform'][g] in ''.join(date_str_split[:i]):
          date_str_split = date_str_split[i:]
          break
      elif sales['Platform'][g] in platform_altnames:
        if platform_altnames[sales['Platform'][g]] in ''.join(date_str_split[:i]):
          date_str_split = date_str_split[i:]
          break

    for i, q in enumerate(date_str_split):
      if 'NA' in date_str_split[i-1] or 'WW' in date_str_split[i-1]:
          date_str_split = [date_str_split[i]]
          break

    for i, q in enumerate(date_str_split):
      if 'JP' in date_str_split[i-1]:
        continue
      day, month, year = '', '', ''
      date = date_str_split[i].split()
      for mem in date:
          mem= re.sub('[(){}<>,]', '', mem)
          if mem in months:
              month = str(months[mem])
          if mem.isnumeric():
              if int(mem) < 32:
                  day = str(mem)
              else:
                  year = str(mem)
      try:
        int(year)
      except Exception:
        continue
      if (day != '' and month != '' and int(year) == int(sales['Year_of_Release'][g])):
          break
    try:
      int(year)
    except Exception:
      continue
    if day == '' or month == '' or year == '' or int(year) != int(sales['Year_of_Release'][g]):
      continue
    if month == '12' and int(year) == 2016:
      continue
    rd = month + '/' + day + '/' + year
    release_date.append(rd)
    new_sales = new_sales.append(sales.iloc[[g]])
  
  new_sales['release date'] = release_date
  f = open('games_w_rldates.csv', "w+")
  new_sales.to_csv('games_w_rldates.csv', index=False)