def get_results(item, location): params = { "q": item, "tbm": "shop", "location": location, "hl": "en", "gl": "us", "api_key": "286dc1ea151c8c789b1babc2c6e89694919c91e5edb1908278d4c771c5fdcf68", "num": 30 } client = GoogleSearch(params) results = client.get_dict() results = results["shopping_results"] item_list = [] for result in results: item_list.append( ItemData(result.get("title"), result.get("link"), result.get("price"), result.get("snippet"), result.get("source"))) return item_list
def print_form(): global CONTEXT if request.method == "GET": # serve HTML page return render_template("index.html") else: # handle text from submitted form CONTEXT["photos"].clear() text_book = request.json["text_book"] slider_val = int(request.json["slider_val"]) # split text into sections text_book_sentences = text_book.split('.') text_book_sentences = text_book_sentences[: -1] # get rid of last empty string (after last sentence) num_sentences = len(text_book_sentences) text_book_sections = [] for idx in range(0, num_sentences, slider_val): if idx + slider_val < num_sentences: text_book_sections.append(". ".join( text_book_sentences[idx:(idx + slider_val)])) else: text_book_sections.append(". ".join(text_book_sentences[idx:])) # summarize each sentence url = "https://textanalysis-text-summarization.p.rapidapi.com/text-summarizer" summaries = [] for section in text_book_sections: payload = {"url": "", "text": section, "sentnum": 1} headers = { 'content-type': "application/json", 'x-rapidapi-key': "3370a90c6bmsh4469eda97977206p1dbffdjsne99d3fc5a7b0", 'x-rapidapi-host': "textanalysis-text-summarization.p.rapidapi.com" } summary = json.loads( requests.request("POST", url, data=json.dumps(payload), headers=headers).text) summaries.append(summary["sentences"][0]) print(summary["sentences"]) # perform image lookup for idx, summary in enumerate(summaries): # make call to image API params = { "q": summary, "tbm": "isch", "ijn": "0", "api_key": NEW_API_KEY } search = GoogleSearch(params) results = search.get_dict() images_results = results['images_results'] if images_results and ("original" in images_results[0]): link = images_results[0]["original"] print(link) CONTEXT["photos"][text_book_sections[idx]] = link return redirect(url_for('view_results'))
def retrieve_paper(doi): """query google scholar api for the article""" params = {"engine": "google_scholar", "q": doi, "api_key": api_key} search = GoogleSearch(params) results = search.get_dict() # now we need to parse through the huge json returned # to actually find the pdf link pdflink = results["organic_results"][0]["resources"][0]["link"] return pdflink
def search_request(query): params = { "q": query, "tbm": "isch", "ijn": 0, "api_key": API_KEY, } search = GoogleSearch(params) results = search.get_dict() return results['images_results']
def test_async(): # store searches search_queue = Queue() # Serp API search search = GoogleSearch({"location": "Austin,Texas", "async": True}) json_q = load_json("./dataset/Questions_with_Ans.json") # json_q = load_json("./dataset/question.json") ll = list(map(lambda x: x["Question"], json_q)) # loop through companies for company in ll: print("execute async search: q = " + company) search.params_dict["q"] = company data = search.get_dict() print("add search to the queue where id: " + data['search_metadata']['id']) # add search to the search_queue search_queue.put(data) print("wait until all search statuses are cached or success") # Create regular search search = GoogleSearch({"async": True}) while not search_queue.empty(): data = search_queue.get() search_id = data['search_metadata']['id'] # retrieve search from the archive - blocker print(search_id + ": get search from archive") search_archived = search.get_search_archive(search_id) print(search_id + ": status = " + search_archived['search_metadata']['status']) # check status if re.search('Cached|Success', search_archived['search_metadata']['status']): print(search_id + ": search done with q = " + search_archived['search_parameters']['q']) print(search_archived["organic_results"]) else: # requeue search_queue print(search_id + ": requeue search") search_queue.put(search) # wait 1s time.sleep(1) # search is over. print('all searches completed')
def test_async(self): # store searches search_queue = Queue() # Serp API search search = GoogleSearch({"location": "Austin,Texas", "async": True}) # loop through companies for company in ['amd', 'nvidia', 'intel']: print("execute async search: q = " + company) search.params_dict["q"] = company data = search.get_dict() if data is not None: print("oops data is empty for: " + company) continue print("add search to the queue where id: " + data['search_metadata']['id']) # add search to the search_queue search_queue.put(data) print("wait until all search statuses are cached or success") # Create regular search search = GoogleSearch({"async": True}) while not search_queue.empty(): data = search_queue.get() search_id = data['search_metadata']['id'] # retrieve search from the archive - blocker print(search_id + ": get search from archive") search_archived = search.get_search_archive(search_id) print(search_id + ": status = " + search_archived['search_metadata']['status']) # check status if re.search('Cached|Success', search_archived['search_metadata']['status']): print(search_id + ": search done with q = " + search_archived['search_parameters']['q']) else: # requeue search_queue print(search_id + ": requeue search") search_queue.put(search) # wait 1s time.sleep(1) # search is over. print('all searches completed')
def parseFood(keyword): query = keyword.replace(" ", "+") if query.find('recipe') == -1: query += '+recipe' params = { "api_key": "9ef20b0d5060890669f34fae37eeb3fe2d0528f3557f84db54715d7a67373827", "engine": "google", "q": query, "google_domain": "google.com", "hl": "id" } search = GoogleSearch(params) results = search.get_dict() return jsonify(results)
def parseShopping(keyword): params = { "api_key": "9ef20b0d5060890669f34fae37eeb3fe2d0528f3557f84db54715d7a67373827", "engine": "google", "q": keyword.replace(" ", "+"), "location": "Indonesia", "google_domain": "google.co.id", "gl": "id", "hl": "id", "tbm": "shop" } search = GoogleSearch(params) results = search.get_dict() return jsonify(results)
def generateLinks(age, gender, student, salary, city, state, country): links = {} toSearch = "" state = "ontario" if gender == "M" or gender == "F": toSearch = toSearch + gender + " " else: toSearch = toSearch + "LGBTQ " toSearch = toSearch + "scholarship " if student == 'true': toSearch = toSearch + "student " if salary < 48535: toSearch = toSearch + "low income " elif salary < 97069: toSearch = toSearch + "middle income " toSearch = toSearch + country search = GoogleSearch({ "q": toSearch, "location": city + ',' + state, "api_key": "157a826ffcd18b1592accedc793f1059857ee66c91b004dfd295b6a9b28cadfc" }) results = search.get_dict() print("-------------------------") organic_results = results['organic_results'] link = "searchLink: " + results['search_metadata']['google_url'] print("\n\n" + link) count = 1 finalString = "" for x in organic_results[:3]: finalString = finalString + x["link"] + "," count += 1 return finalString
def pulling_data(job_titles, cities): job_results = [] for job in job_titles: for city in cities: params = { "engine": "google_jobs", "q": f'{job} {city}', "hl": "en", 'num': 100, 'start': 10, "api_key": None, } #looping through 10 pages for x in range(10): params['start'] = 10 * x search = GoogleSearch(params) #json data results = search.get_dict() #val is going to be either True of False, depending on what job_results_validation returns validate_response = jobs_results_validation(results) print(validate_response) #if jobs_results key is found in the json data, then this will return true and it'll enter the if statament. Otherwise continue with the for loop to get more pages if validate_response: job_postings = results['jobs_results'] print(type(job_postings)) #we're getting 10 pages per loop in a list, so we're looping through each job posting to check that the columns we want are there for job_post in job_postings: response = columns_validation(job_post) if response: print(job_post) job_results.append(job_post) else: print('response was false') #list of dictionaries print(len(job_results)) return job_results
def get_image_results_for_query(query: str, num_images: int = 100): results = [] for page_num in tqdm(range(math.ceil(num_images / 100))): params = { "api_key": os.getenv("SERPAPI_KEY"), "engine": "google", "q": query, "google_domain": "google.com", "tbs": "il:cl", "hl": "en", "tbm": "isch", "ijn": page_num } # tbs is licence, ijn is page search = GoogleSearch(params) result = search.get_dict() with contextlib.suppress(KeyError): results += result['images_results'] return results
def search(q, linked): print("search: %s" % q) # run search parameter = {"q": q, "api_key": os.getenv("API_KEY")} client = GoogleSearch(parameter) results = client.get_dict() # basic error handling if "error" in results: print("ERROR: " + results["error"]) sys.exit(1) # analyze results queue = [] if not 'knowledge_graph' in results: return queue for link in results['knowledge_graph']['people_also_search_for']: name = link['name'].replace('.', '') if name in linked: continue linked.append(name) queue.append(name) return queue
def make_url_request_using_cache(job_query): try: results = [] for i in list(range(0, 110, 10)): #search 200 results from API params = { "engine": "google_jobs", "q": job_query, "hl": "en", "api_key": "a463df1e2c78e577d9220ceeba3d0f6cc418db1a445ed7520d0fc6b0c62ab95a", "start": i } client = GoogleSearch(params) result = client.get_dict() result = result['jobs_results'] for i in result: dic = {} dic['title'] = i['title'] dic['company_name'] = i['company_name'] dic['location'] = i['location'] results.append(dic) return results except: return False #if fail to finish search, return false
def search_on_google(query): params['q'] = query search = GoogleSearch(params) results = search.get_dict() return results
# download latest SEC report: F13 as XML if available links = [] for name in linked: # xml file name fn = name.replace(' ', '_') + '_report.xml' if os.path.exists(fn): print("skip: " + fn + " already downloaded") continue print("search 13F: " + name) parameter = { "q": name + " 13f-hr site:sec.gov", "api_key": os.getenv("API_KEY") } client = GoogleSearch(parameter) results = client.get_dict() holding_link = None if not 'organic_results' in results: print("FAIL: no results found for " + q) break for result in results["organic_results"]: if result["link"][-3:] == "xml": holding_link = result["link"].replace('primary_doc.xml', 'infotable.xml') break if holding_link == None: print("FAIL: no SEC report for: " + name) break print("download: " + holding_link)
def test_get_dict(self): search = GoogleSearch({"q": "Coffee", "location": "Austin,Texas"}) data = search.get_dict() self.assertIsNotNone(data.get('local_results'))
start = time.time() articles = [] cnt = 0 step = 100 while True: params = { "engine": "google_scholar_author", "author_id": author_id, "api_key": api_key, "start": str(cnt), "num": step } search = GoogleSearch(params) author_results = search.get_dict() articles.extend(author_results['articles']) if len(author_results['articles']) < step: break else: cnt += step # AUTHOR IS READY TO BE WRITTEN IN DATABASE author_dict = author_check(author_id=author_id, results=author_results) # conn = connect() # write(conn,author_dict,table='google_authors') # citations_table = [] print(len(articles))
articles = [] cnt = 0 step = 100 while True: params = { "engine": "google_scholar_author", "author_id": author_id, "api_key": api_key, "start": str(cnt), "num": step } search = GoogleSearch(params) results = search.get_dict() articles.extend(results['articles']) if len(results['articles']) < step: break else: cnt += step # WE NOW HAVE THE RESULTS DICT, AND ALL ARTICLES # NAME if not 'name' in results['author'].keys(): family_name, given_name, credit_name = None, None, None else: name_to_list = results['author']['name'].split(' ') family_name = name_to_list[-1]
def searchQuery(q): params = { "q": q, 'location': 'Evanston,Illinois', "hl": "en", "gl": "us", "device": "desktop", "api_key": "6c8dcdb60b2416c130acea4c86a035763e5869fe9e5eb44db66077985f512697" } client = GoogleSearch(params) results = client.get_dict() if 'answer_box' in results: print('ab') res = results['answer_box'] if 'result' in res: output = res['result'] elif 'snippet' in res: output = res['snippet'] elif 'definition' in res: output = res['definition'] else: output = results['answer_box'] pyperclip.copy(str(output)) pyperclip.paste() elif 'knowledge_graph' in results: print('kg') res = results['knowledge_graph'] if 'title' in res: output = res['title'] elif 'snippet' in res: output = res['snippet'] else: output = results['knowledge_graph'] link = res['link'] pyperclip.copy(str(output) + '\n' + str(link)) pyperclip.paste() else: print('links') found = False links = '' output = '' for i in results: if type(results[i]) == list: for k in range(len(results[i])): if 'snippet' in results[i][k]: output = results[i][k][ 'snippet'] + 'LINK AT: ' + results[i][k]['link'] found = True links = getLinks(results[i]) break if found: break if not found: output = "NO VALID ANSWERS" pyperclip.copy(str(output) + '\n' + '\n' + links) pyperclip.paste() openChat()