def get_results(item, location):
    params = {
        "q": item,
        "tbm": "shop",
        "location": location,
        "hl": "en",
        "gl": "us",
        "api_key":
        "286dc1ea151c8c789b1babc2c6e89694919c91e5edb1908278d4c771c5fdcf68",
        "num": 30
    }

    client = GoogleSearch(params)
    results = client.get_dict()
    results = results["shopping_results"]

    item_list = []

    for result in results:
        item_list.append(
            ItemData(result.get("title"), result.get("link"),
                     result.get("price"), result.get("snippet"),
                     result.get("source")))

    return item_list
Example #2
0
def print_form():
    global CONTEXT
    if request.method == "GET":
        # serve HTML page
        return render_template("index.html")
    else:
        # handle text from submitted form
        CONTEXT["photos"].clear()
        text_book = request.json["text_book"]
        slider_val = int(request.json["slider_val"])
        # split text into sections
        text_book_sentences = text_book.split('.')
        text_book_sentences = text_book_sentences[:
                                                  -1]  # get rid of last empty string (after last sentence)
        num_sentences = len(text_book_sentences)
        text_book_sections = []
        for idx in range(0, num_sentences, slider_val):
            if idx + slider_val < num_sentences:
                text_book_sections.append(". ".join(
                    text_book_sentences[idx:(idx + slider_val)]))
            else:
                text_book_sections.append(". ".join(text_book_sentences[idx:]))
        # summarize each sentence
        url = "https://textanalysis-text-summarization.p.rapidapi.com/text-summarizer"
        summaries = []
        for section in text_book_sections:
            payload = {"url": "", "text": section, "sentnum": 1}
            headers = {
                'content-type': "application/json",
                'x-rapidapi-key':
                "3370a90c6bmsh4469eda97977206p1dbffdjsne99d3fc5a7b0",
                'x-rapidapi-host':
                "textanalysis-text-summarization.p.rapidapi.com"
            }
            summary = json.loads(
                requests.request("POST",
                                 url,
                                 data=json.dumps(payload),
                                 headers=headers).text)
            summaries.append(summary["sentences"][0])
            print(summary["sentences"])
        # perform image lookup
        for idx, summary in enumerate(summaries):
            # make call to image API
            params = {
                "q": summary,
                "tbm": "isch",
                "ijn": "0",
                "api_key": NEW_API_KEY
            }
            search = GoogleSearch(params)
            results = search.get_dict()
            images_results = results['images_results']
            if images_results and ("original" in images_results[0]):
                link = images_results[0]["original"]
                print(link)
                CONTEXT["photos"][text_book_sections[idx]] = link
        return redirect(url_for('view_results'))
Example #3
0
def retrieve_paper(doi):
    """query google scholar api for the article"""
    params = {"engine": "google_scholar", "q": doi, "api_key": api_key}
    search = GoogleSearch(params)
    results = search.get_dict()

    # now we need to parse through the huge json returned
    # to actually find the pdf link
    pdflink = results["organic_results"][0]["resources"][0]["link"]
    return pdflink
def search_request(query):
    params = {
        "q": query,
        "tbm": "isch",
        "ijn": 0,
        "api_key": API_KEY,
        
    }
    search = GoogleSearch(params)
    results = search.get_dict()
    return results['images_results']
def test_async():
    # store searches
    search_queue = Queue()

    # Serp API search
    search = GoogleSearch({"location": "Austin,Texas", "async": True})

    json_q = load_json("./dataset/Questions_with_Ans.json")
    # json_q = load_json("./dataset/question.json")

    ll = list(map(lambda x: x["Question"], json_q))

    # loop through companies
    for company in ll:
        print("execute async search: q = " + company)
        search.params_dict["q"] = company
        data = search.get_dict()
        print("add search to the queue where id: " +
              data['search_metadata']['id'])
        # add search to the search_queue
        search_queue.put(data)

    print("wait until all search statuses are cached or success")

    # Create regular search
    search = GoogleSearch({"async": True})
    while not search_queue.empty():
        data = search_queue.get()
        search_id = data['search_metadata']['id']

        # retrieve search from the archive - blocker
        print(search_id + ": get search from archive")
        search_archived = search.get_search_archive(search_id)
        print(search_id + ": status = " +
              search_archived['search_metadata']['status'])

        # check status
        if re.search('Cached|Success',
                     search_archived['search_metadata']['status']):
            print(search_id + ": search done with q = " +
                  search_archived['search_parameters']['q'])
            print(search_archived["organic_results"])
        else:
            # requeue search_queue
            print(search_id + ": requeue search")
            search_queue.put(search)
            # wait 1s
            time.sleep(1)
    # search is over.
    print('all searches completed')
Example #6
0
    def test_async(self):
        # store searches
        search_queue = Queue()

        # Serp API search
        search = GoogleSearch({"location": "Austin,Texas", "async": True})

        # loop through companies
        for company in ['amd', 'nvidia', 'intel']:
            print("execute async search: q = " + company)
            search.params_dict["q"] = company
            data = search.get_dict()
            if data is not None:
                print("oops data is empty for: " + company)
                continue
            print("add search to the queue where id: " +
                  data['search_metadata']['id'])
            # add search to the search_queue
            search_queue.put(data)

        print("wait until all search statuses are cached or success")

        # Create regular search
        search = GoogleSearch({"async": True})
        while not search_queue.empty():
            data = search_queue.get()
            search_id = data['search_metadata']['id']

            # retrieve search from the archive - blocker
            print(search_id + ": get search from archive")
            search_archived = search.get_search_archive(search_id)
            print(search_id + ": status = " +
                  search_archived['search_metadata']['status'])

            # check status
            if re.search('Cached|Success',
                         search_archived['search_metadata']['status']):
                print(search_id + ": search done with q = " +
                      search_archived['search_parameters']['q'])
            else:
                # requeue search_queue
                print(search_id + ": requeue search")
                search_queue.put(search)
                # wait 1s
                time.sleep(1)
        # search is over.
        print('all searches completed')
Example #7
0
def parseFood(keyword):
    query = keyword.replace(" ", "+")
    if query.find('recipe') == -1:
        query += '+recipe'
    params = {
        "api_key":
        "9ef20b0d5060890669f34fae37eeb3fe2d0528f3557f84db54715d7a67373827",
        "engine": "google",
        "q": query,
        "google_domain": "google.com",
        "hl": "id"
    }

    search = GoogleSearch(params)
    results = search.get_dict()

    return jsonify(results)
Example #8
0
def parseShopping(keyword):
    params = {
        "api_key":
        "9ef20b0d5060890669f34fae37eeb3fe2d0528f3557f84db54715d7a67373827",
        "engine": "google",
        "q": keyword.replace(" ", "+"),
        "location": "Indonesia",
        "google_domain": "google.co.id",
        "gl": "id",
        "hl": "id",
        "tbm": "shop"
    }

    search = GoogleSearch(params)
    results = search.get_dict()

    return jsonify(results)
Example #9
0
def generateLinks(age, gender, student, salary, city, state, country):
    links = {}

    toSearch = ""

    state = "ontario"

    if gender == "M" or gender == "F":
        toSearch = toSearch + gender + " "
    else:
        toSearch = toSearch + "LGBTQ "

    toSearch = toSearch + "scholarship "

    if student == 'true':
        toSearch = toSearch + "student "

    if salary < 48535:
        toSearch = toSearch + "low income "
    elif salary < 97069:
        toSearch = toSearch + "middle income "

    toSearch = toSearch + country

    search = GoogleSearch({
        "q":
        toSearch,
        "location":
        city + ',' + state,
        "api_key":
        "157a826ffcd18b1592accedc793f1059857ee66c91b004dfd295b6a9b28cadfc"
    })
    results = search.get_dict()
    print("-------------------------")
    organic_results = results['organic_results']
    link = "searchLink: " + results['search_metadata']['google_url']

    print("\n\n" + link)
    count = 1
    finalString = ""
    for x in organic_results[:3]:
        finalString = finalString + x["link"] + ","
        count += 1

    return finalString
Example #10
0
def pulling_data(job_titles, cities):
    job_results = []
    for job in job_titles:
        for city in cities:
            params = {
                "engine": "google_jobs",
                "q": f'{job} {city}',
                "hl": "en",
                'num': 100,
                'start': 10,
                "api_key": None,
            }

            #looping through 10 pages
            for x in range(10):
                params['start'] = 10 * x
                search = GoogleSearch(params)
                #json data
                results = search.get_dict()

                #val is going to be either True of False, depending on what job_results_validation returns
                validate_response = jobs_results_validation(results)
                print(validate_response)
                #if jobs_results key is found in the json data, then this will return true and it'll enter the if statament. Otherwise continue with the for loop to get more pages
                if validate_response:
                    job_postings = results['jobs_results']
                    print(type(job_postings))
                    #we're getting 10 pages per loop in a list, so we're looping through each job posting to check that the columns we want are there
                    for job_post in job_postings:
                        response = columns_validation(job_post)
                        if response:
                            print(job_post)
                            job_results.append(job_post)
                        else:
                            print('response was false')

    #list of dictionaries
    print(len(job_results))
    return job_results
Example #11
0
def get_image_results_for_query(query: str, num_images: int = 100):
    results = []

    for page_num in tqdm(range(math.ceil(num_images / 100))):
        params = {
            "api_key": os.getenv("SERPAPI_KEY"),
            "engine": "google",
            "q": query,
            "google_domain": "google.com",
            "tbs": "il:cl",
            "hl": "en",
            "tbm": "isch",
            "ijn": page_num
        }

        # tbs is licence, ijn is page
        search = GoogleSearch(params)
        result = search.get_dict()
        with contextlib.suppress(KeyError):
            results += result['images_results']

    return results
def search(q, linked):
    print("search: %s" % q)
    # run search
    parameter = {"q": q, "api_key": os.getenv("API_KEY")}
    client = GoogleSearch(parameter)
    results = client.get_dict()

    # basic error handling
    if "error" in results:
        print("ERROR: " + results["error"])
        sys.exit(1)

    # analyze results
    queue = []
    if not 'knowledge_graph' in results:
        return queue
    for link in results['knowledge_graph']['people_also_search_for']:
        name = link['name'].replace('.', '')
        if name in linked:
            continue
        linked.append(name)
        queue.append(name)
    return queue
Example #13
0
def make_url_request_using_cache(job_query):
    try:
        results = []
        for i in list(range(0, 110, 10)):  #search 200 results from API
            params = {
                "engine": "google_jobs",
                "q": job_query,
                "hl": "en",
                "api_key":
                "a463df1e2c78e577d9220ceeba3d0f6cc418db1a445ed7520d0fc6b0c62ab95a",
                "start": i
            }
            client = GoogleSearch(params)
            result = client.get_dict()
            result = result['jobs_results']
            for i in result:
                dic = {}
                dic['title'] = i['title']
                dic['company_name'] = i['company_name']
                dic['location'] = i['location']
                results.append(dic)
        return results
    except:
        return False  #if fail to finish search, return false
Example #14
0
def search_on_google(query):
    params['q'] = query
    search = GoogleSearch(params)
    results = search.get_dict()
    return results
# download latest SEC report: F13 as XML if available
links = []
for name in linked:
    # xml file name
    fn = name.replace(' ', '_') + '_report.xml'
    if os.path.exists(fn):
        print("skip: " + fn + " already downloaded")
        continue

    print("search 13F: " + name)
    parameter = {
        "q": name + " 13f-hr site:sec.gov",
        "api_key": os.getenv("API_KEY")
    }
    client = GoogleSearch(parameter)
    results = client.get_dict()
    holding_link = None
    if not 'organic_results' in results:
        print("FAIL: no results found for " + q)
        break

    for result in results["organic_results"]:
        if result["link"][-3:] == "xml":
            holding_link = result["link"].replace('primary_doc.xml',
                                                  'infotable.xml')
            break
    if holding_link == None:
        print("FAIL: no SEC report for: " + name)
        break

    print("download: " + holding_link)
Example #16
0
 def test_get_dict(self):
     search = GoogleSearch({"q": "Coffee", "location": "Austin,Texas"})
     data = search.get_dict()
     self.assertIsNotNone(data.get('local_results'))
Example #17
0
    start = time.time()
    articles = []
    cnt = 0
    step = 100

    while True:
        params = {
            "engine": "google_scholar_author",
            "author_id": author_id,
            "api_key": api_key,
            "start": str(cnt),
            "num": step
        }

        search = GoogleSearch(params)
        author_results = search.get_dict()
        articles.extend(author_results['articles'])
        if len(author_results['articles']) < step:
            break
        else:
            cnt += step

    # AUTHOR IS READY TO BE WRITTEN IN DATABASE
    author_dict = author_check(author_id=author_id, results=author_results)

    # conn = connect()
    # write(conn,author_dict,table='google_authors')
    #

    citations_table = []
    print(len(articles))
Example #18
0
articles = []
cnt = 0
step = 100

while True:
    params = {
        "engine": "google_scholar_author",
        "author_id": author_id,
        "api_key": api_key,
        "start": str(cnt),
        "num": step
    }

    search = GoogleSearch(params)
    results = search.get_dict()
    articles.extend(results['articles'])

    if len(results['articles']) < step:
        break
    else:
        cnt += step

# WE NOW HAVE THE RESULTS DICT, AND ALL ARTICLES

# NAME
if not 'name' in results['author'].keys():
    family_name, given_name, credit_name = None, None, None
else:
    name_to_list = results['author']['name'].split(' ')
    family_name = name_to_list[-1]
def searchQuery(q):
    params = {
        "q":
        q,
        'location':
        'Evanston,Illinois',
        "hl":
        "en",
        "gl":
        "us",
        "device":
        "desktop",
        "api_key":
        "6c8dcdb60b2416c130acea4c86a035763e5869fe9e5eb44db66077985f512697"
    }

    client = GoogleSearch(params)
    results = client.get_dict()
    if 'answer_box' in results:
        print('ab')
        res = results['answer_box']
        if 'result' in res:
            output = res['result']
        elif 'snippet' in res:
            output = res['snippet']
        elif 'definition' in res:
            output = res['definition']
        else:
            output = results['answer_box']
        pyperclip.copy(str(output))
        pyperclip.paste()
    elif 'knowledge_graph' in results:
        print('kg')
        res = results['knowledge_graph']
        if 'title' in res:
            output = res['title']
        elif 'snippet' in res:
            output = res['snippet']
        else:
            output = results['knowledge_graph']
        link = res['link']
        pyperclip.copy(str(output) + '\n' + str(link))
        pyperclip.paste()
    else:
        print('links')
        found = False
        links = ''
        output = ''
        for i in results:
            if type(results[i]) == list:
                for k in range(len(results[i])):
                    if 'snippet' in results[i][k]:
                        output = results[i][k][
                            'snippet'] + 'LINK AT:  ' + results[i][k]['link']
                        found = True
                        links = getLinks(results[i])
                        break
                if found:
                    break
        if not found:
            output = "NO VALID ANSWERS"

        pyperclip.copy(str(output) + '\n' + '\n' + links)
        pyperclip.paste()

    openChat()