Ejemplo n.º 1
0
 def test_get_json(self):
     search = GoogleSearch({"q": "Coffee", "engine": "google_scholar"})
     data = search.get_json()
     print(data['search_metadata'])
     search_id = data['search_metadata']['id']
     # retrieve search from the archive - blocker
     print(search_id + ": get search from archive")
     raw_html = search.get_search_archive(search_id, 'html')
     # print(search_id + ": status = " + search_archived['search_metadata']['status'])
     print(raw_html)
def test_async():
    # store searches
    search_queue = Queue()

    # Serp API search
    search = GoogleSearch({"location": "Austin,Texas", "async": True})

    json_q = load_json("./dataset/Questions_with_Ans.json")
    # json_q = load_json("./dataset/question.json")

    ll = list(map(lambda x: x["Question"], json_q))

    # loop through companies
    for company in ll:
        print("execute async search: q = " + company)
        search.params_dict["q"] = company
        data = search.get_dict()
        print("add search to the queue where id: " +
              data['search_metadata']['id'])
        # add search to the search_queue
        search_queue.put(data)

    print("wait until all search statuses are cached or success")

    # Create regular search
    search = GoogleSearch({"async": True})
    while not search_queue.empty():
        data = search_queue.get()
        search_id = data['search_metadata']['id']

        # retrieve search from the archive - blocker
        print(search_id + ": get search from archive")
        search_archived = search.get_search_archive(search_id)
        print(search_id + ": status = " +
              search_archived['search_metadata']['status'])

        # check status
        if re.search('Cached|Success',
                     search_archived['search_metadata']['status']):
            print(search_id + ": search done with q = " +
                  search_archived['search_parameters']['q'])
            print(search_archived["organic_results"])
        else:
            # requeue search_queue
            print(search_id + ": requeue search")
            search_queue.put(search)
            # wait 1s
            time.sleep(1)
    # search is over.
    print('all searches completed')
Ejemplo n.º 3
0
def search_async(q_list):
    search_queue = Queue()
    search = build_search(is_async=True)
    show_msg = False

    # loop through companies
    for q in q_list:
        search.params_dict["q"] = q
        data = search.get_dict()

        # add search to the search_queue
        search_queue.put(data)

        if show_msg:
            print("execute async search: q = " + q)
            print("add search to the queue where id: " +
                  data['search_metadata']['id'])
    print("wait until all search statuses are cached or success")
    # Create regular search
    search = GoogleSearch({"async": True})
    while not search_queue.empty():
        data = search_queue.get()
        search_id = data['search_metadata']['id']

        # retrieve search from the archive - blocker
        search_archived = search.get_search_archive(search_id)
        if show_msg:
            print(search_id + ": get search from archive")
            print(search_id + ": status = " +
                  search_archived['search_metadata']['status'])

        # check status
        if re.search('Cached|Success',
                     search_archived['search_metadata']['status']):
            if show_msg:
                print(search_id + ": search done with q = " +
                      search_archived['search_parameters']['q'])
            QUERY_RESULT[search_archived['search_parameters']['q']
                         [-5:]] = search_archived["organic_results"]
        else:
            # requeue search_queue
            print(search_id + ": requeue search")
            search_queue.put(search)
            # wait 1s
            time.sleep(1)
    # search is over.
    print('all searches completed')
Ejemplo n.º 4
0
    def test_async(self):
        # store searches
        search_queue = Queue()

        # Serp API search
        search = GoogleSearch({"location": "Austin,Texas", "async": True})

        # loop through companies
        for company in ['amd', 'nvidia', 'intel']:
            print("execute async search: q = " + company)
            search.params_dict["q"] = company
            data = search.get_dict()
            if data is not None:
                print("oops data is empty for: " + company)
                continue
            print("add search to the queue where id: " +
                  data['search_metadata']['id'])
            # add search to the search_queue
            search_queue.put(data)

        print("wait until all search statuses are cached or success")

        # Create regular search
        search = GoogleSearch({"async": True})
        while not search_queue.empty():
            data = search_queue.get()
            search_id = data['search_metadata']['id']

            # retrieve search from the archive - blocker
            print(search_id + ": get search from archive")
            search_archived = search.get_search_archive(search_id)
            print(search_id + ": status = " +
                  search_archived['search_metadata']['status'])

            # check status
            if re.search('Cached|Success',
                         search_archived['search_metadata']['status']):
                print(search_id + ": search done with q = " +
                      search_archived['search_parameters']['q'])
            else:
                # requeue search_queue
                print(search_id + ": requeue search")
                search_queue.put(search)
                # wait 1s
                time.sleep(1)
        # search is over.
        print('all searches completed')