def test_asynchrous(self): # Assert asynchronous function call (returns 1). v = web.asynchronous(lambda t: time.sleep(t) or 1, 0.2) while not v.done: time.sleep(0.1) self.assertEqual(v.value, 1) print "pattern.web.asynchronous()"
def generar_consulta_bing(q): reload(sys) sys.setdefaultencoding('utf8') engine_bing = Bing(license="TNMHm68dvf440pSPdnU+2LqxeQi7J2xszPZLBiPYsmI", throttle=0.5, language=None) bing = [] for consulta in q: request = asynchronous(engine_bing.search, consulta, start=1, count=20, type=SEARCH, timeout=10) while not request.done: time.sleep(0.01) # # # An error occured in engine.search(), raise it. if request.error: raise request.error # # # Retrieve the list of search results. for result in request.value: bing.append(result.url) return bing
def websearch(query): limit = config['web_results_limit'] search_library = config['search_library_active'] search_engine = config['search_engine_active'] ret = [] # Bing=50 per page, Google=10 - go figure! per_page = config[search_engine + '_per_page'] pages = int(math.ceil(limit / float(per_page))) if search_library == 'pattern': if search_engine == 'bing': engine = Bing(license='cvzWROzO9Vaxqu0k33+y6h++ts+a4PLQfvA7HlyJyXM=', language="en") elif search_engine == 'google': engine = Google(license=config[config['use_whose_key'] + '_google_key'], language="en") for page in range(pages): try: # turns out start = starting page and count is results per page # could probably do some logic to make sure count is right if limit was 130, on page 3, count should be 30, whereas # our code is going to fetch 50 for a total of 150. ... I think we can probably mess with that later and just work in blocks of 50 request = asynchronous(engine.search, clean_query(query), start=page+1, count=per_page, type=SEARCH, timeout=10, throttle=0.5) while not request.done: time.sleep(0.01) except: raise if request.value != None: for result in request.value: ret.append({'title' : result.title, 'description' : result.text}) elif search_library == 'requests': for page in range(pages): offset = per_page * page params = {'$format': 'json', '$top': per_page,'$skip': offset} results = bing.search('web',clean_query(query),params)()['d']['results'][0]['Web'] for result in results: ret.append({'title' : result['Title'], 'description' : result['Description']}) elif search_library == 'xgoogle': for page in range(pages): try: # inject some delay time.sleep(0.04) gs = GoogleSearch(clean_query(query)) gs.page = page+1 gs.results_per_page = per_page results = gs.get_results() for res in results: ret.append({'title' : res.title.encode("utf8"), 'description' : res.desc.encode("utf8")}) except SearchError, e: print "Search failed: %s" % e
def generar_consulta_bing(q): reload(sys) sys.setdefaultencoding('utf8') engine_bing = Bing(license="TNMHm68dvf440pSPdnU+2LqxeQi7J2xszPZLBiPYsmI", language="en") bing = [] for consulta in q: request = asynchronous(engine_bing.search, consulta, start=1, count=10, type=SEARCH, timeout=10) while not request.done: time.sleep(0.01) # An error occured in engine.search(), raise it. if request.error: raise request.error # Retrieve the list of search results. for result in request.value: bing.append(result.url) return bing
# Yahoo can retrieve up to a 1000 results (10x100) for a query. # You should obtain your own license key at: # https://developer.apps.yahoo.com/wsregapp/ # Otherwise you will be sharing the default license with all users of this module. engine = Bing(license=None) # Quote a query to match it exactly: q = "\"is more important than\"" # When you execute a query, the script will halt until all results are downloaded. # In applications with an event loop (e.g. a GUI or an interactive animation) # it is more useful if the app keeps on running while the search is executed in the background. # This can be achieved with the asynchronous() command. # It takes any function and the function's arguments and keyword arguments: request = asynchronous(engine.search, q, start=1, count=100, type=SEARCH, timeout=10) # This while-loop simulates an application event loop. # In a real-world example you would have an app.update() or similar # in which you can check request.done every now and then. while not request.done: time.sleep(0.01) print ".", print print # An error occured in engine.search(), raise it. if request.error: raise request.error
# If this limit is exceeded, SearchEngineLimitError is raised. # You should obtain your own license key at: # https://datamarket.azure.com/account/ engine = Bing(license=None, language="en") # Quote a query to match it exactly: q = "\"is more important than\"" # When you execute a query, # the script will halt until all results are downloaded. # In apps with an infinite main loop (e.g., GUI, game), # it is often more useful if the app keeps on running # while the search is executed in the background. # This can be achieved with the asynchronous() function. # It takes any function and that function's arguments and keyword arguments: request = asynchronous(engine.search, q, start=1, count=100, type=SEARCH, timeout=10) # This while-loop simulates an infinite application loop. # In real-life you would have an app.update() or similar # in which you can check request.done every now and then. while not request.done: time.sleep(0.01) print ".", print print # An error occured in engine.search(), raise it. if request.error: raise request.error
'https://upload.wikimedia.org/wikipedia/commons/f/f1/RougeOr_football.jpg') file = open('football' + extension(page_url.page), 'wb') file.write(page_url.download()) file.close() # ### Finding URLs within Text from pattern.web import find_urls print(find_urls('To search anything, go to www.google.com', unique=True)) # ### Making Asynchronous Requests for Webpages from pattern.web import asynchronous, time, Google asyn_req = asynchronous(Google().search, 'artificial intelligence', timeout=4) while not asyn_req.done: time.sleep(0.1) print('searching...') print(asyn_req.value) print(find_urls(asyn_req.value, unique=True)) # ### Getting Search Engine Results with APIs # #### Google from pattern.web import Google google = Google(license=None)