Esempio n. 1
0
 def test_asynchrous(self):
     # Assert asynchronous function call (returns 1).
     v = web.asynchronous(lambda t: time.sleep(t) or 1, 0.2)
     while not v.done:
         time.sleep(0.1)
     self.assertEqual(v.value, 1)
     print "pattern.web.asynchronous()"
Esempio n. 2
0
 def test_asynchrous(self):
     # Assert asynchronous function call (returns 1).
     v = web.asynchronous(lambda t: time.sleep(t) or 1, 0.2)
     while not v.done:
         time.sleep(0.1)
     self.assertEqual(v.value, 1)
     print "pattern.web.asynchronous()"
Esempio n. 3
0
def generar_consulta_bing(q):
    reload(sys)
    sys.setdefaultencoding('utf8')

    engine_bing = Bing(license="TNMHm68dvf440pSPdnU+2LqxeQi7J2xszPZLBiPYsmI",
                       throttle=0.5,
                       language=None)
    bing = []
    for consulta in q:
        request = asynchronous(engine_bing.search,
                               consulta,
                               start=1,
                               count=20,
                               type=SEARCH,
                               timeout=10)
        while not request.done:
            time.sleep(0.01)
        #
        # # An error occured in engine.search(), raise it.
        if request.error:
            raise request.error
        #
        # # Retrieve the list of search results.
        for result in request.value:
            bing.append(result.url)

    return bing
Esempio n. 4
0
def websearch(query):
    limit = config['web_results_limit']
    search_library = config['search_library_active']
    search_engine = config['search_engine_active']
    
    ret = []
    # Bing=50 per page, Google=10 - go figure!
    per_page = config[search_engine + '_per_page']
    pages = int(math.ceil(limit / float(per_page)))

    if search_library == 'pattern':
        if search_engine == 'bing':
            engine = Bing(license='cvzWROzO9Vaxqu0k33+y6h++ts+a4PLQfvA7HlyJyXM=', language="en")
        elif search_engine == 'google':
            engine = Google(license=config[config['use_whose_key'] + '_google_key'], language="en")
        for page in range(pages):
            try:
                # turns out start = starting page and count is results per page
                # could probably do some logic to make sure count is right if limit was 130, on page 3, count should be 30, whereas 
                # our code is going to fetch 50 for a total of 150. ... I think we can probably mess with that later and just work in blocks of 50
                request = asynchronous(engine.search, clean_query(query), start=page+1, count=per_page, type=SEARCH, timeout=10, throttle=0.5)
                while not request.done:
                    time.sleep(0.01)
            except:
                raise
            if request.value != None:
                for result in request.value:
                    ret.append({'title' : result.title, 'description' : result.text})
            
    elif search_library == 'requests':
        for page in range(pages):
            offset = per_page * page
            params = {'$format': 'json', '$top': per_page,'$skip': offset}
            results = bing.search('web',clean_query(query),params)()['d']['results'][0]['Web']
            for result in results:
                ret.append({'title' : result['Title'], 'description' : result['Description']})
                
    elif search_library == 'xgoogle':
        for page in range(pages):
            try:
                # inject some delay
                time.sleep(0.04)
                gs = GoogleSearch(clean_query(query))
                gs.page = page+1
                gs.results_per_page = per_page
                results = gs.get_results()
                for res in results:
                    ret.append({'title' : res.title.encode("utf8"), 'description' : res.desc.encode("utf8")})
            except SearchError, e:
                print "Search failed: %s" % e
Esempio n. 5
0
def generar_consulta_bing(q):    
    reload(sys)
    sys.setdefaultencoding('utf8')

    engine_bing = Bing(license="TNMHm68dvf440pSPdnU+2LqxeQi7J2xszPZLBiPYsmI", language="en")
    bing = []
    for consulta in q:
        request = asynchronous(engine_bing.search, consulta, start=1, count=10, type=SEARCH, timeout=10)

        while not request.done:
            time.sleep(0.01)

        # An error occured in engine.search(), raise it.
        if request.error:
            raise request.error

        # Retrieve the list of search results.
        for result in request.value:
            bing.append(result.url)

    return bing
Esempio n. 6
0
# Yahoo can retrieve up to a 1000 results (10x100) for a query.

# You should obtain your own license key at:
# https://developer.apps.yahoo.com/wsregapp/
# Otherwise you will be sharing the default license with all users of this module.
engine = Bing(license=None)

# Quote a query to match it exactly:
q = "\"is more important than\""

# When you execute a query, the script will halt until all results are downloaded.
# In applications with an event loop (e.g. a GUI or an interactive animation)
# it is more useful if the app keeps on running while the search is executed in the background.
# This can be achieved with the asynchronous() command.
# It takes any function and the function's arguments and keyword arguments:
request = asynchronous(engine.search, q, start=1, count=100, type=SEARCH, timeout=10)

# This while-loop simulates an application event loop.
# In a real-world example you would have an app.update() or similar
# in which you can check request.done every now and then.
while not request.done:
    time.sleep(0.01)
    print ".",

print
print

# An error occured in engine.search(), raise it.
if request.error:
    raise request.error
Esempio n. 7
0
# If this limit is exceeded, SearchEngineLimitError is raised.
# You should obtain your own license key at: 
# https://datamarket.azure.com/account/
engine = Bing(license=None, language="en")

# Quote a query to match it exactly:
q = "\"is more important than\""

# When you execute a query,
# the script will halt until all results are downloaded.
# In apps with an infinite main loop (e.g., GUI, game),
# it is often more useful if the app keeps on running 
# while the search is executed in the background.
# This can be achieved with the asynchronous() function.
# It takes any function and that function's arguments and keyword arguments:
request = asynchronous(engine.search, q, start=1, count=100, type=SEARCH, timeout=10)

# This while-loop simulates an infinite application loop.
# In real-life you would have an app.update() or similar
# in which you can check request.done every now and then.
while not request.done:
    time.sleep(0.01)
    print ".",

print
print

# An error occured in engine.search(), raise it.
if request.error:
    raise request.error
Esempio n. 8
0
    'https://upload.wikimedia.org/wikipedia/commons/f/f1/RougeOr_football.jpg')
file = open('football' + extension(page_url.page), 'wb')
file.write(page_url.download())
file.close()

# ### Finding URLs within Text

from pattern.web import find_urls

print(find_urls('To search anything, go to www.google.com', unique=True))

# ### Making Asynchronous Requests for Webpages

from pattern.web import asynchronous, time, Google

asyn_req = asynchronous(Google().search, 'artificial intelligence', timeout=4)
while not asyn_req.done:
    time.sleep(0.1)
    print('searching...')

print(asyn_req.value)

print(find_urls(asyn_req.value, unique=True))

# ### Getting Search Engine Results with APIs

# #### Google

from pattern.web import Google

google = Google(license=None)