def buscar_Flickr(self, texto): """ Busca en Flickr 6 imagenes y las guarda """ engine = Flickr(license=None, throttle=0.5, language='es') i = 0 for result in engine.search(texto, count=6, cached=True, copyright=False): self.espera(i) directorio = os.path.join('imagenes', 'busqueda', str(i) + extension(result.url)) f = open(directorio, 'wb') f.write(result.download(timeout=10)) f.close() i += 1
def get_info(search_query): if isinstance(search_query, str): search_query = str(search_query) else: return { "Error": "Pass a string, from mine.py [7]", "Result": [None] } result = [] engineGoogle = Google(license=None, throttle=0.5, language=None) engineBing = Bing(license=None, throttle=0.5, language=None) engineTwitter = Twitter(license=None, throttle=0.5, language=None) engineFacebook = Facebook(license=None, throttle=1.0, language='en') engineWikipedia = Wikipedia(license=None, throttle=5.0, language=None) engineFlickr = Flickr(license=None, throttle=5.0, language=None) engineArray = [engineGoogle, engineBing, engineTwitter, engineFacebook, engineWikipedia, engineFlickr] engineArray = [engineGoogle, engineTwitter] ''' for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engine[0].search(search_query, type=SEARCH, start=i, count=5)]) [result.append([result.append(repr(plaintext(para.text))) for para in engine.search(search_query, type=SEARCH, start=i, count=5)]) for engine in engineArray] # print repr(plaintext(para.text)) # print repr(plaintext(para.url)) + '\n\n' # result.append(repr(plaintext(para.text))) ''' # Google for i in range(1, 5): result = result + ([para.text for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=10)]) for i in range(1, 5): result = result + ([para.text for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)]) ''' # for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engineBing.search(search_query, type=SEARCH, start=i, count=5)]) for i in range(1,2): result = result + ([repr(plaintext(para.text)) for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)]) # for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engineFacebook.search(search_query, type=SEARCH, start=i, count=5)]) # for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engineWikipedia.search(search_query, type=SEARCH, start=i, count=5)]) # for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engineFlickr.search(search_query, type=SEARCH, start=i, count=5)]) ''' return { "Error": None, "Result": result } # return { "Error": None, "Result": ['Hello World', 'Bye Bye Tommy'] }
from pattern.web import Flickr, extension from pattern.web import RELEVANCY, LATEST, INTERESTING # Image sort order. from pattern.web import SMALL, MEDIUM, LARGE # Image size. # This example downloads an image from Flickr (http://flickr.com). # Acquiring the image data takes three Flickr queries: # - the first query with Flickr.search() retrieves a list of results, # - the second query is executed behind the scenes in the FlickResult.url property, # - the third query downloads the actual image data using this URL. # It is a good idea to cache results from Flickr locally, # which is what the cached=True parameter does. # You should obtain your own license key at: # http://www.flickr.com/services/api/ # Otherwise you will be sharing the default key with all users of this module. engine = Flickr(license=None) q = "duracell bunny" results = engine.search(q, size=MEDIUM, sort=RELEVANCY, cached=True) for img in results: # print img.url # Retrieving the actual image URL executes an additional query. print img.description print img.author print # Download and save the image: img = results[0] data = img.download() path = q.replace(" ", "_") + extension(img.url) f = open(path, "wb") f.write(data)
from pattern.web import RELEVANCY, LATEST, INTERESTING # Image sort order. from pattern.web import SMALL, MEDIUM, LARGE # Image size. # This example downloads an image from Flickr (http://flickr.com). # Acquiring the image data takes three Flickr queries: # 1) Flickr.search() retrieves a list of results, # 2) FlickrResult.url retrieves the image URL (behind the scenes), # 3) FlickrResult.download() visits FlickrResult.url and downloads the content. # It is a good idea to cache results from Flickr locally, # which is what the cached=True parameter does. # You should obtain your own license key at: # http://www.flickr.com/services/api/ # Otherwise you will be sharing the default key with all users of pattern.web. engine = Flickr(license=None) q = "duracell bunny" results = engine.search(q, size=MEDIUM, sort=RELEVANCY, cached=False) for img in results: #print(img.url) # Retrieving the actual image URL executes a query. print(img.text) print(img.author) print("") # Download and save one of the images: img = results[0] data = img.download() path = q.replace(" ", "_") + extension(img.url) f = open(path, "wb") f.write(data)
#twitter from pattern.web import Twitter twitter = Twitter() for results in twitter.search('Analytics India Magazine'): print(results.url) print(results.text) for results in twitter.search('Gamification'): print(results.url) #flickr from pattern.web import Flickr flickr = Flickr(license=None) for result in flickr.search('Analytics India Magazine'): print(result.url) print(result.text) #%%%Accessing Web Pages #The URL object is used to retrieve contents from the webpages. It has several methods that can be used to open a webpage, download the contents from a webpage and read a webpage. #You can directly use the download method to download the HTML contents of any webpage. The following script downloads the HTML source code for the Wikipedia article on artificial intelligence. from pattern.web import download page_html = download('https://en.wikipedia.org/wiki/Artificial_intelligence', unicode=True) #You can also download files from webpages, for example, images using the URL method: from pattern.web import URL, extension
def _anything_1(self,*args): #default vars varstart=1 varcount=10 varsort='relevancy' varsize='medium' download=False outpath=tempfile.gettempdir() tosearch = [] for arg in args: tosearch.append(str(arg)) self.input = ' '.join(tosearch) if hasattr(self, 'start'): varstart = self.start if hasattr(self, 'count'): varcount = self.count if hasattr(self, 'sort'): varsort = self.sort if hasattr(self, 'size'): varsize = self.size if hasattr(self, 'download'): download = self.download if hasattr(self, 'outpath'): outpath = os.path.abspath(self.outpath) dbgmsg = "%s%d%s%d%s%s%s%s%s%s%s%s%s%s" % ("start:", varstart, " count:", varcount, " sort:", varsort, " size:", varsize, " search:", self.input, " download:", download, " output path:", outpath) self._outlet(5, dbgmsg) engine = Flickr(license=None) results = engine.search(self.input, start=varstart, count=varcount, sort=varsort, size=varsize, cached=False) if (download): now = datetime.datetime.now() timenow = "%s%s%s" % (self.input, "_", now.strftime('%Y%m%d-%H%M')) pathdir = os.path.join(outpath, timenow) if not os.path.isdir(pathdir): os.mkdir(pathdir) output = "%s%s" % ("created: ", str(pathdir)) self._outlet(5, output) for img in results: author = encode_utf8(plaintext(img.author)) description = encode_utf8(img.description) url = encode_utf8(img.url) self._outlet(1, description) self._outlet(2, author) self._outlet(3, url) if (download): if url != "None": data = img.download() filename = img.url.rsplit("/",1)[1] pathfile = os.path.join(pathdir, filename) f = open(pathfile, "w") f.write(data) f.close() output = "%s%s" % ("downloaded: ", str(pathfile)) self._outlet(5, output) if (download): self._outlet(4, str(pathdir))