Exemple #1
0
    def buscar_Flickr(self, texto):
        """ Busca en Flickr 6 imagenes y las guarda """
        engine = Flickr(license=None, throttle=0.5, language='es')
        i = 0
        for result in engine.search(texto,
                                    count=6,
                                    cached=True,
                                    copyright=False):
            self.espera(i)

            directorio = os.path.join('imagenes', 'busqueda',
                                      str(i) + extension(result.url))
            f = open(directorio, 'wb')
            f.write(result.download(timeout=10))
            f.close()
            i += 1
Exemple #2
0
def get_info(search_query):
	if isinstance(search_query, str):
		search_query = str(search_query)
	else:
		return { "Error": "Pass a string, from mine.py [7]", "Result": [None] }

	result = []
	engineGoogle = Google(license=None, throttle=0.5, language=None)
	engineBing = Bing(license=None, throttle=0.5, language=None)
	engineTwitter = Twitter(license=None, throttle=0.5, language=None)
	engineFacebook = Facebook(license=None, throttle=1.0, language='en')
	engineWikipedia = Wikipedia(license=None, throttle=5.0, language=None)
	engineFlickr = Flickr(license=None, throttle=5.0, language=None)
	engineArray = [engineGoogle, engineBing, engineTwitter, engineFacebook, engineWikipedia, engineFlickr]
	engineArray = [engineGoogle, engineTwitter]

	'''
	for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engine[0].search(search_query, type=SEARCH, start=i, count=5)])
		[result.append([result.append(repr(plaintext(para.text))) for para in engine.search(search_query, type=SEARCH, start=i, count=5)]) for engine in engineArray]
			# print repr(plaintext(para.text))
			# print repr(plaintext(para.url)) + '\n\n'
			# result.append(repr(plaintext(para.text)))
	'''

	# Google
	for i in range(1, 5):
		result = result + ([para.text for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=10)])
		
	for i in range(1, 5):
		result = result + ([para.text for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)])
	'''
	# for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engineBing.search(search_query, type=SEARCH, start=i, count=5)])
	for i in range(1,2):
		result = result + ([repr(plaintext(para.text)) for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)])
	# for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engineFacebook.search(search_query, type=SEARCH, start=i, count=5)])
	# for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engineWikipedia.search(search_query, type=SEARCH, start=i, count=5)])
	# for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engineFlickr.search(search_query, type=SEARCH, start=i, count=5)])
	'''

	return { "Error": None, "Result": result }

	# return { "Error": None, "Result": ['Hello World', 'Bye Bye Tommy'] }
Exemple #3
0
from pattern.web import Flickr, extension
from pattern.web import RELEVANCY, LATEST, INTERESTING  # Image sort order.
from pattern.web import SMALL, MEDIUM, LARGE  # Image size.

# This example downloads an image from Flickr (http://flickr.com).
# Acquiring the image data takes three Flickr queries:
# - the first query with Flickr.search() retrieves a list of results,
# - the second query is executed behind the scenes in the FlickResult.url property,
# - the third query downloads the actual image data using this URL.
# It is a good idea to cache results from Flickr locally,
# which is what the cached=True parameter does.

# You should obtain your own license key at:
# http://www.flickr.com/services/api/
# Otherwise you will be sharing the default key with all users of this module.
engine = Flickr(license=None)

q = "duracell bunny"
results = engine.search(q, size=MEDIUM, sort=RELEVANCY, cached=True)
for img in results:
    # print img.url # Retrieving the actual image URL executes an additional query.
    print img.description
    print img.author
    print

# Download and save the image:
img = results[0]
data = img.download()
path = q.replace(" ", "_") + extension(img.url)
f = open(path, "wb")
f.write(data)
Exemple #4
0
from pattern.web import RELEVANCY, LATEST, INTERESTING  # Image sort order.
from pattern.web import SMALL, MEDIUM, LARGE            # Image size.

# This example downloads an image from Flickr (http://flickr.com).
# Acquiring the image data takes three Flickr queries:
# 1) Flickr.search() retrieves a list of results,
# 2) FlickrResult.url retrieves the image URL (behind the scenes),
# 3) FlickrResult.download() visits FlickrResult.url and downloads the content.

# It is a good idea to cache results from Flickr locally,
# which is what the cached=True parameter does.

# You should obtain your own license key at:
# http://www.flickr.com/services/api/
# Otherwise you will be sharing the default key with all users of pattern.web.
engine = Flickr(license=None)

q = "duracell bunny"
results = engine.search(q, size=MEDIUM, sort=RELEVANCY, cached=False)
for img in results:
    #print(img.url)  # Retrieving the actual image URL executes a query.
    print(img.text)
    print(img.author)
    print("")

# Download and save one of the images:
img = results[0]
data = img.download()
path = q.replace(" ", "_") + extension(img.url)
f = open(path, "wb")
f.write(data)
Exemple #5
0
#twitter
from pattern.web import Twitter

twitter = Twitter()

for results in twitter.search('Analytics India Magazine'):
    print(results.url)
    print(results.text)

for results in twitter.search('Gamification'):
    print(results.url)

#flickr
from pattern.web import Flickr
flickr = Flickr(license=None)
for result in flickr.search('Analytics India Magazine'):
    print(result.url)
    print(result.text)

#%%%Accessing Web Pages
#The URL object is used to retrieve contents from the webpages. It has several methods that can be used to open a webpage, download the contents from a webpage and read a webpage.
#You can directly use the download method to download the HTML contents of any webpage. The following script downloads the HTML source code for the Wikipedia article on artificial intelligence.

from pattern.web import download
page_html = download('https://en.wikipedia.org/wiki/Artificial_intelligence',
                     unicode=True)

#You can also download files from webpages, for example, images using the URL method:

from pattern.web import URL, extension
Exemple #6
0
    def _anything_1(self,*args):

        #default vars
        varstart=1
        varcount=10
        varsort='relevancy'
        varsize='medium'
        download=False
        outpath=tempfile.gettempdir()

        tosearch = []
        for arg in args:
            tosearch.append(str(arg))
        self.input = ' '.join(tosearch)

        if hasattr(self, 'start'):
            varstart = self.start
        if hasattr(self, 'count'):
            varcount = self.count
        if hasattr(self, 'sort'):
            varsort = self.sort
        if hasattr(self, 'size'):
            varsize = self.size
        if hasattr(self, 'download'):
            download = self.download
        if hasattr(self, 'outpath'):
            outpath = os.path.abspath(self.outpath)
        dbgmsg = "%s%d%s%d%s%s%s%s%s%s%s%s%s%s" % ("start:", varstart, " count:", varcount, " sort:", varsort, " size:", varsize, " search:", self.input, " download:", download, " output path:", outpath)
        self._outlet(5, dbgmsg)

        engine = Flickr(license=None)
        results = engine.search(self.input, start=varstart, count=varcount, sort=varsort, size=varsize, cached=False)

        if (download):
            now = datetime.datetime.now()
            timenow = "%s%s%s" % (self.input, "_", now.strftime('%Y%m%d-%H%M'))
            pathdir = os.path.join(outpath, timenow)
            if not os.path.isdir(pathdir):
                os.mkdir(pathdir)
                output = "%s%s" % ("created: ", str(pathdir))
                self._outlet(5, output)

        for img in results:
            author = encode_utf8(plaintext(img.author))
	    description = encode_utf8(img.description)
            url = encode_utf8(img.url)
            self._outlet(1, description)
            self._outlet(2, author)
            self._outlet(3, url)
            if (download):
                if url != "None":
                    data = img.download()
                    filename = img.url.rsplit("/",1)[1]
                    pathfile = os.path.join(pathdir, filename)
                    f = open(pathfile, "w")
                    f.write(data)
                    f.close()
                    output = "%s%s" % ("downloaded: ", str(pathfile))
                    self._outlet(5, output)

        if (download):
            self._outlet(4, str(pathdir))