class ImageFinder: def __init__(self): self.language = Language() def find(self, lyrics, limit=None): nouns = self.language.get_top_nouns(lyrics) limited_nouns = nouns[:len(nouns) if limit is None or limit >= len(nouns) else limit] if debug: print('Nouns: ' + ','.join(nouns)) if limit is not None: print('Limiting to ({}): {}'.format(limit, ','.join(limited_nouns))) images = [] for noun in limited_nouns: try: images.append((self.get_images(noun, 1)[0], noun)) except: pass return images @staticmethod def get_images(q, cnt): query = q if debug: print('Query: ' + q) query = query.split() query = '+'.join(query) url = "https://www.google.com/search?q=" + query + "&source=lnms&tbm=isch&tbs=itp:clipart,ic:trans" if debug: print('Query url: ' + url) header = { 'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36" } if debug: print('Making request...') soup = BeautifulSoup( urllib2.urlopen(urllib2.Request(url, headers=header)), 'html.parser') if debug: print('Parsing response...') actual_images = [] for a in soup.find_all("div", {"class": "rg_meta"}): link, Type = json.loads(a.text)["ou"], json.loads(a.text)["ity"] actual_images.append((link, Type)) if debug: print('Processing images...') images = [] for i, (img, Type) in enumerate(actual_images[:cnt]): try: req = requests.get(img) raw_img = Image.open(io.BytesIO(req.content)) raw_img.thumbnail((152, 152), Image.ANTIALIAS) wid = raw_img.size[0] / 2 hgt = raw_img.size[1] / 2 #raw_img = raw_img.crop((wid - 64, hgt - 64, wid + 64, hgt + 64)) images.append(np.asarray(raw_img)) except Exception as e: print("could not load : ", img) print('Finished query.') return images