Ejemplo n.º 1
0
class ImageFinder:
    def __init__(self):
        self.language = Language()

    def find(self, lyrics, limit=None):
        nouns = self.language.get_top_nouns(lyrics)
        limited_nouns = nouns[:len(nouns) if limit is None
                              or limit >= len(nouns) else limit]
        if debug:
            print('Nouns: ' + ','.join(nouns))
            if limit is not None:
                print('Limiting to ({}): {}'.format(limit,
                                                    ','.join(limited_nouns)))
        images = []
        for noun in limited_nouns:
            try:
                images.append((self.get_images(noun, 1)[0], noun))
            except:
                pass
        return images

    @staticmethod
    def get_images(q, cnt):
        query = q
        if debug:
            print('Query: ' + q)
        query = query.split()
        query = '+'.join(query)
        url = "https://www.google.com/search?q=" + query + "&source=lnms&tbm=isch&tbs=itp:clipart,ic:trans"
        if debug:
            print('Query url: ' + url)

        header = {
            'User-Agent':
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"
        }
        if debug:
            print('Making request...')
        soup = BeautifulSoup(
            urllib2.urlopen(urllib2.Request(url, headers=header)),
            'html.parser')

        if debug:
            print('Parsing response...')
        actual_images = []
        for a in soup.find_all("div", {"class": "rg_meta"}):
            link, Type = json.loads(a.text)["ou"], json.loads(a.text)["ity"]
            actual_images.append((link, Type))

        if debug:
            print('Processing images...')
        images = []
        for i, (img, Type) in enumerate(actual_images[:cnt]):
            try:
                req = requests.get(img)
                raw_img = Image.open(io.BytesIO(req.content))
                raw_img.thumbnail((152, 152), Image.ANTIALIAS)
                wid = raw_img.size[0] / 2
                hgt = raw_img.size[1] / 2
                #raw_img = raw_img.crop((wid - 64, hgt - 64, wid + 64, hgt + 64))
                images.append(np.asarray(raw_img))
            except Exception as e:
                print("could not load : ", img)
        print('Finished query.')
        return images