def picture(ingredients): ua = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:32.0) Gecko/20100101 Firefox/32.0' headers = {'User-Agent': ua, 'Accept': '*/*'} all_imgs = {} extra_stuff = '&tbm=isch' for ingredient in ingredients.split(','): sanitized_ingr = sanitize_ingredients.sanitize(ingredient) query = 'https://www.google.ca/search?q=' + sanitized_ingr + extra_stuff try: page = requests.get(query, headers=headers, timeout=2) tree = html.fromstring(page.text) path = '/html/body/div[5]/div[4]/div[2]/div[3]/div/div[2]/div[2]/div/div/div/div/div[1]/div[1]/div[1]/div[1]/a/@href' img = tree.xpath(path) if len(img) > 0: img = img[0] img_url = img[img.find("imgurl=")+7:img.find("&")] all_imgs[ingredient] = img_url else: all_imgs[ingredient] = [] except requests.exceptions.Timeout: logging.warning("TIMEOUT GETTING PICTURE") except requests.exceptions.ConnectionError: logging.warning("CONNECTIONERROR GETTING PICTURE") return all_imgs
def test_sanitize_ingredients(self): urls = mock_yummly.search('turkeysandwich')[0] ingredients = urls[2] self.assertEqual('cooked turkey', sanitize_ingredients.sanitize(ingredients[0])) self.assertEqual('celery ribs,', sanitize_ingredients.sanitize(ingredients[1])) self.assertEqual('small onion,', sanitize_ingredients.sanitize(ingredients[2])) self.assertEqual('hard-cooked eggs,', sanitize_ingredients.sanitize(ingredients[3])) self.assertEqual('mayonnaise', sanitize_ingredients.sanitize(ingredients[4])) self.assertEqual('salt', sanitize_ingredients.sanitize(ingredients[5])) self.assertEqual('pepper', sanitize_ingredients.sanitize(ingredients[6])) self.assertEqual('hamburger buns,', sanitize_ingredients.sanitize(ingredients[7]))