def total_time(self): ul = self.soup.find("ul", {"class": "recipe__details"}) total = 0 for li in ul.find_all("li"): if li.span.get_text().lower() in ["prep time", "cook time"]: total += get_minutes(list(li.children)[2].strip()) return total
def test_get_minutes_fraction_with_fraction_unicode_character_three_fours( self): text = "1¾ hours" self.assertEqual(105, get_minutes(text))
def test_get_minutes_int_in_string_literal(self): text = "90" result = get_minutes(text) assert result == 90
def test_get_minutes_handles_to(self): text = "15 to 20 minutes" self.assertEqual(20, get_minutes(text))
def test_get_minutes_english_description(self): text = "1 hour 15 mins" self.assertEqual(75, get_minutes(text))
def test_get_minutes_fraction_with_fraction_digits_with_slash_two_thirds( self): text = "1 2/3 hours" self.assertEqual(100, get_minutes(text))
def test_get_minutes_english_description_with_and(self): text = "1h and 15mins" self.assertEqual(75, get_minutes(text))
def test_get_minutes_fraction_with_fraction_digits_with_slash(self): text = "1 1/2 hours" self.assertEqual(90, get_minutes(text))
def test_get_minutes_long_iso_format(self): text = "P0DT1H10M" result = get_minutes(text) assert result == 70
def test_get_minutes_english_description(self): text = "1 hour 15 mins" result = get_minutes(text) assert result == 75
def test_get_minutes_short_iso_format(self): text = "PT2H30M" result = get_minutes(text) assert result == 150
def test_get_minutes_english_abbreviation(self): text = "3h10m" result = get_minutes(text) assert result == 190
def get_from_scraper(scrape, space): # converting the scrape_me object to the existing json format based on ld+json recipe_json = {} recipe_json['name'] = scrape.title() try: description = scrape.schema.data.get("description") or '' recipe_json['prepTime'] = _utils.get_minutes( scrape.schema.data.get("prepTime")) or 0 recipe_json['cookTime'] = _utils.get_minutes( scrape.schema.data.get("cookTime")) or 0 except AttributeError: description = '' recipe_json['prepTime'] = 0 recipe_json['cookTime'] = 0 recipe_json['description'] = description try: servings = scrape.yields() servings = int(re.findall(r'\b\d+\b', servings)[0]) except (AttributeError, ValueError, IndexError): servings = 1 recipe_json['servings'] = servings if recipe_json['cookTime'] + recipe_json['prepTime'] == 0: try: recipe_json['prepTime'] = scrape.total_time() except AttributeError: pass try: recipe_json['image'] = scrape.image() except AttributeError: pass keywords = [] try: if scrape.schema.data.get("keywords"): keywords += listify_keywords(scrape.schema.data.get("keywords")) if scrape.schema.data.get('recipeCategory'): keywords += listify_keywords( scrape.schema.data.get("recipeCategory")) if scrape.schema.data.get('recipeCuisine'): keywords += listify_keywords( scrape.schema.data.get("recipeCuisine")) recipe_json['keywords'] = parse_keywords( list(set(map(str.casefold, keywords))), space) except AttributeError: recipe_json['keywords'] = keywords try: ingredients = [] for x in scrape.ingredients(): try: amount, unit, ingredient, note = parse_ingredient(x) if ingredient: ingredients.append({ 'amount': amount, 'unit': { 'text': unit, 'id': random.randrange(10000, 99999) }, 'ingredient': { 'text': ingredient, 'id': random.randrange(10000, 99999) }, 'note': note, 'original': x }) except Exception: ingredients.append({ 'amount': 0, 'unit': { 'text': '', 'id': random.randrange(10000, 99999) }, 'ingredient': { 'text': x, 'id': random.randrange(10000, 99999) }, 'note': '', 'original': x }) recipe_json['recipeIngredient'] = ingredients except AttributeError: recipe_json['recipeIngredient'] = ingredients try: recipe_json['recipeInstructions'] = scrape.instructions() except AttributeError: recipe_json['recipeInstructions'] = "" recipe_json['recipeInstructions'] += "\n\nImported from " + scrape.url return recipe_json
def test_get_minutes_fraction_with_fraction_unicode_character_one_fours( self): text = "1¼ hours" self.assertEqual(75, get_minutes(text))
def test_get_minutes_english_abbreviation(self): text = "3h10m" self.assertEqual(190, get_minutes(text))
def test_get_minutes_fraction_with_fraction_unicode_character_two_thirds( self): text = "1⅔ hours" self.assertEqual(100, get_minutes(text))
def test_get_minutes_short_iso_format(self): text = "PT2H30M" self.assertEqual(150, get_minutes(text))
def test_get_minutes_fraction_with_fraction_digits_with_slash_three_fours( self): text = "1 3/4 hours" self.assertEqual(105, get_minutes(text))
def test_get_minutes_long_iso_format(self): text = "P0DT1H10M" self.assertEqual(70, get_minutes(text))
def test_get_minutes_fraction_with_fraction_digits_with_slash_one_fours( self): text = "1 1/4 hours" self.assertEqual(75, get_minutes(text))
def test_get_minutes_int_in_string_literal(self): text = "90" self.assertEqual(90, get_minutes(text))
def test_get_minutes_handles_dashes(self): text = "15 - 20 minutes" self.assertEqual(20, get_minutes(text))
def test_get_minutes_fraction_in_hours_with_dot_notation(self): text = "1.5 hours" self.assertEqual(90, get_minutes(text))
def get_from_scraper(scrape, request): # converting the scrape_me object to the existing json format based on ld+json recipe_json = {} try: recipe_json['name'] = parse_name(scrape.title() or None) except Exception: recipe_json['name'] = None if not recipe_json['name']: try: recipe_json['name'] = scrape.schema.data.get('name') or '' except Exception: recipe_json['name'] = '' try: description = scrape.description() or None except Exception: description = None if not description: try: description = scrape.schema.data.get("description") or '' except Exception: description = '' recipe_json['internal'] = True try: servings = scrape.yields() or None except Exception: servings = None if not servings: try: servings = scrape.schema.data.get('recipeYield') or 1 except Exception: servings = 1 recipe_json['servings'] = parse_servings(servings) recipe_json['servings_text'] = parse_servings_text(servings) try: recipe_json['working_time'] = get_minutes(scrape.prep_time()) or 0 except Exception: try: recipe_json['working_time'] = get_minutes( scrape.schema.data.get("prepTime")) or 0 except Exception: recipe_json['working_time'] = 0 try: recipe_json['waiting_time'] = get_minutes(scrape.cook_time()) or 0 except Exception: try: recipe_json['waiting_time'] = get_minutes( scrape.schema.data.get("cookTime")) or 0 except Exception: recipe_json['waiting_time'] = 0 if recipe_json['working_time'] + recipe_json['waiting_time'] == 0: try: recipe_json['working_time'] = get_minutes(scrape.total_time()) or 0 except Exception: try: recipe_json['working_time'] = get_minutes( scrape.schema.data.get("totalTime")) or 0 except Exception: pass try: recipe_json['image'] = parse_image(scrape.image()) or None except Exception: recipe_json['image'] = None if not recipe_json['image']: try: recipe_json['image'] = parse_image( scrape.schema.data.get('image')) or '' except Exception: recipe_json['image'] = '' keywords = [] try: if scrape.schema.data.get("keywords"): keywords += listify_keywords(scrape.schema.data.get("keywords")) except Exception: pass try: if scrape.category(): keywords += listify_keywords(scrape.category()) except Exception: try: if scrape.schema.data.get('recipeCategory'): keywords += listify_keywords( scrape.schema.data.get("recipeCategory")) except Exception: pass try: if scrape.cuisine(): keywords += listify_keywords(scrape.cuisine()) except Exception: try: if scrape.schema.data.get('recipeCuisine'): keywords += listify_keywords( scrape.schema.data.get("recipeCuisine")) except Exception: pass if source_url := scrape.url: recipe_json['source_url'] = source_url try: keywords.append( source_url.replace('http://', '').replace('https://', '').split('/')[0]) except Exception: pass
def test_get_minutes_fraction_with_fraction_unicode_character_halves(self): text = "1½ hours" self.assertEqual(90, get_minutes(text))
def get_from_scraper(scrape, request): # converting the scrape_me object to the existing json format based on ld+json recipe_json = {} try: recipe_json['name'] = parse_name(scrape.title() or None) except Exception: recipe_json['name'] = None if not recipe_json['name']: try: recipe_json['name'] = scrape.schema.data.get('name') or '' except Exception: recipe_json['name'] = '' try: description = scrape.schema.data.get("description") or '' except Exception: description = '' recipe_json['description'] = parse_description(description) try: servings = scrape.yields() or None except Exception: servings = None if not servings: try: servings = scrape.schema.data.get('recipeYield') or 1 except Exception: servings = 1 if type(servings) != int: try: servings = int(re.findall(r'\b\d+\b', servings)[0]) except Exception: servings = 1 recipe_json['servings'] = max(servings, 1) try: recipe_json['prepTime'] = get_minutes( scrape.schema.data.get("prepTime")) or 0 except Exception: recipe_json['prepTime'] = 0 try: recipe_json['cookTime'] = get_minutes( scrape.schema.data.get("cookTime")) or 0 except Exception: recipe_json['cookTime'] = 0 if recipe_json['cookTime'] + recipe_json['prepTime'] == 0: try: recipe_json['prepTime'] = get_minutes(scrape.total_time()) or 0 except Exception: try: get_minutes(scrape.schema.data.get("totalTime")) or 0 except Exception: pass try: recipe_json['image'] = parse_image(scrape.image()) or None except Exception: recipe_json['image'] = None if not recipe_json['image']: try: recipe_json['image'] = parse_image( scrape.schema.data.get('image')) or '' except Exception: recipe_json['image'] = '' keywords = [] try: if scrape.schema.data.get("keywords"): keywords += listify_keywords(scrape.schema.data.get("keywords")) except Exception: pass try: if scrape.schema.data.get('recipeCategory'): keywords += listify_keywords( scrape.schema.data.get("recipeCategory")) except Exception: pass try: if scrape.schema.data.get('recipeCuisine'): keywords += listify_keywords( scrape.schema.data.get("recipeCuisine")) except Exception: pass try: recipe_json['keywords'] = parse_keywords( list(set(map(str.casefold, keywords))), request.space) except AttributeError: recipe_json['keywords'] = keywords ingredient_parser = IngredientParser(request, True) try: ingredients = [] for x in scrape.ingredients(): try: amount, unit, ingredient, note = ingredient_parser.parse(x) ingredients.append({ 'amount': amount, 'unit': { 'text': unit, 'id': random.randrange(10000, 99999) }, 'ingredient': { 'text': ingredient, 'id': random.randrange(10000, 99999) }, 'note': note, 'original': x }) except Exception: ingredients.append({ 'amount': 0, 'unit': { 'text': '', 'id': random.randrange(10000, 99999) }, 'ingredient': { 'text': x, 'id': random.randrange(10000, 99999) }, 'note': '', 'original': x }) recipe_json['recipeIngredient'] = ingredients except Exception: recipe_json['recipeIngredient'] = ingredients try: recipe_json['recipeInstructions'] = parse_instructions( scrape.instructions()) except Exception: recipe_json['recipeInstructions'] = "" if scrape.url: recipe_json['url'] = scrape.url recipe_json['recipeInstructions'] += "\n\nImported from " + scrape.url return recipe_json
def parse_recipe(): try: recipe_url = request.args.get('url') except: return make_response( 'Need a url in parameters. See <a href="/api">/api</a> for more info', 404) try: recipes = scrape_schema_recipe.scrape_url(recipe_url) if len(recipes) == 1 and recipes[0] is not None: recipe = recipes[0] if 'recipeInstructions' in recipe: ins = recipe['recipeInstructions'] if type(ins) == str: recipe['recipeInstructions'] = [html.escape(ins)] elif type(ins) == list and len(ins) > 0: if type(ins[0]) == dict: recipe['recipeInstructions'] = [] for item in ins: for k, v in item.items(): if k == 'text': recipe['recipeInstructions'].append( html.escape(v)) else: recipe['recipeInstructions'] = [ html.escape(i) for i in recipe['recipeInstructions'] ] if 'keywords' in recipe: recipe['keywords'] = [ html.escape(i.strip()) for i in recipe['keywords'].split(',') ] if 'image' in recipe: if type(recipe['image']) == dict: if 'url' in recipe['image']: recipe['image'] = recipe['image']['url'] if 'image' in recipe: if type(recipe['image']) == list: recipe['image'] = recipe['image'][-1] if 'author' in recipe: if type(recipe['author'] ) == dict and 'name' in recipe['author']: recipe['author'] = html.escape(recipe['author']['name']) if 'recipeYield' in recipe: rYield = recipe['recipeYield'] if type(rYield) == str: recipe['recipeYield'] = [ i.strip() for i in rYield.split(',') ][0] if type(rYield) == list and len(rYield) > 0: recipe['recipeYield'] = rYield[0] if 'cookTime' in recipe: recipe['cookTime'] = get_minutes(recipe['cookTime']) if 'prepTime' in recipe: recipe['prepTime'] = get_minutes(recipe['prepTime']) if 'totalTime' in recipe: recipe['totalTime'] = get_minutes(recipe['totalTime']) return recipe except Exception as e: print(e.args) pass try: recipe = scrape_me(recipe_url) to_return = { "@type": "noSchema", "name": recipe.title(), "url": recipe.url(), "recipeIngredients": recipe.ingredients(), "recipeInstructions": [i for i in recipe.instructions().split('\n') if i != ""], "review": recipe.reviews(), "aggregateRating": recipe.ratings(), "totalTime": recipe.total_time(), "recipeYield": recipe.yields(), "image": recipe.image() } return to_return except Exception as e: return make_response( f'Error processing request. That domain might not be in the list\ See <a href="/api">/api</a> for more info. Error: {e.args}', 500)