def test_multiply_recipe(): recipe = Recipe(title="Test", yields=[ Amount(factor=Decimal('5'), unit="servings"), Amount(unit="unitless yield") ], ingredients=[ Ingredient(amount=Amount(factor=Decimal('5')), name='Eggs'), Ingredient(amount=Amount(factor=Decimal('200'), unit='g'), name='Butter'), Ingredient(name='Salt') ], ingredient_groups=[ IngredientGroup(title='Group', ingredients=[ Ingredient(amount=Amount( factor=Decimal('2'), unit='cloves'), name='Garlic'), ]), ]) result = multiply_recipe(recipe, Decimal(2)) assert result.yields[0].factor == Decimal('10') assert result.ingredients[0].amount.factor == Decimal('10') assert result.ingredients[1].amount.factor == Decimal('400') assert result.ingredients[2].amount is None assert result.ingredient_groups[0].ingredients[0].amount.factor == Decimal( '4')
def test_ingredient_list_get_leaf_ingredients(): recipe = Recipe(title="Test", ingredients=[ Ingredient(amount=Amount(factor=Decimal('5')), name='Eggs'), Ingredient(amount=Amount(factor=Decimal('200'), unit='g'), name='Butter'), Ingredient(name='Salt') ], ingredient_groups=[ IngredientGroup( title='Group', ingredients=[ Ingredient(amount=Amount(factor=Decimal('2'), unit='cloves'), name='Garlic'), ], ingredient_groups=[ IngredientGroup(title='Subgroup', ingredients=[ Ingredient(name='Onions'), ]), ]), ]) pprint(recipe) leaf_ingredients = list(recipe.leaf_ingredients) assert len(leaf_ingredients) == 5 assert leaf_ingredients[0].name == 'Eggs' assert leaf_ingredients[1].name == 'Butter' assert leaf_ingredients[2].name == 'Salt' assert leaf_ingredients[3].name == 'Garlic' assert leaf_ingredients[4].name == 'Onions'
def test_get_recipe_with_yield(): recipe = Recipe( title="Test", yields=[Amount(factor=Decimal('2'), unit="servings")], ingredients=[ Ingredient(amount=Amount(factor=Decimal('5')), name='Eggs'), ], ) result = get_recipe_with_yield(recipe, Amount(factor=Decimal('4'), unit='servings')) assert result.yields[0] == Amount(factor=Decimal('4'), unit='servings') assert result.ingredients[0].amount == Amount(factor=Decimal('10')) # interpreted as "4 recipes", that is multiply by 4 result_unitless = get_recipe_with_yield(recipe, Amount(factor=Decimal('4'))) assert result_unitless.yields[0] == Amount(factor=Decimal('8'), unit='servings') assert result_unitless.ingredients[0].amount == Amount(factor=Decimal('20')) # if recipe has unitless yield, it is preferred to the above interpretation recipe_with_unitless_yield = replace(recipe, yields=[Amount(factor=Decimal('4'))]) result_unitless_from_unitless_yield = get_recipe_with_yield(recipe_with_unitless_yield, Amount(factor=Decimal('4'))) assert result_unitless_from_unitless_yield.yields[0] == Amount(factor=Decimal('4')) assert result_unitless_from_unitless_yield.ingredients[0].amount == Amount(factor=Decimal('5')) # try with unit not in recipe yields with pytest.raises(StopIteration): get_recipe_with_yield(recipe, Amount(factor=Decimal('500'), unit='ml'))
def extract(url, soup): if not 'seriouseats.com' in url: return # title title = soup.find('h1', attrs={'class': 'title recipe-title'}).text.strip() # summary summary = '' summaryPars = soup.find('div', attrs={ 'class': 'recipe-introduction-body' }).find_all('p') for par in summaryPars: if not 'caption' in par.attrs.get('class', []): summary = summary + par.text + '\n\n' summary = summary.strip() # servings yields = [] servings = soup.find('span', attrs={'class': 'info yield'}).text servings_factor = re.compile("\d+").findall(servings) if servings_factor: yields.append(Amount(Decimal(servings_factor[0]), 'servings')) # tags tags = [] for tag in soup.find_all('a', attrs={'class': 'tag'}): tags.append(tag.text) # ingredients ingredients = [] for ingred in soup.find_all('li', attrs={'class': 'ingredient'}): ingredients.append(Ingredient(name=ingred.text)) # instructions instructions = '' for step in soup.find_all('li', attrs={'class': 'recipe-procedure'}): stepNumber = step.find('div', attrs={ 'class': 'recipe-procedure-number' }).text.strip() stepInstr = step.find('div', attrs={ 'class': 'recipe-procedure-text' }).text.strip() instructions = instructions + stepNumber + ' ' + stepInstr + '\n' instructions = instructions.strip() return Recipe(title=title, ingredients=ingredients, instructions=instructions, description=summary, tags=tags, yields=yields)
def test_serialize(self, serializer): testcase_folder = os.path.join(os.path.dirname(__file__), '..', 'testcases') with open(os.path.join(testcase_folder, 'recipe.md'), 'r', encoding='UTF-8') as f: expected_result = f.read() with open(os.path.join(testcase_folder, 'recipe.json'), 'r', encoding='UTF-8') as f: recipe = Recipe.from_json(f.read()) actual_result = serializer.serialize(recipe) assert actual_result == expected_result
def test_parse(self, parser, testcase_file): if testcase_file.endswith('.invalid.md'): with pytest.raises(BaseException): with open(testcase_file, 'r', encoding='UTF-8') as f: parser.parse(f.read()) else: expected_result_file = os.path.splitext(testcase_file)[0] + '.json' with open(expected_result_file, 'r', encoding='UTF-8') as f: expected_result = Recipe.from_json(f.read()) with open(testcase_file, 'r', encoding='UTF-8') as f: actual_result = parser.parse(f.read()) assert actual_result == expected_result
def extract(url, _): try: json_recipes = scrape_schema_recipe.scrape_url(url, python_objects=True) except: return None if len(json_recipes) == 0: return None json_recipe = json_recipes[0] tags = [] if "cookingMethod" in json_recipe: tags.append(json_recipe["cookingMethod"]) if "recipeCategory" in json_recipe: append_or_extend(tags, json_recipe["recipeCategory"]) if "recipeCuisine" in json_recipe: tags.append(json_recipe["recipeCuisine"]) if "keywords" in json_recipe: kw = json_recipe["keywords"] if isinstance(kw, str): kw = kw.split(',') append_or_extend(tags, kw) description_parts = [] if "description" in json_recipe: description_parts.append(json_recipe["description"]) if "image" in json_recipe: if isinstance(json_recipe["image"], list): description_parts.append(f'![]({json_recipe["image"][0]}")') else: description_parts.append(f'![]({json_recipe["image"]}")') yields = [] if "recipeYield" in json_recipe: yields.append(RecipeParser.parse_amount(json_recipe["recipeYield"])) recipe = Recipe( title=json_recipe["name"], description="\n\n".join(description_parts), tags=tags, yields=yields, ingredients=[ Ingredient(name=ingred) for ingred in json_recipe["recipeIngredient"] ], instructions= f'{create_instructions(json_recipe["recipeInstructions"])}\n\n{json_recipe["url"]}', ) return recipe
def extract(url, soup): if not 'chefkoch.de' in url: return # title title = soup.find('h1', attrs={'class': 'page-title'}).text if title == 'Fehler: Seite nicht gefunden' or title == 'Fehler: Rezept nicht gefunden': raise ValueError('No recipe found, check URL') # summary summaryTag = soup.find('div', attrs={'class': 'summary'}) summary = summaryTag.text if summaryTag else None # servings servings = soup.find('input', attrs={'id': 'divisor'}).attrs['value'] yields = [ Amount(Decimal(servings), f'Portion{"en" if int(servings) > 1 else ""}') ] # tags tags = [] tagcloud = soup.find('ul', attrs={'class': 'tagcloud'}) for tag in tagcloud.find_all('a'): tags.append(tag.text) # ingredients table = soup.find('table', attrs={'class': 'incredients'}) rows = table.find_all('tr') ingreds = [] for row in rows: cols = row.find_all('td') cols = [s.text.strip() for s in cols] amount = RecipeParser.parse_amount(cols[0]) ingreds.append(Ingredient(name=cols[1], amount=amount)) # instructions instruct = soup.find('div', attrs={ 'id': 'rezept-zubereitung' }).text # only get text instruct = instruct.strip() # remove leadin and ending whitespace # write to file return Recipe(title=title, ingredients=ingreds, instructions=instruct, description=summary, tags=tags, yields=yields)
def extract(url, _): try: scraper = scrape_me(url) except WebsiteNotImplementedError: return None try: description = f'![]({scraper.image()})' except NotImplementedError: description = '' recipe = Recipe( title=scraper.title(), description=description, yields=[RecipeParser.parse_amount(scraper.yields())], ingredients=[ Ingredient(name=ingred) for ingred in scraper.ingredients() ], instructions=scraper.instructions(), ) return recipe
def recipe() -> Recipe: return Recipe( title="Test", tags=["vegetarian", "flavorful", "tag with spaces"], yields=[ Amount(factor=Decimal("1"), unit="serving"), Amount(factor=Decimal(0.4), unit="kg") ], ingredients=[ Ingredient(amount=Amount(factor=Decimal('5')), name='Eggs'), Ingredient(amount=Amount(factor=Decimal('200'), unit='g'), name='Butter'), Ingredient(name='Salt') ], ingredient_groups=[ IngredientGroup( title='Group', ingredients=[ Ingredient(amount=Amount(factor=Decimal('2'), unit='cloves'), name='Garlic'), ], ingredient_groups=[ IngredientGroup( title='Group', ingredients=[ Ingredient(amount=Amount(factor=Decimal('2'), unit='cloves'), name='Garlic'), ], ingredient_groups=[ IngredientGroup(title='Subgroup', ingredients=[ Ingredient(name='Onions'), ]), ]), ], ), ])
def extract(url, soup): recipe_id_element = soup.find(attrs={ 'data-recipe-id': True, 'class': 'wprm-recipe-container' }) if not recipe_id_element: return recipe_id = recipe_id_element.attrs['data-recipe-id'] data = getJson(url, recipe_id) try: # title title = getText(data['recipe']['name']) # summary summary = getText(data['recipe']['summary']) # servings and tags servingsAmount = RecipeParser.parse_amount(data['recipe']['servings']) servingsUnit = data['recipe']['servings_unit'] if servingsUnit != "": servingsAmount = replace(servingsAmount, unit=servingsUnit) yields = [servingsAmount] tags = [] for tagGroup in data['recipe']['tags'].values(): for tag in tagGroup: tags.append(tag['name']) # ingredients ingredients = [] for ingredGroup in data['recipe']['ingredients']: children = [] if 'name' in ingredGroup: title = getText(ingredGroup['name']) else: title = None for ingred in ingredGroup['ingredients']: amount = RecipeParser.parse_amount(ingred['amount']) unit = ingred['unit'].strip() if unit != '': amount = replace(amount, unit=unit) name = getText('{} {}'.format(ingred['name'], ingred['notes'])) children.append(Ingredient(name, amount)) group = IngredientGroup(title=title, ingredients=children) ingredients.append(group) # instructions instructions = '' for instrGroup in data['recipe']['instructions']: if 'name' in instrGroup: instructions = instructions + '## ' + getText( instrGroup['name']) + '\n' for index, instr in enumerate(instrGroup['instructions']): instructions = instructions + '{}. {}\n'.format( index + 1, getText(instr['text'])) if 'notes' in data['recipe']: instructions = instructions + '\n## Recipe Notes\n\n' + getText( data['recipe']['notes']) return Recipe(title=title, ingredients=ingredients, instructions=instructions, description=summary, tags=tags, yields=yields) except Exception as e: print('failed to extract json:', e) # if the json extraction fails, try to extract data from website # title title = soup.find(attrs={'class': 'wprm-recipe-name'}).text.strip() # summary summary = soup.find('div', attrs={ 'class': 'wprm-recipe-summary' }).text.strip() # yields yields = [] servings = soup.find( 'span', attrs={'class': 'wprm-recipe-details wprm-recipe-servings'}) if servings: servingsAmount = RecipeParser.parse_amount(servings.text.strip()) servingsUnit = soup.find( 'span', attrs={ 'class': 'wprm-recipe-details-unit wprm-recipe-servings-unit' }).text.strip() if servingsUnit != "": servingsAmount = replace(servingsAmount, unit=servingsUnit) yields.append(servingsAmount) # tags tags = [] courseTags = soup.find('span', attrs={'class': 'wprm-recipe-course'}) if courseTags: courseTags = courseTags.text.split(',') else: courseTags = [] cuisineTags = soup.find('span', attrs={'class': 'wprm-recipe-cuisine'}) if cuisineTags: cuisineTags = cuisineTags.text.split(',') else: cuisineTags = [] keywords = soup.find('span', attrs={'class': 'wprm-recipe-keyword'}) if keywords: keywords = keywords.text.split(',') else: keywords = [] for tag in courseTags + cuisineTags + keywords: tags.append(tag.strip()) # ingredients ingreds = [] ingredGroups = soup.find_all( 'div', attrs={'class': 'wprm-recipe-ingredient-group'}) for ingredGroup in ingredGroups: groupName = ingredGroup.find( 'h4', attrs={ 'class': 'wprm-recipe-group-name wprm-recipe-ingredient-group-name' }) if (groupName): title = groupName.text.strip() else: title = None groupIngreds = ingredGroup.find_all( 'li', attrs={'class': 'wprm-recipe-ingredient'}) children = [] for ingred in groupIngreds: amount = ingred.find( 'span', attrs={'class': 'wprm-recipe-ingredient-amount'}) if amount: amount = RecipeParser.parse_amount(amount.text) else: amount = None unit = ingred.find('span', attrs={'class': 'wprm-recipe-ingredient-unit'}) if unit: amount = replace(amount, unit=unit.text) name = ingred.find('span', attrs={'class': 'wprm-recipe-ingredient-name'}) if name: name = name.text.strip() else: name = '' notes = ingred.find( 'span', attrs={'class': 'wprm-recipe-ingredient-notes'}) if notes: notes = notes.text.strip() else: notes = '' children.append( Ingredient('{} {}'.format(name, notes).strip(), amount=amount)) group = IngredientGroup(title=title, ingredients=children) ingreds.append(group) # instructions instructions = '' instructGroups = soup.find_all( 'div', attrs={'class': 'wprm-recipe-instruction-group'}) for ingredGroup in instructGroups: groupName = ingredGroup.find( 'h4', attrs={ 'class': 'wprm-recipe-group-name wprm-recipe-instruction-group-name' }) if groupName: instructions = instructions + '## ' + groupName.text.strip() + '\n' groupInstructs = ingredGroup.find_all( 'li', attrs={'class': 'wprm-recipe-instruction'}) for index, inst in enumerate(groupInstructs): instructions = instructions + str( index + 1) + '. ' + inst.text.strip() + '\n' # notes notesContainer = soup.find('div', attrs={'class': 'wprm-recipe-notes-container'}) if notesContainer: notesTitle = notesContainer.find(attrs={ 'class': 'wprm-recipe-header' }).text.strip() instructions = instructions + '\n## ' + notesTitle for p in notesContainer.find_all('p'): instructions = instructions + '\n\n' + p.text.strip() return Recipe(title=title, ingredients=ingreds, instructions=instructions, description=summary, tags=tags, yields=yields)