def main(): # completions argcomplete.autocomplete(parser) # parse args args = parser.parse_args() # initialize rp = RecipeParser() rs = RecipeSerializer() # read and parse recipe src = args.file.read() r = rp.parse(src) # scale recipe r = _process_scaling(r, args) # base url for late use base_url = URL(f'file://{os.path.abspath(args.file.name)}') # export linked recipes if args.export_links: _export_links(r, args, base_url, rp, rs) return # flatten recipe if args.flatten: r = _get_flattened_recipe(r, base_url=base_url, parser=rp) # create output depending on arguments print(_create_recipe_output(r, rs, args))
def main(): parser = argparse.ArgumentParser(description='Read and process recipemd recipes') parser.add_argument( 'file', type=argparse.FileType('r', encoding='UTF-8'), help='A recipemd file' ).completer = FilesCompleter(allowednames='*.md') display_parser = parser.add_mutually_exclusive_group() display_parser.add_argument('-t', '--title', action='store_true', help='Display recipe title') display_parser.add_argument('-i', '--ingredients', action='store_true', help='Display recipe ingredients') parser.add_argument( '-f', '--flatten', action='store_true', help='Flatten ingredients and instructions of linked recipes into main recipe' ) parser.add_argument( '-r', '--round', type=lambda s: None if s.lower() == 'no' else int(s), metavar='n', default=2, help='Round amount to n digits after decimal point. Default is "2", use "no" to disable rounding.' ).completer = ChoicesCompleter(('no', *range(0, decimal.getcontext().prec + 1))) scale_parser = parser.add_mutually_exclusive_group() scale_parser.add_argument('-m', '--multiply', type=str, help='Multiply recipe by N', metavar='N') scale_parser.add_argument( '-y', '--yield', type=str, help='Scale the recipe for yield Y, e.g. "5 servings"', metavar='Y', dest='required_yield' ).completer = _yield_completer # completions argcomplete.autocomplete(parser) # parse args args = parser.parse_args() # read and parse recipe src = args.file.read() rp = RecipeParser() r = rp.parse(src) # scale recipe r = _process_scaling(r, args) # flatten recipe if args.flatten: base_url = URL(f'file://{os.path.abspath(args.file.name)}') r = _get_flattened_ingredients_recipe(r, base_url=base_url, parser=rp) # create output depending on arguments if args.title: print(r.title) elif args.ingredients: for ingr in r.leaf_ingredients: print(_ingredient_to_string(ingr, rounding=args.round)) else: rs = RecipeSerializer() print(rs.serialize(r, rounding=args.round))
def _yield_completer(prefix, action, parser, parsed_args): try: src = parsed_args.file.read() r = RecipeParser().parse(src) parsed_yield = RecipeParser.parse_amount(prefix) if parsed_yield is None or parsed_yield.factor is None: return [RecipeSerializer._serialize_amount(a) for a in r.yields] return [RecipeSerializer._serialize_amount(Amount(parsed_yield.factor, a.unit)) for a in r.yields if parsed_yield.unit is None or (a.unit is not None and a.unit.startswith(parsed_yield.unit))] except Exception as e: print(e) return []
def get_filtered_recipes(args): rp = RecipeParser() result = [] for path in glob.glob(os.path.join(args.folder, '**/*.md'), recursive=True): try: with open(path, 'r', encoding='UTF-8') as file: recipe = rp.parse(file.read()) tags = recipe.tags if evaluate(args.filter, tags): result.append((recipe, os.path.relpath(path, args.folder))) except Exception as e: if not args.no_messages: print(f"An error occurred, skipping {os.path.relpath(path, args.folder)}: {e.args[0]}", file=sys.stderr) return result
def _get_linked_recipe(ingredient: Ingredient, *, base_url: URL, parser: RecipeParser, flatten: bool = True) -> Recipe: url = base_url.join(URL(ingredient.link)) try: with urllib.request.urlopen(str(url)) as req: encoding = req.info().get_content_charset() or 'UTF-8' src = req.read().decode(encoding) except Exception as e: raise RuntimeError( f'''Couldn't find linked recipe for ingredient "{ingredient.name}"''' ) from e try: link_recipe = parser.parse(src) except Exception as e: raise RuntimeError( f'''Couldn't parse linked recipe for ingredient "{ingredient.name}"''' ) from e if flatten: link_recipe = _get_flattened_recipe(link_recipe, base_url=url, parser=parser) if ingredient.amount: try: link_recipe = get_recipe_with_yield(link_recipe, ingredient.amount) except StopIteration: print(_make_missing_yield_warning(link_recipe, ingredient.amount), file=sys.stderr) return link_recipe
def _process_scaling(r, args): if args.required_yield is not None: required_yield = RecipeParser.parse_amount(args.required_yield) if required_yield is None or required_yield.factor is None: print(f'Given yield is not valid', file=sys.stderr) exit(1) try: r = get_recipe_with_yield(r, required_yield) except StopIteration: print(f'Recipe "{r.title}" does not specify a yield in the unit "{required_yield.unit}". The ' f'following units can be used: ' + ", ".join(f'"{y.unit}"' for y in r.yields), file=sys.stderr) exit(1) elif args.multiply is not None: multiply = RecipeParser.parse_amount(args.multiply) if multiply is None or multiply.factor is None: print(f'Given multiplier is not valid', file=sys.stderr) exit(1) if multiply.unit is not None: print(f'A recipe can only be multiplied with a unitless amount', file=sys.stderr) exit(1) r = multiply_recipe(r, multiply.factor) return r
def extract(url, _): try: json_recipes = scrape_schema_recipe.scrape_url(url, python_objects=True) except: return None if len(json_recipes) == 0: return None json_recipe = json_recipes[0] tags = [] if "cookingMethod" in json_recipe: tags.append(json_recipe["cookingMethod"]) if "recipeCategory" in json_recipe: append_or_extend(tags, json_recipe["recipeCategory"]) if "recipeCuisine" in json_recipe: tags.append(json_recipe["recipeCuisine"]) if "keywords" in json_recipe: kw = json_recipe["keywords"] if isinstance(kw, str): kw = kw.split(',') append_or_extend(tags, kw) description_parts = [] if "description" in json_recipe: description_parts.append(json_recipe["description"]) if "image" in json_recipe: if isinstance(json_recipe["image"], list): description_parts.append(f'![]({json_recipe["image"][0]}")') else: description_parts.append(f'![]({json_recipe["image"]}")') yields = [] if "recipeYield" in json_recipe: yields.append(RecipeParser.parse_amount(json_recipe["recipeYield"])) recipe = Recipe( title=json_recipe["name"], description="\n\n".join(description_parts), tags=tags, yields=yields, ingredients=[ Ingredient(name=ingred) for ingred in json_recipe["recipeIngredient"] ], instructions= f'{create_instructions(json_recipe["recipeInstructions"])}\n\n{json_recipe["url"]}', ) return recipe
def _process_scaling(r, args): """Returns recipes scaled according to --multiply or --yield""" if args.required_yield is not None: required_yield = RecipeParser.parse_amount(args.required_yield) if required_yield is None or required_yield.factor is None: print(f'Given yield is not valid', file=sys.stderr) exit(1) try: r = get_recipe_with_yield(r, required_yield) except StopIteration: print(_make_missing_yield_warning(r, required_yield), file=sys.stderr) exit(1) elif args.multiply is not None: multiply = RecipeParser.parse_amount(args.multiply) if multiply is None or multiply.factor is None: print(f'Given multiplier is not valid', file=sys.stderr) exit(1) if multiply.unit is not None: print(f'A recipe can only be multiplied with a unitless amount', file=sys.stderr) exit(1) r = multiply_recipe(r, multiply.factor) return r
def extract(url, soup): if not 'chefkoch.de' in url: return # title title = soup.find('h1', attrs={'class': 'page-title'}).text if title == 'Fehler: Seite nicht gefunden' or title == 'Fehler: Rezept nicht gefunden': raise ValueError('No recipe found, check URL') # summary summaryTag = soup.find('div', attrs={'class': 'summary'}) summary = summaryTag.text if summaryTag else None # servings servings = soup.find('input', attrs={'id': 'divisor'}).attrs['value'] yields = [ Amount(Decimal(servings), f'Portion{"en" if int(servings) > 1 else ""}') ] # tags tags = [] tagcloud = soup.find('ul', attrs={'class': 'tagcloud'}) for tag in tagcloud.find_all('a'): tags.append(tag.text) # ingredients table = soup.find('table', attrs={'class': 'incredients'}) rows = table.find_all('tr') ingreds = [] for row in rows: cols = row.find_all('td') cols = [s.text.strip() for s in cols] amount = RecipeParser.parse_amount(cols[0]) ingreds.append(Ingredient(name=cols[1], amount=amount)) # instructions instruct = soup.find('div', attrs={ 'id': 'rezept-zubereitung' }).text # only get text instruct = instruct.strip() # remove leadin and ending whitespace # write to file return Recipe(title=title, ingredients=ingreds, instructions=instruct, description=summary, tags=tags, yields=yields)
def _get_linked_recipe(ingredient: Ingredient, *, base_url: URL, parser: RecipeParser, flatten: bool=True) -> Recipe: url = base_url.join(URL(ingredient.link)) try: with urllib.request.urlopen(str(url)) as req: encoding = req.info().get_content_charset() or 'UTF-8' src = req.read().decode(encoding) except Exception as e: raise RuntimeError(f'''Couldn't find linked recipe for ingredient "{ingredient.name}"''') from e try: link_recipe = parser.parse(src) except Exception as e: raise RuntimeError(f'''Couldn't parse linked recipe for ingredient "{ingredient.name}"''') from e if flatten: link_recipe = _get_flattened_ingredients_recipe(link_recipe, base_url=url, parser=parser) return link_recipe
def extract(url, _): try: scraper = scrape_me(url) except WebsiteNotImplementedError: return None try: description = f'![]({scraper.image()})' except NotImplementedError: description = '' recipe = Recipe( title=scraper.title(), description=description, yields=[RecipeParser.parse_amount(scraper.yields())], ingredients=[ Ingredient(name=ingred) for ingred in scraper.ingredients() ], instructions=scraper.instructions(), ) return recipe
def parser(): return RecipeParser()
import sys import glob import os import re from pprint import pprint from recipemd.data import RecipeParser from unidecode import unidecode root_path = '.' rp = RecipeParser() tt = str.maketrans({ "ä": "ae", "ö": "oe", "ü": "ue", "Ä": "Ae", "Ö": "Oe", "Ü": "Ue", "ß": "ss", }) for path in glob.glob(os.path.join(root_path, '**/*.md'), recursive=True): try: with open(path, 'r', encoding='UTF-8') as file: recipe = rp.parse(file.read()) filename = recipe.title filename = filename.translate(tt) filename = unidecode(filename) filename = re.sub(r'[^a-zA-Z0-9]+', '_', filename) filename = re.sub(r'^_+|_+$', '', filename)
def main(): parser = argparse.ArgumentParser( description='Read and process recipemd recipes') parser.add_argument('file', type=open, help='A recipemd file') display_parser = parser.add_mutually_exclusive_group() display_parser.add_argument('-t', '--title', action='store_true', help='Display recipe title') display_parser.add_argument('-i', '--ingredients', action='store_true', help='Display recipe ingredients') scale_parser = parser.add_mutually_exclusive_group() scale_parser.add_argument('-m', '--multiply', type=str, help='Multiply recipe by N', metavar='N') scale_parser.add_argument('-y', '--yield', type=str, help='Scale the recipe for yield Y', metavar='Y', dest='required_yield') args = parser.parse_args() src = args.file.read() rp = RecipeParser() r = rp.parse(src) if args.required_yield is not None: required_yield = RecipeParser.parse_amount(args.required_yield) if required_yield is None or required_yield.factor is None: print(f'Given yield is not valid', file=sys.stderr) exit(1) matching_recipe_yield = next( (y for y in r.yields if y.unit == required_yield.unit), None) if matching_recipe_yield is None: if required_yield.unit is None: matching_recipe_yield = Amount(Decimal(1)) else: print( f'Recipe "{r.title}" does not specify a yield in the unit "{required_yield.unit}". The ' f'following units can be used: ' + ", ".join(f'"{y.unit}"' for y in r.yields), file=sys.stderr) exit(1) r = multiply_recipe( r, required_yield.factor / matching_recipe_yield.factor) elif args.multiply is not None: multiply = RecipeParser.parse_amount(args.multiply) if multiply is None or multiply.factor is None: print(f'Given multiplier is not valid', file=sys.stderr) exit(1) if multiply.unit is not None: print(f'A recipe can only be multiplied with a unitless amount', file=sys.stderr) exit(1) r = multiply_recipe(r, multiply.factor) if args.title: print(r.title) elif args.ingredients: for ingr in r.leaf_ingredients: print(_ingredient_to_string(ingr)) else: rs = RecipeSerializer() print(rs.serialize(r))
def extract(url, soup): recipe_id_element = soup.find(attrs={ 'data-recipe-id': True, 'class': 'wprm-recipe-container' }) if not recipe_id_element: return recipe_id = recipe_id_element.attrs['data-recipe-id'] data = getJson(url, recipe_id) try: # title title = getText(data['recipe']['name']) # summary summary = getText(data['recipe']['summary']) # servings and tags servingsAmount = RecipeParser.parse_amount(data['recipe']['servings']) servingsUnit = data['recipe']['servings_unit'] if servingsUnit != "": servingsAmount = replace(servingsAmount, unit=servingsUnit) yields = [servingsAmount] tags = [] for tagGroup in data['recipe']['tags'].values(): for tag in tagGroup: tags.append(tag['name']) # ingredients ingredients = [] for ingredGroup in data['recipe']['ingredients']: children = [] if 'name' in ingredGroup: title = getText(ingredGroup['name']) else: title = None for ingred in ingredGroup['ingredients']: amount = RecipeParser.parse_amount(ingred['amount']) unit = ingred['unit'].strip() if unit != '': amount = replace(amount, unit=unit) name = getText('{} {}'.format(ingred['name'], ingred['notes'])) children.append(Ingredient(name, amount)) group = IngredientGroup(title=title, ingredients=children) ingredients.append(group) # instructions instructions = '' for instrGroup in data['recipe']['instructions']: if 'name' in instrGroup: instructions = instructions + '## ' + getText( instrGroup['name']) + '\n' for index, instr in enumerate(instrGroup['instructions']): instructions = instructions + '{}. {}\n'.format( index + 1, getText(instr['text'])) if 'notes' in data['recipe']: instructions = instructions + '\n## Recipe Notes\n\n' + getText( data['recipe']['notes']) return Recipe(title=title, ingredients=ingredients, instructions=instructions, description=summary, tags=tags, yields=yields) except Exception as e: print('failed to extract json:', e) # if the json extraction fails, try to extract data from website # title title = soup.find(attrs={'class': 'wprm-recipe-name'}).text.strip() # summary summary = soup.find('div', attrs={ 'class': 'wprm-recipe-summary' }).text.strip() # yields yields = [] servings = soup.find( 'span', attrs={'class': 'wprm-recipe-details wprm-recipe-servings'}) if servings: servingsAmount = RecipeParser.parse_amount(servings.text.strip()) servingsUnit = soup.find( 'span', attrs={ 'class': 'wprm-recipe-details-unit wprm-recipe-servings-unit' }).text.strip() if servingsUnit != "": servingsAmount = replace(servingsAmount, unit=servingsUnit) yields.append(servingsAmount) # tags tags = [] courseTags = soup.find('span', attrs={'class': 'wprm-recipe-course'}) if courseTags: courseTags = courseTags.text.split(',') else: courseTags = [] cuisineTags = soup.find('span', attrs={'class': 'wprm-recipe-cuisine'}) if cuisineTags: cuisineTags = cuisineTags.text.split(',') else: cuisineTags = [] keywords = soup.find('span', attrs={'class': 'wprm-recipe-keyword'}) if keywords: keywords = keywords.text.split(',') else: keywords = [] for tag in courseTags + cuisineTags + keywords: tags.append(tag.strip()) # ingredients ingreds = [] ingredGroups = soup.find_all( 'div', attrs={'class': 'wprm-recipe-ingredient-group'}) for ingredGroup in ingredGroups: groupName = ingredGroup.find( 'h4', attrs={ 'class': 'wprm-recipe-group-name wprm-recipe-ingredient-group-name' }) if (groupName): title = groupName.text.strip() else: title = None groupIngreds = ingredGroup.find_all( 'li', attrs={'class': 'wprm-recipe-ingredient'}) children = [] for ingred in groupIngreds: amount = ingred.find( 'span', attrs={'class': 'wprm-recipe-ingredient-amount'}) if amount: amount = RecipeParser.parse_amount(amount.text) else: amount = None unit = ingred.find('span', attrs={'class': 'wprm-recipe-ingredient-unit'}) if unit: amount = replace(amount, unit=unit.text) name = ingred.find('span', attrs={'class': 'wprm-recipe-ingredient-name'}) if name: name = name.text.strip() else: name = '' notes = ingred.find( 'span', attrs={'class': 'wprm-recipe-ingredient-notes'}) if notes: notes = notes.text.strip() else: notes = '' children.append( Ingredient('{} {}'.format(name, notes).strip(), amount=amount)) group = IngredientGroup(title=title, ingredients=children) ingreds.append(group) # instructions instructions = '' instructGroups = soup.find_all( 'div', attrs={'class': 'wprm-recipe-instruction-group'}) for ingredGroup in instructGroups: groupName = ingredGroup.find( 'h4', attrs={ 'class': 'wprm-recipe-group-name wprm-recipe-instruction-group-name' }) if groupName: instructions = instructions + '## ' + groupName.text.strip() + '\n' groupInstructs = ingredGroup.find_all( 'li', attrs={'class': 'wprm-recipe-instruction'}) for index, inst in enumerate(groupInstructs): instructions = instructions + str( index + 1) + '. ' + inst.text.strip() + '\n' # notes notesContainer = soup.find('div', attrs={'class': 'wprm-recipe-notes-container'}) if notesContainer: notesTitle = notesContainer.find(attrs={ 'class': 'wprm-recipe-header' }).text.strip() instructions = instructions + '\n## ' + notesTitle for p in notesContainer.find_all('p'): instructions = instructions + '\n\n' + p.text.strip() return Recipe(title=title, ingredients=ingreds, instructions=instructions, description=summary, tags=tags, yields=yields)
import urllib.request from pprint import pprint from commonmark import Parser from commonmark.node import NodeWalker from yarl import URL from recipemd._vendor.commonmark_extensions.plaintext import CommonMarkToCommonMarkRenderer from recipemd.data import RecipeParser, RecipeSerializer from unidecode import unidecode root_path = os.path.realpath('.') commonmark_parser = Parser() commonmark_renderer = CommonMarkToCommonMarkRenderer() recipe_parser = RecipeParser() recipe_serializer = RecipeSerializer() def urlopen_user_agent(url: str): request = urllib.request.Request( url, None, { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7' }) return urllib.request.urlopen(request) def download_recipe_images(recipe_path): with open(recipe_path, 'r', encoding='UTF-8') as file: ast = commonmark_parser.parse(file.read())
def serve(base_folder_path) -> Flask: app = Flask(__name__) app.jinja_env.trim_blocks = True app.jinja_env.lstrip_blocks = True recipe_parser = RecipeParser() recipe_serializer = RecipeSerializer() _cleaner = Cleaner(meta=True, embedded=True, links=True, style=True, processing_instructions=True, scripts=True, javascript=True, frames=True, remove_unknown_tags=True, page_structure=True, remove_tags=['body']) @app.context_processor def pjax_processor(): def get_root_template(): if "X-PJAX" in request.headers: return "pjax.html" return "structure.html" return dict(get_root_template=get_root_template) @app.template_filter() def markdown_to_cleaned_html(markdown): unsafe_html_str = commonmark.commonmark(markdown) # remove wrapping div # https://stackoverflow.com/questions/21420922/how-to-use-cleaner-lxml-html-without-returning-div-tag unsafe_doc = document_fromstring(unsafe_html_str) clean_doc = _cleaner.clean_html(unsafe_doc) clean_html_str = "\n".join( tostring(ch, encoding="unicode") for ch in clean_doc) return Markup(clean_html_str) @app.template_filter() def get_recipe_title(child_name: str, parent_path) -> str: absolute_path = os.path.join(base_folder_path, parent_path, child_name) if os.path.isdir(absolute_path): return Markup('<em>Folder</em>') try: with open(absolute_path, 'r', encoding='UTF-8') as f: recipe = recipe_parser.parse(f.read()) # TODO markdown to html return recipe.title except RuntimeError: return Markup('<strong>Invalid recipe!</strong>') @app.template_filter() def serialize_ingredients(ingredients: List[Ingredient]): return ("\n".join( recipe_serializer._serialize_ingredient(i, rounding=2) for i in ingredients)).strip() @app.route('/') @app.route('/<path:relative_path>') def download_file(relative_path=''): absolute_path = os.path.join(base_folder_path, relative_path) if os.path.isdir(absolute_path): if not absolute_path.endswith('/'): return redirect(f'/{relative_path}/', code=302) child_paths = [(ch, os.path.isdir(os.path.join(absolute_path, ch))) for ch in os.listdir(absolute_path)] child_paths = [ (ch, is_dir) for ch, is_dir in child_paths if not ch.startswith('.') and (is_dir or ch.endswith('.md')) ] child_paths = [ f'{ch}/' if not ch.endswith('/') and is_dir else ch for ch, is_dir in child_paths ] child_paths = sorted(child_paths) return render_template("folder.html", child_paths=child_paths, path=relative_path) if not absolute_path.endswith('.md'): return send_from_directory(base_folder_path, relative_path) with open(absolute_path, 'r', encoding='UTF-8') as f: required_yield_str = request.args.get('yield', '1') required_yield = recipe_parser.parse_amount(required_yield_str) if required_yield is None: required_yield = Amount(factor=Decimal(1)) src = f.read() try: recipe = recipe_parser.parse(src) except Exception as e: return render_template("markdown.html", markdown=src, path=relative_path, errors=[e.args[0]]) errors = [] try: recipe = get_recipe_with_yield(recipe, required_yield) except StopIteration: errors.append( f'The recipe does not specify a yield in the unit "{required_yield.unit}". ' f'The following units can be used: ' + ", ".join(f'"{y.unit}"' for y in recipe.yields)) except Exception as e: errors.append(str(e)) return render_template( "recipe.html", recipe=recipe, yields=recipe_serializer._serialize_yields(recipe.yields, rounding=2), tags=recipe_serializer._serialize_tags(recipe.tags), units=list(set(y.unit for y in recipe.yields)), default_yield=recipe_serializer._serialize_amount( recipe.yields[0]) if recipe.yields else "1", path=relative_path, errors=errors) return app