def extract_recipe_from_html(html: str, url: str) -> dict: try: scraped_recipes: List[dict] = scrape_schema_recipe.loads( html, python_objects=True) dump_last_json(scraped_recipes) if not scraped_recipes: scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url( url, python_objects=True) except Exception as e: print(e) scraped_recipes: List[dict] = scrape_schema_recipe.loads(html) dump_last_json(scraped_recipes) if not scraped_recipes: scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(url) if scraped_recipes: new_recipe: dict = scraped_recipes[0] logger.info(f"Recipe Scraped From Web: {new_recipe}") if not new_recipe: return "fail" # TODO: Return Better Error Here new_recipe = Cleaner.clean(new_recipe, url) else: new_recipe = open_graph.basic_recipe_from_opengraph(html, url) logger.info(f"Recipe Scraped from opengraph metadata: {new_recipe}") return new_recipe
def import_recipes(recipe_dir: Path) -> Recipe: image = False for file in recipe_dir.glob("full.*"): image = file break for file in recipe_dir.glob("*.json"): recipe_file = file break with open(recipe_file, "r") as f: recipe_dict = json.loads(f.read()) recipe_data = Cleaner.clean(recipe_dict) image_name = recipe_data["slug"] recipe_data["image"] = recipe_data["slug"] recipe_data["tags"] = clean_nextcloud_tags(recipe_data.get("keywords")) recipe = Recipe(**recipe_data) if image: shutil.copy(image, app_dirs.IMG_DIR.joinpath(image_name + image.suffix)) return recipe
def clean_recipe_dictionary(self, recipe_dict) -> Recipe: """Calls the rewrite_alias function and the Cleaner.clean function on a dictionary and returns the result unpacked into a Recipe object""" recipe_dict = self.rewrite_alias(recipe_dict) recipe_dict = Cleaner.clean(recipe_dict, url=recipe_dict.get("orgURL", None)) return Recipe(**recipe_dict)
def test_cleaner_instructions(instructions): assert Cleaner.instructions(instructions) == [ { "text": "A" }, { "text": "B" }, { "text": "C" }, ]
def create_from_url(url: str) -> Recipe: """Main entry point for generating a recipe from a URL. Pass in a URL and a Recipe object will be returned if successful. Args: url (str): a valid string representing a URL Returns: Recipe: Recipe Object """ r = requests.get(url) new_recipe = extract_recipe_from_html(r.text, url) new_recipe = Cleaner.clean(new_recipe, url) new_recipe = download_image_for_recipe(new_recipe) return Recipe(**new_recipe)
def test_clean_image(): assert Cleaner.image(None) == "no image" assert Cleaner.image("https://my.image/path/") == "https://my.image/path/" assert Cleaner.image({"url": "My URL!"}) == "My URL!" assert Cleaner.image(["My URL!", "MY SECOND URL"]) == "My URL!"
def test_clean_html(): assert Cleaner.html("<div>Hello World</div>") == "Hello World"
def test_clean_category(): assert Cleaner.category("my-category") == ["my-category"]
def test_cleaner_clean(json_file, num_steps): recipe_data = Cleaner.clean( json.load(open(TEST_RAW_RECIPES.joinpath(json_file)))) assert len(recipe_data["recipeInstructions"]) == num_steps
def test_time_cleaner(): my_time_delta = "PT2H30M" return_delta = Cleaner.time(my_time_delta) assert return_delta == "2 Hours 30 Minutes"