def extract_recipe_from_html(html: str, url: str) -> dict: try: scraped_recipes: List[dict] = scrape_schema_recipe.loads( html, python_objects=True) dump_last_json(scraped_recipes) if not scraped_recipes: scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url( url, python_objects=True) except Exception as e: print(e) scraped_recipes: List[dict] = scrape_schema_recipe.loads(html) dump_last_json(scraped_recipes) if not scraped_recipes: scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(url) if scraped_recipes: new_recipe: dict = scraped_recipes[0] logger.info(f"Recipe Scraped From Web: {new_recipe}") if not new_recipe: return "fail" # TODO: Return Better Error Here new_recipe = Cleaner.clean(new_recipe, url) else: new_recipe = open_graph.basic_recipe_from_opengraph(html, url) logger.info(f"Recipe Scraped from opengraph metadata: {new_recipe}") return new_recipe
def test_loads(self): with open('test_data/bevvy-irish-coffee-2019.html') as fp: s = fp.read() recipes = loads(s) recipe = recipes[0] assert recipe['name'] == 'Irish Coffee'
def test_loads(self): with open(f"{DATA_PATH}/bevvy-irish-coffee-2019.html") as fp: s = fp.read() recipes = loads(s) recipe = recipes[0] assert recipe["name"] == "Irish Coffee"
def extract_recipe_from_html(html: str, url: str) -> dict: scraped_recipes: List[dict] = scrape_schema_recipe.loads( html, python_objects=True) if not scraped_recipes: scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url( url, python_objects=True) if scraped_recipes: new_recipe: dict = scraped_recipes[0] logger.info(f"Recipe Scraped From Web: {new_recipe}") if not new_recipe: return "fail" # TODO: Return Better Error Here new_recipe = process_recipe_data(new_recipe, url=url) new_recipe = normalize_data(new_recipe) else: new_recipe = basic_recipe_from_opengraph(html, url) logger.info(f"Recipe Scraped from opengraph metadata: {new_recipe}") return new_recipe
def test_loads(self): with self.assertRaises(SSRTypeError): loads(0xDEADBEEF)