예제 #1
0
def extract_recipe_from_html(html: str, url: str) -> dict:
    try:
        scraped_recipes: List[dict] = scrape_schema_recipe.loads(
            html, python_objects=True)
        dump_last_json(scraped_recipes)

        if not scraped_recipes:
            scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(
                url, python_objects=True)
    except Exception as e:
        print(e)
        scraped_recipes: List[dict] = scrape_schema_recipe.loads(html)
        dump_last_json(scraped_recipes)

        if not scraped_recipes:
            scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(url)

    if scraped_recipes:
        new_recipe: dict = scraped_recipes[0]
        logger.info(f"Recipe Scraped From Web: {new_recipe}")

        if not new_recipe:
            return "fail"  # TODO: Return Better Error Here

        new_recipe = Cleaner.clean(new_recipe, url)
    else:
        new_recipe = open_graph.basic_recipe_from_opengraph(html, url)
        logger.info(f"Recipe Scraped from opengraph metadata: {new_recipe}")

    return new_recipe
    def test_loads(self):
        with open('test_data/bevvy-irish-coffee-2019.html') as fp:
            s = fp.read()

        recipes = loads(s)
        recipe = recipes[0]
        assert recipe['name'] == 'Irish Coffee'
예제 #3
0
    def test_loads(self):
        with open(f"{DATA_PATH}/bevvy-irish-coffee-2019.html") as fp:
            s = fp.read()

        recipes = loads(s)
        recipe = recipes[0]
        assert recipe["name"] == "Irish Coffee"
예제 #4
0
def extract_recipe_from_html(html: str, url: str) -> dict:
    scraped_recipes: List[dict] = scrape_schema_recipe.loads(
        html, python_objects=True)

    if not scraped_recipes:
        scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(
            url, python_objects=True)

    if scraped_recipes:
        new_recipe: dict = scraped_recipes[0]
        logger.info(f"Recipe Scraped From Web: {new_recipe}")

        if not new_recipe:
            return "fail"  # TODO: Return Better Error Here

        new_recipe = process_recipe_data(new_recipe, url=url)
        new_recipe = normalize_data(new_recipe)
    else:
        new_recipe = basic_recipe_from_opengraph(html, url)
        logger.info(f"Recipe Scraped from opengraph metadata: {new_recipe}")

    return new_recipe
예제 #5
0
 def test_loads(self):
     with self.assertRaises(SSRTypeError):
         loads(0xDEADBEEF)