Beispiel #1
0
def extract_recipe_from_html(html: str, url: str) -> dict:
    try:
        scraped_recipes: List[dict] = scrape_schema_recipe.loads(
            html, python_objects=True)
        dump_last_json(scraped_recipes)

        if not scraped_recipes:
            scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(
                url, python_objects=True)
    except Exception as e:
        print(e)
        scraped_recipes: List[dict] = scrape_schema_recipe.loads(html)
        dump_last_json(scraped_recipes)

        if not scraped_recipes:
            scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(url)

    if scraped_recipes:
        new_recipe: dict = scraped_recipes[0]
        logger.info(f"Recipe Scraped From Web: {new_recipe}")

        if not new_recipe:
            return "fail"  # TODO: Return Better Error Here

        new_recipe = Cleaner.clean(new_recipe, url)
    else:
        new_recipe = open_graph.basic_recipe_from_opengraph(html, url)
        logger.info(f"Recipe Scraped from opengraph metadata: {new_recipe}")

    return new_recipe
Beispiel #2
0
def import_recipes(recipe_dir: Path) -> Recipe:
    image = False

    for file in recipe_dir.glob("full.*"):
        image = file
        break

    for file in recipe_dir.glob("*.json"):
        recipe_file = file
        break

    with open(recipe_file, "r") as f:
        recipe_dict = json.loads(f.read())

    recipe_data = Cleaner.clean(recipe_dict)

    image_name = recipe_data["slug"]
    recipe_data["image"] = recipe_data["slug"]
    recipe_data["tags"] = clean_nextcloud_tags(recipe_data.get("keywords"))

    recipe = Recipe(**recipe_data)

    if image:
        shutil.copy(image,
                    app_dirs.IMG_DIR.joinpath(image_name + image.suffix))

    return recipe
Beispiel #3
0
    def clean_recipe_dictionary(self, recipe_dict) -> Recipe:
        """Calls the rewrite_alias function and the Cleaner.clean function on a
        dictionary and returns the result unpacked into a Recipe object"""
        recipe_dict = self.rewrite_alias(recipe_dict)
        recipe_dict = Cleaner.clean(recipe_dict, url=recipe_dict.get("orgURL", None))

        return Recipe(**recipe_dict)
Beispiel #4
0
def test_cleaner_instructions(instructions):
    assert Cleaner.instructions(instructions) == [
        {
            "text": "A"
        },
        {
            "text": "B"
        },
        {
            "text": "C"
        },
    ]
Beispiel #5
0
def create_from_url(url: str) -> Recipe:
    """Main entry point for generating a recipe from a URL. Pass in a URL and
    a Recipe object will be returned if successful.

    Args:
        url (str): a valid string representing a URL

    Returns:
        Recipe: Recipe Object
    """
    r = requests.get(url)
    new_recipe = extract_recipe_from_html(r.text, url)
    new_recipe = Cleaner.clean(new_recipe, url)
    new_recipe = download_image_for_recipe(new_recipe)

    return Recipe(**new_recipe)
Beispiel #6
0
def test_clean_image():
    assert Cleaner.image(None) == "no image"
    assert Cleaner.image("https://my.image/path/") == "https://my.image/path/"
    assert Cleaner.image({"url": "My URL!"}) == "My URL!"
    assert Cleaner.image(["My URL!", "MY SECOND URL"]) == "My URL!"
Beispiel #7
0
def test_clean_html():
    assert Cleaner.html("<div>Hello World</div>") == "Hello World"
Beispiel #8
0
def test_clean_category():
    assert Cleaner.category("my-category") == ["my-category"]
Beispiel #9
0
def test_cleaner_clean(json_file, num_steps):
    recipe_data = Cleaner.clean(
        json.load(open(TEST_RAW_RECIPES.joinpath(json_file))))
    assert len(recipe_data["recipeInstructions"]) == num_steps
Beispiel #10
0
def test_time_cleaner():

    my_time_delta = "PT2H30M"
    return_delta = Cleaner.time(my_time_delta)

    assert return_delta == "2 Hours 30 Minutes"