Beispiel #1
0
 def total_time(self):
     ul = self.soup.find("ul", {"class": "recipe__details"})
     total = 0
     for li in ul.find_all("li"):
         if li.span.get_text().lower() in ["prep time", "cook time"]:
             total += get_minutes(list(li.children)[2].strip())
     return total
Beispiel #2
0
 def test_get_minutes_fraction_with_fraction_unicode_character_three_fours(
         self):
     text = "1¾ hours"
     self.assertEqual(105, get_minutes(text))
Beispiel #3
0
    def test_get_minutes_int_in_string_literal(self):
        text = "90"
        result = get_minutes(text)

        assert result == 90
Beispiel #4
0
 def test_get_minutes_handles_to(self):
     text = "15 to 20 minutes"
     self.assertEqual(20, get_minutes(text))
Beispiel #5
0
 def test_get_minutes_english_description(self):
     text = "1 hour 15 mins"
     self.assertEqual(75, get_minutes(text))
Beispiel #6
0
 def test_get_minutes_fraction_with_fraction_digits_with_slash_two_thirds(
         self):
     text = "1 2/3 hours"
     self.assertEqual(100, get_minutes(text))
Beispiel #7
0
 def test_get_minutes_english_description_with_and(self):
     text = "1h and 15mins"
     self.assertEqual(75, get_minutes(text))
Beispiel #8
0
 def test_get_minutes_fraction_with_fraction_digits_with_slash(self):
     text = "1 1/2 hours"
     self.assertEqual(90, get_minutes(text))
Beispiel #9
0
    def test_get_minutes_long_iso_format(self):
        text = "P0DT1H10M"
        result = get_minutes(text)

        assert result == 70
Beispiel #10
0
    def test_get_minutes_english_description(self):
        text = "1 hour 15 mins"
        result = get_minutes(text)

        assert result == 75
Beispiel #11
0
    def test_get_minutes_short_iso_format(self):
        text = "PT2H30M"
        result = get_minutes(text)

        assert result == 150
Beispiel #12
0
    def test_get_minutes_english_abbreviation(self):
        text = "3h10m"
        result = get_minutes(text)

        assert result == 190
Beispiel #13
0
def get_from_scraper(scrape, space):
    # converting the scrape_me object to the existing json format based on ld+json

    recipe_json = {}
    recipe_json['name'] = scrape.title()

    try:
        description = scrape.schema.data.get("description") or ''
        recipe_json['prepTime'] = _utils.get_minutes(
            scrape.schema.data.get("prepTime")) or 0
        recipe_json['cookTime'] = _utils.get_minutes(
            scrape.schema.data.get("cookTime")) or 0
    except AttributeError:
        description = ''
        recipe_json['prepTime'] = 0
        recipe_json['cookTime'] = 0

    recipe_json['description'] = description

    try:
        servings = scrape.yields()
        servings = int(re.findall(r'\b\d+\b', servings)[0])
    except (AttributeError, ValueError, IndexError):
        servings = 1
    recipe_json['servings'] = servings

    if recipe_json['cookTime'] + recipe_json['prepTime'] == 0:
        try:
            recipe_json['prepTime'] = scrape.total_time()
        except AttributeError:
            pass

    try:
        recipe_json['image'] = scrape.image()
    except AttributeError:
        pass

    keywords = []
    try:
        if scrape.schema.data.get("keywords"):
            keywords += listify_keywords(scrape.schema.data.get("keywords"))
        if scrape.schema.data.get('recipeCategory'):
            keywords += listify_keywords(
                scrape.schema.data.get("recipeCategory"))
        if scrape.schema.data.get('recipeCuisine'):
            keywords += listify_keywords(
                scrape.schema.data.get("recipeCuisine"))
        recipe_json['keywords'] = parse_keywords(
            list(set(map(str.casefold, keywords))), space)
    except AttributeError:
        recipe_json['keywords'] = keywords

    try:
        ingredients = []
        for x in scrape.ingredients():
            try:
                amount, unit, ingredient, note = parse_ingredient(x)
                if ingredient:
                    ingredients.append({
                        'amount': amount,
                        'unit': {
                            'text': unit,
                            'id': random.randrange(10000, 99999)
                        },
                        'ingredient': {
                            'text': ingredient,
                            'id': random.randrange(10000, 99999)
                        },
                        'note': note,
                        'original': x
                    })
            except Exception:
                ingredients.append({
                    'amount': 0,
                    'unit': {
                        'text': '',
                        'id': random.randrange(10000, 99999)
                    },
                    'ingredient': {
                        'text': x,
                        'id': random.randrange(10000, 99999)
                    },
                    'note': '',
                    'original': x
                })
        recipe_json['recipeIngredient'] = ingredients
    except AttributeError:
        recipe_json['recipeIngredient'] = ingredients

    try:
        recipe_json['recipeInstructions'] = scrape.instructions()
    except AttributeError:
        recipe_json['recipeInstructions'] = ""

    recipe_json['recipeInstructions'] += "\n\nImported from " + scrape.url
    return recipe_json
Beispiel #14
0
 def test_get_minutes_fraction_with_fraction_unicode_character_one_fours(
         self):
     text = "1¼ hours"
     self.assertEqual(75, get_minutes(text))
Beispiel #15
0
 def test_get_minutes_english_abbreviation(self):
     text = "3h10m"
     self.assertEqual(190, get_minutes(text))
Beispiel #16
0
 def test_get_minutes_fraction_with_fraction_unicode_character_two_thirds(
         self):
     text = "1⅔ hours"
     self.assertEqual(100, get_minutes(text))
Beispiel #17
0
 def test_get_minutes_short_iso_format(self):
     text = "PT2H30M"
     self.assertEqual(150, get_minutes(text))
Beispiel #18
0
 def test_get_minutes_fraction_with_fraction_digits_with_slash_three_fours(
         self):
     text = "1 3/4 hours"
     self.assertEqual(105, get_minutes(text))
Beispiel #19
0
 def test_get_minutes_long_iso_format(self):
     text = "P0DT1H10M"
     self.assertEqual(70, get_minutes(text))
Beispiel #20
0
 def test_get_minutes_fraction_with_fraction_digits_with_slash_one_fours(
         self):
     text = "1 1/4 hours"
     self.assertEqual(75, get_minutes(text))
Beispiel #21
0
 def test_get_minutes_int_in_string_literal(self):
     text = "90"
     self.assertEqual(90, get_minutes(text))
Beispiel #22
0
 def test_get_minutes_handles_dashes(self):
     text = "15 - 20 minutes"
     self.assertEqual(20, get_minutes(text))
Beispiel #23
0
 def test_get_minutes_fraction_in_hours_with_dot_notation(self):
     text = "1.5 hours"
     self.assertEqual(90, get_minutes(text))
Beispiel #24
0
def get_from_scraper(scrape, request):
    # converting the scrape_me object to the existing json format based on ld+json
    recipe_json = {}
    try:
        recipe_json['name'] = parse_name(scrape.title() or None)
    except Exception:
        recipe_json['name'] = None
    if not recipe_json['name']:
        try:
            recipe_json['name'] = scrape.schema.data.get('name') or ''
        except Exception:
            recipe_json['name'] = ''

    try:
        description = scrape.description() or None
    except Exception:
        description = None
    if not description:
        try:
            description = scrape.schema.data.get("description") or ''
        except Exception:
            description = ''

    recipe_json['internal'] = True

    try:
        servings = scrape.yields() or None
    except Exception:
        servings = None
    if not servings:
        try:
            servings = scrape.schema.data.get('recipeYield') or 1
        except Exception:
            servings = 1

    recipe_json['servings'] = parse_servings(servings)
    recipe_json['servings_text'] = parse_servings_text(servings)

    try:
        recipe_json['working_time'] = get_minutes(scrape.prep_time()) or 0
    except Exception:
        try:
            recipe_json['working_time'] = get_minutes(
                scrape.schema.data.get("prepTime")) or 0
        except Exception:
            recipe_json['working_time'] = 0
    try:
        recipe_json['waiting_time'] = get_minutes(scrape.cook_time()) or 0
    except Exception:
        try:
            recipe_json['waiting_time'] = get_minutes(
                scrape.schema.data.get("cookTime")) or 0
        except Exception:
            recipe_json['waiting_time'] = 0

    if recipe_json['working_time'] + recipe_json['waiting_time'] == 0:
        try:
            recipe_json['working_time'] = get_minutes(scrape.total_time()) or 0
        except Exception:
            try:
                recipe_json['working_time'] = get_minutes(
                    scrape.schema.data.get("totalTime")) or 0
            except Exception:
                pass

    try:
        recipe_json['image'] = parse_image(scrape.image()) or None
    except Exception:
        recipe_json['image'] = None
    if not recipe_json['image']:
        try:
            recipe_json['image'] = parse_image(
                scrape.schema.data.get('image')) or ''
        except Exception:
            recipe_json['image'] = ''

    keywords = []
    try:
        if scrape.schema.data.get("keywords"):
            keywords += listify_keywords(scrape.schema.data.get("keywords"))
    except Exception:
        pass
    try:
        if scrape.category():
            keywords += listify_keywords(scrape.category())
    except Exception:
        try:
            if scrape.schema.data.get('recipeCategory'):
                keywords += listify_keywords(
                    scrape.schema.data.get("recipeCategory"))
        except Exception:
            pass
    try:
        if scrape.cuisine():
            keywords += listify_keywords(scrape.cuisine())
    except Exception:
        try:
            if scrape.schema.data.get('recipeCuisine'):
                keywords += listify_keywords(
                    scrape.schema.data.get("recipeCuisine"))
        except Exception:
            pass

    if source_url := scrape.url:
        recipe_json['source_url'] = source_url
        try:
            keywords.append(
                source_url.replace('http://', '').replace('https://',
                                                          '').split('/')[0])
        except Exception:
            pass
Beispiel #25
0
 def test_get_minutes_fraction_with_fraction_unicode_character_halves(self):
     text = "1½ hours"
     self.assertEqual(90, get_minutes(text))
Beispiel #26
0
def get_from_scraper(scrape, request):
    # converting the scrape_me object to the existing json format based on ld+json
    recipe_json = {}
    try:
        recipe_json['name'] = parse_name(scrape.title() or None)
    except Exception:
        recipe_json['name'] = None
    if not recipe_json['name']:
        try:
            recipe_json['name'] = scrape.schema.data.get('name') or ''
        except Exception:
            recipe_json['name'] = ''

    try:
        description = scrape.schema.data.get("description") or ''
    except Exception:
        description = ''

    recipe_json['description'] = parse_description(description)

    try:
        servings = scrape.yields() or None
    except Exception:
        servings = None
    if not servings:
        try:
            servings = scrape.schema.data.get('recipeYield') or 1
        except Exception:
            servings = 1
    if type(servings) != int:
        try:
            servings = int(re.findall(r'\b\d+\b', servings)[0])
        except Exception:
            servings = 1
    recipe_json['servings'] = max(servings, 1)

    try:
        recipe_json['prepTime'] = get_minutes(
            scrape.schema.data.get("prepTime")) or 0
    except Exception:
        recipe_json['prepTime'] = 0
    try:
        recipe_json['cookTime'] = get_minutes(
            scrape.schema.data.get("cookTime")) or 0
    except Exception:
        recipe_json['cookTime'] = 0

    if recipe_json['cookTime'] + recipe_json['prepTime'] == 0:
        try:
            recipe_json['prepTime'] = get_minutes(scrape.total_time()) or 0
        except Exception:
            try:
                get_minutes(scrape.schema.data.get("totalTime")) or 0
            except Exception:
                pass

    try:
        recipe_json['image'] = parse_image(scrape.image()) or None
    except Exception:
        recipe_json['image'] = None
    if not recipe_json['image']:
        try:
            recipe_json['image'] = parse_image(
                scrape.schema.data.get('image')) or ''
        except Exception:
            recipe_json['image'] = ''

    keywords = []
    try:
        if scrape.schema.data.get("keywords"):
            keywords += listify_keywords(scrape.schema.data.get("keywords"))
    except Exception:
        pass
    try:
        if scrape.schema.data.get('recipeCategory'):
            keywords += listify_keywords(
                scrape.schema.data.get("recipeCategory"))
    except Exception:
        pass
    try:
        if scrape.schema.data.get('recipeCuisine'):
            keywords += listify_keywords(
                scrape.schema.data.get("recipeCuisine"))
    except Exception:
        pass
    try:
        recipe_json['keywords'] = parse_keywords(
            list(set(map(str.casefold, keywords))), request.space)
    except AttributeError:
        recipe_json['keywords'] = keywords

    ingredient_parser = IngredientParser(request, True)
    try:
        ingredients = []
        for x in scrape.ingredients():
            try:
                amount, unit, ingredient, note = ingredient_parser.parse(x)
                ingredients.append({
                    'amount': amount,
                    'unit': {
                        'text': unit,
                        'id': random.randrange(10000, 99999)
                    },
                    'ingredient': {
                        'text': ingredient,
                        'id': random.randrange(10000, 99999)
                    },
                    'note': note,
                    'original': x
                })
            except Exception:
                ingredients.append({
                    'amount': 0,
                    'unit': {
                        'text': '',
                        'id': random.randrange(10000, 99999)
                    },
                    'ingredient': {
                        'text': x,
                        'id': random.randrange(10000, 99999)
                    },
                    'note': '',
                    'original': x
                })
        recipe_json['recipeIngredient'] = ingredients
    except Exception:
        recipe_json['recipeIngredient'] = ingredients

    try:
        recipe_json['recipeInstructions'] = parse_instructions(
            scrape.instructions())
    except Exception:
        recipe_json['recipeInstructions'] = ""

    if scrape.url:
        recipe_json['url'] = scrape.url
        recipe_json['recipeInstructions'] += "\n\nImported from " + scrape.url
    return recipe_json
Beispiel #27
0
def parse_recipe():
    try:
        recipe_url = request.args.get('url')
    except:
        return make_response(
            'Need a url in parameters. See <a href="/api">/api</a> for more info',
            404)
    try:
        recipes = scrape_schema_recipe.scrape_url(recipe_url)
        if len(recipes) == 1 and recipes[0] is not None:
            recipe = recipes[0]
            if 'recipeInstructions' in recipe:
                ins = recipe['recipeInstructions']
                if type(ins) == str:
                    recipe['recipeInstructions'] = [html.escape(ins)]
                elif type(ins) == list and len(ins) > 0:
                    if type(ins[0]) == dict:
                        recipe['recipeInstructions'] = []
                        for item in ins:
                            for k, v in item.items():
                                if k == 'text':
                                    recipe['recipeInstructions'].append(
                                        html.escape(v))
                    else:
                        recipe['recipeInstructions'] = [
                            html.escape(i)
                            for i in recipe['recipeInstructions']
                        ]
            if 'keywords' in recipe:
                recipe['keywords'] = [
                    html.escape(i.strip())
                    for i in recipe['keywords'].split(',')
                ]
            if 'image' in recipe:
                if type(recipe['image']) == dict:
                    if 'url' in recipe['image']:
                        recipe['image'] = recipe['image']['url']
            if 'image' in recipe:
                if type(recipe['image']) == list:
                    recipe['image'] = recipe['image'][-1]
            if 'author' in recipe:
                if type(recipe['author']
                        ) == dict and 'name' in recipe['author']:
                    recipe['author'] = html.escape(recipe['author']['name'])
            if 'recipeYield' in recipe:
                rYield = recipe['recipeYield']
                if type(rYield) == str:
                    recipe['recipeYield'] = [
                        i.strip() for i in rYield.split(',')
                    ][0]
                if type(rYield) == list and len(rYield) > 0:
                    recipe['recipeYield'] = rYield[0]
            if 'cookTime' in recipe:
                recipe['cookTime'] = get_minutes(recipe['cookTime'])
            if 'prepTime' in recipe:
                recipe['prepTime'] = get_minutes(recipe['prepTime'])
            if 'totalTime' in recipe:
                recipe['totalTime'] = get_minutes(recipe['totalTime'])
            return recipe
    except Exception as e:
        print(e.args)
        pass

    try:
        recipe = scrape_me(recipe_url)
        to_return = {
            "@type":
            "noSchema",
            "name":
            recipe.title(),
            "url":
            recipe.url(),
            "recipeIngredients":
            recipe.ingredients(),
            "recipeInstructions":
            [i for i in recipe.instructions().split('\n') if i != ""],
            "review":
            recipe.reviews(),
            "aggregateRating":
            recipe.ratings(),
            "totalTime":
            recipe.total_time(),
            "recipeYield":
            recipe.yields(),
            "image":
            recipe.image()
        }
        return to_return
    except Exception as e:
        return make_response(
            f'Error processing request. That domain might not be in the list\
             See <a href="/api">/api</a> for more info. Error: {e.args}', 500)