Python scrape_me Beispiele, recipe_scrapers.scrape_me Python Beispiele

Beispiel #1

0

Datei anzeigen

def recipe_from_url(request):
    url = request.POST['url']

    try:
        scrape = scrape_me(url)
    except WebsiteNotImplementedError:
        try:
            scrape = scrape_me(url, wild_mode=True)
        except NoSchemaFoundInWildMode:
            return JsonResponse(
                {
                    'error':
                    True,
                    'msg':
                    _('The requested site provided malformed data and cannot be read.'
                      )  # noqa: E501
                },
                status=400)
    except ConnectionError:
        return JsonResponse(
            {
                'error': True,
                'msg': _('The requested page could not be found.')
            },
            status=400)
    return JsonResponse(get_from_scraper(scrape, request.space))

Beispiel #2

0

Datei anzeigen

def AcceptImage():
    #Init = time.time()
    #print(MakePrediction("tomato.jpeg"))
    try:
        if (request.method == "POST"):
            #print("Here")
            f = request.files['fileupload']
            filename = f.filename
            f.save(os.path.join(Save_Folder, secure_filename(f.filename)))
            CheckVal = request.form.get("Check")

            ActualObject = MakePrediction(filename)
            ListOfLink = FindOnGoogle(ActualObject)
            ValidSite = []
            counter = 0
            max = 0

            for Link in ListOfLink:
                try:
                    if (counter == 3):
                        break
                    ListofItem = scrape_me(Link).ingredients()
                    Time = scrape_me(Link).total_time()
                    Title = scrape_me(Link).title()

                    if (len(ListofItem) != 0):
                        Ingredient, Price = WegmanAPI.FormatData(ListofItem)
                        InsideLinkObj = []
                        InsideLinkObj.append(Link)
                        InsideLinkObj.append(Ingredient)
                        InsideLinkObj.append(round(float(Price), 2))
                        InsideLinkObj.append(Time)
                        InsideLinkObj.append(Title)
                        ValidSite.append(InsideLinkObj)
                        counter += 1
                except:
                    print("Stupid Error")
            ValidSite = Sorting.SortProperly(ValidSite, CheckVal)
            print("Got past ValidSite")
            for i in ValidSite:
                print(i, max)
                if len(i[1]) > max:
                    max = len(i[1])
            print("ValidSite indexing")
            for i in ValidSite:
                if len(i[1]) < max:
                    for j in range((max - len(i[1]))):
                        i[1].append(' ')
            print("ValidSite afterward")
            return render_template("Recipes.html",
                                   ActualObject=ActualObject,
                                   ListOfLink=ValidSite,
                                   CheckV=CheckVal)

        else:
            return "GTFO GET"
    except:
        return render_template("ErrorPage.html")

Beispiel #3

0

Datei anzeigen

Datei: get_single_json.py Projekt: grep-food/grep-food

def write_recipe_to_file(link):
    from recipe_scrapers import scrape_me
    scraper = scrape_me(link)

    ingredients = scraper.ingredients()
    write_json_to_file(
        "recipes/" + scraper.title(),
        {
            #zis is a dictionary
            # dictionary go brrrrrrrrrrr
            "title":
            scraper.title(),
            "ingredients":
            ingredients,
            "just_ingredients":
            unique(join_lists([extract_ingredient(i) for i in ingredients])),
            "image":
            scraper.image(),
            "instructions":
            scraper.instructions(),
            "total time":
            scraper.total_time(),
            "yields:":
            scraper.yields()
        })

Beispiel #4

0

Datei anzeigen

Datei: runScraper.py Projekt: gonzalo10/recipe-scrapers

def scrapeRecipeUrl(url):
    scraper = scrape_me(url)

    rawIngredients = scraper.ingredients()
    ingredients = parseIngredients(rawIngredients['parsed_ingredients'])
    tags = scraper.tags()
    title = scraper.title()
    image = scraper.images()
    recipe_summary = scraper.recipe_summary()
    isRecipe = scraper.isRecipe()
    if not isRecipe:
        return
    data = {}
    data['title'] = title
    data['img_src'] = image
    data['url'] = url
    data['tags'] = tags
    data['raiting'] = scraper.raiting()
    data['instructions'] = scraper.instructions()
    data['full_nutrition_data'] = scraper.full_nutrition_data()
    data['parsed_ingredients'] = ingredients
    data['ingredients'] = rawIngredients['ingredients']
    data['recipe_summary'] = recipe_summary
    data['searchable_keys'] = createSearchableKeys(ingredients, title, tags)

    if not image or not recipe_summary:
        return "no image, not saved"
    if scraper.raiting() == 0:
        return 'no raiting, not saved'

    savedPath = '../recipes/' + title + '.json'

    with open(savedPath, 'w') as outfile:
        json.dump(data, outfile)
    return title

Beispiel #5

0

Datei anzeigen

def scrape_recipe(url):
    recipe = {}

    try:
        scraper = scrape_me(url)
        recipe = {
            'name': scraper.title(),
            'ingredients': scraper.ingredients(),
            'instructions': scraper.instructions().split("\n"),
            'image': scraper.image(),
            'url': url,
        }
    except WebsiteNotImplementedError:
        pass

    if not recipe:
        parsed_uri = urllib.parse.urlparse(url)
        domain = parsed_uri.netloc.lower()
        parser = parsers.getParser(domain)

        if parser is None:
            return None

        recipe = parser.Parse(url)

        #try:
        #    recipe = parser.Parse(url)
        #except:
        #    return recipe

    return recipe

Beispiel #6

0

Datei anzeigen

def main(food):
    from googlesearch import search
    from recipe_scrapers import scrape_me
    # google a recipe from allrecipes.com
    query = food + " allrecipes.com"
    # find the desired url
    url = ""
    for i in search(query, tld="com", num=1, stop=1, pause=2):
        url = i
    print("url: ", url)
    # assign scraper to this url
    scraper = scrape_me(url)
    # find the ingredients for the recipe and convert that into a string
    ingredients = str(scraper.ingredients())
    #remove brackets and add line breaks in the string to make it more readable
    ingredients = ingredients.replace("',", "\n") #replace commas with line breaks
    ingredients = ingredients.replace("®", "") #remove registered mark for speech purposes
    ingredients = ingredients.replace("[", "") #remove left bracket
    ingredients = ingredients.replace("]", "") #remove right bracket
    ingredients = ingredients.replace("'", "") #remove single quotes

    #if nothing is returned
    if ingredients == "":
        return "Not specific enough"

    return ingredients

Beispiel #7

0

Datei anzeigen

def scrape_recipe(url):
    recipe = {}

    parsed_uri = urllib.parse.urlparse(url)
    domain = parsed_uri.netloc.lower()
    domain = domain.replace('www.', '',
                            1) if domain.startswith('www.') else domain
    parser = parsers.getParser(domain)

    if parser is not None:
        recipe = parser.Parse(url)

    if not recipe:
        try:
            scraper = scrape_me(url)
            instructions = [
                i.strip() for i in scraper.instructions().split("\n")
                if i.strip()
            ]
            recipe = {
                'name': scraper.title(),
                'ingredients': scraper.ingredients(),
                'instructions': instructions,
                'image': scraper.image(),
                'url': url,
            }
        except WebsiteNotImplementedError:
            pass

    return recipe

Beispiel #8

0

Datei anzeigen

    def recipe_scraper2json(args, url):
        from recipe_scrapers import scrape_me

        print_debug("Using recipe-scraper module...")

        recipe_json={}
        recipe_json['url'] = url

        try:
            scraper = scrape_me(url)

            recipe_json['title'] = scraper.title()
            recipe_json['description'] = ''
            recipe_json['yield'] = scraper.yields()
            recipe_json['preptime'] = ''
            recipe_json['cooktime'] = ''
            recipe_json['totaltime'] = minutes2time(scraper.total_time())
            recipe_json['ingredient_groups'] = []
            recipe_json['ingredient_groups'].append(json.loads('{"title":"","ingredients":[]}'))
            recipe_json['ingredient_groups'][0]['ingredients'] = scraper.ingredients()
            recipe_json['direction_groups'] = []
            recipe_json['direction_groups'].append(json.loads('{"group":"","directions":[]}'))
            instructions = scraper.instructions().split('\n')
            recipe_json['direction_groups'][0]['directions'] = instructions

        except:
            raise UrlError(url, 'URL not supported.')

        return recipe_json

Beispiel #9

0

Datei anzeigen

def scrape_url(url):
    """ function that scrapes recipe data"""
    try:
        scraper = scrape_me(url, wild_mode=True)
        print(f'{scraper.title()}')
        print(f'cooking time: {scraper.total_time()}')
        print(f'number of servings {scraper.yields()[:2]}')
        print('\nRECIPE\n')
        for i in scraper.ingredients():
            print(i)
        print('\nINSTRUCTIONS\n')
        print(scraper.instructions())
        print(f'\nimage: {scraper.image()}')
        print(f'\nsource: {scraper.host()}')
        #print(scraper.links())
        print('\nNUTRIENTS\n')
        for k, v in scraper.nutrients().items():
            print(f'{k}:{v}')  # if available
        print(f'\nauthor: {scraper.author()}')
        print(f'\ncanonical_url: {scraper.canonical_url()}')
        print(f'\nlanguage: {scraper.language()}')
        print(f'\nreviews: {scraper.reviews()}')
        print(f'\nsite_name: {scraper.site_name()}')

    except:
        print(f'no information retrieved from {url}')
    return 0

Beispiel #10

0

Datei anzeigen

def get_recipe(url):
    """
    function to scrape a recipe with a given url and return the content in 
    a ordered way as a JSON
    """

    try:

        # list to store all parts of the recipe
        complete_recipe = []

        # query the given url
        scraper = scrape_me(url)

        # append the title of the recipe
        complete_recipe.append({"title": scraper.title()})

        # parse ingredients

        ingredients_dict = parse_ingredients_mod(scraper)
        complete_recipe.append(ingredients_dict)

        # parse instructions

        instructions_list = scraper.instructions().splitlines()
        complete_recipe.append({"instructions": instructions_list})

        return json.dumps({"recipe": complete_recipe})

    except Exception as e:
        return json.dumps({"error": f"{e}"})

Beispiel #11

0

Datei anzeigen

Datei: librecipe.py Projekt: jhylands/foodie

def get_recipe(URL):
#.title()
#.ingredients()
#.total_time()
#.instructions()
#.links()
    return scrape_me(URL)

Beispiel #12

0

Datei anzeigen

def add_new_recipe():
    #  this function adds new recipes via a URL and displays the recipe
    print(
        "Try this URL if you need inspiration: https://www.allrecipes.com/recipe/8499/basic-chicken-salad\n"
    )
    URL = input('What is the recipe URL? ')
    try:
        scraper = scrape_me(URL)
        display_title_bar()
        ingredients = []
        print('\n**' + scraper.title() + '**\n')
        print("Yields: {}\n".format(scraper.yields()))
        print('INGREDIENTS')
        for ingredient in scraper.ingredients():
            ingredients.append(ingredient)
            print(ingredient)
        print('\nINSTRUCTIONS')
        #    for instructions in scraper.instructions():
        print(scraper.instructions())
        recipe = {
            'title': scraper.title(),
            'ingredients': ingredients,
            'instructions': scraper.instructions()
        }
        if debug:
            print(recipe.title)
        choice = input('\nWould you like to save this recipe? (y/n) ')
        if choice == 'y':
            save_recipe(recipe)
        return True
    except:
        print("That website is not supported, please try again.")
        return True

Beispiel #13

0

Datei anzeigen

Datei: scrapers.py Projekt: ATawzer/Formulated-Flavors

    def scrape_minamlist_baker(self):

        source = 'minamalist_baker'
        recipe_list = [
            x["_id"]
            for x in list(self.urls.find({"source": source}, {"_id": 1}))
        ]

        for i in range(1, 63):
            page = scrape_me(
                "https://minimalistbaker.com/recipe-index/?fwp_paged=" +
                str(i)).links()
            wait()
            for link in page:
                for link in page:
                    if 'tabindex' in link.keys():
                        recipe = link['href']
                        if recipe not in recipe_list:
                            self.urls.insert_one({
                                "_id":
                                recipe,
                                "name":
                                recipe.split(".com/")[1][:-1],
                                'read':
                                False,
                                'type': [],
                                'source':
                                source,
                            })
                            recipe_list.append(recipe)
            print(f"Recipes Scraped: {len(recipe_list)}")

Beispiel #14

0

Datei anzeigen

Datei: scrapers.py Projekt: ATawzer/Formulated-Flavors

    def scrape_inspiralized(self):

        source = 'inspiralized'
        recipe_list = [
            x["_id"]
            for x in list(self.urls.find({"source": source}, {"_id": 1}))
        ]

        for i in range(1, 60):
            page = scrape_me("https://inspiralized.com/recipe-index/page/" +
                             str(i)).links()
            wait()
            for link in page:
                if "data-id" in link.keys():
                    recipe = link['href']
                    if recipe not in recipe_list:
                        self.urls.insert_one({
                            "_id":
                            recipe,
                            "name":
                            recipe.split(".com/")[1][:-1],
                            'read':
                            False,
                            'type': [],
                            'source':
                            source,
                            "website_id":
                            link["data-id"]
                        })
                        recipe_list.append(recipe)
            print(f"Recipes Scraped: {len(recipe_list)}")

Beispiel #15

0

Datei anzeigen

Datei: scrapers.py Projekt: ATawzer/Formulated-Flavors

    def scrape_kreme_de_la_krum(self):

        source = 'creme_de_la_crum'
        recipe_list = [
            x["_id"]
            for x in list(self.urls.find({"source": source}, {"_id": 1}))
        ]

        for i in range(1, 53):
            page = scrape_me(
                "https://www.lecremedelacrumb.com/recipe-index//page/" +
                str(i)).links()
            wait()
            for link in page:
                for link in page:
                    if 'rel' in link.keys():
                        if "bookmark" in link["rel"]:
                            recipe = link['href']
                            if recipe not in recipe_list:
                                self.urls.insert_one({
                                    "_id":
                                    recipe,
                                    "name":
                                    recipe.split(".com/")[1][:-1],
                                    'read':
                                    False,
                                    'type': [],
                                    'source':
                                    source,
                                })
                                recipe_list.append(recipe)
            print(f"Recipes Scraped: {len(recipe_list)}")

Beispiel #16

0

Datei anzeigen

def find_recipe(keywords: str):
    """
    Tries to find a recipe given the keywords (a space-separated string of words).
    Only URLs supported by the `recipe-scrapers` library are considered.

    :param keywords: Keywords describing the dish to search
    :return: a :py:class:Recipe object describing a found recipe or `None` if nothing was found
    """

    for url in search(keywords + " recipe"):
        if any(urlparse(url).netloc.endswith(k) for k in SCRAPERS.keys()):
            scraper = scrape_me(url)
            nutrients = {}

            for nutr, value in scraper.nutrients().items():
                name = ' '.join(camel_case_regex.findall(nutr))
                nutrients[name[0].upper() + name[1:].lower()] = value

            return Recipe(
                url=url,
                title=scraper.title(),
                total_time=scraper.total_time(),
                yields=scraper.yields(),
                ingredients=scraper.ingredients(),
                instructions=scraper.instructions(),
                nutrients=nutrients,
                image=scraper.image()
            )
    
    return None

Beispiel #17

0

Datei anzeigen

Datei: ChefBoyarTweetBot.py Projekt: ahmedkidwai/ChefBoyarTweet

    def get_recipe(self, recipe_url):
        scraper = scrape_me(recipe_url)
        print(type(scraper.title()))
        print(type(scraper.instructions()))

        myIngredients = scraper.ingredients()
        myIngredientsString = ', '.join(myIngredients[1:])
        print(myIngredientsString)

        image = Image.open(r'test.jpg')

        draw = ImageDraw.Draw(image)

        # specified font size

        fontsize = 20  # starting font size
        font = ImageFont.truetype('gillsans.ttf', fontsize)
        text1 = myIngredientsString

        text_color = (0, 0, 0)
        text_start_height = 5
        self.draw_multiple_line_text(image, scraper.title(), font, 'red',
                                     text_start_height)
        self.draw_multiple_line_text(image, "Ingredients", font, 'red', 50)
        self.draw_multiple_line_text(image, text1, font, text_color, 75)
        self.draw_multiple_line_text(image, "Instructions", font, 'red', 575)
        self.draw_multiple_line_text(image, scraper.instructions(), font,
                                     text_color, 600)
        print("Jackpot")
        image.save('pil_text.png')

Beispiel #18

0

Datei anzeigen

def get_recipe(url):
    try:
        scrap = scrape_me(url)
    except:
        print('Could not scrape URL {}'.format(url))
        return {}

    try:
        title = scrap.title()
    except AttributeError:
        title = None

    try:
        ingredients = scrap.ingredients()
    except AttributeError:
        ingredients = None

    try:
        instructions = scrap.instructions()
    except AttributeError:
        instructions = None

    try:
        picture_link = scrap.picture()
    except AttributeError:
        picture_link = None

    return {
        'title': title,
        'ingredients': ingredients,
        'instructions': instructions,
        'picture_link': picture_link,
    }

Beispiel #19

0

Datei anzeigen

Datei: app.py Projekt: rymuelle/recipe-converter

def ingredients_from_url():
    response = request.get_json()
    scraper = scrape_me(response["url"])

    return jsonify({
        "ingredients": "\n".join(scraper.ingredients()),
        "instructions": scraper.instructions().replace("\n", "\n\n")
    })

Beispiel #20

0

Datei anzeigen

 def setUp(self):
     with open("tests/test_data/wild_mode.testhtml",
               encoding="utf-8") as testfile:
         options = {
             "wild_mode": True,
             "exception_handling": False,
             "test": True
         }
         self.wild_mode_scraper = scrape_me(testfile, **options)

Beispiel #21

0

Datei anzeigen

def scrape(request):
    try:
        scraper = scrape_me(request.POST["url"])
        form = form_from_scrape(scraper, request.POST["url"])
        context = {'form':form}
        return render(request, "recipes/scrape.html", context)
    except WebsiteNotImplementedError:
        msg = "the url " + request.POST["url"] + " is not supported by recipe_scraper"
        return write(request, error_message_link=msg)

Beispiel #22

0

Datei anzeigen

Datei: scraperecipies.py Projekt: GustavHandmark/webdev-backend

def returnScraped(url):
    scraper = scrape_me(url)

    return format_response({
        scraper.title(),
        scraper.total_time(),
        scraper.ingredients(),
        scraper.instructions()
    })

Beispiel #23

0

Datei anzeigen

Datei: runScraper.py Projekt: gonzalo10/recipe-scrapers

def test(url):
    scraper = scrape_me(url)

    rawIngredients = scraper.ingredients()
    ingredients = parseIngredients(rawIngredients['parsed_ingredients'])
    tags = scraper.tags()
    print(tags)

    return

Beispiel #24

0

Datei anzeigen

Datei: crawler.py Projekt: xifeiwang/Edesia

    def scrape(self):
        recipes = []
        for url in self._urls:
            try:
                if MongoHelper.getRecipeByUrl(url).count() > 0:
                    print('Recipe is already in DB for URL:{}'.format(url))
                    continue

                scraper = scrape_me(url)
                if not self._isRecipe(scraper):
                    continue

                name = scraper.title()

                ingredients = scraper.ingredients()

                directions = scraper.instructions()

                servingCount = scraper.yields()

                totalTime = scraper.total_time()

                image = scraper.image()

                ratings = scraper.ratings()

                recipe = {
                    'name': name,
                    'url': url,
                    'ingredients': ingredients,
                    'directions': directions,
                    'servingCount': servingCount,
                    'image': image,
                    'totalTime': totalTime,
                    'sourceName': self.website.name,
                    'ratings': ratings,
                    'scrapeTime': datetime.datetime.now(),
                    'language': self.website.language
                }

                recipes.append(recipe)
                print('Scraped Recipe: {}, from URL: {}, RecipeBatch#: {}'.
                      format(name, url, len(recipes)))

                if len(recipes) >= self._recipeBuffer:
                    recipeIds = MongoHelper.insertRecipes(recipes)
                    recipes = []
                    print(
                        '{} Recipes have been successfully written: {}'.format(
                            Crawler._recipeBuffer, recipeIds))

                time.sleep(self._sleepTime
                           )  # Sleeping between requests to avoid limit
            except:
                print('Could not parse url: ', url)
                continue

Beispiel #25

0

Datei anzeigen

Datei: get_recepie.py Projekt: gitcodes/CONCOCT

def get_recipe(dishname):
    instructions = []
    link = get_link(dishname)
    if (link != None):
        try:
            scraper = scrape_me(link)
            instructions = scraper.instructions()
        except:
            instructions = []
    return instructions

Beispiel #26

0

Datei anzeigen

Datei: recipe_me.py Projekt: hpham17/recipe-alexa-app

def answer(dish):

    dish = dish.replace(" ", "-")
    dish += "-"
    query = f"https://www.foodnetwork.com/search/{dish}"
    recipe = scrape_me()
    # $(".m-MediaBlock__a-Headline a")[0].href
    text = "Recipe for %s. Total time: %s. Here is the list of ingredients. %s" % (
        recipe.title(), recipe.total_time(), recipe.ingredients())
    return statement(text)

Beispiel #27

0

Datei anzeigen

Datei: scrapers.py Projekt: ATawzer/Formulated-Flavors

    def scrape_host_the_toast(self):

        source = 'host_the_toast'
        recipe_list = [
            x["_id"]
            for x in list(self.urls.find({"source": source}, {"_id": 1}))
        ]
        page = scrape_me("https://hostthetoast.com/recipes/").links()
        categories = []

        # Some sites require category exploration to loop through recipes
        for link in page:
            if "category" in link["href"]:
                categories.append(link['href'])

        for c in categories:
            c_name = c.split("/category/")[1][:-1]
            for i in range(1, 40):
                page = scrape_me(c + "page/" + str(i)).links()

                if len(page) < 35:
                    break

                wait()
                for link in page:
                    if 'rel' in link.keys():
                        if "bookmark" in link["rel"]:
                            recipe = link['href']
                            if recipe not in recipe_list:
                                self.urls.insert_one({
                                    "_id":
                                    recipe,
                                    "name":
                                    recipe.split("hostthetoast.com/")[1][:-1],
                                    'read':
                                    False,
                                    'type': [c_name],
                                    'source':
                                    source
                                })
                                recipe_list.append(recipe)
                print(f"Recipes Scraped: {len(recipe_list)}")

Beispiel #28

0

Datei anzeigen

def getDataAndWrite(url, img):
    try:
        data = scrape_me(url)
        if data.title() is "" or data.ingredients() is "" or data.instructions(
        ) is "":
            return
        writeDataToFile(data.title(), data.total_time(), data.ingredients(),
                        data.instructions(), img)
    except:
        print("error for url: " + url)
        return

Beispiel #29

0

Datei anzeigen

def main():
    recipes = get_recipes()
    for recipe in recipes:
        scrape = scrape_me(recipe)
        print("scraping: {0}".format(scrape.title()))
        recipe_dict = recipe_to_dict(scrape)
        write_to_file(recipe_dict)
        image_url = find_image(recipe_dict.get("source"), scrape.host(),
                               recipe_dict.get("title"))
        filename = "{0}.{1}".format(get_file_name(recipe_dict),
                                    get_file_type(image_url))
        download_image(filename, image_url)

Beispiel #30

0

Datei anzeigen

Datei: scraper.py Projekt: elenachurilova/gastronotes

def web_scraper(web_page_url):
    """Web scraper for recipes from multiple websites"""
    
    page = scrape_me(web_page_url)

    return {
            "recipe_title" : page.title(),
            "recipe_ingred" : page.ingredients(),
            "recipe_direct" : page.instructions(),
            "recipe_url" : web_page_url,
            "image_url" : page.image()
            }