def parseRecipes(urls): recipeList = [] for url in urls: print("Getting Recipe for " + str(url)) #web-scrape data = scrap_me(url) recipeName = data.title() recipeTime = int(data.prepTime()) recipeDescription = data.description() recipeIngredients = data.ingredients() recipeInstructions = data.instructions() #clean-up recipeInstructions = removeAds(recipeInstructions) recipeIngredients = [ parseIngredient(removeAds(i)) for i in recipeIngredients ] recipeIngredients = [x for x in recipeIngredients if x is not None] #storage recipeList.append( Recipe(recipeName, recipeDescription, recipeInstructions, recipeTime, recipeIngredients)) #to store into DB ingredientList = findUniqueIngredients(recipeList) return ingredientList, recipeList
def get_recipe(url): try: scrap = scrap_me(url) except: print('Could not scrape URL {}'.format(url)) return {} try: title = scrap.title() except AttributeError: title = None try: ingredients = scrap.ingredients() except AttributeError: ingredients = None try: instructions = scrap.instructions() except AttributeError: instructions = None try: picture_link = scrap.picture() except AttributeError: picture_link = None return { 'title': title, 'ingredients': ingredients, 'instructions': instructions, 'picture_link': picture_link, }
def post(self, request, *args, **kwargs): url = request.data.get('url') if url: try: data = scrap_me(url) response = {} response['title'] = self.mine_data(data, 'title') response['servings'] = self.mine_data(data, 'servings') response['ingredients'] = self.mine_data(data, 'ingredients') response['info'] = self.mine_data(data, 'description') response['image'] = self.mine_data(data, 'image') response['source'] = url # Get the directions and add a step key to it. response['directions'] = [{ 'step': i + 1, 'title': instruction } for i, instruction in enumerate(data.instructions())] # Break the total time into two groups. # Prep Time and Cook Time. total_time = self.mine_data(data, 'total_time') if 'prep-time' in total_time: response['prep_time'] = total_time.get('prep-time', '') if 'cook-time' in total_time: response['cook_time'] = total_time.get('cook-time', '') return Response(response) except: return Response({ 'error': '2', 'response': 'Bad URL or URL not supported' }) return Response({'error': '3', 'response': 'No URL given.'})
def __init__(self, recipe_link="", image_link=""): recipe_count = 0 from recipe_scrapers import scrap_me scraped_recipe = scrap_me(recipe_link) self.title = scraped_recipe.title() self.number = "Recipe_" + str(len(Recipe.recipe_list)).zfill(2) self.time = scraped_recipe.total_time() self.ingredient_list = scraped_recipe.ingredients() self.instructions = scraped_recipe.instructions() self.image_link = image_link Recipe.recipe_dict[self] = [self.number, self.title] Recipe.recipe_list.append([self, self.number, self.title]) Recipe.all_ingredient_list.append(self.ingredient_list)
from recipe_scrapers import scrap_me scrap = scrap_me("https://www.epicurious.com/recipes/food/views/creamy-pasta-with-crispy-mushrooms") print(scrap.tags()) print(scrap.servings()) scrap = scrap_me("https://www.foodnetwork.com/recipes/trisha-yearwood/slow-cooker-georgia-pulled-pork-barbeque-recipe-2078315") print(scrap.tags()) scrap = scrap_me("https://www.allrecipes.com/recipe/17991/stuffed-green-peppers-i/?internalSource=previously%20viewed&referringContentType=Homepage") print(scrap.servings())
#!/usr/bin/python3 from recipe_scrapers import scrap_me s = scrap_me( "http://allrecipes.com/recipe/221093/good-frickin-paprika-chicken/") try: print("~~~~~~~~~~~TITLE~~~~~~~~~~~~~") print(s.title()) except Exception: print("no title") try: print("~~~~~~~~~~~TIME~~~~~~~~~~~~~") print(s.total_time()) except Exception: print("no time") try: print("~~~~~~~~~~~INGREDIENTS~~~~~~~~~~~~~") print(s.ingredients()) except Exception: print("no ingredients") try: print("~~~~~~~~~~~INSTRUCTIONS~~~~~~~~~~~~~") print(s.instructions()) except Exception: print("no instructions") try: print("~~~~~~~~~~~IMAGE~~~~~~~~~~~~~") print(s.image()) except Exception:
lower - upper 150000 - 152000 - niha 152000 - 154000 - farhan 154000 - 156000 - jay 156000 - 158000 - sid ''' #Change these lower = 158000 upper = 160000 cols = ['title', 'time', 'ingredients', 'instructions'] list = pd.DataFrame(columns=cols) recipeNames = [] for i in range(lower, upper, 1): try: s = scrap_me('http://allrecipes.com/recipe/' + str(i)) print(i) if (s.title() not in recipeNames): time.sleep(1) recipeNames.append(s.title()) list.loc[len(list)] = [ s.title(), s.total_time(), s.ingredients(), s.instructions() ] print(s.title()) if len(list) >= 5000: break except Exception: print("not found recipe: {}".format(i))
def get_recipe(url): try: scrap = scrap_me(url) except: print('Could not scrape URL {}'.format(url)) return {} try: title = scrap.title() except AttributeError: title = None try: ingredients = scrap.ingredients() except AttributeError: ingredients = None try: instructions = scrap.instructions() except AttributeError: instructions = None try: picture_link = scrap.picture() except AttributeError: picture_link = None try: fat_content = scrap.fatContent() except AttributeError: fat_content = None try: satfat_content = scrap.satfatContent() except AttributeError: satfat_content = None # try: # pufa_content = scrap.pufaContent() # except AttributeError: # pufa_content = None # # try: # mufa_content = scrap.mufaContent() # except AttributeError: # mufa_content = None try: sodium_content = scrap.sodiumContent() except AttributeError: sodium_content = None try: carb_content = scrap.carbContent() except AttributeError: carb_content = None try: protein_content = scrap.proteinContent() except AttributeError: protein_content = None try: fiber_content = scrap.fiberContent() except AttributeError: fiber_content = None try: calories = scrap.calories() except AttributeError: calories = None try: tag_cuisine = scrap.tagCuisine() except AttributeError: tag_cuisine = None try: tag_special = scrap.tagSpecial() except AttributeError: tag_special = None try: tag_meal = scrap.tagMeal() except AttributeError: tag_meal = None try: tag_tag = scrap.tagTag() except AttributeError: tag_tag = None try: tag_ingre = scrap.tagIngre() except AttributeError: tag_ingre = None try: tag_type = scrap.tagType() except AttributeError: tag_type = None try: tag_occasion = scrap.tagOccasion() except AttributeError: tag_occasion = None try: tag_tech = scrap.tagTech() except AttributeError: tag_tech = None try: tag_equip = scrap.tagEquip() except AttributeError: tag_equip = None try: tag_source = scrap.tagSource() except AttributeError: tag_source = None try: total_time = scrap.totalTime() except AttributeError: total_time = None return { 'title': title, 'ingredients': ingredients, 'instructions': instructions, 'picture_link': picture_link, 'fat_content': fat_content, 'satfat_content': satfat_content, # 'pufa_content' : pufa_content, # 'mufa_content' : mufa_content, 'sodium_content': sodium_content, 'carb_content': carb_content, 'protein_content': protein_content, 'fiber_content': fiber_content, 'calories': calories, 'tag_cuisine': tag_cuisine, 'tag_special': tag_special, 'tag_meal': tag_meal, 'tag_tag': tag_tag, 'tag_ingredient': tag_ingre, 'tag_type': tag_type, 'tag_occasion': tag_occasion, 'tag_tech': tag_tech, 'tag_equipment': tag_equip, 'tag_source': tag_source, 'total_time': total_time }