async def crawl(self): search_results = await self.crawler.get_json( self.crawler.SEARCH_URL, params={'q': self.input}) if len(search_results['results']) == 0: raise ImportException( f'No recipes found for title "{self.input}"') self.json = search_results['results'][0] reliability = self.validate(self.json['name']) if reliability < 0.6: # so far, all correct matches have been >= 0.8 # todo: maybe import anyways and add a note raise ImportException( f'Probably not the one we were looking for, found: {self.json["name"]}' ) slug = self.json['slug'] recipe_url = self.crawler.RECIPE_URL + f'/{slug}' q = Recipe.objects.filter(source=recipe_url) if q.exists(): r = q.first() url = r.get_absolute_url() raise ImportException(f'Already imported: {url}') raw = await self.crawler.http_get(recipe_url) self.dom = BeautifulSoup(raw, 'html.parser') r = Recipe() r.title = self.find_title() r.preparationtime = self.find_preparationtime() r.cooktime = self.find_cooktime() r.resttime = self.find_resttime() r.portion_quantity = self.find_portion_quantity() r.portion_unit = self.find_portion_unit() r.nutrition_kcal = self.find_nutrition_kcal() r.nutrition_carbs = self.find_nutrition_carbs() r.nutrition_fat = self.find_nutrition_fat() r.nutrition_protein = self.find_nutrition_protein() r.note = self.find_note() r.author = self.find_author() r.source = self.find_source() r.save() r.tags.add(self.find_tags()) try: self.add_ingredients(r.id) self.add_directions(r.id) await self.add_pictures(r.id) except Exception as e: r.delete() raise e return { 'input': self.input, 'found_recipe': r.title, 'reliability': self.validate(r.title), 'url': r.get_absolute_url() }
def handle_recipe(self, data, url): if Recipe.objects.filter(URL=url).exists(): self.stdout.write(u"\t already in database") return 1, False recipe = Recipe(Title=data[u"name"], URL=url) self.stdout.write(u"\t - author: %s" % data.get(u"author", None)) recipe.Author = data.get(u"author", None) self.stdout.write(u"\t - image: %s" % data.get(u"image", None)) recipe.Image = data.get(u"image", None) self.stdout.write(u"\t - ratingValue: %s" % data.get(u"ratingValue", None)) recipe.Rating = data.get(u"ratingValue", None) self.stdout.write(u"\t - description: %s" % data.get(u"description", '')[:60]) recipe.Description = data.get(u"description", None) # Produces self.stdout.write(u"\t - Produces: %s" % data[u"name"]) ingredient = Ingredient.objects.search(data[u"name"]) if not ingredient: if self.create_foods: ingredient = Ingredient() ingredient.NDB_No = self.ingredient_NDB_No_generator(); ingredient.Shrt_Desc = data[u"name"] ingredient.Long_Desc = data[u"name"] ingredient.save() else: return None, False recipe.Produces = ingredient # Yields self.stdout.write(u"\t - yields: %s" % data.get(u"yields", None)) yields = data.get(u"yields", None) if yields: items = [it for it in yields.split() if it.isdigit()] if items: recipe.Yields = items[0] self.stdout.write(u"\t + yields: %s -> %s" % (yields, recipe.Yields)) # PrepTime and CookTime def minutes(value_str): if value_str: t = parse_duration(value_str) return int(t.seconds/60.) return None self.stdout.write(u"\t - prepTime: %s" % minutes(data.get(u"prepTime", None))) self.stdout.write(u"\t - cookTime: %s" % minutes(data.get(u"cookTime", None))) recipe.PrepTime = minutes(data.get(u"prepTime", None)) recipe.CookTime = minutes(data.get(u"cookTime", None)) recipe.save() # Ingredients try: for name, amount, ingredientid, grams in zip(data[u"ingredients-name"], data[u"ingredients-amount"], data["ingredients-data-ingredientid"], data[u"ingredients-data-grams"]): self.handle_ingredient(recipe, name, amount, ingredientid, grams) except Exception as e: self.stderr.write(str(e)) recipe.delete() # Al borrar el recipe, se borran todos los relacionados (IngredientWithAmount) para no quedar huérfanos return None, False #Directions try: i = 1 self.stdout.write(u"\t - directions:") for direction in data.get(u"directions"): d = Direction(Recipe=recipe, StepNumber=i) d.Description = direction d.save() self.stdout.write(u"\t + %s" % d) i += 1 except Exception as e: self.stderr.write(str(e)) recipe.delete() # Al borrar el recipe, se borran todos los relacionados (IngredientWithAmount) para no quedar huérfanos return None, False return recipe, True
def handle_recipe(self, data, url): if Recipe.objects.filter(URL=url).exists(): self.stdout.write(u"\t already in database") return 1, False recipe = Recipe(Title=data[u"name"], URL=url) self.stdout.write(u"\t - author: %s" % data.get(u"author", None)) recipe.Author = data.get(u"author", None) self.stdout.write(u"\t - image: %s" % data.get(u"image", None)) recipe.Image = data.get(u"image", None) self.stdout.write(u"\t - ratingValue: %s" % data.get(u"ratingValue", None)) recipe.Rating = data.get(u"ratingValue", None) self.stdout.write(u"\t - description: %s" % data.get(u"description", '')[:60]) recipe.Description = data.get(u"description", None) # Produces self.stdout.write(u"\t - Produces: %s" % data[u"name"]) ingredient = Ingredient.objects.search(data[u"name"]) if not ingredient: if self.create_foods: ingredient = Ingredient() ingredient.NDB_No = self.ingredient_NDB_No_generator() ingredient.Shrt_Desc = data[u"name"] ingredient.Long_Desc = data[u"name"] ingredient.save() else: return None, False recipe.Produces = ingredient # Yields self.stdout.write(u"\t - yields: %s" % data.get(u"yields", None)) yields = data.get(u"yields", None) if yields: items = [it for it in yields.split() if it.isdigit()] if items: recipe.Yields = items[0] self.stdout.write(u"\t + yields: %s -> %s" % (yields, recipe.Yields)) # PrepTime and CookTime def minutes(value_str): if value_str: t = parse_duration(value_str) return int(t.seconds / 60.) return None self.stdout.write(u"\t - prepTime: %s" % minutes(data.get(u"prepTime", None))) self.stdout.write(u"\t - cookTime: %s" % minutes(data.get(u"cookTime", None))) recipe.PrepTime = minutes(data.get(u"prepTime", None)) recipe.CookTime = minutes(data.get(u"cookTime", None)) recipe.save() # Ingredients try: for name, amount, ingredientid, grams in zip( data[u"ingredients-name"], data[u"ingredients-amount"], data["ingredients-data-ingredientid"], data[u"ingredients-data-grams"]): self.handle_ingredient(recipe, name, amount, ingredientid, grams) except Exception as e: self.stderr.write(str(e)) recipe.delete( ) # Al borrar el recipe, se borran todos los relacionados (IngredientWithAmount) para no quedar huérfanos return None, False #Directions try: i = 1 self.stdout.write(u"\t - directions:") for direction in data.get(u"directions"): d = Direction(Recipe=recipe, StepNumber=i) d.Description = direction d.save() self.stdout.write(u"\t + %s" % d) i += 1 except Exception as e: self.stderr.write(str(e)) recipe.delete( ) # Al borrar el recipe, se borran todos los relacionados (IngredientWithAmount) para no quedar huérfanos return None, False return recipe, True