def get_ontology_node(self, name): """ Get the ontology node for the given name. Rather that performing an exact match with the name, this uses a heuristic to find the best-matching OntologyNode. """ return self._get_closest_ontology_node(normalize_ingredient_name(name))
def get_ingredients(self, name=None): """ Get ingredients matching the given criteria. """ query = self._session.query(Ingredient) if name != None: name = normalize_ingredient_name(name) query = query.filter_by(name=name) return query
def get_ingredients(tokenized_string, enum=False): """ Returns a tuple of (index, ingredient) or a list of ingredients from a tokenized string. >>> raw_input_string = "I like apples, cinnamon, and pepper." >>> tokenizer = nltk.WordPunctTokenizer() >>> tokenized_string = tokenizer.tokenize(raw_input_string) >>> for i,w in get_ingredients(tokenized_string, enum=True): print i,w 2 apples 4 cinnamon 7 pepper """ words = [normalize_ingredient_name(x) for x in tokenized_string] results = [x for x in enumerate(words) if is_ingredient(x[1])] if enum: return [(i, tokenized_string[i]) for i, w in results] else: return [tokenized_string[i] for i, w in results]
def get_cuisine(title, description, ingredients, title_weight=10, description_weight=5, ingredient_weight=1): """ Given a title (string), a description (string), and a list of ingredients (list of strings), it will return a most likely cuisine that is what the given data is. The value attached to the cuisine that is highest is the most probable cuisine. The relative weights of the hits from the title, description, and ingredients are modifiable via arguments 4, 5, and 6 (respectively) >>> get_cuisine("Japanese Pork Fried Rice","My Japanese grandmother gave me this recipe",["Pork","rice","vinegar","cabbage"]) defaultdict(<type 'int'>, {'Sausage': 1, 'Japanese': 15}) >>> get_cuisine("Thai Chicken","This recipe was created when we felt like having spicy, Oriental tasting food. It's delicious, uses bold ingredients such as peanut butter, fresh ginger sesame oil. Try serving over rice.",["soy sauce", "garlic", "ginger", "chicken", "sesame oil", "peanut butter", "green onions"]) defaultdict(<type 'int'>, {'Thai': 10, 'Indonesian': 1, 'Sausage': 1}) >>> get_cuisine("Italian Rice Balls","Crispy meatball-sized appetizers with deep fried outsides and moist herb and cheese insides",["water","brown rice","garlic","bay leaf","prosciutto", "basil","olive oil","egg whites","Parmesan","bread crumbs", "vegetable oil"]) defaultdict(<type 'int'>, {'Pizza': 2, 'Italian': 11}) """ cuisines = defaultdict(int) # Maps cuisine to frequency of occurrence title_words = title.split() #split the words into an iterable list for word in title_words: #iterate through the title if word in list_of_adjectivals: cuisines[word] += title_weight #add the weight description_words = description.split() #split up the description for word in description_words: #iterate through the description if word in list_of_adjectivals: cuisines[word] += description_weight #add the weight for word in ingredients: #go through the ingredients list # Check for cuisines strongly associated with certain ingredients normal = normalize_ingredient_name(word) if normal in ingredient_cuisine_mapping: for cuisine in ingredient_cuisine_mapping[normal]: cuisines[cuisine] += ingredient_weight return cuisines
def get_cuisine(title, description, ingredients, title_weight=10, description_weight=5, ingredient_weight=1): """ Given a title (string), a description (string), and a list of ingredients (list of strings), it will return a most likely cuisine that is what the given data is. The value attached to the cuisine that is highest is the most probable cuisine. The relative weights of the hits from the title, description, and ingredients are modifiable via arguments 4, 5, and 6 (respectively) >>> get_cuisine("Japanese Pork Fried Rice","My Japanese grandmother gave me this recipe",["Pork","rice","vinegar","cabbage"]) defaultdict(<type 'int'>, {'Sausage': 1, 'Japanese': 15}) >>> get_cuisine("Thai Chicken","This recipe was created when we felt like having spicy, Oriental tasting food. It's delicious, uses bold ingredients such as peanut butter, fresh ginger sesame oil. Try serving over rice.",["soy sauce", "garlic", "ginger", "chicken", "sesame oil", "peanut butter", "green onions"]) defaultdict(<type 'int'>, {'Thai': 10, 'Indonesian': 1, 'Sausage': 1}) >>> get_cuisine("Italian Rice Balls","Crispy meatball-sized appetizers with deep fried outsides and moist herb and cheese insides",["water","brown rice","garlic","bay leaf","prosciutto", "basil","olive oil","egg whites","Parmesan","bread crumbs", "vegetable oil"]) defaultdict(<type 'int'>, {'Pizza': 2, 'Italian': 11}) """ cuisines = defaultdict(int) # Maps cuisine to frequency of occurrence title_words = title.split() # split the words into an iterable list for word in title_words: # iterate through the title if word in list_of_adjectivals: cuisines[word] += title_weight # add the weight description_words = description.split() # split up the description for word in description_words: # iterate through the description if word in list_of_adjectivals: cuisines[word] += description_weight # add the weight for word in ingredients: # go through the ingredients list # Check for cuisines strongly associated with certain ingredients normal = normalize_ingredient_name(word) if normal in ingredient_cuisine_mapping: for cuisine in ingredient_cuisine_mapping[normal]: cuisines[cuisine] += ingredient_weight return cuisines
def normalize_ontology_name(name): return normalize_ingredient_name(name).replace('_', ' ')
def get_recipes(self, include_ingredients=(), exclude_ingredients=(), include_cuisines=(), exclude_cuisines=(), prep_time=None, cook_time=None, total_time=None, num_steps=None, num_ingredients=None): """ Get recipes matching the given criteria. Numeric attributes, like total_time, can be specified as single values (to retreive exact matches) or (min, max) tuples that define ranges which include their endpoints. To specify just a maximum or minimum, set the other value to None. For example, to find recipes with a total time of 1/2 to 1 hours: >>> db = Database("sqlite:///:memory:") >>> recipes = db.get_recipes(total_time=(30, 60)) Or, to find recipes that take up to 15 minutes to prepare: >>> recipes = db.get_recipes(prep_time=(None, 15)) To find recipes that have exactly 5 steps: >>> recipes = db.get_recipes(num_steps=5) To find Italian recipes: >>> recipes = db.get_recipes(include_cuisines=["Italian"]) """ # Make sure that include_* and exclude_* arguments are not strings: for argument in [include_ingredients, exclude_ingredients, include_cuisines, exclude_cuisines]: if isinstance(argument, types.StringTypes): raise ValueError('include_* and exclude_* must be iterables of' ' strings, not strings.') # Normalize ingredient names, so that they match the names stored in # the database. include_ingredients = \ [normalize_ingredient_name(i) for i in include_ingredients] exclude_ingredients = \ [normalize_ingredient_name(i) for i in exclude_ingredients] # Construct the query query = self._session.query(Recipe) # Handle ingredient inclusion and exclusion if include_ingredients or exclude_ingredients: double_join = join(RecipeIngredientAssociation, Recipe) triple_join = join(double_join, Ingredient) join_query = query.select_from(triple_join) query = join_query for ingredient_name in include_ingredients: query = query.intersect( join_query.filter(Ingredient.name == ingredient_name)) for ingredient_name in exclude_ingredients: query = query.except_( join_query.filter(Ingredient.name == ingredient_name)) # Handle cuisine inclusion and exclusion: # TODO: cuisine names should probably be normalized before querying, so # lowercase 'italian' matches 'Italian'. if include_cuisines or exclude_cuisines: for cuisine_name in include_cuisines: query = query.filter(Recipe.cuisines.any( Cuisine.name == cuisine_name)) for cuisine_name in exclude_cuisines: query = query.filter(Recipe.cuisines.any( Cuisine.name != cuisine_name)) # Handle ranges searches over simple numeric attributes, like # total_time or num_steps if total_time != None: query = query.filter(_range_predicate(Recipe.total_time, total_time)) if cook_time != None: query = query.filter(_range_predicate(Recipe.cook_time, cook_time)) if prep_time != None: query = query.filter(_range_predicate(Recipe.prep_time, prep_time)) if num_steps != None: query = query.filter(_range_predicate(Recipe.num_steps, num_steps)) if num_ingredients != None: query = query.filter(_range_predicate(Recipe.num_ingredients, num_ingredients)) return query.all()
def get_recipes(self, include_ingredients=(), exclude_ingredients=(), include_cuisines=(), exclude_cuisines=(), prep_time=None, cook_time=None, total_time=None, num_steps=None, num_ingredients=None): """ Get recipes matching the given criteria. Numeric attributes, like total_time, can be specified as single values (to retreive exact matches) or (min, max) tuples that define ranges which include their endpoints. To specify just a maximum or minimum, set the other value to None. For example, to find recipes with a total time of 1/2 to 1 hours: >>> db = Database("sqlite:///:memory:") >>> recipes = db.get_recipes(total_time=(30, 60)) Or, to find recipes that take up to 15 minutes to prepare: >>> recipes = db.get_recipes(prep_time=(None, 15)) To find recipes that have exactly 5 steps: >>> recipes = db.get_recipes(num_steps=5) To find Italian recipes: >>> recipes = db.get_recipes(include_cuisines=["Italian"]) """ # Make sure that include_* and exclude_* arguments are not strings: for argument in [ include_ingredients, exclude_ingredients, include_cuisines, exclude_cuisines ]: if isinstance(argument, types.StringTypes): raise ValueError('include_* and exclude_* must be iterables of' ' strings, not strings.') # Normalize ingredient names, so that they match the names stored in # the database. include_ingredients = \ [normalize_ingredient_name(i) for i in include_ingredients] exclude_ingredients = \ [normalize_ingredient_name(i) for i in exclude_ingredients] # Construct the query query = self._session.query(Recipe) # Handle ingredient inclusion and exclusion if include_ingredients or exclude_ingredients: double_join = join(RecipeIngredientAssociation, Recipe) triple_join = join(double_join, Ingredient) join_query = query.select_from(triple_join) query = join_query for ingredient_name in include_ingredients: query = query.intersect( join_query.filter(Ingredient.name == ingredient_name)) for ingredient_name in exclude_ingredients: query = query.except_( join_query.filter(Ingredient.name == ingredient_name)) # Handle cuisine inclusion and exclusion: # TODO: cuisine names should probably be normalized before querying, so # lowercase 'italian' matches 'Italian'. if include_cuisines or exclude_cuisines: for cuisine_name in include_cuisines: query = query.filter( Recipe.cuisines.any(Cuisine.name == cuisine_name)) for cuisine_name in exclude_cuisines: query = query.filter( Recipe.cuisines.any(Cuisine.name != cuisine_name)) # Handle ranges searches over simple numeric attributes, like # total_time or num_steps if total_time != None: query = query.filter( _range_predicate(Recipe.total_time, total_time)) if cook_time != None: query = query.filter(_range_predicate(Recipe.cook_time, cook_time)) if prep_time != None: query = query.filter(_range_predicate(Recipe.prep_time, prep_time)) if num_steps != None: query = query.filter(_range_predicate(Recipe.num_steps, num_steps)) if num_ingredients != None: query = query.filter( _range_predicate(Recipe.num_ingredients, num_ingredients)) return query.all()