def match_one_weight(food: Food, measurement: str) -> FoodWeight: """Finds best matching weight (FoodWeight object) to a measurement. Most of Food objects have common Weight entries, so this function uses difflib.get_close_matches() to find the most matching to the ingredient unit. If there's no match, returns last Weight in the list. Args: food: Food which weights are searched. measurement: Measurement to be searched. Returns: Best matching FoodWeight Raises: AttributeError if Food doesn't have Weight entries. """ if not food.weight.exists(): raise AttributeError(f"{food} has no weights.") weights = food.weight.all() matches = get_close_matches(measurement, [w.desc for w in weights], cutoff=0.5) if matches: return weights.filter(desc=matches[0])[0] # If couldn't match default measurement then try it's varations if measurement == utils.DEFAULT_MEASUREMENT: for def_measurement in utils.DEFAULT_MEASUREMENT_VARIATIONS: for weight in weights: if def_measurement == utils.singularize(weight.desc): matches.append(weight.desc) if matches: return weights.filter(desc=matches[0])[0] else: return food.weight.last()
def test_singularize(self): for unit in utils.all_units: if unit[-1] != "s": assert utils.singularize(unit) == unit for measure in utils.measurements: assert utils.singularize(measure) == measure assert utils.singularize("chickens") == "chicken" assert utils.singularize("leaves") == "leaf" assert utils.singularize("flour") == "flour" assert utils.singularize("pasta") == "pasta"
def naive_parse_ingredient(string: str) -> dict: """Parses string and returns unit, amount, measurement and name of ingredient It is a very basic and naive implementation of parsing a string (ingredient). Based on simple checks if string starts with an amount or with a unit, etc. Ideally would be implemented with CRF (e.g. using PyStruct). Usage example: >>> parse_ingredient("1 onion") {'amount': 1.0, 'unit': '', 'measurement': 'serving', 'name': 'onion', 'raw': '1 onion'} >>> parse_ingredient("150 grams of chicken breasts (boneless and skinless)") {'amount': 150.0, 'unit': 'g', 'measurement': '', 'name': 'chicken breast boneless skinless', 'raw': '150 grams of chicken breasts (boneless and skinless)'} Args: string: A string to be parsed. Returns: Dictionary: 'amount': float 'unit': str (may be empty) 'measurement': str (may be empty) 'name': str 'raw': str Raises: ParseIngredientError: When string is empty. """ if not string: raise ParseIngredientError(string, "String cannot be empty.") raw = string string = utils.strip_special_chars(string) string = utils.separate_letters_from_numbers(string) string = utils.remove_or_ingredients(string) string = utils.strip_stop_words(string) string = utils.convert_range_to_one_amount(string) if not string: # Re-check in case of invalid strings like "$$" etc. raise ParseIngredientError(raw, "String is not valid.") string_split = string.split() amount = 0 unit = "" measurement = "" name = "" if (len(string_split) > 1 and string_split[0].isnumeric() and utils.is_decimal_amount(string_split[1])): # 1.0 - 1st and 2nd word are amounts (2nd is decimal) amount = list(map(int, string_split[1].split("/"))) amount = amount[0] / amount[1] amount = amount + int(string_split[0]) if len(string_split) > 2 and utils.is_measure_or_unit(string_split[2]): # 1.1 - With unit e.g '1 1/2 cup of flour' if utils.is_measurement(string_split[2]): measurement = string_split[2] else: unit = utils.get_unit(string_split[2]) name = utils.singularize(" ".join(string_split[3:])) else: # 1.2 - No unit e.g. '1 1/2 of chicken breast' measurement = utils.DEFAULT_MEASUREMENT name = utils.singularize(" ".join(string_split[2:])) elif utils.is_decimal_amount(string_split[0]): # 2.0 - 1st word is a decimal amount (and 2nd is not) amount = list(map(int, string_split[0].split("/"))) amount = amount[0] / amount[1] if len(string_split) > 1 and utils.is_measure_or_unit(string_split[1]): # 2.1 - With unit e.g. '1/2 cup of flour' if utils.is_measurement(string_split[1]): measurement = string_split[1] else: unit = utils.get_unit(string_split[1]) name = utils.singularize(" ".join(string_split[2:])) else: # 2.2 - No unit e.g. '1/2 of chicken breast' measurement = utils.DEFAULT_MEASUREMENT name = utils.singularize(" ".join(string_split[1:])) elif string_split[0].isnumeric(): # 3.0 - 1st word is an integer amount if len(string_split) > 1 and utils.is_measure_or_unit(string_split[1]): # 3.1 - With unit e.g. '1 cup of flour' amount = int(string_split[0]) if utils.is_measurement(string_split[1]): measurement = string_split[1] else: unit = utils.get_unit(string_split[1]) name = utils.singularize(" ".join(string_split[2:])) else: # 3.2 - No unit e.g. '1 chicken breast' amount = int(string_split[0]) measurement = utils.DEFAULT_MEASUREMENT name = utils.singularize(" ".join(string_split[1:])) else: # 4.0 - 1st word is not an amount if utils.is_measure_or_unit(string_split[0]): # 4.1 - With unit e.g. 'cup of flour' amount = 1 if utils.is_measurement(string_split[0]): measurement = string_split[0] else: unit = utils.get_unit(string_split[0]) name = utils.singularize(" ".join(string_split[1:])) else: # 4.2 - No unit e.g. 'chicken breast' amount = 1 measurement = utils.DEFAULT_MEASUREMENT name = utils.singularize(" ".join(string_split[0:])) return { "amount": float(amount), "unit": unit, "measurement": measurement, "name": name, "raw": raw, }
def getIngredientList(ingredient_name_list): ingredient_list_id = [ match_one_food(singularize(i)) for i in ingredient_name_list ] ingredient_list_id = [i.id for i in ingredient_list_id if i] return ingredient_list_id