def main(data_dir): ureg = UnitRegistry() food_description_file = os.path.join(data_dir, "FOOD_DES.txt") nutrient_definition_file = os.path.join(data_dir, "NUTR_DEF.txt") nutrition_data_file = os.path.join(data_dir, "NUT_DATA.txt") weight_data_file = os.path.join(data_dir, "WEIGHT.txt") food_descriptions = pandas.read_csv(food_description_file, quotechar='~', delimiter='^', encoding='latin-1', header=None, names=food_description_columns) nutrient_definitions = pandas.read_csv(nutrient_definition_file, quotechar='~', delimiter='^', encoding='latin-1', header=None, names=nutrient_definition_columns) nutrition_data = pandas.read_csv(nutrition_data_file, quotechar='~', delimiter='^', encoding='latin-1', header=None, names=nutrition_data_columns) weight_data = pandas.read_csv(weight_data_file, quotechar='~', delimiter='^', encoding='latin-1', header=None, names=weight_data_columns) # Pandas is retarded when it comes to handling text in csv files... food_descriptions.fillna('', inplace=True) nutrient_definitions.fillna('', inplace=True) nutrition_data.fillna('', inplace=True) weight_data.fillna('', inplace=True) with app.test_request_context(): ingredients = {} ingredient_preparations = [] nutrients = {} ingredient_nutrients = [] for entry in food_descriptions.itertuples(): if entry.food_group in ignored_food_groups: continue ingredient = models.Ingredient(ingredient_id=int(entry.ndb_id)) ingredients[entry.ndb_id] = ingredient ingredient.names.append(models.IngredientName(name=entry.description, canonical=True)) for entry in nutrient_definitions.itertuples(): nutrient_id = int(entry.nutrient_id) display_name = nutrient_display_names.get(nutrient_id) scientific_name = nutrient_scientific_names.get(nutrient_id) recommended_daily_intake = nutrient_rdi.get(nutrient_id) display = display_nutrient.get(nutrient_id, False) nutrient = models.Nutrient(nutrient_id=nutrient_id, display_name=display_name, scientific_name=scientific_name, measurement_unit=entry.units, recommended_daily_intake=recommended_daily_intake, display=display) nutrients[nutrient_id] = nutrient # Most entries in the weights file conform to this pattern weight_re = re.compile(r"([\w\s]+?)(?:\s+\(.*\))?(?:,\s+(.*))?\Z") for weight_entry in weight_data.itertuples(): if weight_entry.ndb_id not in ingredients: continue # Pint thinks fl oz is femtolitre ounces if weight_entry.measure_description == "fl oz": description = "fluid ounces" # US regulation defines a fluid ounce as equivalent to 30mL for nutrition labeling purposes volume = weight_entry.amount * ureg.parse_expression("30 ml").to_base_units() # Convert the gram weight to kilograms so density is in standard units mass = weight_entry.gram_weight / 1000 * ureg.kilogram density = float((mass / volume).magnitude) # Special case, as pat matches a unit, but in this context should not be interpreted as such elif weight_entry.measure_description.startswith("pat "): description = weight_entry.measure_description density = None else: match = weight_re.match(weight_entry.measure_description) if match: (unit_name, preparation) = match.groups() # First determine that this weight contains units rather than something nebulous like a "serving" try: quantity = weight_entry.amount * ureg.parse_expression(unit_name) description = preparation # Convert to base units so volume measurements are in cubic meters volume = quantity.to_base_units() # Discard entries with non-volume measurements if not volume.units.get("meter") == 3: continue # Convert the gram weight to kilograms so density is in standard units mass = weight_entry.gram_weight / 1000 * ureg.kilogram density = float((mass / volume).magnitude) except UndefinedUnitError: description = weight_entry.measure_description density = None else: description = weight_entry.measure_description density = None ingredient_preparation = models.IngredientMeasure(ingredient_id=int(weight_entry.ndb_id), description=description, density=density, amount=float(weight_entry.amount), weight=float(weight_entry.gram_weight)) ingredient_preparations.append(ingredient_preparation) for entry in nutrition_data.itertuples(): if entry.ndb_id not in ingredients: continue ingredient_nutrient = models.IngredientNutrient(nutrient_id=int(entry.nutrient_id), ingredient_id=int(entry.ndb_id), quantity=float(entry.nutrient_value)) ingredient_nutrients.append(ingredient_nutrient) db.session.add_all(ingredients.values()) db.session.add_all(nutrients.values()) db.session.commit() db.session.add_all(ingredient_preparations) db.session.add_all(ingredient_nutrients) db.session.commit()
import metarecipe.models as m from metarecipe.app import app, db if __name__ == "__main__": with app.test_request_context(): db.metadata.drop_all() db.metadata.create_all() # m.Ingredient.query.order_by(db.desc(db.func.similarity("chicken", m.Ingredient.ingredient_name))).limit(10).all()
def main(data_dir): ureg = UnitRegistry() food_description_file = os.path.join(data_dir, "FOOD_DES.txt") nutrient_definition_file = os.path.join(data_dir, "NUTR_DEF.txt") nutrition_data_file = os.path.join(data_dir, "NUT_DATA.txt") weight_data_file = os.path.join(data_dir, "WEIGHT.txt") food_descriptions = pandas.read_csv(food_description_file, quotechar='~', delimiter='^', encoding='latin-1', header=None, names=food_description_columns) nutrient_definitions = pandas.read_csv(nutrient_definition_file, quotechar='~', delimiter='^', encoding='latin-1', header=None, names=nutrient_definition_columns) nutrition_data = pandas.read_csv(nutrition_data_file, quotechar='~', delimiter='^', encoding='latin-1', header=None, names=nutrition_data_columns) weight_data = pandas.read_csv(weight_data_file, quotechar='~', delimiter='^', encoding='latin-1', header=None, names=weight_data_columns) # Pandas is retarded when it comes to handling text in csv files... food_descriptions.fillna('', inplace=True) nutrient_definitions.fillna('', inplace=True) nutrition_data.fillna('', inplace=True) weight_data.fillna('', inplace=True) with app.test_request_context(): ingredients = {} ingredient_preparations = [] nutrients = {} ingredient_nutrients = [] for entry in food_descriptions.itertuples(): if entry.food_group in ignored_food_groups: continue ingredient = models.Ingredient(ingredient_id=int(entry.ndb_id)) ingredients[entry.ndb_id] = ingredient ingredient.names.append( models.IngredientName(name=entry.description, canonical=True)) for entry in nutrient_definitions.itertuples(): nutrient_id = int(entry.nutrient_id) display_name = nutrient_display_names.get(nutrient_id) scientific_name = nutrient_scientific_names.get(nutrient_id) recommended_daily_intake = nutrient_rdi.get(nutrient_id) display = display_nutrient.get(nutrient_id, False) nutrient = models.Nutrient( nutrient_id=nutrient_id, display_name=display_name, scientific_name=scientific_name, measurement_unit=entry.units, recommended_daily_intake=recommended_daily_intake, display=display) nutrients[nutrient_id] = nutrient # Most entries in the weights file conform to this pattern weight_re = re.compile(r"([\w\s]+?)(?:\s+\(.*\))?(?:,\s+(.*))?\Z") for weight_entry in weight_data.itertuples(): if weight_entry.ndb_id not in ingredients: continue # Pint thinks fl oz is femtolitre ounces if weight_entry.measure_description == "fl oz": description = "fluid ounces" # US regulation defines a fluid ounce as equivalent to 30mL for nutrition labeling purposes volume = weight_entry.amount * ureg.parse_expression( "30 ml").to_base_units() # Convert the gram weight to kilograms so density is in standard units mass = weight_entry.gram_weight / 1000 * ureg.kilogram density = float((mass / volume).magnitude) # Special case, as pat matches a unit, but in this context should not be interpreted as such elif weight_entry.measure_description.startswith("pat "): description = weight_entry.measure_description density = None else: match = weight_re.match(weight_entry.measure_description) if match: (unit_name, preparation) = match.groups() # First determine that this weight contains units rather than something nebulous like a "serving" try: quantity = weight_entry.amount * ureg.parse_expression( unit_name) description = preparation # Convert to base units so volume measurements are in cubic meters volume = quantity.to_base_units() # Discard entries with non-volume measurements if not volume.units.get("meter") == 3: continue # Convert the gram weight to kilograms so density is in standard units mass = weight_entry.gram_weight / 1000 * ureg.kilogram density = float((mass / volume).magnitude) except UndefinedUnitError: description = weight_entry.measure_description density = None else: description = weight_entry.measure_description density = None ingredient_preparation = models.IngredientMeasure( ingredient_id=int(weight_entry.ndb_id), description=description, density=density, amount=float(weight_entry.amount), weight=float(weight_entry.gram_weight)) ingredient_preparations.append(ingredient_preparation) for entry in nutrition_data.itertuples(): if entry.ndb_id not in ingredients: continue ingredient_nutrient = models.IngredientNutrient( nutrient_id=int(entry.nutrient_id), ingredient_id=int(entry.ndb_id), quantity=float(entry.nutrient_value)) ingredient_nutrients.append(ingredient_nutrient) db.session.add_all(ingredients.values()) db.session.add_all(nutrients.values()) db.session.commit() db.session.add_all(ingredient_preparations) db.session.add_all(ingredient_nutrients) db.session.commit()