예제 #1
0
def extractRecipesFromJSON(allRecipes):
    jsonFilePath = os.path.join(c.PATH_TO_RESOURCES, "aliasdata", "miniAliases.json")
    myDict = util.loadJSONDict(jsonFilePath)

    # Fill allRecipes with just the values for each JSON member,
    # as the values actually contain the keys as a member
    for _, val in myDict.items():
        allRecipes.append(val)
예제 #2
0
def createAliasDatapoints(jsonDataFilePath, featureList):
	# sparse matrix, shape = (n_samples, n_features)
	allPoints = []
	countToAliasName = {}

	allAlias = util.loadJSONDict(jsonDataFilePath)

	count = 0
	for aliasName, aliasData in allAlias.items():
		point = createAliasPoint(aliasData, featureList)
		allPoints.append(point)
		countToAliasName[count] = aliasName
		count += 1

	vec = DictVectorizer()
	dataMatrix = vec.fit_transform(allPoints)

	return dataMatrix, countToAliasName, allPoints
예제 #3
0
def createRecipeDatapoints(jsonDataFilePath, featureList):
	# sparse matrix, shape = (n_samples, n_features)
	allPoints = []
	countToRecipeName = {}

	allRecipes = util.loadJSONDict(jsonDataFilePath)

	count = 0
	for recipeName, recipe in allRecipes.items():
		point = createRecipePoint(recipe, featureList)
		allPoints.append(point)
		countToRecipeName[count] = recipeName
		count += 1

	vec = DictVectorizer()
	dataMatrix = vec.fit_transform(allPoints)

	return dataMatrix, countToRecipeName, allPoints
예제 #4
0
def initializeTraits(verbose):
    traitsDataPath = os.path.join(constants.PATH_TO_ROOT, "res", "csp_defaultTraits.json")
    traits = util.loadJSONDict(traitsDataPath)
    traits["verbose"] = verbose
    traits["ND"] = nutrientdatabase.NutrientDatabase()
    return traits
def main(argv):
	global ingredientMassDict
	global validAliasDict
	global unitCountDict
	validIngredientsFilePath = os.path.join(c.PATH_TO_RESOURCES, "validIngredients.json")
	validAliasDict = util.loadJSONDict(validIngredientsFilePath)

	conversionDict = util.createWaterConversionDict()

	allRecipes = []

	# Each alias has 3 main fields:
	#   "count"
	#   "aliasBuddies"
	#   "lines"
	aliasData = {}
	ingredientLineDict = {}
	#ingredientMassDict = {}
	#unitCountDict = {}

	# Read in and parse recipe data structures (dictionaries) from a json file.
	extractRecipesFromJSON(allRecipes)

	# Convert all string data to lowercase.
	lowerAllStrings(allRecipes)

	#ndb = ndb.NutrientDatabase()
	
	#Let's f**k around.
	unmatched = float(0)
	tried = float(0)
	for recipe in allRecipes:
		# print "Ingredient Lines: " + str(len(recipe['ingredientLines']))
		# print recipe['ingredientLines']
		# print "\nIngredients: " + str(len(recipe['ingredients']))
		# print recipe['ingredients']

		for ingredientLineIndex in range(0, len(recipe['ingredientLines'])):
			if ingredientLineIndex == len(recipe['ingredients']):
				break
		 	ingredientLine = recipe['ingredientLines'][ingredientLineIndex].encode('ascii', errors='ignore')
		 	ingredient = recipe['ingredients'][ingredientLineIndex].encode('ascii', errors='ignore')
		 	if ingredient not in validAliasDict:
		 		continue
		 	if ingredient not in ingredientLineDict:
		 		ingredientLineDict[ingredient] = []
		 	ingredientLineDict[ingredient].append(ingredientLine)

	#print ingredientLineDict

	for ingredient in ingredientLineDict:
		for ingredientLine in ingredientLineDict[ingredient]:
			#TIME TO PARSE.
			words = ingredientLine.split()
			potentialStart = removeHyphen(words[0])

			#If the first token is a number, try the next few.
			if isPossibleAmount(words[0]):
				if '/' in potentialStart:
					tokens = potentialStart.split('/')
					first = float(tokens[0])
					second = float(tokens[1])
					potentialStart = first/second
				amount = float(potentialStart)
				potentialUnit, foundUnit = extractUnit(words, ingredient, conversionDict)
				
				if potentialUnit != None:
					#Add both the mass and the unit count
					if foundUnit:
						massInGrams = amount*ndb.getConversionFactor(ingredient, potentialUnit)
					else:
						massInGrams = amount*conversionDict[potentialUnit]
					if ingredient not in ingredientMassDict:
						ingredientMassDict[ingredient] = []
					ingredientMassDict[ingredient].append(massInGrams)
					if ingredient not in unitCountDict:
						unitCountDict[ingredient] = Counter()
					unitCountDict[ingredient][potentialUnit] += 1	
					#print "Amount: " + str(amount) + " Unit: " + potentialUnit
				else:
					unmatched += 1
					#print "Couldn't match unit for ingredient: " + ingredient
					#print words
					#print


			elif not hasAnAmount(words):
				if ingredient not in unitCountDict:
					unitCountDict[ingredient] = Counter()
				unitCountDict[ingredient]['unitless'] += 1
			tried += 1
	print "Missed amounts for " + str(unmatched) + " / " + str(tried) + " ingredients."
	print str((tried-unmatched)/tried*100) + "% Success rate!"


	# Get the counts of ingredient short names.
	# Create a dictionary storing relationships between the various aliases.
	# Create a dictionary with aliases as keys and lists of lines they've been
	# associated with as values.

	fillAliasData(allRecipes, aliasData)

	#Temporarily removed to test.
	dumpAliasDataToJSONFiles(aliasData)

	#Now create small files
	smallAliasData = {}
	for _ in range(250):
		item = aliasData.popitem()
		smallAliasData[item[0]] = item[1]

	smallFilePath = os.path.join(c.PATH_TO_RESOURCES, "aliasData_small.json")
	util.dumpJSONDict(smallFilePath, smallAliasData)

	for _ in range(250):
		item = aliasData.popitem()
		smallAliasData[item[0]] = item[1]

	mediumFilePath = os.path.join(c.PATH_TO_RESOURCES, "aliasData_medium.json")
	util.dumpJSONDict(mediumFilePath, smallAliasData)

	for _ in range(500):
		item = aliasData.popitem()
		smallAliasData[item[0]] = item[1]

	largeFilePath = os.path.join(c.PATH_TO_RESOURCES, "aliasData_large.json")
	util.dumpJSONDict(largeFilePath, smallAliasData)
예제 #6
0
def createNutrientDataJSON():
	nutrientIDsFilePath = os.path.join(c.PATH_TO_RESOURCES, "allNutrientIDs.json")
	nutrientIDs = util.loadJSONDict(nutrientIDsFilePath)
	buildNutritionalDatabase(nutrientIDs)