Esempio n. 1
0
def manualNutrientQuery(ingredientName, ingredientId):
	filePath = os.path.join(c.PATH_TO_RESOURCES, "manualNutrientQuery.json")
	manualDict = {}
	getRequest = getNutritionalRequest(ingredientId)
	if getRequest is not None:
		requestAsJson = json.loads(getRequest.content)
		requestAsFormattedDict = nutrientDictFromJSON(requestAsJson)
		manualDict[ingredientName] = requestAsFormattedDict
	util.dumpJSONDict(filePath, manualDict)
def main(argv):
	fileName = filter(lambda s: "file=" in s, argv)[0]
	fileName = fileName.split("=")[-1]
	filePath = os.path.join(c.PATH_TO_ROOT, fileName.lstrip("/"))

	lines = None
	with open(filePath, 'r') as f:
		lines = f.read().split("\n")
		lines = filter(lambda s: s.strip() != "", lines)

	lineDict = {"list": lines}

	jsonFileName = filePath.split("/")[-1]
	jsonFileName = ".".join(jsonFileName.split(".")[:-1]) + ".json"
	jsonFilePath = os.path.join(os.path.dirname(filePath), jsonFileName)

	util.dumpJSONDict(jsonFilePath, lineDict)
def main(argv):
    pathToRecipes = os.path.join(c.PATH_TO_RESOURCES, "jsonrecipes")

    # Get the full absolute file paths of all files in /res/jsonrecipes/
    # that end in ".json"
    jsonRecipes = util.listFilesWithSuffix(pathToRecipes, ".json")

    # Load all of these ".json" files into dictionaries and put all
    # these dictionaries in a list
    listOfJSONDicts = util.loadJSONDicts(jsonRecipes)

    # Merge recipes into one dictionary, deleting duplicates
    mergedDict = util.naivelyMergeDicts(listOfJSONDicts)

    # Write the merged recipe dictionary to a file
    allRecipesFilePath = os.path.join(c.PATH_TO_RESOURCES, "allRecipes.json")
    util.dumpJSONDict(allRecipesFilePath, mergedDict)
def main(argv):
    fileName = filter(lambda s: "file=" in s, argv)[0]
    fileName = fileName.split("=")[-1]
    filePath = os.path.join(c.PATH_TO_ROOT, fileName.lstrip("/"))

    lines = None
    with open(filePath, "r") as f:
        lines = f.read().split("\n")
        lines = filter(lambda s: s.strip() != "", lines)

    lineDict = {"list": lines}

    jsonFileName = filePath.split("/")[-1]
    jsonFileName = ".".join(jsonFileName.split(".")[:-1]) + ".json"
    jsonFilePath = os.path.join(os.path.dirname(filePath), jsonFileName)

    util.dumpJSONDict(jsonFilePath, lineDict)
def main(argv):
	print "Merging nutrient IDs"
	pathToNutrients = os.path.join(c.PATH_TO_RESOURCES, "nutrients", "foundingredients")

	# Get the full absolute file paths of all files in /res/nutrients/foundingredients
	# that end in ".json"
	jsonNutrients = util.listFilesWithSuffix(pathToNutrients, ".json")

	# Load all of these ".json" files into dictionaries and put all
	# these dictionaries in a list
	listOfJSONDicts = util.loadJSONDicts(jsonNutrients)

	# Merge Nutrients into one dictionary, deleting duplicates
	mergedDict = util.naivelyMergeDicts(listOfJSONDicts)

	# Write the merged recipe dictionary to a file
	nutrientIDsFilePath = os.path.join(c.PATH_TO_RESOURCES, "allNutrientIDs.json")
	util.dumpJSONDict(nutrientIDsFilePath, mergedDict)
Esempio n. 6
0
def printClusters(clusterToData, clusterStats, est, dataType, featureList):
	dict2dump = copy.deepcopy(clusterToData)
	numtypeFeatures = getListIntersections(featureList, c.KMEANS_NUM_FEATURES)

	for cluster, recipeList in clusterToData.iteritems():
		print 10 * '-' + cluster + ' '+ 10 * '-'
		clusterData = clusterStats[cluster]
		
		for f, v in clusterData.iteritems():
			if f in numtypeFeatures or isIngredientContraint(f):
				if f == c.KMEANS_FEATURE_TOTALTIME:
					v /= TOTAL_TIME_ADJUSTMENT
				if f == c.KMEANS_FEATURE_NUM_INGREDIENTS:
					v /= NUM_INGREDIENT_ADJUSTMENT
				dict2dump[cluster + ' ' + f + ' average score'] = v
				print "The cluster's %s average score was: %f" % (f, v)
			else:
				dict2dump[cluster + ' ' + f + ' distribution'] = v
				print "The cluster's %s distribution was: " % f
				for name, prob in v.iteritems():
					print "    %s: %f%%" % (name, prob)

		print
		for recipe in recipeList:
			print recipe
		print

	dict2dump['command-line'] = COMMAND_LINE

	tempFilename = dataType + '-cluster-'
	for f in featureList:
		tempFilename = tempFilename + f + '&'
	if tempFilename[-1] == '&':
		tempFilename = tempFilename[:-1]
	toFilename = util.string_appendDateAndTime(tempFilename)
	
	if dataType == c.KMEANS_RECIPE_DATATYPE:
		jsonToFilePath = os.path.join(c.PATH_TO_ROOT, "res/kmeans-results/recipe", toFilename)
	if dataType == c.KMEANS_ALIAS_DATATYPE:
		jsonToFilePath = os.path.join(c.PATH_TO_ROOT, "res/kmeans-results/alias", toFilename)

	util.dumpJSONDict(jsonToFilePath, dict2dump)
	print 'Clustering dumped into: ' + jsonToFilePath
def main(argv):
	print "Merging nutrient data"
	pathToNutrients = os.path.join(c.PATH_TO_RESOURCES, "nutrients", "nutrientdata")

	# Get the full absolute file paths of all files in /res/nutrients/foundingredients
	# that end in ".json"
	jsonNutrients = util.listFilesWithSuffix(pathToNutrients, ".json")

	# Load all of these ".json" files into dictionaries and put all
	# these dictionaries in a list
	listOfJSONDicts = util.loadJSONDicts(jsonNutrients)

	# Merge Nutrients into one dictionary, deleting duplicates
	mergedDict = util.naivelyMergeDicts(listOfJSONDicts)

	# Write the merged recipe dictionary to a file
	nutrientIDsFilePath = os.path.join(c.PATH_TO_RESOURCES, "allNutrientData.json")
	util.dumpJSONDict(nutrientIDsFilePath, mergedDict)

	#This will prepare the json file of valid ingredients
	validIngredientDict = {}
	validNutrientDict = {}
	for ingredient in mergedDict:
		#print ingredient.encode('ascii', errors='ignore')
		validIngredientDict[ingredient] = 0
		for nutrient in mergedDict[ingredient]['nutrients']:
			validNutrientDict[nutrient] = 0



	print "There are " + str(len(validIngredientDict)) + " valid ingredients."
	print "There are " + str(len(validNutrientDict)) + " valid nutrients."

	validIngredientsFilePath = os.path.join(c.PATH_TO_RESOURCES, "validIngredients.json")
	validNutrientFilePath = os.path.join(c.PATH_TO_RESOURCES, "validNutrients.json")
	util.dumpJSONDict(validIngredientsFilePath, validIngredientDict)
	util.dumpJSONDict(validNutrientFilePath, validNutrientDict)
def dumpAliasDataToJSONFiles(aliasData):
	dataFileName = util.string_appendDateAndTime("aliasData")
	dataFilePath = os.path.join(c.PATH_TO_RESOURCES, "aliasdata", dataFileName)
	util.dumpJSONDict(dataFilePath, aliasData)
def main(argv):
	global ingredientMassDict
	global validAliasDict
	global unitCountDict
	validIngredientsFilePath = os.path.join(c.PATH_TO_RESOURCES, "validIngredients.json")
	validAliasDict = util.loadJSONDict(validIngredientsFilePath)

	conversionDict = util.createWaterConversionDict()

	allRecipes = []

	# Each alias has 3 main fields:
	#   "count"
	#   "aliasBuddies"
	#   "lines"
	aliasData = {}
	ingredientLineDict = {}
	#ingredientMassDict = {}
	#unitCountDict = {}

	# Read in and parse recipe data structures (dictionaries) from a json file.
	extractRecipesFromJSON(allRecipes)

	# Convert all string data to lowercase.
	lowerAllStrings(allRecipes)

	#ndb = ndb.NutrientDatabase()
	
	#Let's f**k around.
	unmatched = float(0)
	tried = float(0)
	for recipe in allRecipes:
		# print "Ingredient Lines: " + str(len(recipe['ingredientLines']))
		# print recipe['ingredientLines']
		# print "\nIngredients: " + str(len(recipe['ingredients']))
		# print recipe['ingredients']

		for ingredientLineIndex in range(0, len(recipe['ingredientLines'])):
			if ingredientLineIndex == len(recipe['ingredients']):
				break
		 	ingredientLine = recipe['ingredientLines'][ingredientLineIndex].encode('ascii', errors='ignore')
		 	ingredient = recipe['ingredients'][ingredientLineIndex].encode('ascii', errors='ignore')
		 	if ingredient not in validAliasDict:
		 		continue
		 	if ingredient not in ingredientLineDict:
		 		ingredientLineDict[ingredient] = []
		 	ingredientLineDict[ingredient].append(ingredientLine)

	#print ingredientLineDict

	for ingredient in ingredientLineDict:
		for ingredientLine in ingredientLineDict[ingredient]:
			#TIME TO PARSE.
			words = ingredientLine.split()
			potentialStart = removeHyphen(words[0])

			#If the first token is a number, try the next few.
			if isPossibleAmount(words[0]):
				if '/' in potentialStart:
					tokens = potentialStart.split('/')
					first = float(tokens[0])
					second = float(tokens[1])
					potentialStart = first/second
				amount = float(potentialStart)
				potentialUnit, foundUnit = extractUnit(words, ingredient, conversionDict)
				
				if potentialUnit != None:
					#Add both the mass and the unit count
					if foundUnit:
						massInGrams = amount*ndb.getConversionFactor(ingredient, potentialUnit)
					else:
						massInGrams = amount*conversionDict[potentialUnit]
					if ingredient not in ingredientMassDict:
						ingredientMassDict[ingredient] = []
					ingredientMassDict[ingredient].append(massInGrams)
					if ingredient not in unitCountDict:
						unitCountDict[ingredient] = Counter()
					unitCountDict[ingredient][potentialUnit] += 1	
					#print "Amount: " + str(amount) + " Unit: " + potentialUnit
				else:
					unmatched += 1
					#print "Couldn't match unit for ingredient: " + ingredient
					#print words
					#print


			elif not hasAnAmount(words):
				if ingredient not in unitCountDict:
					unitCountDict[ingredient] = Counter()
				unitCountDict[ingredient]['unitless'] += 1
			tried += 1
	print "Missed amounts for " + str(unmatched) + " / " + str(tried) + " ingredients."
	print str((tried-unmatched)/tried*100) + "% Success rate!"


	# Get the counts of ingredient short names.
	# Create a dictionary storing relationships between the various aliases.
	# Create a dictionary with aliases as keys and lists of lines they've been
	# associated with as values.

	fillAliasData(allRecipes, aliasData)

	#Temporarily removed to test.
	dumpAliasDataToJSONFiles(aliasData)

	#Now create small files
	smallAliasData = {}
	for _ in range(250):
		item = aliasData.popitem()
		smallAliasData[item[0]] = item[1]

	smallFilePath = os.path.join(c.PATH_TO_RESOURCES, "aliasData_small.json")
	util.dumpJSONDict(smallFilePath, smallAliasData)

	for _ in range(250):
		item = aliasData.popitem()
		smallAliasData[item[0]] = item[1]

	mediumFilePath = os.path.join(c.PATH_TO_RESOURCES, "aliasData_medium.json")
	util.dumpJSONDict(mediumFilePath, smallAliasData)

	for _ in range(500):
		item = aliasData.popitem()
		smallAliasData[item[0]] = item[1]

	largeFilePath = os.path.join(c.PATH_TO_RESOURCES, "aliasData_large.json")
	util.dumpJSONDict(largeFilePath, smallAliasData)
Esempio n. 10
0
def writeSegmentedJSONFile(dictionaryToDump, baseFilename, segmentIndex):
	# Dumps and prints out a .JSON file for every YUM_STEP recipes retrieved.
	# jsonToWrite = json.dumps(dictionaryToDump, sort_keys=True, indent=4)
	#Filename is /res/jsonrecipes/jsonrecipe_index, where index is startIndex/100.
	targetFileName = baseFilename + "_" + str(segmentIndex) + ".json"
	util.dumpJSONDict(targetFileName, dictionaryToDump)