def manualNutrientQuery(ingredientName, ingredientId): filePath = os.path.join(c.PATH_TO_RESOURCES, "manualNutrientQuery.json") manualDict = {} getRequest = getNutritionalRequest(ingredientId) if getRequest is not None: requestAsJson = json.loads(getRequest.content) requestAsFormattedDict = nutrientDictFromJSON(requestAsJson) manualDict[ingredientName] = requestAsFormattedDict util.dumpJSONDict(filePath, manualDict)
def main(argv): fileName = filter(lambda s: "file=" in s, argv)[0] fileName = fileName.split("=")[-1] filePath = os.path.join(c.PATH_TO_ROOT, fileName.lstrip("/")) lines = None with open(filePath, 'r') as f: lines = f.read().split("\n") lines = filter(lambda s: s.strip() != "", lines) lineDict = {"list": lines} jsonFileName = filePath.split("/")[-1] jsonFileName = ".".join(jsonFileName.split(".")[:-1]) + ".json" jsonFilePath = os.path.join(os.path.dirname(filePath), jsonFileName) util.dumpJSONDict(jsonFilePath, lineDict)
def main(argv): pathToRecipes = os.path.join(c.PATH_TO_RESOURCES, "jsonrecipes") # Get the full absolute file paths of all files in /res/jsonrecipes/ # that end in ".json" jsonRecipes = util.listFilesWithSuffix(pathToRecipes, ".json") # Load all of these ".json" files into dictionaries and put all # these dictionaries in a list listOfJSONDicts = util.loadJSONDicts(jsonRecipes) # Merge recipes into one dictionary, deleting duplicates mergedDict = util.naivelyMergeDicts(listOfJSONDicts) # Write the merged recipe dictionary to a file allRecipesFilePath = os.path.join(c.PATH_TO_RESOURCES, "allRecipes.json") util.dumpJSONDict(allRecipesFilePath, mergedDict)
def main(argv): fileName = filter(lambda s: "file=" in s, argv)[0] fileName = fileName.split("=")[-1] filePath = os.path.join(c.PATH_TO_ROOT, fileName.lstrip("/")) lines = None with open(filePath, "r") as f: lines = f.read().split("\n") lines = filter(lambda s: s.strip() != "", lines) lineDict = {"list": lines} jsonFileName = filePath.split("/")[-1] jsonFileName = ".".join(jsonFileName.split(".")[:-1]) + ".json" jsonFilePath = os.path.join(os.path.dirname(filePath), jsonFileName) util.dumpJSONDict(jsonFilePath, lineDict)
def main(argv): print "Merging nutrient IDs" pathToNutrients = os.path.join(c.PATH_TO_RESOURCES, "nutrients", "foundingredients") # Get the full absolute file paths of all files in /res/nutrients/foundingredients # that end in ".json" jsonNutrients = util.listFilesWithSuffix(pathToNutrients, ".json") # Load all of these ".json" files into dictionaries and put all # these dictionaries in a list listOfJSONDicts = util.loadJSONDicts(jsonNutrients) # Merge Nutrients into one dictionary, deleting duplicates mergedDict = util.naivelyMergeDicts(listOfJSONDicts) # Write the merged recipe dictionary to a file nutrientIDsFilePath = os.path.join(c.PATH_TO_RESOURCES, "allNutrientIDs.json") util.dumpJSONDict(nutrientIDsFilePath, mergedDict)
def printClusters(clusterToData, clusterStats, est, dataType, featureList): dict2dump = copy.deepcopy(clusterToData) numtypeFeatures = getListIntersections(featureList, c.KMEANS_NUM_FEATURES) for cluster, recipeList in clusterToData.iteritems(): print 10 * '-' + cluster + ' '+ 10 * '-' clusterData = clusterStats[cluster] for f, v in clusterData.iteritems(): if f in numtypeFeatures or isIngredientContraint(f): if f == c.KMEANS_FEATURE_TOTALTIME: v /= TOTAL_TIME_ADJUSTMENT if f == c.KMEANS_FEATURE_NUM_INGREDIENTS: v /= NUM_INGREDIENT_ADJUSTMENT dict2dump[cluster + ' ' + f + ' average score'] = v print "The cluster's %s average score was: %f" % (f, v) else: dict2dump[cluster + ' ' + f + ' distribution'] = v print "The cluster's %s distribution was: " % f for name, prob in v.iteritems(): print " %s: %f%%" % (name, prob) print for recipe in recipeList: print recipe print dict2dump['command-line'] = COMMAND_LINE tempFilename = dataType + '-cluster-' for f in featureList: tempFilename = tempFilename + f + '&' if tempFilename[-1] == '&': tempFilename = tempFilename[:-1] toFilename = util.string_appendDateAndTime(tempFilename) if dataType == c.KMEANS_RECIPE_DATATYPE: jsonToFilePath = os.path.join(c.PATH_TO_ROOT, "res/kmeans-results/recipe", toFilename) if dataType == c.KMEANS_ALIAS_DATATYPE: jsonToFilePath = os.path.join(c.PATH_TO_ROOT, "res/kmeans-results/alias", toFilename) util.dumpJSONDict(jsonToFilePath, dict2dump) print 'Clustering dumped into: ' + jsonToFilePath
def main(argv): print "Merging nutrient data" pathToNutrients = os.path.join(c.PATH_TO_RESOURCES, "nutrients", "nutrientdata") # Get the full absolute file paths of all files in /res/nutrients/foundingredients # that end in ".json" jsonNutrients = util.listFilesWithSuffix(pathToNutrients, ".json") # Load all of these ".json" files into dictionaries and put all # these dictionaries in a list listOfJSONDicts = util.loadJSONDicts(jsonNutrients) # Merge Nutrients into one dictionary, deleting duplicates mergedDict = util.naivelyMergeDicts(listOfJSONDicts) # Write the merged recipe dictionary to a file nutrientIDsFilePath = os.path.join(c.PATH_TO_RESOURCES, "allNutrientData.json") util.dumpJSONDict(nutrientIDsFilePath, mergedDict) #This will prepare the json file of valid ingredients validIngredientDict = {} validNutrientDict = {} for ingredient in mergedDict: #print ingredient.encode('ascii', errors='ignore') validIngredientDict[ingredient] = 0 for nutrient in mergedDict[ingredient]['nutrients']: validNutrientDict[nutrient] = 0 print "There are " + str(len(validIngredientDict)) + " valid ingredients." print "There are " + str(len(validNutrientDict)) + " valid nutrients." validIngredientsFilePath = os.path.join(c.PATH_TO_RESOURCES, "validIngredients.json") validNutrientFilePath = os.path.join(c.PATH_TO_RESOURCES, "validNutrients.json") util.dumpJSONDict(validIngredientsFilePath, validIngredientDict) util.dumpJSONDict(validNutrientFilePath, validNutrientDict)
def dumpAliasDataToJSONFiles(aliasData): dataFileName = util.string_appendDateAndTime("aliasData") dataFilePath = os.path.join(c.PATH_TO_RESOURCES, "aliasdata", dataFileName) util.dumpJSONDict(dataFilePath, aliasData)
def main(argv): global ingredientMassDict global validAliasDict global unitCountDict validIngredientsFilePath = os.path.join(c.PATH_TO_RESOURCES, "validIngredients.json") validAliasDict = util.loadJSONDict(validIngredientsFilePath) conversionDict = util.createWaterConversionDict() allRecipes = [] # Each alias has 3 main fields: # "count" # "aliasBuddies" # "lines" aliasData = {} ingredientLineDict = {} #ingredientMassDict = {} #unitCountDict = {} # Read in and parse recipe data structures (dictionaries) from a json file. extractRecipesFromJSON(allRecipes) # Convert all string data to lowercase. lowerAllStrings(allRecipes) #ndb = ndb.NutrientDatabase() #Let's f**k around. unmatched = float(0) tried = float(0) for recipe in allRecipes: # print "Ingredient Lines: " + str(len(recipe['ingredientLines'])) # print recipe['ingredientLines'] # print "\nIngredients: " + str(len(recipe['ingredients'])) # print recipe['ingredients'] for ingredientLineIndex in range(0, len(recipe['ingredientLines'])): if ingredientLineIndex == len(recipe['ingredients']): break ingredientLine = recipe['ingredientLines'][ingredientLineIndex].encode('ascii', errors='ignore') ingredient = recipe['ingredients'][ingredientLineIndex].encode('ascii', errors='ignore') if ingredient not in validAliasDict: continue if ingredient not in ingredientLineDict: ingredientLineDict[ingredient] = [] ingredientLineDict[ingredient].append(ingredientLine) #print ingredientLineDict for ingredient in ingredientLineDict: for ingredientLine in ingredientLineDict[ingredient]: #TIME TO PARSE. words = ingredientLine.split() potentialStart = removeHyphen(words[0]) #If the first token is a number, try the next few. if isPossibleAmount(words[0]): if '/' in potentialStart: tokens = potentialStart.split('/') first = float(tokens[0]) second = float(tokens[1]) potentialStart = first/second amount = float(potentialStart) potentialUnit, foundUnit = extractUnit(words, ingredient, conversionDict) if potentialUnit != None: #Add both the mass and the unit count if foundUnit: massInGrams = amount*ndb.getConversionFactor(ingredient, potentialUnit) else: massInGrams = amount*conversionDict[potentialUnit] if ingredient not in ingredientMassDict: ingredientMassDict[ingredient] = [] ingredientMassDict[ingredient].append(massInGrams) if ingredient not in unitCountDict: unitCountDict[ingredient] = Counter() unitCountDict[ingredient][potentialUnit] += 1 #print "Amount: " + str(amount) + " Unit: " + potentialUnit else: unmatched += 1 #print "Couldn't match unit for ingredient: " + ingredient #print words #print elif not hasAnAmount(words): if ingredient not in unitCountDict: unitCountDict[ingredient] = Counter() unitCountDict[ingredient]['unitless'] += 1 tried += 1 print "Missed amounts for " + str(unmatched) + " / " + str(tried) + " ingredients." print str((tried-unmatched)/tried*100) + "% Success rate!" # Get the counts of ingredient short names. # Create a dictionary storing relationships between the various aliases. # Create a dictionary with aliases as keys and lists of lines they've been # associated with as values. fillAliasData(allRecipes, aliasData) #Temporarily removed to test. dumpAliasDataToJSONFiles(aliasData) #Now create small files smallAliasData = {} for _ in range(250): item = aliasData.popitem() smallAliasData[item[0]] = item[1] smallFilePath = os.path.join(c.PATH_TO_RESOURCES, "aliasData_small.json") util.dumpJSONDict(smallFilePath, smallAliasData) for _ in range(250): item = aliasData.popitem() smallAliasData[item[0]] = item[1] mediumFilePath = os.path.join(c.PATH_TO_RESOURCES, "aliasData_medium.json") util.dumpJSONDict(mediumFilePath, smallAliasData) for _ in range(500): item = aliasData.popitem() smallAliasData[item[0]] = item[1] largeFilePath = os.path.join(c.PATH_TO_RESOURCES, "aliasData_large.json") util.dumpJSONDict(largeFilePath, smallAliasData)
def writeSegmentedJSONFile(dictionaryToDump, baseFilename, segmentIndex): # Dumps and prints out a .JSON file for every YUM_STEP recipes retrieved. # jsonToWrite = json.dumps(dictionaryToDump, sort_keys=True, indent=4) #Filename is /res/jsonrecipes/jsonrecipe_index, where index is startIndex/100. targetFileName = baseFilename + "_" + str(segmentIndex) + ".json" util.dumpJSONDict(targetFileName, dictionaryToDump)