def crossfolding(trainrecipes): logging.info("### run summary ###") #initialize counters (at least 2) numPartitions = 10 accuracy = np.array([0.0] * numPartitions) partitionsSize = np.array([0] * numPartitions) #define partitions size if len(trainrecipes) < numPartitions: logging.error("Train dataset must have more than %d items" % numPartitions) sys.exit(0) partitionsSize += len(trainrecipes) / numPartitions for i in range(len(trainrecipes) % numPartitions): partitionsSize[i] += 1 logging.info(">number of training recipes: %d" % len(trainrecipes)) #calculate accuracy for each partition logging.info("...calculating accuracy for each partition...") partitionIndex = 0 for i in range(numPartitions): logging.info("FOLD %d" % (i + 1)) #get train and test lists testList = trainrecipes[partitionIndex:partitionIndex + partitionsSize[i]] trainList = [] * (len(trainrecipes) - len(testList)) for nDocument in range(len(trainrecipes)): if (nDocument < partitionIndex) | ( nDocument > partitionIndex + partitionsSize[i]): trainList.append(trainrecipes[nDocument]) partitionIndex += partitionsSize[i] #classify test list classifiedList = bayes.run(trainList, testList) totalrecipes = 0.0 truePositives = 0.0 for recipe in testList: totalrecipes += 1 if classifiedList[recipe['id']] == recipe['cuisine']: truePositives += 1 #compare classification to calculate accuracy accuracy[i] = truePositives / totalrecipes #calculate avg accuracy avgAccuracy = 0.0 avgAccuracy = np.average(accuracy) return avgAccuracy
def main(argv): #load json files. with open(args.trainRecipesFile) as train_recipes_file: trainJson = json.load(train_recipes_file) with open(args.unknownRecipesFile) as unknown_recipes_file: unknownJson = json.load(unknown_recipes_file) results = {} #run naive bayes classifier. results = bayes.run(trainJson,unknownJson) # write to output file text_file = open(args.outputFile, "w") text_file.write('id,cuisine\n') for i in results: text_file.write(str(i) + ',' + results[i] + '\n') text_file.close()
def main(argv): #load json files. with open(args.trainRecipesFile) as train_recipes_file: trainJson = json.load(train_recipes_file) with open(args.unknownRecipesFile) as unknown_recipes_file: unknownJson = json.load(unknown_recipes_file) results = {} #run naive bayes classifier. results = bayes.run(trainJson, unknownJson) # write to output file text_file = open(args.outputFile, "w") text_file.write('id,cuisine\n') for i in results: text_file.write(str(i) + ',' + results[i] + '\n') text_file.close()
def crossfolding(trainrecipes): logging.info("### run summary ###") #initialize counters (at least 2) numPartitions = 10 accuracy = np.array([0.0] * numPartitions) partitionsSize = np.array([0] * numPartitions) #define partitions size if len(trainrecipes) < numPartitions: logging.error("Train dataset must have more than %d items" % numPartitions) sys.exit(0) partitionsSize += len(trainrecipes) / numPartitions for i in range(len(trainrecipes) % numPartitions): partitionsSize[i] += 1 logging.info(">number of training recipes: %d" % len(trainrecipes)) #calculate accuracy for each partition logging.info("...calculating accuracy for each partition...") partitionIndex = 0 for i in range(numPartitions): logging.info("FOLD %d" % (i+1)) #get train and test lists testList = trainrecipes[partitionIndex:partitionIndex+partitionsSize[i]] trainList = [] * (len(trainrecipes)-len(testList)) for nDocument in range(len(trainrecipes)): if (nDocument < partitionIndex) | (nDocument>partitionIndex+partitionsSize[i]): trainList.append(trainrecipes[nDocument]) partitionIndex += partitionsSize[i] #classify test list classifiedList = bayes.run(trainList,testList) totalrecipes = 0.0 truePositives = 0.0 for recipe in testList: totalrecipes += 1 if classifiedList[recipe['id']] == recipe['cuisine']: truePositives += 1 #compare classification to calculate accuracy accuracy[i] = truePositives / totalrecipes #calculate avg accuracy avgAccuracy = 0.0 avgAccuracy = np.average(accuracy) return avgAccuracy