Esempio n. 1
0
def crossfolding(trainrecipes):
    logging.info("### run summary ###")
    #initialize counters (at least 2)
    numPartitions = 10
    accuracy = np.array([0.0] * numPartitions)
    partitionsSize = np.array([0] * numPartitions)

    #define partitions size
    if len(trainrecipes) < numPartitions:
        logging.error("Train dataset must have more than %d items" %
                      numPartitions)
        sys.exit(0)
    partitionsSize += len(trainrecipes) / numPartitions
    for i in range(len(trainrecipes) % numPartitions):
        partitionsSize[i] += 1
    logging.info(">number of training recipes: %d" % len(trainrecipes))

    #calculate accuracy for each partition
    logging.info("...calculating accuracy for each partition...")
    partitionIndex = 0
    for i in range(numPartitions):
        logging.info("FOLD %d" % (i + 1))
        #get train and test lists
        testList = trainrecipes[partitionIndex:partitionIndex +
                                partitionsSize[i]]
        trainList = [] * (len(trainrecipes) - len(testList))
        for nDocument in range(len(trainrecipes)):
            if (nDocument < partitionIndex) | (
                    nDocument > partitionIndex + partitionsSize[i]):
                trainList.append(trainrecipes[nDocument])
        partitionIndex += partitionsSize[i]

        #classify test list
        classifiedList = bayes.run(trainList, testList)
        totalrecipes = 0.0
        truePositives = 0.0
        for recipe in testList:
            totalrecipes += 1
            if classifiedList[recipe['id']] == recipe['cuisine']:
                truePositives += 1
        #compare classification to calculate accuracy
        accuracy[i] = truePositives / totalrecipes
    #calculate avg accuracy
    avgAccuracy = 0.0
    avgAccuracy = np.average(accuracy)
    return avgAccuracy
Esempio n. 2
0
def main(argv):
	#load json files.
	with open(args.trainRecipesFile) as train_recipes_file:
		trainJson = json.load(train_recipes_file)
	with open(args.unknownRecipesFile) as unknown_recipes_file:
		unknownJson = json.load(unknown_recipes_file)

	results = {}

	#run naive bayes classifier.
	results = bayes.run(trainJson,unknownJson)

	# write to output file
	text_file = open(args.outputFile, "w")
	text_file.write('id,cuisine\n')
	for i in results:
		text_file.write(str(i) + ',' + results[i] + '\n')
	text_file.close()
Esempio n. 3
0
def main(argv):
    #load json files.
    with open(args.trainRecipesFile) as train_recipes_file:
        trainJson = json.load(train_recipes_file)
    with open(args.unknownRecipesFile) as unknown_recipes_file:
        unknownJson = json.load(unknown_recipes_file)

    results = {}

    #run naive bayes classifier.
    results = bayes.run(trainJson, unknownJson)

    # write to output file
    text_file = open(args.outputFile, "w")
    text_file.write('id,cuisine\n')
    for i in results:
        text_file.write(str(i) + ',' + results[i] + '\n')
    text_file.close()
Esempio n. 4
0
def crossfolding(trainrecipes):
	logging.info("### run summary ###")
	#initialize counters (at least 2)
	numPartitions = 10
	accuracy = np.array([0.0] * numPartitions)
	partitionsSize = np.array([0] * numPartitions)
	
	#define partitions size
	if len(trainrecipes) < numPartitions: 
		logging.error("Train dataset must have more than %d items" % numPartitions)
		sys.exit(0)
	partitionsSize += len(trainrecipes) / numPartitions
	for i in range(len(trainrecipes) % numPartitions):
		partitionsSize[i] += 1
	logging.info(">number of training recipes: %d" % len(trainrecipes))

	#calculate accuracy for each partition
	logging.info("...calculating accuracy for each partition...")
	partitionIndex = 0
	for i in range(numPartitions):
		logging.info("FOLD %d" % (i+1))
		#get train and test lists		
		testList = trainrecipes[partitionIndex:partitionIndex+partitionsSize[i]]
		trainList = [] * (len(trainrecipes)-len(testList))
		for nDocument in range(len(trainrecipes)):
			if (nDocument < partitionIndex) | (nDocument>partitionIndex+partitionsSize[i]):
				trainList.append(trainrecipes[nDocument])
		partitionIndex += partitionsSize[i]
		
		#classify test list
		classifiedList = bayes.run(trainList,testList)
		totalrecipes = 0.0
		truePositives = 0.0
		for recipe in testList:
			totalrecipes += 1
			if classifiedList[recipe['id']] == recipe['cuisine']:
				truePositives += 1
		#compare classification to calculate accuracy
		accuracy[i] = truePositives / totalrecipes
	#calculate avg accuracy
	avgAccuracy = 0.0
	avgAccuracy = np.average(accuracy)
	return avgAccuracy