def split_classes(file_name):
    training_data = data_preprocessing.load_csv_data(file_name)
    training_data.pop(0)  # remove top row
    print len(training_data)
    class_zero = []
    class_one = []
    for row in training_data:
        if (row[8] == "class1"):
            class_one.append(row)
        else:
            class_zero.append(row)
    random.shuffle(class_one)
    random.shuffle(class_zero)
    return class_one, class_zero  # randomise the two lists and return it
def split_classes (file_name):
	training_data = data_preprocessing.load_csv_data(file_name)
	training_data.pop(0) # remove top row
	print len(training_data)
	class_zero = []
	class_one = []
	for row in training_data:
		if (row[8] == "class1"):
			class_one.append(row)
		else:
			class_zero.append(row)
	random.shuffle(class_one)
	random.shuffle(class_zero)
	return class_one,class_zero # randomise the two lists and return it
Beispiel #3
0
def main():

	#attr_names = ['plasma_glucose_concentration','bmi','diabetes_pedigree','age','class'] # For CFS
	attr_names = None


	training_data =  data_preprocessing.load_csv_data("pima.csv")
	training_data.pop(0) #pop the header off

	test_sample = training_data.pop(random.randint(0, len(training_data) - 1 ))

	#print "Test sample:", test_sample

	(c,correctness) = classify(10, test_sample, training_data, attr_names)
	print "Classifier predicted: ", c, " Correctness: ", correctness
def main():
	attr_names = ['plasma_glucose_concentration','bmi','diabetes_pedigree','age','class'] # For CFS
	(class_zero, class_one) = init_bayes("pima.csv")
	#print print_mean_sd(class_zero,"Class Zero")
	#print print_mean_sd(class_one, "Class One")

	data = data_preprocessing.load_csv_data("pima.csv")
	data.pop(0)
	count_correct = 0
	count_incorrect = 0
	for item in data:
		(a,out) =  classify(item, class_zero, class_one)
		if out == True:
			count_correct = count_correct + 1
		else:
			count_incorrect = count_incorrect + 1
	print "Correct: %d    Incorrect: %d" %(count_correct,count_incorrect)
	#print print_mean_sd(class_zero,"Class Zero")
	#print print_mean_sd(class_one, "Class One")


	#initBayes(training_data)
	return 1;
def init_bayes(file_name,attr_names=False):
	training_data = data_preprocessing.load_csv_data(file_name,False)
	return calculate_mean_sd(training_data,attr_names)