def split_classes(file_name): training_data = data_preprocessing.load_csv_data(file_name) training_data.pop(0) # remove top row print len(training_data) class_zero = [] class_one = [] for row in training_data: if (row[8] == "class1"): class_one.append(row) else: class_zero.append(row) random.shuffle(class_one) random.shuffle(class_zero) return class_one, class_zero # randomise the two lists and return it
def split_classes (file_name): training_data = data_preprocessing.load_csv_data(file_name) training_data.pop(0) # remove top row print len(training_data) class_zero = [] class_one = [] for row in training_data: if (row[8] == "class1"): class_one.append(row) else: class_zero.append(row) random.shuffle(class_one) random.shuffle(class_zero) return class_one,class_zero # randomise the two lists and return it
def main(): #attr_names = ['plasma_glucose_concentration','bmi','diabetes_pedigree','age','class'] # For CFS attr_names = None training_data = data_preprocessing.load_csv_data("pima.csv") training_data.pop(0) #pop the header off test_sample = training_data.pop(random.randint(0, len(training_data) - 1 )) #print "Test sample:", test_sample (c,correctness) = classify(10, test_sample, training_data, attr_names) print "Classifier predicted: ", c, " Correctness: ", correctness
def main(): attr_names = ['plasma_glucose_concentration','bmi','diabetes_pedigree','age','class'] # For CFS (class_zero, class_one) = init_bayes("pima.csv") #print print_mean_sd(class_zero,"Class Zero") #print print_mean_sd(class_one, "Class One") data = data_preprocessing.load_csv_data("pima.csv") data.pop(0) count_correct = 0 count_incorrect = 0 for item in data: (a,out) = classify(item, class_zero, class_one) if out == True: count_correct = count_correct + 1 else: count_incorrect = count_incorrect + 1 print "Correct: %d Incorrect: %d" %(count_correct,count_incorrect) #print print_mean_sd(class_zero,"Class Zero") #print print_mean_sd(class_one, "Class One") #initBayes(training_data) return 1;
def init_bayes(file_name,attr_names=False): training_data = data_preprocessing.load_csv_data(file_name,False) return calculate_mean_sd(training_data,attr_names)