from adaboost import AdaBoost from naivebayes import NaiveBayes xfilename = sys.argv[1] yfilename = sys.argv[2] testset = sys.argv[3] print("training naive bayes...") nb = NaiveBayes() ab = AdaBoost() dataset = convert(xfilename, yfilename) validation_set_size = 10000 train_set, validation_set = split_train_validation(dataset, validation_set_size) num_to_train_on = 10000000 time_before("training adaboost") ab.train_set(dataset[:num_to_train_on]) time_after("training adaboost") time_before("training naive bayes") nb.train_set(dataset[:num_to_train_on]) time_after("training naive bayes") kg_validations_nb = [] kg_validations_ab = [] for i in validation_set: kg_validations_nb.append(nb.predict(*i[1:]) == i[0]) kg_validations_ab.append(ab.predict(*i[1:]) == i[0]) # print("Errors nb: %s " % sum([0 if i else 1 for i in kg_validations_nb])) print("Errors ab: %s " % sum([0 if i else 1 for i in kg_validations_ab]))