def q2(): models = ['Bernoulli', 'Gaussian', '4-bins', '9-bins'] spamData = hw3.pandas_to_data(hw3.load_and_normalize_spambase()) k = 10 k_folds = hw3.partition_folds(spamData, k) for model_type in range(4): print '\nModel: {}'.format(models[model_type]) train_acc_sum = 0 nb_models = [] for ki in range(k - 1): alpha = .001 if model_type==0 else 0 nb_model = nb.NaiveBayes(model_type, alpha=alpha) truth_rows, data_rows, data_mus, y_mu = hw3.get_data_and_mus(k_folds[ki]) nb_model.train(data_rows, truth_rows) predict = nb_model.predict(data_rows) print predict accuracy = hw3.get_accuracy(predict, truth_rows) train_acc_sum += accuracy print_output(ki, accuracy) nb_models.append(nb_model) nb_combined = nb.NaiveBayes(model_type, alpha=.001) if model_type < 2: nb_combined.aggregate_model(nb_models) else: nb_combined.aggregate_model3(nb_models) truth_rows, data_rows, data_mus, y_mu = hw3.get_data_and_mus(k_folds[k - 1]) test_predict = nb_combined.predict(data_rows) test_accuracy = hw3.get_accuracy(test_predict, truth_rows) print_test_output(test_accuracy, float(train_acc_sum)/(k-1)) #print len(k_folds[0]) truth_rows, data_rows, data_mus, y_mu = hw3.get_data_and_mus(spamData)
def q2_plots(): models = ['Bernoulli', 'Gaussian', '4-bins', '9-bins'] spamData = hw3.pandas_to_data(hw3.load_and_normalize_spambase()) k = 10 num_points = 50 k_folds = hw3.partition_folds(spamData, k) for model_type in range(4): roc = ROC.ROC() print '\nModel: {}'.format(models[model_type]) train_acc_sum = 0 nb_models = [] for ki in [0]: alpha = .001 if model_type==0 else 0 nb_model = nb.NaiveBayes(model_type, alpha=alpha) truth_rows, data_rows, data_mus, y_mu = hw3.get_data_and_mus(k_folds[ki]) nb_model.train(data_rows, truth_rows) for ti in range(num_points + 2): theta = ti * 1./(num_points + 1) predict = nb_model.predict(data_rows, theta) print predict accuracy = hw3.get_accuracy(predict, truth_rows) train_acc_sum += accuracy roc.add_tp_tn(predict, truth_rows, theta) #print_plot_output(ki, accuracy, theta) roc.plot_ROC('/Users/Admin/Dropbox/ML/MachineLearning_CS6140/CS6140_A_MacLeay/Homeworks/roc_{}.pdf'.format(model_type)) roc.print_info()
def test_NaiveBayes(): bayes = nb.NaiveBayes(2) arr = get_nb_data() print arr truth_rows, data_rows, data_mus, y_mu = hw3.get_data_and_mus(arr) bayes.train(data_rows, truth_rows) print bayes.model return bayes
def test_NaiveBayes_predict(): bayes = nb.NaiveBayes(2) arr = get_nb_data() test = get_nb_test_data(5) print arr truth_rows, data_rows, data_mus, y_mu = hw3.get_data_and_mus(arr) bayes.train(data_rows, truth_rows) print data_mus print bayes.model print bayes.predict(test)