def prob_5(): arff = Arff('datasets/cars.arff') arff.shuffle() test = arff.create_subset_arff(slice(arff.instance_count//10)) training = arff.create_subset_arff(slice(arff.instance_count//10,None)) tf = test.get_features() tl = test.get_labels() splits = k_fold_cv(arff) arff = arff.create_subset_arff(slice(arff.instance_count//4,None)) d = DecisionTreeLearner() d.train(arff.get_features(), arff.get_labels()) a = d.tree arff = Arff('datasets/voting.arff') arff.shuffle() arff = arff.create_subset_arff(slice(arff.instance_count//4,None)) d = DecisionTreeLearner() d.train(arff.get_features(), arff.get_labels()) b = d.tree return a, b
def prob_3(weighted_d = False): test_arff = Arff("housing_testing_data.arff") train_arff = Arff("housing_training_data.arff") test_arff.shuffle() train_arff.shuffle() test_arff.normalize() train_arff.normalize() K = [1, 3, 5, 7, 9, 11, 13, 15] A = [] for k_hat in K: test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data)) train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data)) KNNC = KNNClassifier(k_hat, train_data, test_data) A.append(KNNC.get_accuracy_regress(weighted_d)) plt.plot(K, A, label="") t = "KNN Regression M.S.E Housing" if weighted_d: t += "(weighted-d)" weighted_d plt.title(t) plt.xlabel("K") plt.ylabel("M.S.E") # plt.legend() plt.show()
def main(): arff = Arff(sys.argv[1]) pl = PerceptronLearner() features = arff.get_features() labels = arff.get_labels() accuracy_matrix = np.zeros((5, 20)) for i in range(5): pl.train(features, labels) a = pl.accuracy_tracker[:20] # pad to make 20 wide a = np.pad(a, (0, 20 - len(a)), 'constant', constant_values=a[-1]) accuracy_matrix[i] = a # Average the accuracies of each step print(accuracy_matrix) avg_accuracy = np.sum(accuracy_matrix, axis=0) / 5 print(avg_accuracy) plt.plot(1 - avg_accuracy) plt.xlabel("Epochs") plt.ylabel("Avg Misclassification Rate") plt.title("Avg Misclassification Rate Over Epochs") plt.show()
def setUp(self): path = os.path.join(utils.get_root(), "test/datasets/cm1_req.arff") data = Arff(arff=path) self.features = data.get_features() self.labels = data.get_labels() self.learner = BaselineLearner()
def prob0haccomplete(): arff = Arff('datasets/labor.arff', label_count=1) # Trim the id column arff = arff.create_subset_arff(col_idx=slice(1, None)) arff = arff.get_features() hac = HAC(simple=False) hac.train(arff, verbose=True, printk=[5])
def prob0(): arff = Arff('datasets/labor.arff', label_count=1) # Trim the id column arff = arff.create_subset_arff(col_idx=slice(1, None)) arff = arff.get_features() km = KMeans(5) km.train(arff, verbose=True, centers=arff.data[:5])
def prob_0(): arff = Arff('datasets/lenses.arff') d = DecisionTreeLearner() f = arff.get_features() l = arff.get_labels() d.train(f,l) print(d.tree)
def main(): arff = Arff(sys.argv[1]) features = arff.get_features() labels = arff.get_labels() pl = PerceptronLearner() pl.train(features, labels) visualize_training(features, labels, pl)
def setup(): arff = Arff('datasets/labor.arff', label_count=1) # Trim the id column arff = arff.create_subset_arff(col_idx=slice(1, None)) arff = arff.get_features() hac = HAC() hac.nominal_indicies = np.where(np.array(arff.attr_types) == 'nominal')[0] print('33,44', hac.get_distance(arff.data[33], arff.data[44])) print('25,34', hac.get_distance(arff.data[25], arff.data[34]))
def prob_3(): print('cars') arff = Arff('datasets/cars.arff') arff.shuffle() d = DecisionTreeLearner() d.train(arff.get_features(), arff.get_labels()) a = d.tree print() print('voting') arff = Arff('datasets/voting.arff') arff.shuffle() d = DecisionTreeLearner() d.train(arff.get_features(), arff.get_labels()) b = d.tree return a, b
def prob_2(weighted_d = False): """ """ k = 3 test_arff = Arff("magic_telescope_testing_data.arff") train_arff = Arff("magic_telescope_training_data.arff") test_arff.shuffle() train_arff.shuffle() # attributes = test_arff.get_attr_names() test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data)) train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data)) KNNC = KNNClassifier(k, train_data, test_data) acc = KNNC.get_accuracy(weighted_d) test_arff.normalize() train_arff.normalize() n_test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data)) n_train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data)) n_KNNC = KNNClassifier(k, n_test_data, n_train_data) acc_n = n_KNNC.get_accuracy(weighted_d) # print(np.array([[acc,acc_n]])) print(acc,acc_n) # show_table(["Not Normalized" "Normailzed"], ["Accuracy"], np.array([[acc,acc_n]]), title = "Normalized vs Non-normalized, k=3") K = [1, 3, 5, 7, 9, 11, 13, 15] A = [] for k_hat in K: # n_test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data)) # n_train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data)) n_KNNC = KNNClassifier(k_hat, n_train_data, n_test_data) A.append(n_KNNC.get_accuracy(weighted_d)) plt.plot(K, A, label="") t = "KNN Accuracy Telesc. " if weighted_d: t += "(weighted-d)" plt.title(t) plt.xlabel("K") plt.ylabel("Accuracy") # plt.legend() plt.show()
def prob_6(): """ """ k = 3 test_arff = Arff("magic_telescope_testing_data.arff") train_arff = Arff("magic_telescope_training_data.arff") test_arff.shuffle() train_arff.shuffle() test_arff.normalize() train_arff.normalize() K = [1, 3, 5] T = [] A = [] T_KSM = [] A_KSM = [] for k_hat in K: test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data)) train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data)) KNNC = KNNClassifier(k_hat, train_data, test_data) t = time.time() A.append(KNNC.get_accuracy()) T.append(time.time() - t) KNNC.induce_KSM() t = time.time() A_KSM.append(KNNC.get_accuracy()) T_KSM.append(time.time() - t) ax = plt.axes(projection='3d') ax.plot(K, A, T, label="No-KSM") ax.plot(K, A_KSM, T_KSM, label="KSM") ax.set_xlabel('K') ax.set_ylabel('Accuracy') ax.set_zlabel('Time') t = "KNN Accuracy w/ IKSM" plt.title(t) plt.legend() plt.show()
def test_get_features(self): """ Tests construction of Arff from path, arff, numpy array """ # Create a Matrix object from arff credit = Arff(arff=self.credit_data_path, label_count=1) credit.label_count=0 np.testing.assert_equal(credit.data, credit.get_features().data) ## Test label inference credit.label_count = 5 self.assertEqual(credit.get_labels().shape, (690, 5)) ## Copy last 8 columns credit2 = Arff(credit, col_idx=slice(-8, None)) self.assertEqual(credit2.label_count, 5) self.assertEqual((690,3), credit2.get_features().shape) ## Verify 0 labels credit.label_count = 0 self.assertEqual((690, 16), credit.get_features().shape) self.assertEqual((690, 0), credit.get_labels().shape)
def prob2(): iris = Arff('datasets/iris.arff') features = iris.get_features() # features.normalize() # Train k means for 2-7 ks = [2, 3, 4, 5, 6, 7] for k in ks: km = KMeans(k) km.train(features) hac2 = HAC(simple=False) hac2.train(features, printk=ks)
def prob_3(): # Use regression knn on housing price prediction dataset train = Arff('datasets/housing_train.arff') test = Arff('datasets/housing_test.arff') train.normalize() test.normalize() krange = np.arange(1, 16, 2) mses = [] for k in krange: knn = KNN(k) preds = knn.knn(train.get_features(), train.get_labels(), test.get_features()) mse = sum((preds - np.ravel(test.get_labels().data))**2) / len(preds) mses.append(mse) plt.plot(krange, mses) plt.title("K Size Versus MSE on Housing Prices") plt.xlabel("K") plt.ylabel("Mean Squared Error") plt.show()
def test_cases(): # test_1() attr_types = [ "real", "real", "real", "real", "cat", "real", "cat", "real", "real", "cat", "real", "cat", "cat", "cat", "cat", "cat", "cat" ] attr_idx = [ [], [], [], [], ['none','tcf','tc'], [], ['none','ret_allw','empl_contr'], [], [], ['yes','no'], [], ['below_average','average','generous'], ['yes','no'], ['none','half','full'], ['yes','no'], ['none','half','full'], ['bad','good'] ] k = 5 arff = Arff("labor.arff") arff.normalize() features = arff.get_features().data labels = arff.get_labels().data # attributes = arff.get_attr_names() data = np.hstack((features, labels))[:, 1:] kmc = KMC(k, data, data, attr_types, attr_idx) kmc.train(tol=0)
def prob_4_telescope(): # Repeat experiments for magic telescope and housing using weights (w = 1/dist**2) train = Arff('datasets/magic_telescope_train.arff') test = Arff('datasets/magic_telescope_test.arff') train.normalize() test.normalize() krange = np.arange(1, 16, 2) accs = [] for k in krange: knn = KNN(k, weighting=True) predictions = knn.knn(train.get_features(), train.get_labels(), test.get_features()) acc = predictions == np.ravel(test.get_labels().data) print("k:", k, "accuracy:", sum(acc) / len(acc)) accs.append(sum(acc) / len(acc)) plt.plot(krange, accs) plt.title("K Size Versus Accuracy") plt.xlabel("K") plt.ylabel("Accuracy") plt.show()
def prob_4_housing(): # Repeat experiments for magic telescope and housing using weights (w = 1/dist**2) train = Arff('datasets/housing_train.arff') test = Arff('datasets/housing_test.arff') train.normalize() test.normalize() krange = np.arange(1, 16, 2) mses = [] for k in krange: knn = KNN(k, weighting=True) preds = knn.knn_regression(train.get_features(), train.get_labels(), test.get_features()) mse = np.sum( (preds - np.ravel(test.get_labels().data))**2, axis=0) / len(preds) mses.append(mse) plt.plot(krange, mses) plt.title("K Size Versus MSE on Housing (Weighted)") plt.xlabel("K") plt.ylabel("Mean Squared Error") plt.show()
def prob_2(): # try first without normalizing train = Arff('datasets/magic_telescope_train.arff') test = Arff('datasets/magic_telescope_test.arff') k = KNN(3) predictions = k.knn(train.get_features(), train.get_labels(), test.get_features()) acc = predictions == np.ravel(test.get_labels().data) print("Before normalization:", sum(acc) / len(acc)) train.normalize() test.normalize() predictions = k.knn(train.get_features(), train.get_labels(), test.get_features()) acc = predictions == np.ravel(test.get_labels().data) print("After normalization:", sum(acc) / len(acc)) print("PART TWO:") krange = np.arange(1, 16, 2) accs = [] for k in krange: knn = KNN(k) predictions = knn.knn(train.get_features(), train.get_labels(), test.get_features()) acc = predictions == np.ravel(test.get_labels().data) print("k:", k, "accuracy:", sum(acc) / len(acc)) accs.append(sum(acc) / len(acc)) plt.plot(krange, accs) plt.title("K Size Versus Accuracy") plt.xlabel("K") plt.ylabel("Accuracy") plt.show()
from toolkit.perceptron_learner import PerceptronLearner from toolkit.arff import Arff import sys import numpy as np def rnd4(obj): if isinstance(obj, np.ndarray): return obj elif isinstance(obj, (int, float, complex)): return "{:.4f}".format(obj) arff = Arff(sys.argv[1]) features = arff.get_features() labels = arff.get_labels() pl = PerceptronLearner() weights = [] for i in range(10): pl.train(features, labels) weights.append(pl.weights) avg_weights = np.sum(weights, axis=0) / 10 names = arff.get_attr_names() for i in range(len(avg_weights)): print(rnd4(avg_weights[i]), names[i])