def run(fname, folds): st = time.clock() data = get_data("Data/{}.data".format(fname)) with open("Results/{}_{}folds.txt".format(fname, folds), "a") as f: tee = Tee(sys.stdout, f) tee("Validating classifier with {}-fold test...".format(folds)) kf = KFold(len(data), n_folds=folds) avg_error = 0 it = 1 for train, test in kf: start = time.clock() tee("Iteration #{}".format(it)) oc = ObliqueClassifier() oc.fit(data[train]) predictions = [oc.predict(r) for r in data[test]] actual_labels = data[test][:, -1] error = error_rate(predictions, actual_labels) tee("Error: {:.3f}".format(error)) tee("Elapsed time: {:.3f} seconds".format(time.clock() - start)) tee() avg_error += error it += 1 totaltime = time.clock() - st tee("Average error: {:.3f}".format(avg_error/folds)) tee("Total elapsed time: {:.3f} seconds.".format(totaltime)) tee("Average elapsed time: {:.3f} seconds.".format(totaltime/folds))
def run(fname, folds): st = time.clock() data = get_data("Data/{}.data".format(fname)) with open("Results/{}_{}folds.txt".format(fname, folds), "a") as f: tee = Tee(sys.stdout, f) tee("Validating classifier with {}-fold test...".format(folds)) kf = KFold(len(data), n_folds=folds) avg_error = 0 it = 1 for train, test in kf: start = time.clock() tee("Iteration #{}".format(it)) oc = ObliqueClassifier() oc.fit(data[train]) predictions = [oc.predict(r) for r in data[test]] actual_labels = data[test][:, -1] error = error_rate(predictions, actual_labels) tee("Error: {:.3f}".format(error)) tee("Elapsed time: {:.3f} seconds".format(time.clock() - start)) tee() avg_error += error it += 1 totaltime = time.clock() - st tee("Average error: {:.3f}".format(avg_error / folds)) tee("Total elapsed time: {:.3f} seconds.".format(totaltime)) tee("Average elapsed time: {:.3f} seconds.".format(totaltime / folds))
#!/usr/bin/env python # This little transformation makes the dataset a lot smaller # but the classifier still takes absolute ages to fit it. from pyblique import get_data from sklearn.decomposition import PCA import numpy as np # Shrink the isolet dataset by means of PCA. # 600+ attributes -> 10 attributes isolet = get_data("Data/isolet.data") pca = PCA(n_components=10) transformed = pca.fit_transform(isolet[:, :-1]) labels = isolet[:, -1] labels = np.reshape(labels, (labels.shape[0], 1)) result = np.concatenate((transformed, labels), axis=1) np.savetxt("Data/isolet_compressed.data", result, delimiter=",")