def main(filename): # dataset, indexes = data_handler.get_data(filename) trainX, trainY, testX, testY = data_handler.splitData2TestTrain( data_handler.pickDataClass( 'Handwrittenletters.txt', data_handler.letter_2_digit_convert("ABCDEFGHIJ")), 39, "1:20") predict(trainX, trainY, testX, testY, 4)
def driver(classifier): print (getTitle(classifier)) if classifier == 4: trainX, trainY, testX, testY = data_handler.splitData2TestTrain('ATNTFaceImages400.txt', 10, '1:10') print ("\nAverage Accuracy for 5 folds: %s"% SVM.cross_validate(trainX, trainY, testX, testY)) else: data, indexes = data_handler.get_data("ATNTFaceImages400.txt") print ("\nAverage Accuracy for 5 folds: %s"%cross_validator(5, data, indexes, classifier))
def driver(): for i in range(1, data_frame.shape[1]): compute_f_test(data_frame.iloc[:, [0, i]], i) f_data_des = np.array(sorted(f_data, key=itemgetter(1), reverse=True)) top_selected_samples = f_data_des[:100, :] top_100_ixs = top_selected_samples.astype('int') print ("Selected top %s sample indexes and f_scores \n"%len(top_selected_samples)) for index, item in enumerate(top_selected_samples): print ("%s => %s - %s"%(index+1, item[0], item[1])) top_100_ixs = np.insert(top_100_ixs, 0, 0) training_data = data_frame.iloc[:, top_100_ixs] train_x, train_y, testX, testY = data_handler.splitData2TestTrain(training_data.as_matrix().tolist(), 39, "20:19") print ("\nPredicted Classes : \n") classify_and_predict(SVC(kernel='linear'), train_x, train_y, testX)
import data_handler import LinearRegression import centroid_classifier import kn_classifier import SVM dataset_file = "Handwrittenletters.txt" test_string = "DCIAYNKR" #4391 YVKR test_instances = "1:10" instances_pre_class = 39 trainX, trainY, testX, testY = data_handler.splitData2TestTrain(\ data_handler.pickDataClass(\ dataset_file, data_handler.letter_2_digit_convert(\ test_string)), instances_pre_class, test_instances) data_handler.store_data(trainX, trainY, testX, testY) print ("Actual classes %s"%data_handler.letter_2_digit_convert(test_string)) print ("Centroid method") centroid_acc, prediction = centroid_classifier.predict(trainX, trainY, testX, testY, 3) print ("Centroid accuracy %s"%centroid_acc) print ("Predicted Classes %s"%prediction) print ("============================================================================") print ("KNN method") kn_acc, prediction = kn_classifier.knn_driver(trainX, testX, 3) print ("KNN accuracy %s"%kn_acc) print ("Predicted Classes %s"%prediction)
from numpy import vstack,array, int32 from numpy.random import rand # from scipy.cluster.vq import kmeans,vq from sklearn.cluster import KMeans import pandas as pd from sklearn.metrics import confusion_matrix import matplotlib.pyplot as plt import Hungarian_python import data_handler filename = "HandWrittenLetters.txt" number_per_class = 39 test_instances = "1:3" nu_clusters = 26 trainX, trainY, testX, testY = data_handler.splitData2TestTrain(filename, number_per_class, test_instances) # trainX, trainY, testX, testY = data_handler.splitData2TestTrain(data_handler.pickDataClass(filename, ['1', '2', '3', '4']), 10, "1:3") trainX.extend(testX) trainX = array(trainX, int32) testX = array(testX, int32) # colors = ["g.","r.","c.","y.", "w.", "b.", "a.", "e.", "f.", "h."] kmeans = KMeans (n_clusters=nu_clusters) kmeans.fit(trainX) centroids = kmeans.cluster_centers_ labels = kmeans.labels_ # for i in range(len(trainX)): # print("coordinate:",trainX[i], "label:", labels[i])
import data_handler import matplotlib as mpl mpl.use('TkAgg') import matplotlib.pyplot as plt import Task1 all_accs = [] sample_string = "adgjwryozm" splits = ["1:34", "1:29", "1:24", "1:19", "1:14", "1:9", "1:4"] for split in splits: print("Current split %s" % split) trainX, trainY, testX, testY = data_handler.splitData2TestTrain( data_handler.pickDataClass( 'Handwrittenletters.txt', data_handler.letter_2_digit_convert(sample_string)), 39, split) all_accs.append(Task1.predict(trainX, trainY, testX, testY, 10)) print(all_accs) x = [1, 2, 3, 4, 5, 6, 7] # Plot the data for i in range(len(x)): plt.scatter(x[i], all_accs[i], color='black', marker='^') plt.plot(x[i], all_accs[i], label=i) plt.legend(loc='lower right', frameon=False) # Show the plot plt.show()
mpl.use('TkAgg') import matplotlib.pyplot as plt from sklearn.neighbors import KNeighborsClassifier from sklearn.neighbors.nearest_centroid import NearestCentroid from sklearn.linear_model import LinearRegression from sklearn import linear_model from sklearn import svm from sklearn import metrics import math import operator import data_handler data = pd.read_csv('HandWrittenLetters.txt', header=-1).as_matrix() trainX, trainY, testX, testY = data_handler.splitData2TestTrain( data_handler.pickDataClass('Handwrittenletters.txt', data_handler.letter_2_digit_convert("abcde")), 39, "1:9") def getCentroid(labelVectors): curr_centroid = [] for j in range(len(labelVectors[0]) - 1): curr_sum = 0 for i in range(len(labelVectors)): curr_sum += int(labelVectors[i][j]) curr_sum = float(curr_sum) / len(labelVectors) curr_centroid.append(curr_sum) curr_centroid.append(labelVectors[-1][-1]) return curr_centroid