def run_train(algorithm): X, y = code.loaddata() validate_idx = code.make_random_indices(200, len(y)) estimate_idx = code.make_random_indices(1000, len(y)) Parameters = {'algorithm': algorithm} return code.TrainMyClassifier(X[estimate_idx], X[validate_idx], Parameters, y[estimate_idx])
def cross_validate_pca(algorithm): import sklearn.decomposition X, y = code.loaddata() #pca = sklearn.decomposition.PCA(n_components='mle', svd_solver='full') pca = sklearn.decomposition.PCA(n_components=6) X = pca.fit_transform(X) return code.MyCrossValidate(X, 5, {'algorithm': algorithm}, y)
def run_cross_validation(algorithm, length=5000): X, y = code.loaddata() if length > 0: indices = code.make_random_indices(length, len(y)) X = X[indices] y = y[indices] return code.MyCrossValidate(X, 5, { 'algorithm': algorithm, 'ifprint': True }, y)
def test_SVC(): import scipy.io as sio import numpy as np from sklearn.model_selection import KFold from sklearn.cross_validation import train_test_split import sklearn.svm as svm from sklearn.metrics import confusion_matrix from sklearn.metrics import classification_report import random # features = sio.loadmat("Proj2FeatVecsSet1.mat").get("Proj2FeatVecsSet1") # targetOutput = sio.loadmat("Proj2TargetOutputsSet1.mat").get("Proj2TargetOutputsSet1") #read in data [features, targetOutput] = code.loaddata() # print features, features.shape # print targetOutput, targetOutput.shape index = np.arange(features.shape[0]) #index = np.arange(10) random.shuffle(index) #print index, features.shape[0], index[0:3] # print features[index[0:3]] # print features[index[0:3]].shape index_train = index[0:20000] index_test = index[20000:] features_train = features[index_train] features_test = features[index_test] targetOutput_train = targetOutput[index_train] targetOutput_test = targetOutput[index_test] #X_estimate, X_validate, y_estimate, y_validate = train_test_split(features_train, # targetOutput_train, test_size=0.2, random_state=42) param = ["linear", 2.0] kf = KFold(n_splits=5, shuffle=True, random_state=17) score = 0.0 score1 = 0.0 for train_index, test_index in kf.split(features_train): X_estimate, X_validate = features_train[train_index], features_train[ test_index] y_estimate, y_validate = targetOutput_train[ train_index], targetOutput_train[test_index] estParam = code.SVM.TrainMyClassifierSVM(X_estimate, y_estimate) score1 = estParam.score(X_validate, y_validate) score += score1 print score1 print "average score: ", score / 5.0 y_predict_clf2 = code.SVM.TestMyClassifierSVM(features_test, estParam) confMat = confusion_matrix(targetOutput_test, y_predict_clf2) print confMat print(classification_report(targetOutput_test, y_predict_clf2)) print estParam.score(features_test, targetOutput_test)
def print_confusion_matrices(algorithm, length=5000): X, y = code.loaddata() if length > 0: indices = code.make_random_indices(length, len(y)) X = X[indices] y = y[indices] ytrain, clfs, conf_mats, conf_mat = code.MyCrossValidate( X, 5, {'algorithm': algorithm}, y) output = '' newline = '\n' for k in range(len(conf_mats)): output += 'confusion matrix ' + str(k) + newline output += code.print_confusion_matrix(conf_mats[k]) + newline output += 'overall confusion matrix' + newline output += code.print_confusion_matrix(conf_mat) f = open('conf_mats_' + algorithm + '.txt', 'w') f.write(output) f.close()
def call_MyConfMatrix(algorithm, length=5000): X, y = code.loaddata() if length > 0: indices = code.make_random_indices(length, len(y)) X = X[indices] y = y[indices]