def optimal_svm(optimal_c): """ This function is to calculate AUC for optimal C chose from model selection """ #load datasets train_X, train_y = load_data('train_X.csv', 'train_y.csv') test_X, test_y = load_data('test_X.csv', 'test_y.csv') train_X_pca = data_pca(0.95, train_X, train_X) test_X_pca = data_pca(0.95, train_X, test_X) train_y = np.array(train_y).ravel() test_y = np.array(test_y).ravel() #set up model with the optimal C my_svm = svm.SVC(kernel='linear', C=optimal_c, class_weight='auto') predicted_y = my_svm.fit(train_X_pca,train_y).decision_function(test_X_pca) fpr, tpr, tr = roc_curve(test_y, predicted_y) print auc(fpr, tpr)
def main(): #load datasets train_X, train_Y = load_data('train_X.csv', 'train_y.csv') train_X_pca = data_pca(0.95, train_X, train_X) train = train_X_pca train['Y'] = train_Y #set a list of hyperparameter C c = [10**i for i in range(-9,2)] #conduct X cross validation and return AUCs in each sample for each C aucs=xValSVM(train, 'Y', 5, c) #calculate the average and standard error of AUC for each C avg, stderr = avg_stderr(aucs, c) #plot the results of cross validation plotxValSVM(avg, stderr, c)
def main(): train_X, train_Y = load_data("train_X.csv", "train_y.csv") train_x, validation_x, train_y, validation_y = split_data(train_X, train_Y, 0.2) train_x_pca_df = data_pca(0.95, train_X, train_x) validation_x_pca_df = data_pca(0.95, train_X, validation_x) svm_model(train_x_pca_df, train_y, validation_x_pca_df, validation_y)