def train_test_split_evaluation_procedure(X_train ,Y_train, X_test, Y_test): print 'Starting with train_test_split procedure','\n' accuracy_score =[] model = KNeighborsClassifier(n_neighbors=5) model.fit(X_train, Y_train) predicted_labels = model.predict(X_test) accuracy_score.append(metrics.accuracy_score(Y_test, predicted_labels)) print("Confusion Matrix") print(confusion_matrix(Y_test, predicted_labels)) print("Precision") print(precision_score(Y_test, predicted_labels, average=None)) print("Recall") print(recall_score(Y_test, predicted_labels, average=None)) print("F1 score") print(f1_score(Y_test, predicted_labels, average=None)) filename = 'pickle_modified/KNN_5.pkl' pickle.dump(model, open(filename, 'wb')) return accuracy_score
def crossvalidation(): neural_model = MLPClassifier(solver='sgd',activation='logistic',alpha=0.00000001,learning_rate_init=0.2,learning_rate='adaptive') cv = KFold(Number_of_samples,n_folds=3) Total_TrainData=df[['X Coordinate','Y Coordinate','Year','MONTH','DAY','Time_in_Mintues']] Total_TrainData_Target=df['Primary Type'] newtraindata=Total_TrainData.as_matrix() newtraindata_target=Total_TrainData_Target.as_matrix() accuracy_score = [] accuracy=[] for train_cv, test_cv in cv: model = neural_model.fit(newtraindata[train_cv],newtraindata_target[train_cv]) neuralnetwork_predictions=model.predict(newtraindata[test_cv]) accuracy_score.append(model.score(newtraindata[test_cv],newtraindata_target[test_cv])) #accuracy.append(accuracy_score(neuralnetwork_predictions,newtraindata_target[test_cv])) mean_accuracy= np.array(accuracy_score).mean() print accuracy_score #print accuracy return mean_accuracy
def main(): model = Model() university_names = [] accuracy_score = [] # list of training csv files training_List = ['asu.csv', 'clemson.csv', 'iitc.csv', 'mtu.csv'] # list of testing csv files testing_list = [ 'asu_test.csv', 'clemson_test.csv', 'iitc_test.csv', 'mtu_test.csv' ] # get results for each university data for trainD, testD in zip(training_List, testing_list): result, uni_name = model.loadData(trainD, testD) accuracy_score.append(result * 100) university_names.append(uni_name) # end for print('University predictions for student:') for uni, accuracy in zip(university_names, accuracy_score): print(uni + ' -> ' + str(accuracy) + '%\n')
macro_recall = report['macro avg']['recall'] macro_f1 = report['macro avg']['f1-score'] accuracy = report['accuracy'] print('jjjjj',macro_precision) #f1_score1 = (f1_score(y, y_pred)) f1_score.append(macro_f1) #precision_score1 = (precision_score(y, y_pred)) precision_score.append(macro_precision) #recall_score1 = (recall_score(y, y_pred)) recall_score.append(macro_recall) #acc = accuracy_score(y, y_pred) accuracy_score.append(accuracy) sensitivity1 = cm_train[1,1]/(cm_train[1,0]+cm_train[1,1]) #print(sensitivity) #0.6666666666666666 sensitivity.append(sensitivity1) specificity1 = cm_train[0,0]/(cm_train[0,0]+cm_train[0,1]) #print(specificity) #0.89171974522293 specificity.append( specificity1) PPV1 = cm_train[1,1]/(cm_train[1,1]+cm_train[0,1]) #print(PPV) #0.5
# Libraries from sklearn.svm import SVC from sklearn.metrics import jaccard_similarity_score from sklearn.metrics import f1_score import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') kernel_func = ['sigmoid', 'poly', 'rbf', 'linear'] accuracy_score = [] for k in kernel_func: svc_model = SVC(C=0.01, gamma='auto', kernel=k) svc_model.fit(X_train, y_train) svc_yhat = svc_model.predict(X_test) accuracy_score.append(f1_score(y_test, svc_yhat, average='weighted')) # In[31]: # Visualization of best kernel y_pos = np.arange(len(kernel_func)) plt.bar(y_pos, accuracy_score, align='center', alpha=0.8) plt.xticks(y_pos, kernel_func) plt.xlabel('Kernel Functions') plt.ylabel('Accuracy') # In[32]: # Final Support Vector Machine Model svc_model = SVC(C=0.01, gamma='auto', kernel='poly') svc_model.fit(X_train, y_train)
X_train, X_test, y_train, y_test = train_test_split(X_sm, Y_sm, random_state=10, test_size=0.3) from sklearn.feature_selection import RFE accuracy_score = [] for i in range(1, 39): X_train_rfe = X_train X_test_rfe = X_test logreg = LogisticRegression() rfe_model = RFE(estimator=logreg, n_features_to_select=i) rfe_model = rfe_model.fit(X_train_rfe, y_train) feat_index = pd.Series(data=rfe_model.ranking_, index=X_train_rfe.columns) signi_feat_rfe = feat_index[feat_index == 1].index accuracy_score.append(rfe_model.score(X_train_rfe, y_train)) lis_acc = {(i + 1, np.round(accuracy_score[i], 4)) for i in range(0, 38)} lis_acc # # Logistic regression (RFE) # In[47]: logreg = LogisticRegression() rfe_model = RFE(estimator=logreg, n_features_to_select=33) rfe_model = rfe_model.fit(X_train, y_train) feat_index = pd.Series(data=rfe_model.ranking_, index=X_train.columns) signi_feat_rfe = feat_index[feat_index == 1].index print(signi_feat_rfe)