예제 #1
0
def train_test_split_evaluation_procedure(X_train ,Y_train, X_test, Y_test):
    print 'Starting with train_test_split procedure','\n'
    accuracy_score =[]
    model = KNeighborsClassifier(n_neighbors=5)
    model.fit(X_train, Y_train)
    predicted_labels = model.predict(X_test)
    accuracy_score.append(metrics.accuracy_score(Y_test, predicted_labels))
    print("Confusion Matrix")
    print(confusion_matrix(Y_test, predicted_labels))
    print("Precision")
    print(precision_score(Y_test, predicted_labels, average=None))
    print("Recall")
    print(recall_score(Y_test, predicted_labels, average=None))
    print("F1 score")
    print(f1_score(Y_test, predicted_labels, average=None))
    filename = 'pickle_modified/KNN_5.pkl'
    pickle.dump(model, open(filename, 'wb'))
    return accuracy_score
def crossvalidation():
	neural_model = MLPClassifier(solver='sgd',activation='logistic',alpha=0.00000001,learning_rate_init=0.2,learning_rate='adaptive')
	cv = KFold(Number_of_samples,n_folds=3)
	Total_TrainData=df[['X Coordinate','Y Coordinate','Year','MONTH','DAY','Time_in_Mintues']]
	Total_TrainData_Target=df['Primary Type']
	newtraindata=Total_TrainData.as_matrix()
	
	newtraindata_target=Total_TrainData_Target.as_matrix()
	accuracy_score = []
	accuracy=[]
	for train_cv, test_cv in cv:
		model = neural_model.fit(newtraindata[train_cv],newtraindata_target[train_cv])
		neuralnetwork_predictions=model.predict(newtraindata[test_cv])
		accuracy_score.append(model.score(newtraindata[test_cv],newtraindata_target[test_cv]))
		#accuracy.append(accuracy_score(neuralnetwork_predictions,newtraindata_target[test_cv]))
	mean_accuracy= np.array(accuracy_score).mean() 
	print accuracy_score
	#print accuracy
	return mean_accuracy
예제 #3
0
def main():
    model = Model()

    university_names = []
    accuracy_score = []

    # list of training csv files
    training_List = ['asu.csv', 'clemson.csv', 'iitc.csv', 'mtu.csv']
    # list of testing csv files
    testing_list = [
        'asu_test.csv', 'clemson_test.csv', 'iitc_test.csv', 'mtu_test.csv'
    ]

    # get results for each university data
    for trainD, testD in zip(training_List, testing_list):
        result, uni_name = model.loadData(trainD, testD)
        accuracy_score.append(result * 100)
        university_names.append(uni_name)
    # end for

    print('University predictions for student:')
    for uni, accuracy in zip(university_names, accuracy_score):
        print(uni + ' -> ' + str(accuracy) + '%\n')
 macro_recall = report['macro avg']['recall']    
 macro_f1 = report['macro avg']['f1-score']
 
 accuracy = report['accuracy']
 print('jjjjj',macro_precision)
 #f1_score1 = (f1_score(y, y_pred))
 f1_score.append(macro_f1)
 
 #precision_score1 = (precision_score(y, y_pred))
 precision_score.append(macro_precision)
 
 #recall_score1 = (recall_score(y, y_pred)) 
 recall_score.append(macro_recall)
 
 #acc = accuracy_score(y, y_pred)
 accuracy_score.append(accuracy)
 
 sensitivity1 =  cm_train[1,1]/(cm_train[1,0]+cm_train[1,1])								
 #print(sensitivity)
 #0.6666666666666666
 sensitivity.append(sensitivity1)
 
 specificity1 = cm_train[0,0]/(cm_train[0,0]+cm_train[0,1])									
 #print(specificity)
 #0.89171974522293
 specificity.append( specificity1)
 
 
 PPV1 = cm_train[1,1]/(cm_train[1,1]+cm_train[0,1])	
 #print(PPV)
 #0.5
# Libraries
from sklearn.svm import SVC
from sklearn.metrics import jaccard_similarity_score
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')

kernel_func = ['sigmoid', 'poly', 'rbf', 'linear']
accuracy_score = []

for k in kernel_func:
    svc_model = SVC(C=0.01, gamma='auto', kernel=k)
    svc_model.fit(X_train, y_train)
    svc_yhat = svc_model.predict(X_test)
    accuracy_score.append(f1_score(y_test, svc_yhat, average='weighted'))

# In[31]:

# Visualization of best kernel
y_pos = np.arange(len(kernel_func))
plt.bar(y_pos, accuracy_score, align='center', alpha=0.8)
plt.xticks(y_pos, kernel_func)
plt.xlabel('Kernel Functions')
plt.ylabel('Accuracy')

# In[32]:

# Final Support Vector Machine Model
svc_model = SVC(C=0.01, gamma='auto', kernel='poly')
svc_model.fit(X_train, y_train)
X_train, X_test, y_train, y_test = train_test_split(X_sm,
                                                    Y_sm,
                                                    random_state=10,
                                                    test_size=0.3)

from sklearn.feature_selection import RFE
accuracy_score = []
for i in range(1, 39):
    X_train_rfe = X_train
    X_test_rfe = X_test
    logreg = LogisticRegression()
    rfe_model = RFE(estimator=logreg, n_features_to_select=i)
    rfe_model = rfe_model.fit(X_train_rfe, y_train)
    feat_index = pd.Series(data=rfe_model.ranking_, index=X_train_rfe.columns)
    signi_feat_rfe = feat_index[feat_index == 1].index
    accuracy_score.append(rfe_model.score(X_train_rfe, y_train))

lis_acc = {(i + 1, np.round(accuracy_score[i], 4)) for i in range(0, 38)}
lis_acc

# # Logistic regression (RFE)

# In[47]:

logreg = LogisticRegression()
rfe_model = RFE(estimator=logreg, n_features_to_select=33)
rfe_model = rfe_model.fit(X_train, y_train)
feat_index = pd.Series(data=rfe_model.ranking_, index=X_train.columns)
signi_feat_rfe = feat_index[feat_index == 1].index
print(signi_feat_rfe)