def Classification_grid_search_method(pipeline, parameters, X_training_set, Y_training_set, X_test_set, Y_test_set): training_model = GridSearchCV(pipeline, parameters, cv=10, scoring='accuracy') trained_model = training_model.fit(X_training_set, Y_training_set) Y_pred = trained_model.predict(X_test_set) test_Accuracy_Score = AS(Y_test_set, Y_pred) train_Accuracy_Score = AS(Y_training_set, trained_model.predict(X_training_set)) Confusion_Matrix = confusion_matrix(Y_test, Y_pred) return (Y_pred, train_Accuracy_Score, test_Accuracy_Score, Confusion_Matrix, trained_model.best_estimator_)
def RunModel(model, data, columns, Predict): X = data[columns] Y = data[Predict] X_train, X_test, y_train, y_test = train_test_split(X, Y, train_size=train, test_size=test, random_state=42) Model = model Model.fit(X_train, y_train) prediction = Model.predict(X_test) mse = (MSE(y_test, prediction)) r2 = (R2(y_test, prediction)) mae = (MAE(y_test, prediction)) acc = AS(y_test, prediction) con_met = CM(y_test, prediction) return mse, r2, mae, acc, con_met
X = labels_X[rank1Features] X_train, X_test, y_train, y_test = TTS(X, class_y, test_size=0.10, shuffle=True, random_state=2000) Model = RFC(n_estimators=1000, max_depth=30, random_state=100, max_leaf_nodes=1000) Model.fit(X_train, y_train) prediction = Model.predict(X_test) print("ACCURACY is : {:.2f}".format(AS(y_test, prediction) * 100)) ### Precision, Recall, F1 print('\n') print("Precision, Recall, F1") print('\n') CR = classification_report(y_test, prediction) print(CR) print('\n') ### ROC CURVE fpr, tpr, thresholds = roc_curve(y_test, prediction) roc_auc = auc(fpr, tpr)
#p = Perceptron(random_state=42, # max_iter=10) #p.fit(X, y) mlpc = MLPClassifier(hidden_layer_sizes=(15,15),solver='lbfgs') mlpc.fit(X_train, Y_train) for i in range(shape): #print(i) Y_pred=mlpc.predict(X_Test.loc[[i]]) Y_pred = int(Y_pred[0]) pred = mlpc.predict_proba(X_Test.loc[[i]]) pred = pred[0] print(Y_pred, ' ', Y_Test[i], ' ', pred[Y_pred]) # mlpc_result = mlpc.predict(X_test) Y_pred=mlpc.predict(X_Test) print(" Accuracy is : ", AS(Y_Test,Y_pred)*100) Accuracy.append(AS(Y_Test,Y_pred)*100) print(max(Accuracy)) df = pd.DataFrame() df['Accuracy'] = Accuracy conf_matrix = confusion_matrix(Y_Test, Y_pred) #accuracy = accuracy_score(Y_Test, Y_pred) print(conf_matrix) #param_grid = {'a': [1, 9], 'b': [True, False]} #scores_10 = cross_val_score(estimator = mlpc,X = X_train, y = Y_train, cv = 10) #scores1_10 = cross_val_score(estimator = mlpc,X = X_test, y = Y_test, cv = 10)
target = colsname[9] X = fraudcheck2[predictors] Y = fraudcheck2[target] rfc = RFC(n_jobs=2, oob_score=True, n_estimators=1000, criterion="entropy") np.shape(fraudcheck2) rfc.fit(X, Y) # Fitting RandomForestClassifier model from sklearn.ensemble rfc.oob_score_ rfc.predict(X) fraudcheck2['rfc_pred'] = rfc.predict(X) print('Model accuracy score: {0:0.4f}'.format( AS(fraudcheck2['TaxInc'], fraudcheck2['rfc_pred']))) ##################Company Data################## import pandas as pd import numpy as np from sklearn.ensemble import RandomForestClassifier as RFC from sklearn.metrics import accuracy_score as AS cdata = pd.read_csv("C:\\Users\\Home\\Downloads\\Company_data.csv") cdata.columns cdata cdata.describe() a = list(cdata['Sales']) plt.boxplot(a)
X_Test=X_feature1 Y_Test=y3 shape=X_Test.shape[0] for i in range(shape): # #print(i) Y_pred=clf_gini.predict(X_Test.loc[[i]]) Y_pred = int(Y_pred[0]) pred = clf_gini.predict_proba(X_Test.loc[[i]]) pred = pred[0] print(Y_pred, ' ', Y_Test[i], ' ', pred[Y_pred]) ### print('Y Test : ', Y_Test) ### print('Y Pred : ', Y_pred) Y_pred = clf_gini.predict(X_Test) # print(" Accuracy is : ", AS(Y_Test,Y_pred)*100, ' ',max_depth, ' ',max_leaf_nodes) ## print('Y Pred : ', pred) Accuracy.append(AS(Y_Test,Y_pred)*100) for_depth.append(max_depth) for_leaf.append(max_leaf_nodes) print(max(Accuracy)) df = pd.DataFrame() df['Accuracy'] = Accuracy df['max_depth'] = for_depth df['max_leaf_nodes'] = for_leaf df = df.sort_values("Accuracy", ascending = False).head(10) print(df) #""" Taking top 10 values of max_depth and max_leaf_nodes and testing""" #df1 = pd.DataFrame() #Accuracy = [] #Random_State = []
clf_gini.fit(X_Train, Y_Train) X_Test = X_feature1 Y_Test = y3 shape = X_Test.shape[0] for i in range(shape): # #print(i) Y_pred = clf_gini.predict(X_Test.loc[[i]]) Y_pred = int(Y_pred[0]) pred = clf_gini.predict_proba(X_Test.loc[[i]]) pred = pred[0] print(Y_pred, ' ', Y_Test[i], ' ', pred[Y_pred]) ### print('Y Test : ', Y_Test) ### print('Y Pred : ', Y_pred) Y_pred = clf_gini.predict(X_Test) print(" Accuracy is : ", AS(Y_Test, Y_pred) * 100, ' ', max_depth, ' ', max_leaf_nodes, ' ', k) ## print('Y Pred : ', pred) Accuracy.append(AS(Y_Test, Y_pred) * 100) for_depth.append(max_depth) for_leaf.append(max_leaf_nodes) print(max(Accuracy)) df = pd.DataFrame() df['Accuracy'] = Accuracy df['max_depth'] = for_depth df['max_leaf_nodes'] = for_leaf df = df.sort_values("Accuracy", ascending=False).head(10) print(df) #""" Taking top 10 values of max_depth and max_leaf_nodes and testing"""
#transforming data in principle components. 10 prinpiple components are used as the explain 99 percent of the variance X_train_std_transformed = np.concatenate( (U.dot(np.diag(s))[:, 0:10], np.array(X_train.iloc[:, -2:]).reshape( len(X_train), 2)), axis=1) X_test_std_transformed = np.concatenate( (X_test_std[:, :-2].dot(np.transpose(V))[:, 0:10], np.array(X_test.iloc[:, -2:]).reshape(len(X_test), 2)), axis=1) """Using Classification techniques for wine quality analysis""" clf = NB.GaussianNB() clf.fit(X_train_std_transformed, Y_train) Y_pred_GNB = clf.predict(X_test_std_transformed) train_accuracy_score_GNB = AS(Y_train, clf.predict(X_train_std_transformed)) test_accuracy_score_GNB = AS(Y_test, Y_pred_GNB) confusion_matrix_GNB = confusion_matrix(Y_test, Y_pred_GNB) precision_score_GNB = precision_score(Y_test, Y_pred_GNB, average='weighted') recall_score_GNB = recall_score(Y_test, Y_pred_GNB, average='weighted') #parameter selection and prediction for classification models def Classification_grid_search_method(pipeline, parameters, X_training_set, Y_training_set, X_test_set, Y_test_set): training_model = GridSearchCV(pipeline, parameters, cv=10, scoring='accuracy') trained_model = training_model.fit(X_training_set, Y_training_set) Y_pred = trained_model.predict(X_test_set)