def train(self): try: model_score_dict = dict() model_start_time = datetime.datetime.now() nb = GaussianNB() nb.fit(self.x_train, self.y_train) y_pred = nb.predict(self.x_test) acc_nb = accuracy_score(y_pred, self.y_test) print("Naive Bayes Accuracy Score is : ", acc_nb) model_end_time = datetime.datetime.now() model_running_performance = model_end_time - model_start_time #Confusion Matrix conf_mat = confusion_matrix(self.y_test, y_pred) # ROC Curve pred_proba_nb = nb.predict_proba(self.x_test)[::, 1] fpr, tpr, _ = metrics.roc_curve(self.y_test, pred_proba_nb) auc_nb = metrics.roc_auc_score(self.y_test, pred_proba_nb) plt.figure() lw = 3 plt.plot(fpr, tpr, label="Naive Bayes, auc_nb = " + str(auc_nb)) plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='dashed') plt.title('Naive Bayes ROC') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc=4) plt.savefig('./static/images/roc_nb.png') #Assign all score values to dict model_score_dict["model_running_performance"] = ( model_running_performance.seconds / 60) model_score_dict["accuracy"] = acc_nb model_score_dict["conf_mat"] = conf_mat.tolist() model_score_dict["fpr"] = fpr.tolist() model_score_dict["tpr"] = tpr.tolist() model_score_dict["auc"] = auc_nb md = ModelDetail( **{ 'AlgorithmName': 'Naive Bayes', 'ModelScoreDict': str(model_score_dict) }) md.save() # Export model with open('./HRAnalysis/analysemodels/models/NB.pkl', 'wb') as model_file: #pickle.dump(nb, model_file) pickle.dump( { "columns": self.x_test.columns.tolist(), "model": nb }, model_file) except Exception as e: raise e
def train(self): try: model_score_dict = dict() model_start_time = datetime.datetime.now() lda = LinearDiscriminantAnalysis(shrinkage="auto", solver="lsqr", # eigen, svd(default) ) lda.fit(self.x_train, self.y_train) y_pred = lda.predict(self.x_test) acc_lda = accuracy_score(y_pred, self.y_test) print("Linear Discriminant Analysis Accuracy Score is : ", acc_lda) model_end_time = datetime.datetime.now() model_running_performance = model_end_time - model_start_time #Confusion Matrix conf_mat = confusion_matrix(self.y_test, y_pred) # ROC Curve pred_proba_lda = lda.predict_proba(self.x_test)[::, 1] fpr, tpr, _ = metrics.roc_curve(self.y_test, pred_proba_lda) auc_lda = metrics.roc_auc_score(self.y_test, pred_proba_lda) plt.figure() lw = 3 plt.plot(fpr, tpr, label="Linear Discriminant Analysis, auc_lda = " + str(auc_lda)) plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='dashed') plt.title('Linear Discriminant Analysis ROC') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc=4) plt.savefig('./static/images/roc_lda.png') #Assign all score values to dict model_score_dict["model_running_performance"] = (model_running_performance.seconds/60) model_score_dict["accuracy"] = acc_lda model_score_dict["conf_mat"] = conf_mat.tolist() model_score_dict["fpr"] = fpr.tolist() model_score_dict["tpr"] = tpr.tolist() model_score_dict["auc"] = auc_lda md = ModelDetail(**{'AlgorithmName': 'Linear Discriminant Analysis', 'ModelScoreDict': str(model_score_dict)}) md.save() # Export model with open('./HRAnalysis/analysemodels/models/LDA.pkl', 'wb') as model_file: #pickle.dump(lda, model_file) pickle.dump({"columns": self.x_test.columns.tolist(), "model": lda}, model_file) except Exception as e: raise e
def train(self): try: model_score_dict = dict() model_start_time = datetime.datetime.now() """rf = RandomForestClassifier() parameters = {"n_estimators" : np.arange(100, 500, 100), "max_features": ["auto"], "max_depth": np.arange(2, 10, 1), "criterion" : ["gini", "entropy"]} gridcv_rf = GridSearchCV(estimator = rf, param_grid = parameters, scoring = "accuracy", cv = 10) print("Grid Search started for Random Forest: ", datetime.datetime.now()) gridcv_rf.fit(x_train, y_train) print("Grid Search finished for Random Forest: ", datetime.datetime.now()) print("Best Parameters for Random Forest are :",gridcv_rf.best_params_) print("accuracy :",gridcv_rf.best_score_)""" rf2 = RandomForestClassifier(criterion="entropy", max_depth=8, max_features="auto", n_estimators=100) rf2.fit(self.x_train, self.y_train) y_pred = rf2.predict(self.x_test) acc_rf2 = accuracy_score(y_pred, self.y_test) print("Random Forest Accuracy Score with Grid Search CV is : ", acc_rf2) model_end_time = datetime.datetime.now() model_running_performance = model_end_time - model_start_time #Confusion Matrix conf_mat = confusion_matrix(self.y_test, y_pred) # ROC Curve pred_proba_rf = rf2.predict_proba(self.x_test)[::, 1] fpr, tpr, _ = metrics.roc_curve(self.y_test, pred_proba_rf) auc_rf = metrics.roc_auc_score(self.y_test, pred_proba_rf) plt.figure() lw = 3 plt.plot(fpr, tpr, label="Random Forest, auc_rf = " + str(auc_rf)) plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='dashed') plt.title('Random Forest ROC') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc=4) plt.savefig('./static/images/roc_rf.png') #Assign all score values to dict model_score_dict["model_running_performance"] = ( model_running_performance.seconds / 60) model_score_dict["accuracy"] = acc_rf2 model_score_dict["conf_mat"] = conf_mat.tolist() model_score_dict["fpr"] = fpr.tolist() model_score_dict["tpr"] = tpr.tolist() model_score_dict["auc"] = auc_rf md = ModelDetail( **{ 'AlgorithmName': 'Random Forest', 'ModelScoreDict': str(model_score_dict) }) md.save() # Export model with open('./HRAnalysis/analysemodels/models/RF.pkl', 'wb') as model_file: #pickle.dump(rf2, model_file) pickle.dump( { "columns": self.x_test.columns.tolist(), "model": rf2 }, model_file) except Exception as e: raise e
def train(self): try: #from keras import backend as K from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense from tensorflow.keras.optimizers import Adam,SGD,Adagrad,Adadelta,RMSprop from tensorflow.keras.utils import to_categorical model_score_dict = dict() model_start_time = datetime.datetime.now() classifier = Sequential() # Adding the input layer and the first hidden layer classifier.add(Dense(units = 6, kernel_initializer='uniform', activation='relu', input_dim=len(self.x_train.columns))) # Adding the second hidden layer classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu')) # Adding the output layer classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid')) # Compiling the ANN | means applying SGD on the whole ANN classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy']) # Fitting the ANN to the Training set classifier.fit(self.x_train, self.y_train, epochs=50) score, acc_annTrain = classifier.evaluate(self.x_train, self.y_train,batch_size=10) print('Train score:', score) print('Train accuracy:', acc_annTrain) # Part 3 - Making predictions and evaluating the model y_pred = classifier.predict(self.x_test) y_pred = (y_pred > 0.5) model_end_time = datetime.datetime.now() model_running_performance = model_end_time - model_start_time print('*'*20) score, acc_annTest = classifier.evaluate(self.x_test, self.y_test, batch_size=10) print('Test score:', score) print('Test accuracy:', acc_annTest) conf_mat = metrics.confusion_matrix(self.y_test, y_pred) # ROC Curve #pred_proba_rf = classifier.predict_proba(self.x_test) pred_proba_ann = [] for i in classifier.predict_proba(self.x_test): pred_proba_ann.append(i) fpr, tpr, _ = metrics.roc_curve(self.y_test, pred_proba_ann) auc_ann = metrics.roc_auc_score(self.y_test, pred_proba_ann) plt.figure() lw = 3 plt.plot(fpr, tpr, label="Neural Network, auc_ann = " + str(auc_ann)) plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='dashed') plt.title('Neural Network ROC') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc=4) plt.savefig('./static/images/roc_ann.png') # Assign all score values to dict model_score_dict["model_running_performance"] = (model_running_performance.seconds/60) model_score_dict["accuracy"] = acc_annTrain model_score_dict["conf_mat"] = conf_mat.tolist() model_score_dict["fpr"] = fpr.tolist() model_score_dict["tpr"] = tpr.tolist() model_score_dict["auc"] = auc_ann md = ModelDetail(**{'AlgorithmName': 'ANN', 'ModelScoreDict': str(model_score_dict)}) md.save() # Export model classifier.save('./HRAnalysis/analysemodels/models/ANN.h5') with open('./HRAnalysis/analysemodels/models/ann.txt', 'w') as f: row = {"columns": self.x_test.columns.tolist()} json.dump(row, f) except Exception as e: raise e
def train(self): try: model_score_dict = dict() model_start_time = datetime.datetime.now() """ada = AdaBoostClassifier() parameters = { "learning_rate": [0.01, 0.05, 0.1, 0.3, 1, 2], "n_estimators": [50, 100, 1000], "algorithm": ["SAMME", "SAMME.R"] } gridcv_ada = GridSearchCV(estimator=ada, param_grid=parameters, scoring='accuracy', cv=10) print("Grid Search started for Adaboost: ", datetime.datetime.now()) gridcv_ada.fit(self.x_train, self.y_train) print("Grid Search finished for Adaboost: ", datetime.datetime.now()) print("Best Parameters for Adaboost are :", gridcv_ada.best_params_) print("accuracy :", gridcv_ada.best_score_)""" ada2 = AdaBoostClassifier(algorithm="SAMME", learning_rate=0.3, n_estimators=1000) ada2.fit(self.x_train, self.y_train) y_pred = ada2.predict(self.x_test) acc_ada2 = accuracy_score(y_pred, self.y_test) print("Adaboost Accuracy Score with Grid Search CV is : ", acc_ada2) model_end_time = datetime.datetime.now() model_running_performance = model_end_time - model_start_time #Confusion Matrix conf_mat = confusion_matrix(self.y_test, y_pred) # ROC Curve pred_proba_ada = ada2.predict_proba(self.x_test)[::, 1] fpr, tpr, _ = metrics.roc_curve(self.y_test, pred_proba_ada) auc_ada = metrics.roc_auc_score(self.y_test, pred_proba_ada) plt.figure() lw = 3 plt.plot(fpr, tpr, label="Adaboost, auc_ada = " + str(auc_ada)) plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='dashed') plt.title('Adaboost ROC') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc=4) plt.savefig('./static/images/roc_ada.png') #Assign all score values to dict model_score_dict["model_running_performance"] = ( model_running_performance.seconds / 60) model_score_dict["accuracy"] = acc_ada2 model_score_dict["conf_mat"] = conf_mat.tolist() model_score_dict["fpr"] = fpr.tolist() model_score_dict["tpr"] = tpr.tolist() model_score_dict["auc"] = auc_ada md = ModelDetail( **{ 'AlgorithmName': 'Adaboost', 'ModelScoreDict': str(model_score_dict) }) md.save() # Export model with open('./HRAnalysis/analysemodels/models/Adaboost.pkl', 'wb') as model_file: #pickle.dump(ada2, model_file) pickle.dump( { "columns": self.x_test.columns.tolist(), "model": ada2 }, model_file) except Exception as e: raise e
def train(self): try: model_score_dict = dict() model_start_time = datetime.datetime.now() """parameters = {"C":[20.0, 40.0, 60.0, 80.0, 100.0, 120.0], "penalty":["l1","l2"]}# l1 lasso l2 ridge logr = LogisticRegression() gridcv_logreg = GridSearchCV(logr, parameters, cv=10) print("Grid Search started for Logistic Regression: ", datetime.datetime.now()) gridcv_logreg.fit(self.x_train, self.y_train) print("Grid Search finished for Logistic Regression: ", datetime.datetime.now()) print("Best Parameters for Logistic Regression are :", gridcv_logreg.best_params_) print("accuracy :",gridcv_logreg.best_score_)""" logreg2 = LogisticRegression(C=20.0, penalty="l2") logreg2.fit(self.x_train, self.y_train) y_pred = logreg2.predict(self.x_test) acc_logreg2 = accuracy_score(y_pred, self.y_test) print( "Logistic Regression Accuracy Score with Grid Search CV is : ", acc_logreg2) model_end_time = datetime.datetime.now() model_running_performance = model_end_time - model_start_time #Confusion Matrix conf_mat = confusion_matrix(self.y_test, y_pred) # ROC Curve predict_proba_logreg = logreg2.predict_proba(self.x_test)[::, 1] fpr, tpr, _ = metrics.roc_curve(self.y_test, predict_proba_logreg) auc_logreg = metrics.roc_auc_score(self.y_test, predict_proba_logreg) plt.figure() lw = 3 plt.plot(fpr, tpr, label="Logistic Regression, auc_logreg = " + str(auc_logreg)) plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='dashed') plt.title('Logistic Regression ROC') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc=4) plt.savefig('./static/images/roc_logr.png') #Assign all score values to dict model_score_dict["model_running_performance"] = ( model_running_performance.seconds / 60) model_score_dict["accuracy"] = acc_logreg2 model_score_dict["conf_mat"] = conf_mat.tolist() model_score_dict["fpr"] = fpr.tolist() model_score_dict["tpr"] = tpr.tolist() model_score_dict["auc"] = auc_logreg md = ModelDetail( **{ 'AlgorithmName': 'Logistic Regression', 'ModelScoreDict': str(model_score_dict) }) md.save() # Export model with open('./HRAnalysis/analysemodels/models/LogReg.pkl', 'wb') as model_file: #pickle.dump(logreg2, model_file) pickle.dump( { "columns": self.x_test.columns.tolist(), "model": logreg2 }, model_file) except Exception as e: raise e
def train(self): try: model_score_dict = dict() model_start_time = datetime.datetime.now() """knn = KNeighborsClassifier() parameters = {"leaf_size": np.arange(2, 20, 1)} gridcv_knn = GridSearchCV(estimator=knn, param_grid=parameters, scoring="accuracy", cv=10) print("Grid Search started for KNN : ", datetime.datetime.now()) gridcv_knn.fit(self.x_train, self.y_train) print("Grid Search finished for KNN : ", datetime.datetime.now()) print("Best Parameters for KNN are :", gridcv_knn.best_params_) print("accuracy :", gridcv_knn.best_score_)""" knn2 = KNeighborsClassifier(leaf_size=2) knn2.fit(self.x_train, self.y_train) y_pred = knn2.predict(self.x_test) acc_knn2 = accuracy_score(y_pred, self.y_test) print("KNN Accuracy Score is :", acc_knn2) model_end_time = datetime.datetime.now() model_running_performance = model_end_time - model_start_time #Confusion Matrix conf_mat = confusion_matrix(self.y_test, y_pred) # ROC Curve pred_proba_knn = knn2.predict_proba(self.x_test)[::, 1] fpr, tpr, _ = metrics.roc_curve(self.y_test, pred_proba_knn) auc_knn = metrics.roc_auc_score(self.y_test, pred_proba_knn) plt.figure() lw = 3 plt.plot(fpr, tpr, label="KNN, auc_knn = " + str(auc_knn)) plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='dashed') plt.title('KNN ROC') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc=4) plt.savefig('./static/images/roc_knn.png') #Assign all score values to dict model_score_dict["model_running_performance"] = ( model_running_performance.seconds / 60) model_score_dict["accuracy"] = acc_knn2 model_score_dict["conf_mat"] = conf_mat.tolist() model_score_dict["fpr"] = fpr.tolist() model_score_dict["tpr"] = tpr.tolist() model_score_dict["auc"] = auc_knn md = ModelDetail(**{ 'AlgorithmName': 'KNN', 'ModelScoreDict': str(model_score_dict) }) md.save() # Export model with open('./HRAnalysis/analysemodels/models/KNN.pkl', 'wb') as model_file: #pickle.dump(knn2, model_file) pickle.dump( { "columns": self.x_test.columns.tolist(), "model": knn2 }, model_file) except Exception as e: raise e
def train(self): try: model_score_dict = dict() model_start_time = datetime.datetime.now() svm = SVC() """ parameters = {"C": np.arange(100, 1000, 100), "gamma": [0.01, 0.001, 0.0001], "kernel": ["rbf"]} gridcv_svm = GridSearchCV(estimator=svm, param_grid=parameters, scoring="accuracy", cv=10) print("Grid Search started for SVM: ", datetime.datetime.now()) gridcv_svm.fit(x_train, y_train) print("Grid Search finished for SVM: ", datetime.datetime.now()) print("Best Parameters for SVM are :", gridcv_svm.best_params_) print("accuracy :", gridcv_svm.best_score_)""" svm2 = SVC(C=100, gamma=0.001, kernel="rbf", probability=True) svm2.fit(self.x_train, self.y_train) y_pred = svm2.predict(self.x_test) acc_svm2 = accuracy_score(y_pred, self.y_test) print("SVM Score with Grid Search CV is :", acc_svm2) model_end_time = datetime.datetime.now() model_running_performance = model_end_time - model_start_time #Confusion Matrix conf_mat = confusion_matrix(self.y_test, y_pred) # ROC Curve pred_proba_svm = svm2.predict_proba(self.x_test)[::, 1] fpr, tpr, _ = metrics.roc_curve(self.y_test, pred_proba_svm) auc_svm = metrics.roc_auc_score(self.y_test, pred_proba_svm) plt.figure() lw = 3 plt.plot(fpr, tpr, label="Support Vector Machine, auc_svm = " + str(auc_svm)) plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='dashed') plt.title('Support Vector Machine ROC') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc=4) plt.savefig('./static/images/roc_svm.png') #Assign all score values to dict model_score_dict["model_running_performance"] = ( model_running_performance.seconds / 60) model_score_dict["accuracy"] = acc_svm2 model_score_dict["conf_mat"] = conf_mat.tolist() model_score_dict["fpr"] = fpr.tolist() model_score_dict["tpr"] = tpr.tolist() model_score_dict["auc"] = auc_svm md = ModelDetail(**{ 'AlgorithmName': 'SVM', 'ModelScoreDict': str(model_score_dict) }) md.save() # Export model with open('./HRAnalysis/analysemodels/models/SVM.pkl', 'wb') as model_file: #pickle.dump(svm2, model_file) pickle.dump( { "columns": self.x_test.columns.tolist(), "model": svm2 }, model_file) except Exception as e: raise e