Exemple #1
0
    def train(self):
        try:
            model_score_dict = dict()
            model_start_time = datetime.datetime.now()

            nb = GaussianNB()
            nb.fit(self.x_train, self.y_train)
            y_pred = nb.predict(self.x_test)
            acc_nb = accuracy_score(y_pred, self.y_test)
            print("Naive Bayes Accuracy Score is : ", acc_nb)

            model_end_time = datetime.datetime.now()
            model_running_performance = model_end_time - model_start_time

            #Confusion Matrix
            conf_mat = confusion_matrix(self.y_test, y_pred)

            # ROC Curve
            pred_proba_nb = nb.predict_proba(self.x_test)[::, 1]
            fpr, tpr, _ = metrics.roc_curve(self.y_test, pred_proba_nb)
            auc_nb = metrics.roc_auc_score(self.y_test, pred_proba_nb)

            plt.figure()
            lw = 3
            plt.plot(fpr, tpr, label="Naive Bayes, auc_nb = " + str(auc_nb))
            plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='dashed')
            plt.title('Naive Bayes ROC')
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.legend(loc=4)
            plt.savefig('./static/images/roc_nb.png')

            #Assign all score values to dict
            model_score_dict["model_running_performance"] = (
                model_running_performance.seconds / 60)
            model_score_dict["accuracy"] = acc_nb
            model_score_dict["conf_mat"] = conf_mat.tolist()
            model_score_dict["fpr"] = fpr.tolist()
            model_score_dict["tpr"] = tpr.tolist()
            model_score_dict["auc"] = auc_nb

            md = ModelDetail(
                **{
                    'AlgorithmName': 'Naive Bayes',
                    'ModelScoreDict': str(model_score_dict)
                })
            md.save()

            # Export model
            with open('./HRAnalysis/analysemodels/models/NB.pkl',
                      'wb') as model_file:
                #pickle.dump(nb, model_file)
                pickle.dump(
                    {
                        "columns": self.x_test.columns.tolist(),
                        "model": nb
                    }, model_file)
        except Exception as e:
            raise e
    def train(self):
        try:
            model_score_dict = dict()
            model_start_time = datetime.datetime.now()

            lda = LinearDiscriminantAnalysis(shrinkage="auto",
                                             solver="lsqr",  # eigen, svd(default)
                                             )
            lda.fit(self.x_train, self.y_train)
            y_pred = lda.predict(self.x_test)
            acc_lda = accuracy_score(y_pred, self.y_test)
            print("Linear Discriminant Analysis Accuracy Score is : ", acc_lda)

            model_end_time = datetime.datetime.now()
            model_running_performance = model_end_time - model_start_time

            #Confusion Matrix
            conf_mat = confusion_matrix(self.y_test, y_pred)

            # ROC Curve
            pred_proba_lda = lda.predict_proba(self.x_test)[::, 1]
            fpr, tpr, _ = metrics.roc_curve(self.y_test, pred_proba_lda)
            auc_lda = metrics.roc_auc_score(self.y_test, pred_proba_lda)

            plt.figure()
            lw = 3
            plt.plot(fpr, tpr, label="Linear Discriminant Analysis, auc_lda = " + str(auc_lda))
            plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='dashed')
            plt.title('Linear Discriminant Analysis ROC')
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.legend(loc=4)
            plt.savefig('./static/images/roc_lda.png')

            #Assign all score values to dict
            model_score_dict["model_running_performance"] = (model_running_performance.seconds/60)
            model_score_dict["accuracy"] = acc_lda
            model_score_dict["conf_mat"] = conf_mat.tolist()
            model_score_dict["fpr"] = fpr.tolist()
            model_score_dict["tpr"] = tpr.tolist()
            model_score_dict["auc"] = auc_lda

            md = ModelDetail(**{'AlgorithmName': 'Linear Discriminant Analysis', 'ModelScoreDict': str(model_score_dict)})
            md.save()

            # Export model
            with open('./HRAnalysis/analysemodels/models/LDA.pkl', 'wb') as model_file:
                #pickle.dump(lda, model_file)
                pickle.dump({"columns": self.x_test.columns.tolist(), "model": lda}, model_file)
        except Exception as e:
            raise e
    def train(self):
        try:
            model_score_dict = dict()
            model_start_time = datetime.datetime.now()
            """rf = RandomForestClassifier()
            
            parameters = {"n_estimators" : np.arange(100, 500, 100),
                          "max_features": ["auto"],
                          "max_depth": np.arange(2, 10, 1),
                          "criterion" : ["gini", "entropy"]}
            gridcv_rf = GridSearchCV(estimator = rf,
                                      param_grid = parameters,
                                      scoring = "accuracy",
                                      cv = 10)
            
            print("Grid Search started for Random Forest: ", datetime.datetime.now())
            gridcv_rf.fit(x_train, y_train)
            print("Grid Search finished for Random Forest: ", datetime.datetime.now())
            
            print("Best Parameters for Random Forest are :",gridcv_rf.best_params_)
            print("accuracy :",gridcv_rf.best_score_)"""

            rf2 = RandomForestClassifier(criterion="entropy",
                                         max_depth=8,
                                         max_features="auto",
                                         n_estimators=100)
            rf2.fit(self.x_train, self.y_train)
            y_pred = rf2.predict(self.x_test)
            acc_rf2 = accuracy_score(y_pred, self.y_test)
            print("Random Forest Accuracy Score with Grid Search CV is : ",
                  acc_rf2)

            model_end_time = datetime.datetime.now()
            model_running_performance = model_end_time - model_start_time

            #Confusion Matrix
            conf_mat = confusion_matrix(self.y_test, y_pred)

            # ROC Curve
            pred_proba_rf = rf2.predict_proba(self.x_test)[::, 1]
            fpr, tpr, _ = metrics.roc_curve(self.y_test, pred_proba_rf)
            auc_rf = metrics.roc_auc_score(self.y_test, pred_proba_rf)

            plt.figure()
            lw = 3
            plt.plot(fpr, tpr, label="Random Forest, auc_rf = " + str(auc_rf))
            plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='dashed')
            plt.title('Random Forest ROC')
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.legend(loc=4)
            plt.savefig('./static/images/roc_rf.png')

            #Assign all score values to dict
            model_score_dict["model_running_performance"] = (
                model_running_performance.seconds / 60)
            model_score_dict["accuracy"] = acc_rf2
            model_score_dict["conf_mat"] = conf_mat.tolist()
            model_score_dict["fpr"] = fpr.tolist()
            model_score_dict["tpr"] = tpr.tolist()
            model_score_dict["auc"] = auc_rf

            md = ModelDetail(
                **{
                    'AlgorithmName': 'Random Forest',
                    'ModelScoreDict': str(model_score_dict)
                })
            md.save()

            # Export model
            with open('./HRAnalysis/analysemodels/models/RF.pkl',
                      'wb') as model_file:
                #pickle.dump(rf2, model_file)
                pickle.dump(
                    {
                        "columns": self.x_test.columns.tolist(),
                        "model": rf2
                    }, model_file)
        except Exception as e:
            raise e
    def train(self):
        try:
            #from keras import backend as K
            from tensorflow.keras.models import Sequential
            from tensorflow.keras.layers import Dense
            from tensorflow.keras.optimizers import Adam,SGD,Adagrad,Adadelta,RMSprop
            from tensorflow.keras.utils import to_categorical

            model_score_dict = dict()
            model_start_time = datetime.datetime.now()

            classifier = Sequential()
            # Adding the input layer and the first hidden layer
            classifier.add(Dense(units = 6, kernel_initializer='uniform', activation='relu', input_dim=len(self.x_train.columns)))

            # Adding the second hidden layer
            classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))

            # Adding the output layer
            classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))

            # Compiling the ANN | means applying SGD on the whole ANN
            classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

            # Fitting the ANN to the Training set
            classifier.fit(self.x_train, self.y_train, epochs=50)

            score, acc_annTrain = classifier.evaluate(self.x_train, self.y_train,batch_size=10)
            print('Train score:', score)
            print('Train accuracy:', acc_annTrain)
            # Part 3 - Making predictions and evaluating the model
            y_pred = classifier.predict(self.x_test)
            y_pred = (y_pred > 0.5)

            model_end_time = datetime.datetime.now()
            model_running_performance = model_end_time - model_start_time

            print('*'*20)
            score, acc_annTest = classifier.evaluate(self.x_test, self.y_test, batch_size=10)
            print('Test score:', score)
            print('Test accuracy:', acc_annTest)

            conf_mat = metrics.confusion_matrix(self.y_test, y_pred)

            # ROC Curve
            #pred_proba_rf = classifier.predict_proba(self.x_test)
            pred_proba_ann = []
            for i in classifier.predict_proba(self.x_test):
                pred_proba_ann.append(i)
            fpr, tpr, _ = metrics.roc_curve(self.y_test, pred_proba_ann)
            auc_ann = metrics.roc_auc_score(self.y_test, pred_proba_ann)

            plt.figure()
            lw = 3
            plt.plot(fpr, tpr, label="Neural Network, auc_ann = " + str(auc_ann))
            plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='dashed')
            plt.title('Neural Network ROC')
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.legend(loc=4)
            plt.savefig('./static/images/roc_ann.png')

            # Assign all score values to dict
            model_score_dict["model_running_performance"] = (model_running_performance.seconds/60)
            model_score_dict["accuracy"] = acc_annTrain
            model_score_dict["conf_mat"] = conf_mat.tolist()
            model_score_dict["fpr"] = fpr.tolist()
            model_score_dict["tpr"] = tpr.tolist()
            model_score_dict["auc"] = auc_ann

            md = ModelDetail(**{'AlgorithmName': 'ANN', 'ModelScoreDict': str(model_score_dict)})
            md.save()

            # Export model
            classifier.save('./HRAnalysis/analysemodels/models/ANN.h5')
            with open('./HRAnalysis/analysemodels/models/ann.txt', 'w') as f:
                row = {"columns": self.x_test.columns.tolist()}
                json.dump(row, f)
        except Exception as e:
            raise e
    def train(self):
        try:
            model_score_dict = dict()
            model_start_time = datetime.datetime.now()
            """ada = AdaBoostClassifier()
            parameters = {
                "learning_rate": [0.01, 0.05, 0.1, 0.3, 1, 2],
                "n_estimators": [50, 100, 1000],
                "algorithm": ["SAMME", "SAMME.R"]
            }
            gridcv_ada = GridSearchCV(estimator=ada,
                                      param_grid=parameters,
                                      scoring='accuracy',
                                      cv=10)

            print("Grid Search started for Adaboost: ", datetime.datetime.now())
            gridcv_ada.fit(self.x_train, self.y_train)
            print("Grid Search finished for Adaboost: ", datetime.datetime.now())

            print("Best Parameters for Adaboost are :", gridcv_ada.best_params_)
            print("accuracy :", gridcv_ada.best_score_)"""

            ada2 = AdaBoostClassifier(algorithm="SAMME",
                                      learning_rate=0.3,
                                      n_estimators=1000)
            ada2.fit(self.x_train, self.y_train)
            y_pred = ada2.predict(self.x_test)
            acc_ada2 = accuracy_score(y_pred, self.y_test)
            print("Adaboost Accuracy Score with Grid Search CV is : ",
                  acc_ada2)

            model_end_time = datetime.datetime.now()
            model_running_performance = model_end_time - model_start_time

            #Confusion Matrix
            conf_mat = confusion_matrix(self.y_test, y_pred)

            # ROC Curve
            pred_proba_ada = ada2.predict_proba(self.x_test)[::, 1]
            fpr, tpr, _ = metrics.roc_curve(self.y_test, pred_proba_ada)
            auc_ada = metrics.roc_auc_score(self.y_test, pred_proba_ada)

            plt.figure()
            lw = 3
            plt.plot(fpr, tpr, label="Adaboost, auc_ada = " + str(auc_ada))
            plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='dashed')
            plt.title('Adaboost ROC')
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.legend(loc=4)
            plt.savefig('./static/images/roc_ada.png')

            #Assign all score values to dict
            model_score_dict["model_running_performance"] = (
                model_running_performance.seconds / 60)
            model_score_dict["accuracy"] = acc_ada2
            model_score_dict["conf_mat"] = conf_mat.tolist()
            model_score_dict["fpr"] = fpr.tolist()
            model_score_dict["tpr"] = tpr.tolist()
            model_score_dict["auc"] = auc_ada

            md = ModelDetail(
                **{
                    'AlgorithmName': 'Adaboost',
                    'ModelScoreDict': str(model_score_dict)
                })
            md.save()

            # Export model
            with open('./HRAnalysis/analysemodels/models/Adaboost.pkl',
                      'wb') as model_file:
                #pickle.dump(ada2, model_file)
                pickle.dump(
                    {
                        "columns": self.x_test.columns.tolist(),
                        "model": ada2
                    }, model_file)
        except Exception as e:
            raise e
Exemple #6
0
    def train(self):
        try:
            model_score_dict = dict()
            model_start_time = datetime.datetime.now()
            """parameters = {"C":[20.0, 40.0, 60.0, 80.0, 100.0, 120.0], "penalty":["l1","l2"]}# l1 lasso l2 ridge
            logr = LogisticRegression()
            gridcv_logreg = GridSearchCV(logr, parameters, cv=10)

            print("Grid Search started for Logistic Regression: ", datetime.datetime.now())
            gridcv_logreg.fit(self.x_train, self.y_train)
            print("Grid Search finished for Logistic Regression: ", datetime.datetime.now())

            print("Best Parameters for Logistic Regression are :", gridcv_logreg.best_params_)
            print("accuracy :",gridcv_logreg.best_score_)"""

            logreg2 = LogisticRegression(C=20.0, penalty="l2")
            logreg2.fit(self.x_train, self.y_train)
            y_pred = logreg2.predict(self.x_test)

            acc_logreg2 = accuracy_score(y_pred, self.y_test)
            print(
                "Logistic Regression Accuracy Score with Grid Search CV is : ",
                acc_logreg2)

            model_end_time = datetime.datetime.now()
            model_running_performance = model_end_time - model_start_time

            #Confusion Matrix
            conf_mat = confusion_matrix(self.y_test, y_pred)

            # ROC Curve
            predict_proba_logreg = logreg2.predict_proba(self.x_test)[::, 1]
            fpr, tpr, _ = metrics.roc_curve(self.y_test, predict_proba_logreg)
            auc_logreg = metrics.roc_auc_score(self.y_test,
                                               predict_proba_logreg)

            plt.figure()
            lw = 3
            plt.plot(fpr,
                     tpr,
                     label="Logistic Regression, auc_logreg = " +
                     str(auc_logreg))
            plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='dashed')
            plt.title('Logistic Regression ROC')
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.legend(loc=4)
            plt.savefig('./static/images/roc_logr.png')

            #Assign all score values to dict
            model_score_dict["model_running_performance"] = (
                model_running_performance.seconds / 60)
            model_score_dict["accuracy"] = acc_logreg2
            model_score_dict["conf_mat"] = conf_mat.tolist()
            model_score_dict["fpr"] = fpr.tolist()
            model_score_dict["tpr"] = tpr.tolist()
            model_score_dict["auc"] = auc_logreg

            md = ModelDetail(
                **{
                    'AlgorithmName': 'Logistic Regression',
                    'ModelScoreDict': str(model_score_dict)
                })
            md.save()

            # Export model
            with open('./HRAnalysis/analysemodels/models/LogReg.pkl',
                      'wb') as model_file:
                #pickle.dump(logreg2, model_file)
                pickle.dump(
                    {
                        "columns": self.x_test.columns.tolist(),
                        "model": logreg2
                    }, model_file)
        except Exception as e:
            raise e
Exemple #7
0
    def train(self):
        try:
            model_score_dict = dict()
            model_start_time = datetime.datetime.now()
            """knn = KNeighborsClassifier()

            parameters = {"leaf_size": np.arange(2, 20, 1)}
            gridcv_knn = GridSearchCV(estimator=knn,
                                      param_grid=parameters,
                                      scoring="accuracy",
                                      cv=10)

            print("Grid Search started for KNN : ", datetime.datetime.now())
            gridcv_knn.fit(self.x_train, self.y_train)
            print("Grid Search finished for KNN : ", datetime.datetime.now())

            print("Best Parameters for KNN are :", gridcv_knn.best_params_)
            print("accuracy :", gridcv_knn.best_score_)"""

            knn2 = KNeighborsClassifier(leaf_size=2)
            knn2.fit(self.x_train, self.y_train)
            y_pred = knn2.predict(self.x_test)
            acc_knn2 = accuracy_score(y_pred, self.y_test)
            print("KNN Accuracy Score is :", acc_knn2)

            model_end_time = datetime.datetime.now()
            model_running_performance = model_end_time - model_start_time

            #Confusion Matrix
            conf_mat = confusion_matrix(self.y_test, y_pred)

            # ROC Curve
            pred_proba_knn = knn2.predict_proba(self.x_test)[::, 1]
            fpr, tpr, _ = metrics.roc_curve(self.y_test, pred_proba_knn)
            auc_knn = metrics.roc_auc_score(self.y_test, pred_proba_knn)

            plt.figure()
            lw = 3
            plt.plot(fpr, tpr, label="KNN, auc_knn = " + str(auc_knn))
            plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='dashed')
            plt.title('KNN ROC')
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.legend(loc=4)
            plt.savefig('./static/images/roc_knn.png')

            #Assign all score values to dict
            model_score_dict["model_running_performance"] = (
                model_running_performance.seconds / 60)
            model_score_dict["accuracy"] = acc_knn2
            model_score_dict["conf_mat"] = conf_mat.tolist()
            model_score_dict["fpr"] = fpr.tolist()
            model_score_dict["tpr"] = tpr.tolist()
            model_score_dict["auc"] = auc_knn

            md = ModelDetail(**{
                'AlgorithmName': 'KNN',
                'ModelScoreDict': str(model_score_dict)
            })
            md.save()

            # Export model
            with open('./HRAnalysis/analysemodels/models/KNN.pkl',
                      'wb') as model_file:
                #pickle.dump(knn2, model_file)
                pickle.dump(
                    {
                        "columns": self.x_test.columns.tolist(),
                        "model": knn2
                    }, model_file)
        except Exception as e:
            raise e
Exemple #8
0
    def train(self):
        try:
            model_score_dict = dict()
            model_start_time = datetime.datetime.now()

            svm = SVC()
            """
            parameters = {"C": np.arange(100, 1000, 100),
                          "gamma": [0.01, 0.001, 0.0001],
                          "kernel": ["rbf"]}

            gridcv_svm = GridSearchCV(estimator=svm,
                                      param_grid=parameters,
                                      scoring="accuracy",
                                      cv=10)

            print("Grid Search started for SVM: ", datetime.datetime.now())
            gridcv_svm.fit(x_train, y_train)
            print("Grid Search finished for SVM: ", datetime.datetime.now())

            print("Best Parameters for SVM are :", gridcv_svm.best_params_)
            print("accuracy :", gridcv_svm.best_score_)"""

            svm2 = SVC(C=100, gamma=0.001, kernel="rbf", probability=True)
            svm2.fit(self.x_train, self.y_train)
            y_pred = svm2.predict(self.x_test)
            acc_svm2 = accuracy_score(y_pred, self.y_test)
            print("SVM Score with Grid Search CV is :", acc_svm2)

            model_end_time = datetime.datetime.now()
            model_running_performance = model_end_time - model_start_time

            #Confusion Matrix
            conf_mat = confusion_matrix(self.y_test, y_pred)

            # ROC Curve
            pred_proba_svm = svm2.predict_proba(self.x_test)[::, 1]
            fpr, tpr, _ = metrics.roc_curve(self.y_test, pred_proba_svm)
            auc_svm = metrics.roc_auc_score(self.y_test, pred_proba_svm)

            plt.figure()
            lw = 3
            plt.plot(fpr,
                     tpr,
                     label="Support Vector Machine, auc_svm = " + str(auc_svm))
            plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='dashed')
            plt.title('Support Vector Machine ROC')
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.legend(loc=4)
            plt.savefig('./static/images/roc_svm.png')

            #Assign all score values to dict
            model_score_dict["model_running_performance"] = (
                model_running_performance.seconds / 60)
            model_score_dict["accuracy"] = acc_svm2
            model_score_dict["conf_mat"] = conf_mat.tolist()
            model_score_dict["fpr"] = fpr.tolist()
            model_score_dict["tpr"] = tpr.tolist()
            model_score_dict["auc"] = auc_svm

            md = ModelDetail(**{
                'AlgorithmName': 'SVM',
                'ModelScoreDict': str(model_score_dict)
            })
            md.save()

            # Export model
            with open('./HRAnalysis/analysemodels/models/SVM.pkl',
                      'wb') as model_file:
                #pickle.dump(svm2, model_file)
                pickle.dump(
                    {
                        "columns": self.x_test.columns.tolist(),
                        "model": svm2
                    }, model_file)
        except Exception as e:
            raise e