Beispiel #1
0
def base_model(X_train, y_train,X_test, y_test):
    model = LogisticRegression(multi_class='auto',solver='lbfgs')
    visualizer = ROCAUC(model, classes=['dancehall','reggae','soca','pop'])

    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.show() 
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir):
    model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c)
    rfe = RFE(model, n_features_to_select=n)

    ## learning curve
    plt.clf()
    viz_LC = LearningCurve(
        rfe, scoring='f1_weighted', n_jobs=4
    )
    viz_LC.fit(X, y)
    viz_LC.show(outpath=outdir + '/LC.png')

    ## classification report
    plt.clf()
    viz_CR = ClassificationReport(rfe, classes=class_names, support=True)
    viz_CR.fit(X, y)
    viz_CR.score(X_test, y_test)
    viz_CR.show(outpath=outdir + '/CR.png')

    ## confusion matrix
    plt.clf()
    viz_CM = ConfusionMatrix(rfe, classes=class_names)
    viz_CM.fit(X, y)
    viz_CM.score(X_test, y_test)
    viz_CM.show(outpath=outdir + '/CM.png')

    ## precision recall curve
    plt.clf()
    viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True,
                                   fill_area=False, micro=False, classes=class_names)
    viz_PRC.fit(X, y)
    viz_PRC.score(X_test, y_test)
    viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720))

    ## class prediction error
    plt.clf()
    viz_CPE = ClassPredictionError(
        rfe, classes=class_names
    )
    viz_CPE.fit(X, y)
    viz_CPE.score(X_test, y_test)
    viz_CPE.show(outpath=outdir + '/CPE.png')

    ## ROCAUC
    plt.clf()
    viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720))
    viz_RA.fit(X, y)
    viz_RA.score(X, y)
    viz_RA.show(outpath=outdir + '/RA.png')

    fit = rfe.fit(X,y)
    y_predict = fit.predict(X_test)
    f1 = f1_score(y_test, y_predict, average='weighted')

    features_retained_RFE = X.columns[rfe.get_support()].values
    feature_df =pd.DataFrame(features_retained_RFE.tolist())
    feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False)

    return f1
def ROC_Curve(Model, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.15)
    model = Model
    visualizer = ROCAUC(Model)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    visualizer.show()
def plotting_ROC_curve_yellowbrick(model, labels, X_train, y_train, X_test,
                                   y_test):
    st.subheader('ROC Curve')
    visualizer = ROCAUC(model, classes=labels)
    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    visualizer.show()
    st.pyplot()
    return
 def roc_curve(self, classes) -> None:
     visualizer = ROCAUC(self.trained_model, classes=classes)
     visualizer.fit(self.X_train,
                    self.y_train)  # Fit the training data to the visualizer
     visualizer.score(self.X_test,
                      self.y_test)  # Evaluate the model on the test data
     save_dir = f"{self.plots_dir}/roc_curve_{self.model_id}.png"
     visualizer.show(outpath=save_dir)
     if not LOCAL:
         upload_to_s3(save_dir,
                      f'plots/roc_curve_{self.model_id}.png',
                      bucket=S3_BUCKET_NAME)
     plt.clf()
Beispiel #6
0
def evaluate_model(model,x_test,y_test,coef_):
    
    prediction = model.predict(x_test)
    acc_test = accuracy_score(y_test,prediction)
    acc_train = accuracy_score(y_train,model.predict(X_train))
    print("\n")
    print("Accuracy Score(Test): " + str(acc_test))
    print("Accuracy Score(Train): " + str(acc_train))
    print("Difference between train and test accuracy = {0}".format(abs(acc_test-acc_train)))
    print("Roc Auc Score: "+ str(roc_auc_score(y_test,prediction)))
    print("\n")
    print("Classification Report:")
    print(classification_report(y_test,prediction))
    # confusion matrix
    plt.figure()
    cm = confusion_matrix(y_test,prediction)
    sns.heatmap(cm,annot=True,cmap="YlGnBu",fmt="d")
    plt.title("Confusion Matrix(1:Churned, 0:Not Churned)")
    plt.show()
    
    # roc-curve
    plt.figure()
    visualizer = ROCAUC(model, classes=["Not Churn", "Churn"])
    
    visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)        # Evaluate the model on the test data
    visualizer.show()                       # Finalize and show the figure    
    plt.show()
    
    
    if coef_:
        feature_imp ={}
        
        for idx,col_name in enumerate(X_train.columns):
            feature_imp[col_name] = model.coef_[0][idx]
            
        
        feature_imp = pd.DataFrame(feature_imp.items(),columns = ["Feature","Feature Importance"])
        feature_imp.set_index("Feature",inplace=True)
        
        
        ax = feature_imp.plot(kind="bar",fontsize=10,color="red")
        
        ax.set_title("Future Importance",fontdict={"fontsize":12,"fontweight":"bold"})
        ax.set_ylabel("Coef_")
        
        plt.show()
y_pred = model.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
import seaborn as sn
import matplotlib.pyplot as plt
cmET = confusion_matrix(y_test, y_pred)
sn.heatmap(cmET,
           cmap='Blues_r',
           annot=True,
           xticklabels='1234',
           yticklabels='1234')
plt.xlabel('Predicted Label')
plt.ylabel('Actual Label')
plt.show()

#ROCAUC Plot
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder
from yellowbrick.classifier import ROCAUC

# Encode the non-numeric columns
X = OrdinalEncoder().fit_transform(X)
y = LabelEncoder().fit_transform(y)

# Instaniate the classification model and visualizer
visualizer = ROCAUC(model, Damage=[1, 2, 3, 4])

visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.show()  # Finalize and render the figure
# STEP 5: Accuracy check
#########################################################

from sklearn import metrics
prediction_test = model.predict(X_test)
##Check accuracy on test dataset. 
print ("Accuracy = ", metrics.accuracy_score(y_test, prediction_test))

from yellowbrick.classifier import ROCAUC
print("Classes in the image are: ", np.unique(Y))

#ROC curve for RF
roc_auc=ROCAUC(model, classes=[0, 1, 2, 3])  #Create object
roc_auc.fit(X_train, y_train)
roc_auc.score(X_test, y_test)
roc_auc.show()

##########################################################
#STEP 6: SAVE MODEL FOR FUTURE USE
###########################################################
##You can store the model for future use. In fact, this is how you do machine elarning
##Train on training images, validate on test images and deploy the model on unknown images. 
#
#
##Save the trained model as pickle string to disk for future use
model_name = "sandstone_model"
pickle.dump(model, open(model_name, 'wb'))
#
##To test the model on future datasets
#loaded_model = pickle.load(open(model_name, 'rb'))
Beispiel #9
0

from sklearn.model_selection import cross_val_score

cv_scores = cross_val_score(
                estimator = clasificador,
                X         = X_train,
                y         = y_train,
                scoring   = 'neg_root_mean_squared_error',
                cv        = 5
             )

print(f"Métricas validación cruzada: {cv_scores}")
print(f"Média métricas de validación cruzada: {cv_scores.mean()}")
cv_scores = pd.DataFrame(cv_scores)

prediccion = clasificador.predict(X_test)

from yellowbrick.classifier import ROCAUC
visualizer = ROCAUC(clasificador, classes=[0, 1, 2, 3, 4, 5, 6, 7 , 8, 9])

visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
visualizer.score(X_test, y_test)        # Evaluate the model on the test data
visualizer.show()       


y_pred_proba = clasificador.predict_proba(X_test)

from sklearn.metrics import roc_auc_score
score = roc_auc_score(y_test,y_pred_proba,multi_class="ovr")
'''
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.savefig('Log_ROC')
plt.show()

# Yellowbrick visualization
viz = ClassificationReport(model)
viz.fit(X_train, y_train)
viz.score(X_test, y_test)
viz.show()

roc = ROCAUC(model)
roc.fit(X_train, y_train)
roc.score(X_test, y_test)
roc.show()


# # Perform Gridsearch using various parameters
penalty = ['l1', 'l2']
C = [0.1, 0.25, 0.5]
solver = ['newton-cg', 'lbfgs', 'liblinear']
class_weight = [{0: 15, 1: 85}, {0: 0.14, 1: 0.86}]
miter = [5000]

hyperparams = dict(C=C, penalty=penalty, solver=solver, class_weight=class_weight, max_iter=miter)
RegLog = LogisticRegression()

finalmodel = GridSearchCV(RegLog, hyperparams, verbose=0, cv=5)
finalmodel.fit(X_train, y_train)
y_predict = finalmodel.predict(X_test)
Beispiel #11
0
def best_model(model):
    visualizer = ROCAUC(model, classes=['reggae','soca','dancehall','pop'])
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.show()