Exemple #1
0
def base_model(X_train, y_train,X_test, y_test):
    model = LogisticRegression(multi_class='auto',solver='lbfgs')
    visualizer = ROCAUC(model, classes=['dancehall','reggae','soca','pop'])

    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.show() 
Exemple #2
0
def evaluation(estimator, X, Y, x, y):

    classes = [Y[1], Y[0]]
    f, (ax, ax1, ax2) = plt.subplots(1, 3, figsize=(18, 6))

    #Confusion Matrix
    cmm = ConfusionMatrix(model=estimator,
                          ax=ax1,
                          classes=classes,
                          label_encoder={
                              0.0: 'Negativo',
                              1.0: 'Positivo'
                          })
    cmm.score(x, y)

    #ROCAUC
    viz = ROCAUC(model=estimator, ax=ax2)
    viz.fit(X, Y)
    viz.score(x, y)

    #Learning Curve
    cv_strategy = StratifiedKFold(n_splits=3)
    sizes = np.linspace(0.3, 1.0, 10)
    visualizer = LearningCurve(estimator,
                               ax=ax,
                               cv=cv_strategy,
                               scoring='roc_auc',
                               train_sizes=sizes,
                               n_jobs=4)
    visualizer.fit(X, Y)

    cmm.poof(), viz.poof(), visualizer.poof()
    plt.show()
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir):
    model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c)
    rfe = RFE(model, n_features_to_select=n)

    ## learning curve
    plt.clf()
    viz_LC = LearningCurve(
        rfe, scoring='f1_weighted', n_jobs=4
    )
    viz_LC.fit(X, y)
    viz_LC.show(outpath=outdir + '/LC.png')

    ## classification report
    plt.clf()
    viz_CR = ClassificationReport(rfe, classes=class_names, support=True)
    viz_CR.fit(X, y)
    viz_CR.score(X_test, y_test)
    viz_CR.show(outpath=outdir + '/CR.png')

    ## confusion matrix
    plt.clf()
    viz_CM = ConfusionMatrix(rfe, classes=class_names)
    viz_CM.fit(X, y)
    viz_CM.score(X_test, y_test)
    viz_CM.show(outpath=outdir + '/CM.png')

    ## precision recall curve
    plt.clf()
    viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True,
                                   fill_area=False, micro=False, classes=class_names)
    viz_PRC.fit(X, y)
    viz_PRC.score(X_test, y_test)
    viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720))

    ## class prediction error
    plt.clf()
    viz_CPE = ClassPredictionError(
        rfe, classes=class_names
    )
    viz_CPE.fit(X, y)
    viz_CPE.score(X_test, y_test)
    viz_CPE.show(outpath=outdir + '/CPE.png')

    ## ROCAUC
    plt.clf()
    viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720))
    viz_RA.fit(X, y)
    viz_RA.score(X, y)
    viz_RA.show(outpath=outdir + '/RA.png')

    fit = rfe.fit(X,y)
    y_predict = fit.predict(X_test)
    f1 = f1_score(y_test, y_predict, average='weighted')

    features_retained_RFE = X.columns[rfe.get_support()].values
    feature_df =pd.DataFrame(features_retained_RFE.tolist())
    feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False)

    return f1
def ROC_Curve(Model, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.15)
    model = Model
    visualizer = ROCAUC(Model)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    visualizer.show()
Exemple #5
0
def ROC_AUC(model, classes, X_train, Y_train, X_test, Y_test):
    from yellowbrick.classifier import ROCAUC

    # Instantiate the visualizer with the classification model
    visualizer = ROCAUC(model, classes=classes)

    visualizer.fit(X_train, Y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, Y_test)  # Evaluate the model on the test data
    g = visualizer.poof()
def plotting_ROC_curve_yellowbrick(model, labels, X_train, y_train, X_test,
                                   y_test):
    st.subheader('ROC Curve')
    visualizer = ROCAUC(model, classes=labels)
    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    visualizer.show()
    st.pyplot()
    return
def log_roc_auc_chart(classifier,
                      X_train,
                      X_test,
                      y_train,
                      y_test,
                      experiment=None):
    """Log ROC-AUC chart.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The classification target for training
        y_test (:obj:`ndarray`):
            | The classification target for testing
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            exp = neptune.create_experiment()

            log_roc_auc_chart(rfc, X_train, X_test, y_train, y_test, experiment=exp)
    """
    assert is_classifier(
        classifier), 'classifier should be sklearn classifier.'
    exp = _validate_experiment(experiment)

    try:
        fig, ax = plt.subplots()
        visualizer = ROCAUC(classifier, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        exp.log_image('charts_sklearn', fig, image_name='ROC-AUC')
        plt.close(fig)
    except Exception as e:
        print('Did not log ROC-AUC chart. Error {}'.format(e))
Exemple #8
0
def classification_sanity_check(model,
                                X_train,
                                X_test,
                                y_train,
                                y_test,
                                classes=None):
    visualizer = ROCAUC(model, micro=False, macro=False, classes=classes)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    visualizer.poof()
 def roc_curve(self, classes) -> None:
     visualizer = ROCAUC(self.trained_model, classes=classes)
     visualizer.fit(self.X_train,
                    self.y_train)  # Fit the training data to the visualizer
     visualizer.score(self.X_test,
                      self.y_test)  # Evaluate the model on the test data
     save_dir = f"{self.plots_dir}/roc_curve_{self.model_id}.png"
     visualizer.show(outpath=save_dir)
     if not LOCAL:
         upload_to_s3(save_dir,
                      f'plots/roc_curve_{self.model_id}.png',
                      bucket=S3_BUCKET_NAME)
     plt.clf()
def create_roc_auc_chart(classifier, X_train, X_test, y_train, y_test):
    """Create ROC-AUC chart.

    Tip:
        Check Sklearn-Neptune integration
        `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_
        for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The classification target for training
        y_test (:obj:`ndarray`):
            | The classification target for testing

    Returns:
        ``neptune.types.File`` object that you can assign to run's ``base_namespace``.

    Examples:
        .. code:: python3

            import neptune.new.integrations.sklearn as npt_utils

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            run = neptune.init(project='my_workspace/my_project')
            run['visuals/roc_auc'] = npt_utils.create_roc_auc_chart(rfc, X_train, X_test, y_train, y_test)
    """
    assert is_classifier(
        classifier), 'classifier should be sklearn classifier.'

    chart = None

    try:
        fig, ax = plt.subplots()
        visualizer = ROCAUC(classifier, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        chart = neptune.types.File.as_image(fig)
        plt.close(fig)
    except Exception as e:
        print('Did not log ROC-AUC chart. Error {}'.format(e))

    return chart
Exemple #11
0
def rocauc(X, y, model, outpath, **kwargs):
    # Create a new figure and axes
    _, ax = plt.subplots()

    # Instantiate the classification model and visualizer
    visualizer = ROCAUC(model, ax=ax, **kwargs)

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)

    # Save to disk
    visualizer.poof(outpath=outpath)
def rocauc(X, y, model, outpath, **kwargs):
    # Create a new figure and axes
    _, ax = plt.subplots()

    # Instantiate the classification model and visualizer
    visualizer = ROCAUC(model, ax=ax, **kwargs)

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)

    # Save to disk
    visualizer.poof(outpath=outpath)
def evaluate_model(clf_, X_tr, X_te, y_tr, y_te, cls_rpt_tr=False, show=True, cls_labels=None, binary=False):
    
    
    """Takes any classifier, train/test data for X/y, labels for graph (optional).
    Will output (if show) a Sklearn Classification Report and Confusion Matrix
    along with a Yellowbrick ROC/AUC curve and Feature Importance graph (if a tree).
    Otherwise will return training/test predictions."""
    
    import sklearn.metrics as metrics
    import matplotlib.pyplot as plt
    from yellowbrick.classifier import ROCAUC
    
    ## Fit and predict 
    y_hat_trn, y_hat_tes = fit_n_pred(clf_, X_tr, X_te, y_tr)
    
    if show:
        ## Classification Report / Scores
        if cls_rpt_tr:
            print('Classification Report Train')
            print(metrics.classification_report(y_tr,y_hat_trn))
        else:
            print('Classification Report Test')
            print(metrics.classification_report(y_te,y_hat_tes))

        ## Confusion Matrix
        fig, ax = plt.subplots(figsize=(10,5), ncols=2)
        
        metrics.plot_confusion_matrix(clf_,X_te,y_te,cmap="YlOrRd",
                                      normalize='true',ax=ax[0])
        ax[0].set(title='Confusion Matrix Test Data')
        ax[0].grid(False)        

        roc = ROCAUC(clf_, classes=cls_labels, ax=ax[1])
        roc.fit(X_tr, y_tr)
        roc.score(X_te, y_te)
        roc.finalize()
            
        plt.tight_layout()
        plt.show()
        
        if binary:
            try:
                imps = plot_importance(clf_, X_tr)
            except:
                imps = None
        
    else:
        return y_hat_trn, y_hat_tes
Exemple #14
0
    def generate_roc_auc(self, X_train, y_train, X_test, y_test, **kwargs):
        """
        Given the training and testing sets, computes the ROC AUC metrics
        for the given model and returns a ROC AUC plotly figure.
        
        :param X_train: the training feature set.
        :param y_train: the training target set.
        :param X_test: the testing feature set.
        :param y_test: the testing target set.
        """
        
        visualizer = ROCAUC(self.model, classes=self.classes)

        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        
        roc_data = visualizer.tpr
        
        layout = go.Layout(yaxis = dict(
                                scaleratio = 1,
                                range=[-.1, 1]
                            ),
                            xaxis=dict(
                                range=[-.1, 1],
                                #scaleratio = 1
                            ),
                        )

        fig = go.Figure(layout=layout)

        for tr in roc_data.keys():
            trace = go.Scatter(
                x = [i / len(roc_data[tr]) for i in range(len(roc_data[tr]))],
                y = roc_data[tr],
                name = f'{tr}' if type(tr) != int else f'{self.classes[tr]}',
                line = dict(shape = 'hv')
            )
            fig.add_trace(trace)

        lin_line = go.Scatter(
            x = [0,1],
            y = [0,1],
            name = 'linear_line',
            line = dict(dash='dash')
        )

        fig.add_trace(lin_line)
        return fig
def rocauc(dataset):
    if dataset == "binary":
        X, y = load_occupancy()
        model = GaussianNB()
    elif dataset == "multiclass":
        X, y = load_game()
        X = OrdinalEncoder().fit_transform(X)
        model = RidgeClassifier()
    else:
        raise ValueError("uknown dataset")

    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)
    oz = ROCAUC(model, ax=newfig())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    savefig(oz, "rocauc_{}".format(dataset))
Exemple #16
0
def evaluate_model(model,x_test,y_test,coef_):
    
    prediction = model.predict(x_test)
    acc_test = accuracy_score(y_test,prediction)
    acc_train = accuracy_score(y_train,model.predict(X_train))
    print("\n")
    print("Accuracy Score(Test): " + str(acc_test))
    print("Accuracy Score(Train): " + str(acc_train))
    print("Difference between train and test accuracy = {0}".format(abs(acc_test-acc_train)))
    print("Roc Auc Score: "+ str(roc_auc_score(y_test,prediction)))
    print("\n")
    print("Classification Report:")
    print(classification_report(y_test,prediction))
    # confusion matrix
    plt.figure()
    cm = confusion_matrix(y_test,prediction)
    sns.heatmap(cm,annot=True,cmap="YlGnBu",fmt="d")
    plt.title("Confusion Matrix(1:Churned, 0:Not Churned)")
    plt.show()
    
    # roc-curve
    plt.figure()
    visualizer = ROCAUC(model, classes=["Not Churn", "Churn"])
    
    visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)        # Evaluate the model on the test data
    visualizer.show()                       # Finalize and show the figure    
    plt.show()
    
    
    if coef_:
        feature_imp ={}
        
        for idx,col_name in enumerate(X_train.columns):
            feature_imp[col_name] = model.coef_[0][idx]
            
        
        feature_imp = pd.DataFrame(feature_imp.items(),columns = ["Feature","Feature Importance"])
        feature_imp.set_index("Feature",inplace=True)
        
        
        ax = feature_imp.plot(kind="bar",fontsize=10,color="red")
        
        ax.set_title("Future Importance",fontdict={"fontsize":12,"fontweight":"bold"})
        ax.set_ylabel("Coef_")
        
        plt.show()
Exemple #17
0
def rocauc(ax):
    from yellowbrick.classifier import ROCAUC
    from sklearn.linear_model import LogisticRegression

    # Specify the features of interest and the classes of the target
    features = ["temperature", "relative humidity", "light", "C02", "humidity"]
    target = "occupancy"
    classes = ['unoccupied', 'occupied']

    # Load the data
    splits = load_data('occupancy', cols=features, target=target, tts=True)
    X_train, X_test, y_train, y_test = splits

    estimator = LogisticRegression()
    visualizer = ROCAUC(estimator, ax=ax)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    return visualizer
Exemple #18
0
def showROC():
    # Load the classification data set
    data = load_data('occupancy')

    # Specify the features of interest and the classes of the target
    features = ["temperature", "relative humidity", "light", "C02", "humidity"]
    classes = ['unoccupied', 'occupied']

    # Extract the numpy arrays from the data frame
    X = data[features].as_matrix()
    y = data.occupancy.as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    # Instantiate the classification model and visualizer
    logistic = LogisticRegression()
    visualizer = ROCAUC(logistic)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof()  # Draw/show/poof the data
def plot(X, Y):
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
    oz = ROCAUC(GaussianNB())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    oz.poof()
y_pred = model.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
import seaborn as sn
import matplotlib.pyplot as plt
cmET = confusion_matrix(y_test, y_pred)
sn.heatmap(cmET,
           cmap='Blues_r',
           annot=True,
           xticklabels='1234',
           yticklabels='1234')
plt.xlabel('Predicted Label')
plt.ylabel('Actual Label')
plt.show()

#ROCAUC Plot
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder
from yellowbrick.classifier import ROCAUC

# Encode the non-numeric columns
X = OrdinalEncoder().fit_transform(X)
y = LabelEncoder().fit_transform(y)

# Instaniate the classification model and visualizer
visualizer = ROCAUC(model, Damage=[1, 2, 3, 4])

visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.show()  # Finalize and render the figure
#######################################################
# STEP 5: Accuracy check
#########################################################

from sklearn import metrics
prediction_test = model.predict(X_test)
##Check accuracy on test dataset. 
print ("Accuracy = ", metrics.accuracy_score(y_test, prediction_test))

from yellowbrick.classifier import ROCAUC
print("Classes in the image are: ", np.unique(Y))

#ROC curve for RF
roc_auc=ROCAUC(model, classes=[0, 1, 2, 3])  #Create object
roc_auc.fit(X_train, y_train)
roc_auc.score(X_test, y_test)
roc_auc.show()

##########################################################
#STEP 6: SAVE MODEL FOR FUTURE USE
###########################################################
##You can store the model for future use. In fact, this is how you do machine elarning
##Train on training images, validate on test images and deploy the model on unknown images. 
#
#
##Save the trained model as pickle string to disk for future use
model_name = "sandstone_model"
pickle.dump(model, open(model_name, 'wb'))
#
##To test the model on future datasets
Exemple #22
0

datasets = DatasetMixin()
credit = datasets.load_data('credit')
credit_keys = credit.dtype.names
datatype = credit.dtype[0]
ncols = len(credit_keys)
categorical_names = ['edu','married']
y_name = 'default'
credit_data = None
for j in range(0,ncols):
    if credit_keys[j] in categorical_names:
        credit_data = add_categorical(credit_data,credit[credit_keys[j]],datatype)
    elif credit_keys[j] == y_name:
        y = credit[y_name].astype(int)
    else:
        credit_data = add_column(credit_data,credit[credit_keys[j]])

datashape = credit_data.shape
nrows = datashape[0]
cmeans = np.mean(credit_data,0)
repmeans = numpy.matlib.repmat(cmeans,nrows,1)
mydata = credit_data - repmeans
sstds = np.std(mydata,0)
repstds = numpy.matlib.repmat(sstds,nrows,1)
mydata = np.divide(mydata,repstds)

visualizer = ROCAUC(LinearSVC())
visualizer.fit(mydata,y)
visualizer.score(mydata,y)
visualizer.poof()
def train(experiment_id, run_name, xtrain, xtest, ytrain, ytest):
    
    np.random.seed(100)

    
    with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run:
        
        tfid_vect =TfidfVectorizer(analyzer='word', tokenizer=nltk.tokenize.word_tokenize, stop_words='english', min_df=5)
        
        
        my_pipeline = Pipeline(steps=[('vectorizer', tfid_vect),
                                       ('lr', LogisticRegression(random_state=42))])
        
           
        my_pipeline.fit(xtrain, ytrain)
        predictions = my_pipeline.predict(xtest)
                                      
        joblib.dump(my_pipeline, 'pipeline_lr.pkl')
        
        accuracy = accuracy_score(ytest, predictions)
        
        f1score = f1_score(ytest, predictions)
        
        auc_score = roc_auc_score(ytest, predictions)
        
        class_report = classification_report(ytest, predictions)
        
        print(f'Accuracy : {round(accuracy, 2)}')
        print(f'f1_score : {round(f1score, 2)}')
        print(f'auc_score : {round(auc_score, 2)}')
        print(f'class_report : \n {class_report}')
        
        mlflow.log_metric('Accuracy', round(accuracy, 2))
        mlflow.log_metric('f1_score', round(f1score, 2))
        mlflow.log_metric('auc_score', round(auc_score, 2))
        
        fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4)
        
        visualizer = ClassificationReport(my_pipeline, ax=ax1, classes=[0,1])
        visualizer.fit(xtrain, ytrain)
        visualizer.score(xtest, ytest)
        a=visualizer.poof(outpath="image/classification_report.png")
        print(' ')
        
        mlflow.log_artifact("image/classification_report.png")
        
        # The ConfusionMatrix visualizer taxes a model
        cm = ConfusionMatrix(my_pipeline, ax=ax2, classes=[0,1])
        cm.fit(xtrain, ytrain)
        cm.score(xtest, ytest) 
        b=cm.poof(outpath="image/confusionmatrix.png")
        
        mlflow.log_artifact("image/confusionmatrix.png")
        print(' ')
        
        vis = ROCAUC(my_pipeline, ax=ax3, classes=[0,1])
        vis.fit(xtrain, ytrain)  # Fit the training data to the visualizer
        vis.score(xtest, ytest)  # Evaluate the model on the test data
        c = vis.poof(outpath="image/rocauc.png")             # Draw/show/poof the data
        print(' ')
        mlflow.log_artifact("image/rocauc.png")
        
        visual = ClassPredictionError(my_pipeline, ax=ax4, classes=[0,1])
        visual.fit(xtrain, ytrain)
        visual.score(xtest, ytest)
        g = visual.poof(outpath="image/ClassificationError.png")
        print(' ')
        mlflow.log_artifact("image/ClassificationError.png")
        
        
        return run.info.run_uuid
# %%
classification_report(clf, X, y)

# %%
visualizer = ROCAUC(clf, classes=class_names)
visualizer.score(X, y)
visualizer.poof()

# %%
visualizer = ClassPredictionError(clf, classes=class_names)
visualizer.score(X, y)
visualizer.poof()

# %%
visualizer = DiscriminationThreshold(clf)
visualizer.fit(X, y)
visualizer.poof()

# %%
keep = [263, 268, 287, 288, 300, 302, 307, 308, 313, 315]

# %%
seed = 15
test_size = 0.33
Xt, Xv, yt, yv = \
    sklearn.model_selection.train_test_split(
        X[keep], y, test_size=test_size, stratify=y, random_state=seed)

# %%
explainer = shap.TreeExplainer(clf)
shap_values = explainer.shap_values(Xv)
Exemple #25
0
#Note the low accuracy for the important class (201 label)

#Right metric is ROC AUC
#Starting version 0.23.1 you can report this for multilabel problems.
#https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_auc_score.html
from sklearn.metrics import roc_auc_score  #Version 0.23.1 of sklearn

print("ROC_AUC score for imbalanced data is:")
print(roc_auc_score(y_test, prediction_test_RF))

#https://www.scikit-yb.org/en/latest/api/classifier/rocauc.html
from yellowbrick.classifier import ROCAUC

roc_auc = ROCAUC(model_RF)  #Create object
roc_auc.fit(X_train, y_train)
roc_auc.score(X_test, y_test)
roc_auc.show()

#############################################################################
# Handling Imbalanced data
###########################################

# Technique 2 Up-sample minority class
from sklearn.utils import resample
print(df['Label'].value_counts())

#Separate majority and minority classes
df_majority = df[df['Label'] == 1]
df_minority = df[df['Label'] == 2]
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.savefig('Log_ROC')
plt.show()

# Yellowbrick visualization
viz = ClassificationReport(model)
viz.fit(X_train, y_train)
viz.score(X_test, y_test)
viz.show()

roc = ROCAUC(model)
roc.fit(X_train, y_train)
roc.score(X_test, y_test)
roc.show()


# # Perform Gridsearch using various parameters
penalty = ['l1', 'l2']
C = [0.1, 0.25, 0.5]
solver = ['newton-cg', 'lbfgs', 'liblinear']
class_weight = [{0: 15, 1: 85}, {0: 0.14, 1: 0.86}]
miter = [5000]

hyperparams = dict(C=C, penalty=penalty, solver=solver, class_weight=class_weight, max_iter=miter)
RegLog = LogisticRegression()

finalmodel = GridSearchCV(RegLog, hyperparams, verbose=0, cv=5)
Exemple #27
0
                              classes=classes,
                              label_encoder={
                                  0: 'benign',
                                  1: 'malignant'
                              })
conf_matrix.fit(X_train, y_train)
conf_matrix.score(X_test, y_test)
conf_matrix.poof()

### ROC-AUC

from yellowbrick.classifier import ROCAUC

visualizer = ROCAUC(LogisticRegression(), classes=classes)

visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.poof()

### Class Prediction Error

from yellowbrick.classifier import ClassPredictionError

visualizer = ClassPredictionError(LogisticRegression(), classes=classes)

visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.poof()

### Discrimination Threshold
Exemple #28
0
test_mean = np.mean(test_scores, axis=1)
test_mean

test_std = np.std(test_scores, axis=1)
test_std

plt.plot(train_sizes, train_mean, label='Training Score')
plt.plot(train_sizes, test_mean, label='Cross-Validation Score')
plt.fill_between(train_sizes,
                 train_mean - train_std,
                 train_mean + train_std,
                 color='#DDDDDD')
plt.fill_between(train_sizes,
                 test_mean - test_std,
                 test_mean + test_std,
                 color='#DDDDDD')

plt.title("Learning Curve")
plt.xlabel("Training Size")
plt.ylabel("Accuracy Score")
plt.legend(loc='best')
"""**ROC ve AUC**"""

from yellowbrick.classifier import ROCAUC

fig, ax = plt.subplots(1, 1, figsize=(12, 8))
roc_auc = ROCAUC(clf, ax=ax)
roc_auc.fit(pc_train, y_train)
roc_auc.score(pc_test, y_test)

roc_auc.poof()
Exemple #29
0
knn_result = pd.DataFrame(knn_result.cv_results_)
knn_result.to_csv('knn_preprocessed.csv')
print('knn process done')
"""
###################################################################
# Evaluation
###################################################################
#Random_Forest Evaluation
rf_best = RandomForestClassifier(criterion="entropy", bootstrap=False,max_depth=10)
rf_best.fit(X,Y)
print(confusion_matrix(Y_test,rf_best.predict(X_test)))
plt.figure(figsize=(2, 2))
sns.heatmap(metrics.confusion_matrix(Y_test,rf_best.predict(X_test)), annot=True, fmt='.2f', linewidths=.1, cmap='Blues')

visualizer = ROCAUC(rf_best, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], micro=False, macro=True, per_class=False)
visualizer.fit(X, Y)
visualizer.score(X, Y)
visualizer.show()

#Bagging Classifier Evaluation
bagging_best = BaggingClassifier(bootstrap=True, n_estimators=16)
bagging_best.fit(X,Y)
print(confusion_matrix(Y_test,bagging_best.predict(X_test)))
plt.figure(figsize=(2, 2))
sns.heatmap(metrics.confusion_matrix(Y_test,bagging_best.predict(X_test)), annot=True, fmt='.2f', linewidths=.1, cmap='Blues')


visualizer = ROCAUC(bagging_best, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], micro=False, macro=True, per_class=False)
visualizer.fit(X, Y)
visualizer.score(X, Y)
visualizer.show()
Exemple #30
0
def best_model(model):
    visualizer = ROCAUC(model, classes=['reggae','soca','dancehall','pop'])
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.show()