コード例 #1
0
def base_model(X_train, y_train,X_test, y_test):
    model = LogisticRegression(multi_class='auto',solver='lbfgs')
    visualizer = ROCAUC(model, classes=['dancehall','reggae','soca','pop'])

    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.show() 
コード例 #2
0
def evaluation(estimator, X, Y, x, y):

    classes = [Y[1], Y[0]]
    f, (ax, ax1, ax2) = plt.subplots(1, 3, figsize=(18, 6))

    #Confusion Matrix
    cmm = ConfusionMatrix(model=estimator,
                          ax=ax1,
                          classes=classes,
                          label_encoder={
                              0.0: 'Negativo',
                              1.0: 'Positivo'
                          })
    cmm.score(x, y)

    #ROCAUC
    viz = ROCAUC(model=estimator, ax=ax2)
    viz.fit(X, Y)
    viz.score(x, y)

    #Learning Curve
    cv_strategy = StratifiedKFold(n_splits=3)
    sizes = np.linspace(0.3, 1.0, 10)
    visualizer = LearningCurve(estimator,
                               ax=ax,
                               cv=cv_strategy,
                               scoring='roc_auc',
                               train_sizes=sizes,
                               n_jobs=4)
    visualizer.fit(X, Y)

    cmm.poof(), viz.poof(), visualizer.poof()
    plt.show()
コード例 #3
0
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir):
    model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c)
    rfe = RFE(model, n_features_to_select=n)

    ## learning curve
    plt.clf()
    viz_LC = LearningCurve(
        rfe, scoring='f1_weighted', n_jobs=4
    )
    viz_LC.fit(X, y)
    viz_LC.show(outpath=outdir + '/LC.png')

    ## classification report
    plt.clf()
    viz_CR = ClassificationReport(rfe, classes=class_names, support=True)
    viz_CR.fit(X, y)
    viz_CR.score(X_test, y_test)
    viz_CR.show(outpath=outdir + '/CR.png')

    ## confusion matrix
    plt.clf()
    viz_CM = ConfusionMatrix(rfe, classes=class_names)
    viz_CM.fit(X, y)
    viz_CM.score(X_test, y_test)
    viz_CM.show(outpath=outdir + '/CM.png')

    ## precision recall curve
    plt.clf()
    viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True,
                                   fill_area=False, micro=False, classes=class_names)
    viz_PRC.fit(X, y)
    viz_PRC.score(X_test, y_test)
    viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720))

    ## class prediction error
    plt.clf()
    viz_CPE = ClassPredictionError(
        rfe, classes=class_names
    )
    viz_CPE.fit(X, y)
    viz_CPE.score(X_test, y_test)
    viz_CPE.show(outpath=outdir + '/CPE.png')

    ## ROCAUC
    plt.clf()
    viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720))
    viz_RA.fit(X, y)
    viz_RA.score(X, y)
    viz_RA.show(outpath=outdir + '/RA.png')

    fit = rfe.fit(X,y)
    y_predict = fit.predict(X_test)
    f1 = f1_score(y_test, y_predict, average='weighted')

    features_retained_RFE = X.columns[rfe.get_support()].values
    feature_df =pd.DataFrame(features_retained_RFE.tolist())
    feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False)

    return f1
コード例 #4
0
def ROC_Curve(Model, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.15)
    model = Model
    visualizer = ROCAUC(Model)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    visualizer.show()
コード例 #5
0
def plotting_ROC_curve_yellowbrick(model, labels, X_train, y_train, X_test,
                                   y_test):
    st.subheader('ROC Curve')
    visualizer = ROCAUC(model, classes=labels)
    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    visualizer.show()
    st.pyplot()
    return
コード例 #6
0
def ROC_AUC(model, classes, X_train, Y_train, X_test, Y_test):
    from yellowbrick.classifier import ROCAUC

    # Instantiate the visualizer with the classification model
    visualizer = ROCAUC(model, classes=classes)

    visualizer.fit(X_train, Y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, Y_test)  # Evaluate the model on the test data
    g = visualizer.poof()
コード例 #7
0
def log_roc_auc_chart(classifier,
                      X_train,
                      X_test,
                      y_train,
                      y_test,
                      experiment=None):
    """Log ROC-AUC chart.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The classification target for training
        y_test (:obj:`ndarray`):
            | The classification target for testing
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            exp = neptune.create_experiment()

            log_roc_auc_chart(rfc, X_train, X_test, y_train, y_test, experiment=exp)
    """
    assert is_classifier(
        classifier), 'classifier should be sklearn classifier.'
    exp = _validate_experiment(experiment)

    try:
        fig, ax = plt.subplots()
        visualizer = ROCAUC(classifier, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        exp.log_image('charts_sklearn', fig, image_name='ROC-AUC')
        plt.close(fig)
    except Exception as e:
        print('Did not log ROC-AUC chart. Error {}'.format(e))
コード例 #8
0
def classification_sanity_check(model,
                                X_train,
                                X_test,
                                y_train,
                                y_test,
                                classes=None):
    visualizer = ROCAUC(model, micro=False, macro=False, classes=classes)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    visualizer.poof()
コード例 #9
0
 def roc_curve(self, classes) -> None:
     visualizer = ROCAUC(self.trained_model, classes=classes)
     visualizer.fit(self.X_train,
                    self.y_train)  # Fit the training data to the visualizer
     visualizer.score(self.X_test,
                      self.y_test)  # Evaluate the model on the test data
     save_dir = f"{self.plots_dir}/roc_curve_{self.model_id}.png"
     visualizer.show(outpath=save_dir)
     if not LOCAL:
         upload_to_s3(save_dir,
                      f'plots/roc_curve_{self.model_id}.png',
                      bucket=S3_BUCKET_NAME)
     plt.clf()
コード例 #10
0
def create_roc_auc_chart(classifier, X_train, X_test, y_train, y_test):
    """Create ROC-AUC chart.

    Tip:
        Check Sklearn-Neptune integration
        `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_
        for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The classification target for training
        y_test (:obj:`ndarray`):
            | The classification target for testing

    Returns:
        ``neptune.types.File`` object that you can assign to run's ``base_namespace``.

    Examples:
        .. code:: python3

            import neptune.new.integrations.sklearn as npt_utils

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            run = neptune.init(project='my_workspace/my_project')
            run['visuals/roc_auc'] = npt_utils.create_roc_auc_chart(rfc, X_train, X_test, y_train, y_test)
    """
    assert is_classifier(
        classifier), 'classifier should be sklearn classifier.'

    chart = None

    try:
        fig, ax = plt.subplots()
        visualizer = ROCAUC(classifier, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        chart = neptune.types.File.as_image(fig)
        plt.close(fig)
    except Exception as e:
        print('Did not log ROC-AUC chart. Error {}'.format(e))

    return chart
コード例 #11
0
ファイル: rocauc.py プロジェクト: zzusunjs/yellowbrick
def rocauc(X, y, model, outpath, **kwargs):
    # Create a new figure and axes
    _, ax = plt.subplots()

    # Instantiate the classification model and visualizer
    visualizer = ROCAUC(model, ax=ax, **kwargs)

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)

    # Save to disk
    visualizer.poof(outpath=outpath)
コード例 #12
0
def rocauc(X, y, model, outpath, **kwargs):
    # Create a new figure and axes
    _, ax = plt.subplots()

    # Instantiate the classification model and visualizer
    visualizer = ROCAUC(model, ax=ax, **kwargs)

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)

    # Save to disk
    visualizer.poof(outpath=outpath)
コード例 #13
0
ファイル: custom_viz.py プロジェクト: rambabusure/whitebox
    def generate_roc_auc(self, X_train, y_train, X_test, y_test, **kwargs):
        """
        Given the training and testing sets, computes the ROC AUC metrics
        for the given model and returns a ROC AUC plotly figure.
        
        :param X_train: the training feature set.
        :param y_train: the training target set.
        :param X_test: the testing feature set.
        :param y_test: the testing target set.
        """
        
        visualizer = ROCAUC(self.model, classes=self.classes)

        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        
        roc_data = visualizer.tpr
        
        layout = go.Layout(yaxis = dict(
                                scaleratio = 1,
                                range=[-.1, 1]
                            ),
                            xaxis=dict(
                                range=[-.1, 1],
                                #scaleratio = 1
                            ),
                        )

        fig = go.Figure(layout=layout)

        for tr in roc_data.keys():
            trace = go.Scatter(
                x = [i / len(roc_data[tr]) for i in range(len(roc_data[tr]))],
                y = roc_data[tr],
                name = f'{tr}' if type(tr) != int else f'{self.classes[tr]}',
                line = dict(shape = 'hv')
            )
            fig.add_trace(trace)

        lin_line = go.Scatter(
            x = [0,1],
            y = [0,1],
            name = 'linear_line',
            line = dict(dash='dash')
        )

        fig.add_trace(lin_line)
        return fig
コード例 #14
0
def evaluate_model(clf_, X_tr, X_te, y_tr, y_te, cls_rpt_tr=False, show=True, cls_labels=None, binary=False):
    
    
    """Takes any classifier, train/test data for X/y, labels for graph (optional).
    Will output (if show) a Sklearn Classification Report and Confusion Matrix
    along with a Yellowbrick ROC/AUC curve and Feature Importance graph (if a tree).
    Otherwise will return training/test predictions."""
    
    import sklearn.metrics as metrics
    import matplotlib.pyplot as plt
    from yellowbrick.classifier import ROCAUC
    
    ## Fit and predict 
    y_hat_trn, y_hat_tes = fit_n_pred(clf_, X_tr, X_te, y_tr)
    
    if show:
        ## Classification Report / Scores
        if cls_rpt_tr:
            print('Classification Report Train')
            print(metrics.classification_report(y_tr,y_hat_trn))
        else:
            print('Classification Report Test')
            print(metrics.classification_report(y_te,y_hat_tes))

        ## Confusion Matrix
        fig, ax = plt.subplots(figsize=(10,5), ncols=2)
        
        metrics.plot_confusion_matrix(clf_,X_te,y_te,cmap="YlOrRd",
                                      normalize='true',ax=ax[0])
        ax[0].set(title='Confusion Matrix Test Data')
        ax[0].grid(False)        

        roc = ROCAUC(clf_, classes=cls_labels, ax=ax[1])
        roc.fit(X_tr, y_tr)
        roc.score(X_te, y_te)
        roc.finalize()
            
        plt.tight_layout()
        plt.show()
        
        if binary:
            try:
                imps = plot_importance(clf_, X_tr)
            except:
                imps = None
        
    else:
        return y_hat_trn, y_hat_tes
コード例 #15
0
def roc(model, data_type="music", features_nr=705):
    classes = ["{}".format(data_type), "no_{}".format(data_type)]
    from yellowbrick.classifier import ROCAUC

    data = load_data(how_many=4, last=True, data_type=data_type)
    data = data.astype({'class': str})

    features = data.columns[:features_nr]
    X = data[features]
    y = data["class"]

    # Instantiate the visualizer with the classification model
    visualizer = ROCAUC(model, classes=classes)

    visualizer.score(X, y)  # Evaluate the model on the test data
    g = visualizer.poof()  # Draw/show/poof the data
コード例 #16
0
def rocauc(dataset):
    if dataset == "binary":
        X, y = load_occupancy()
        model = GaussianNB()
    elif dataset == "multiclass":
        X, y = load_game()
        X = OrdinalEncoder().fit_transform(X)
        model = RidgeClassifier()
    else:
        raise ValueError("uknown dataset")

    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)
    oz = ROCAUC(model, ax=newfig())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    savefig(oz, "rocauc_{}".format(dataset))
コード例 #17
0
def evaluate_model(model,x_test,y_test,coef_):
    
    prediction = model.predict(x_test)
    acc_test = accuracy_score(y_test,prediction)
    acc_train = accuracy_score(y_train,model.predict(X_train))
    print("\n")
    print("Accuracy Score(Test): " + str(acc_test))
    print("Accuracy Score(Train): " + str(acc_train))
    print("Difference between train and test accuracy = {0}".format(abs(acc_test-acc_train)))
    print("Roc Auc Score: "+ str(roc_auc_score(y_test,prediction)))
    print("\n")
    print("Classification Report:")
    print(classification_report(y_test,prediction))
    # confusion matrix
    plt.figure()
    cm = confusion_matrix(y_test,prediction)
    sns.heatmap(cm,annot=True,cmap="YlGnBu",fmt="d")
    plt.title("Confusion Matrix(1:Churned, 0:Not Churned)")
    plt.show()
    
    # roc-curve
    plt.figure()
    visualizer = ROCAUC(model, classes=["Not Churn", "Churn"])
    
    visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)        # Evaluate the model on the test data
    visualizer.show()                       # Finalize and show the figure    
    plt.show()
    
    
    if coef_:
        feature_imp ={}
        
        for idx,col_name in enumerate(X_train.columns):
            feature_imp[col_name] = model.coef_[0][idx]
            
        
        feature_imp = pd.DataFrame(feature_imp.items(),columns = ["Feature","Feature Importance"])
        feature_imp.set_index("Feature",inplace=True)
        
        
        ax = feature_imp.plot(kind="bar",fontsize=10,color="red")
        
        ax.set_title("Future Importance",fontdict={"fontsize":12,"fontweight":"bold"})
        ax.set_ylabel("Coef_")
        
        plt.show()
コード例 #18
0
ファイル: gallery.py プロジェクト: subodhchhabra/PyCon2017
def rocauc(ax):
    from yellowbrick.classifier import ROCAUC
    from sklearn.linear_model import LogisticRegression

    # Specify the features of interest and the classes of the target
    features = ["temperature", "relative humidity", "light", "C02", "humidity"]
    target = "occupancy"
    classes = ['unoccupied', 'occupied']

    # Load the data
    splits = load_data('occupancy', cols=features, target=target, tts=True)
    X_train, X_test, y_train, y_test = splits

    estimator = LogisticRegression()
    visualizer = ROCAUC(estimator, ax=ax)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    return visualizer
コード例 #19
0
def showROC():
    # Load the classification data set
    data = load_data('occupancy')

    # Specify the features of interest and the classes of the target
    features = ["temperature", "relative humidity", "light", "C02", "humidity"]
    classes = ['unoccupied', 'occupied']

    # Extract the numpy arrays from the data frame
    X = data[features].as_matrix()
    y = data.occupancy.as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    # Instantiate the classification model and visualizer
    logistic = LogisticRegression()
    visualizer = ROCAUC(logistic)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof()  # Draw/show/poof the data
コード例 #20
0
    def get_roc(self, on="test"):

        visualizer = ROCAUC(self.pipe)
        if on == "test":
            visualizer.score(self._X_test, self._y_test)
        elif on == "train":
            visualizer.score(self._X_train, self._y_train)
        elif on == "all":
            visualizer.score(self.X, self.y)

        visualizer.poof()
コード例 #21
0
    def get_roc(self, on="test"):
        """
        Produces aAUC/ROC curve graph made through the yellowbrick package

        Input
        -----
        on : string (default=test)
            Determines which set of data to score and create a ROC graph on.
            Default is 'test', meaning it will make a ROC graph of the test results. 
            'train' and 'all' are alternative values. 
        """
        visualizer = ROCAUC(self.pipe)
        if on == "test":
            visualizer.score(self._X_test, self._y_test)
        elif on == "train":
            visualizer.score(self._X_train, self._y_train)
        elif on == "all":
            visualizer.score(self._X, self._y)

        visualizer.poof()
def plot(X, Y):
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
    oz = ROCAUC(GaussianNB())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    oz.poof()
コード例 #23
0
y_pred = model.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
import seaborn as sn
import matplotlib.pyplot as plt
cmET = confusion_matrix(y_test, y_pred)
sn.heatmap(cmET,
           cmap='Blues_r',
           annot=True,
           xticklabels='1234',
           yticklabels='1234')
plt.xlabel('Predicted Label')
plt.ylabel('Actual Label')
plt.show()

#ROCAUC Plot
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder
from yellowbrick.classifier import ROCAUC

# Encode the non-numeric columns
X = OrdinalEncoder().fit_transform(X)
y = LabelEncoder().fit_transform(y)

# Instaniate the classification model and visualizer
visualizer = ROCAUC(model, Damage=[1, 2, 3, 4])

visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.show()  # Finalize and render the figure
#######################################################
# STEP 5: Accuracy check
#########################################################

from sklearn import metrics
prediction_test = model.predict(X_test)
##Check accuracy on test dataset. 
print ("Accuracy = ", metrics.accuracy_score(y_test, prediction_test))

from yellowbrick.classifier import ROCAUC
print("Classes in the image are: ", np.unique(Y))

#ROC curve for RF
roc_auc=ROCAUC(model, classes=[0, 1, 2, 3])  #Create object
roc_auc.fit(X_train, y_train)
roc_auc.score(X_test, y_test)
roc_auc.show()

##########################################################
#STEP 6: SAVE MODEL FOR FUTURE USE
###########################################################
##You can store the model for future use. In fact, this is how you do machine elarning
##Train on training images, validate on test images and deploy the model on unknown images. 
#
#
##Save the trained model as pickle string to disk for future use
model_name = "sandstone_model"
pickle.dump(model, open(model_name, 'wb'))
#
##To test the model on future datasets
#loaded_model = pickle.load(open(model_name, 'rb'))
コード例 #25
0
ファイル: demo_ma.py プロジェクト: mattandahalfew/yellowbrick

datasets = DatasetMixin()
credit = datasets.load_data('credit')
credit_keys = credit.dtype.names
datatype = credit.dtype[0]
ncols = len(credit_keys)
categorical_names = ['edu','married']
y_name = 'default'
credit_data = None
for j in range(0,ncols):
    if credit_keys[j] in categorical_names:
        credit_data = add_categorical(credit_data,credit[credit_keys[j]],datatype)
    elif credit_keys[j] == y_name:
        y = credit[y_name].astype(int)
    else:
        credit_data = add_column(credit_data,credit[credit_keys[j]])

datashape = credit_data.shape
nrows = datashape[0]
cmeans = np.mean(credit_data,0)
repmeans = numpy.matlib.repmat(cmeans,nrows,1)
mydata = credit_data - repmeans
sstds = np.std(mydata,0)
repstds = numpy.matlib.repmat(sstds,nrows,1)
mydata = np.divide(mydata,repstds)

visualizer = ROCAUC(LinearSVC())
visualizer.fit(mydata,y)
visualizer.score(mydata,y)
visualizer.poof()
コード例 #26
0
def train(experiment_id, run_name, xtrain, xtest, ytrain, ytest):
    
    np.random.seed(100)

    
    with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run:
        
        tfid_vect =TfidfVectorizer(analyzer='word', tokenizer=nltk.tokenize.word_tokenize, stop_words='english', min_df=5)
        
        
        my_pipeline = Pipeline(steps=[('vectorizer', tfid_vect),
                                       ('lr', LogisticRegression(random_state=42))])
        
           
        my_pipeline.fit(xtrain, ytrain)
        predictions = my_pipeline.predict(xtest)
                                      
        joblib.dump(my_pipeline, 'pipeline_lr.pkl')
        
        accuracy = accuracy_score(ytest, predictions)
        
        f1score = f1_score(ytest, predictions)
        
        auc_score = roc_auc_score(ytest, predictions)
        
        class_report = classification_report(ytest, predictions)
        
        print(f'Accuracy : {round(accuracy, 2)}')
        print(f'f1_score : {round(f1score, 2)}')
        print(f'auc_score : {round(auc_score, 2)}')
        print(f'class_report : \n {class_report}')
        
        mlflow.log_metric('Accuracy', round(accuracy, 2))
        mlflow.log_metric('f1_score', round(f1score, 2))
        mlflow.log_metric('auc_score', round(auc_score, 2))
        
        fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4)
        
        visualizer = ClassificationReport(my_pipeline, ax=ax1, classes=[0,1])
        visualizer.fit(xtrain, ytrain)
        visualizer.score(xtest, ytest)
        a=visualizer.poof(outpath="image/classification_report.png")
        print(' ')
        
        mlflow.log_artifact("image/classification_report.png")
        
        # The ConfusionMatrix visualizer taxes a model
        cm = ConfusionMatrix(my_pipeline, ax=ax2, classes=[0,1])
        cm.fit(xtrain, ytrain)
        cm.score(xtest, ytest) 
        b=cm.poof(outpath="image/confusionmatrix.png")
        
        mlflow.log_artifact("image/confusionmatrix.png")
        print(' ')
        
        vis = ROCAUC(my_pipeline, ax=ax3, classes=[0,1])
        vis.fit(xtrain, ytrain)  # Fit the training data to the visualizer
        vis.score(xtest, ytest)  # Evaluate the model on the test data
        c = vis.poof(outpath="image/rocauc.png")             # Draw/show/poof the data
        print(' ')
        mlflow.log_artifact("image/rocauc.png")
        
        visual = ClassPredictionError(my_pipeline, ax=ax4, classes=[0,1])
        visual.fit(xtrain, ytrain)
        visual.score(xtest, ytest)
        g = visual.poof(outpath="image/ClassificationError.png")
        print(' ')
        mlflow.log_artifact("image/ClassificationError.png")
        
        
        return run.info.run_uuid
コード例 #27
0
class_names = ['+', '-']
verbose_class_names = ['Activator', 'Repressor']

# %%
X = load('X.joblib')
y = load('y.joblib')

# %%
to_graphviz(clf, num_trees=0, rankdir='LR')

# %%
classification_report(clf, X, y)

# %%
visualizer = ROCAUC(clf, classes=class_names)
visualizer.score(X, y)
visualizer.poof()

# %%
visualizer = ClassPredictionError(clf, classes=class_names)
visualizer.score(X, y)
visualizer.poof()

# %%
visualizer = DiscriminationThreshold(clf)
visualizer.fit(X, y)
visualizer.poof()

# %%
keep = [263, 268, 287, 288, 300, 302, 307, 308, 313, 315]
コード例 #28
0
ファイル: modelling.py プロジェクト: Divnsh/AV_WNS_Hackathon
def baseline_model(df):
    X_train=df.drop('is_click',axis=1)
    with open(r'./baseline_logistic.pkl','wb') as f:
        pickle.dump(X_train,f)
    logistic=LogisticRegression()
    cv_res=cross_validation(logistic,X_train,df['is_click'])
    plot_cv_res(cv_res)
    return logistic.fit(X_train,df['is_click']),X_train


def predict_test_values(test_df,model,transformer):
    vect=transformer.transform(test_df['tweet'])
    predictions=model.predict(vect)
    df=pd.DataFrame()
    df['id']=test_df['id'].values
    df['label']=predictions
    return  df

log,X_train=baseline_model(train)    
test_df=pd.read_csv(r'./Data/test.csv')
with open(r'./Models/baseline_logistic.pkl','rb') as f:
        X_train=pickle.load(f)
pred_df=predict_test_values(test_df,log,X_train)
pred_df.to_csv('./Submissions/baseline_predictions.csv',index=False)
#pca,arr=dimesionality_reduction(X_train.toarray())
#plot_reduced_dimension(arr,train['is_click'])
visualizer = ROCAUC(log)
visualizer.score(X_train,train['label'])
visualizer.poof()
feature_series=pd.Series(index=X_train.columns,data=X_train.values)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.savefig('Log_ROC')
plt.show()

# Yellowbrick visualization
viz = ClassificationReport(model)
viz.fit(X_train, y_train)
viz.score(X_test, y_test)
viz.show()

roc = ROCAUC(model)
roc.fit(X_train, y_train)
roc.score(X_test, y_test)
roc.show()


# # Perform Gridsearch using various parameters
penalty = ['l1', 'l2']
C = [0.1, 0.25, 0.5]
solver = ['newton-cg', 'lbfgs', 'liblinear']
class_weight = [{0: 15, 1: 85}, {0: 0.14, 1: 0.86}]
miter = [5000]

hyperparams = dict(C=C, penalty=penalty, solver=solver, class_weight=class_weight, max_iter=miter)
RegLog = LogisticRegression()

finalmodel = GridSearchCV(RegLog, hyperparams, verbose=0, cv=5)
finalmodel.fit(X_train, y_train)
コード例 #30
0
test_mean = np.mean(test_scores, axis=1)
test_mean

test_std = np.std(test_scores, axis=1)
test_std

plt.plot(train_sizes, train_mean, label='Training Score')
plt.plot(train_sizes, test_mean, label='Cross-Validation Score')
plt.fill_between(train_sizes,
                 train_mean - train_std,
                 train_mean + train_std,
                 color='#DDDDDD')
plt.fill_between(train_sizes,
                 test_mean - test_std,
                 test_mean + test_std,
                 color='#DDDDDD')

plt.title("Learning Curve")
plt.xlabel("Training Size")
plt.ylabel("Accuracy Score")
plt.legend(loc='best')
"""**ROC ve AUC**"""

from yellowbrick.classifier import ROCAUC

fig, ax = plt.subplots(1, 1, figsize=(12, 8))
roc_auc = ROCAUC(clf, ax=ax)
roc_auc.fit(pc_train, y_train)
roc_auc.score(pc_test, y_test)

roc_auc.poof()
コード例 #31
0
def best_model(model):
    visualizer = ROCAUC(model, classes=['reggae','soca','dancehall','pop'])
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.show()