Esempio n. 1
0
def classification_sanity_check(model,
                                X_train,
                                X_test,
                                y_train,
                                y_test,
                                classes=None):
    visualizer = ROCAUC(model, micro=False, macro=False, classes=classes)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    visualizer.poof()
    def get_roc(self, on="test"):

        visualizer = ROCAUC(self.pipe)
        if on == "test":
            visualizer.score(self._X_test, self._y_test)
        elif on == "train":
            visualizer.score(self._X_train, self._y_train)
        elif on == "all":
            visualizer.score(self.X, self.y)

        visualizer.poof()
Esempio n. 3
0
def rocauc(X, y, model, outpath, **kwargs):
    # Create a new figure and axes
    _, ax = plt.subplots()

    # Instantiate the classification model and visualizer
    visualizer = ROCAUC(model, ax=ax, **kwargs)

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)

    # Save to disk
    visualizer.poof(outpath=outpath)
Esempio n. 4
0
def rocauc(X, y, model, outpath, **kwargs):
    # Create a new figure and axes
    _, ax = plt.subplots()

    # Instantiate the classification model and visualizer
    visualizer = ROCAUC(model, ax=ax, **kwargs)

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)

    # Save to disk
    visualizer.poof(outpath=outpath)
Esempio n. 5
0
def evaluation(estimator, X, Y, x, y):

    classes = [Y[1], Y[0]]
    f, (ax, ax1, ax2) = plt.subplots(1, 3, figsize=(18, 6))

    #Confusion Matrix
    cmm = ConfusionMatrix(model=estimator,
                          ax=ax1,
                          classes=classes,
                          label_encoder={
                              0.0: 'Negativo',
                              1.0: 'Positivo'
                          })
    cmm.score(x, y)

    #ROCAUC
    viz = ROCAUC(model=estimator, ax=ax2)
    viz.fit(X, Y)
    viz.score(x, y)

    #Learning Curve
    cv_strategy = StratifiedKFold(n_splits=3)
    sizes = np.linspace(0.3, 1.0, 10)
    visualizer = LearningCurve(estimator,
                               ax=ax,
                               cv=cv_strategy,
                               scoring='roc_auc',
                               train_sizes=sizes,
                               n_jobs=4)
    visualizer.fit(X, Y)

    cmm.poof(), viz.poof(), visualizer.poof()
    plt.show()
Esempio n. 6
0
def ROC_AUC(model, classes, X_train, Y_train, X_test, Y_test):
    from yellowbrick.classifier import ROCAUC

    # Instantiate the visualizer with the classification model
    visualizer = ROCAUC(model, classes=classes)

    visualizer.fit(X_train, Y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, Y_test)  # Evaluate the model on the test data
    g = visualizer.poof()
    def get_roc(self, on="test"):
        """
        Produces aAUC/ROC curve graph made through the yellowbrick package

        Input
        -----
        on : string (default=test)
            Determines which set of data to score and create a ROC graph on.
            Default is 'test', meaning it will make a ROC graph of the test results. 
            'train' and 'all' are alternative values. 
        """
        visualizer = ROCAUC(self.pipe)
        if on == "test":
            visualizer.score(self._X_test, self._y_test)
        elif on == "train":
            visualizer.score(self._X_train, self._y_train)
        elif on == "all":
            visualizer.score(self._X, self._y)

        visualizer.poof()
Esempio n. 8
0
def roc(model, data_type="music", features_nr=705):
    classes = ["{}".format(data_type), "no_{}".format(data_type)]
    from yellowbrick.classifier import ROCAUC

    data = load_data(how_many=4, last=True, data_type=data_type)
    data = data.astype({'class': str})

    features = data.columns[:features_nr]
    X = data[features]
    y = data["class"]

    # Instantiate the visualizer with the classification model
    visualizer = ROCAUC(model, classes=classes)

    visualizer.score(X, y)  # Evaluate the model on the test data
    g = visualizer.poof()  # Draw/show/poof the data
Esempio n. 9
0
def showROC():
    # Load the classification data set
    data = load_data('occupancy')

    # Specify the features of interest and the classes of the target
    features = ["temperature", "relative humidity", "light", "C02", "humidity"]
    classes = ['unoccupied', 'occupied']

    # Extract the numpy arrays from the data frame
    X = data[features].as_matrix()
    y = data.occupancy.as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    # Instantiate the classification model and visualizer
    logistic = LogisticRegression()
    visualizer = ROCAUC(logistic)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof()  # Draw/show/poof the data
data = load_data(how_many=14)
data = data.astype({'class': str})
print("Number of music entries")
print(data[data['class'] == "music"].shape)
print("Number of no_music entries")
print(data[data['class'] == "no_music"].shape)
# Specify the features of interest and the classes of the target
features = data.columns[:705]

classes = ["music", "no_music"]

# Extract the instances and target

X = data[features]
y = data['class']

# Create the train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
from yellowbrick.classifier import ROCAUC
from sklearn.linear_model import LogisticRegression

# model = load_model("models\\random_forest\\rf-10-music-nestimators25.joblib")
model = RandomForestClassifier(n_estimators=25, n_jobs=4, random_state=0, verbose=1)
# Instantiate the visualizer with the classification model
visualizer = ROCAUC(model, classes=classes)

visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
g = visualizer.poof()             # Draw/show/poof the data
Esempio n. 11
0
datasets = DatasetMixin()
credit = datasets.load_data('credit')
credit_keys = credit.dtype.names
datatype = credit.dtype[0]
ncols = len(credit_keys)
categorical_names = ['edu','married']
y_name = 'default'
credit_data = None
for j in range(0,ncols):
    if credit_keys[j] in categorical_names:
        credit_data = add_categorical(credit_data,credit[credit_keys[j]],datatype)
    elif credit_keys[j] == y_name:
        y = credit[y_name].astype(int)
    else:
        credit_data = add_column(credit_data,credit[credit_keys[j]])

datashape = credit_data.shape
nrows = datashape[0]
cmeans = np.mean(credit_data,0)
repmeans = numpy.matlib.repmat(cmeans,nrows,1)
mydata = credit_data - repmeans
sstds = np.std(mydata,0)
repstds = numpy.matlib.repmat(sstds,nrows,1)
mydata = np.divide(mydata,repstds)

visualizer = ROCAUC(LinearSVC())
visualizer.fit(mydata,y)
visualizer.score(mydata,y)
visualizer.poof()
Esempio n. 12
0
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from yellowbrick.classifier import ROCAUC

if __name__ == '__main__':
    # Load the regression data set
    data = pd.read_csv("../../../examples/data/occupancy/occupancy.csv")

    features = ["temperature", "relative humidity", "light", "C02", "humidity"]
    classes = ['unoccupied', 'occupied']

    # Extract the numpy arrays from the data frame
    X = data[features].as_matrix()
    y = data.occupancy.as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # Instantiate the classification model and visualizer
    logistic = LogisticRegression()
    visualizer = ROCAUC(logistic)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof(outpath="images/rocauc.png")  # Draw/show/poof the data
def train(experiment_id, run_name, xtrain, xtest, ytrain, ytest):
    
    np.random.seed(100)

    
    with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run:
        
        tfid_vect =TfidfVectorizer(analyzer='word', tokenizer=nltk.tokenize.word_tokenize, stop_words='english', min_df=5)
        
        
        my_pipeline = Pipeline(steps=[('vectorizer', tfid_vect),
                                       ('lr', LogisticRegression(random_state=42))])
        
           
        my_pipeline.fit(xtrain, ytrain)
        predictions = my_pipeline.predict(xtest)
                                      
        joblib.dump(my_pipeline, 'pipeline_lr.pkl')
        
        accuracy = accuracy_score(ytest, predictions)
        
        f1score = f1_score(ytest, predictions)
        
        auc_score = roc_auc_score(ytest, predictions)
        
        class_report = classification_report(ytest, predictions)
        
        print(f'Accuracy : {round(accuracy, 2)}')
        print(f'f1_score : {round(f1score, 2)}')
        print(f'auc_score : {round(auc_score, 2)}')
        print(f'class_report : \n {class_report}')
        
        mlflow.log_metric('Accuracy', round(accuracy, 2))
        mlflow.log_metric('f1_score', round(f1score, 2))
        mlflow.log_metric('auc_score', round(auc_score, 2))
        
        fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4)
        
        visualizer = ClassificationReport(my_pipeline, ax=ax1, classes=[0,1])
        visualizer.fit(xtrain, ytrain)
        visualizer.score(xtest, ytest)
        a=visualizer.poof(outpath="image/classification_report.png")
        print(' ')
        
        mlflow.log_artifact("image/classification_report.png")
        
        # The ConfusionMatrix visualizer taxes a model
        cm = ConfusionMatrix(my_pipeline, ax=ax2, classes=[0,1])
        cm.fit(xtrain, ytrain)
        cm.score(xtest, ytest) 
        b=cm.poof(outpath="image/confusionmatrix.png")
        
        mlflow.log_artifact("image/confusionmatrix.png")
        print(' ')
        
        vis = ROCAUC(my_pipeline, ax=ax3, classes=[0,1])
        vis.fit(xtrain, ytrain)  # Fit the training data to the visualizer
        vis.score(xtest, ytest)  # Evaluate the model on the test data
        c = vis.poof(outpath="image/rocauc.png")             # Draw/show/poof the data
        print(' ')
        mlflow.log_artifact("image/rocauc.png")
        
        visual = ClassPredictionError(my_pipeline, ax=ax4, classes=[0,1])
        visual.fit(xtrain, ytrain)
        visual.score(xtest, ytest)
        g = visual.poof(outpath="image/ClassificationError.png")
        print(' ')
        mlflow.log_artifact("image/ClassificationError.png")
        
        
        return run.info.run_uuid
Esempio n. 14
0
                        solver='adam',
                        batch_size=512,
                        activation='tanh')

    # Run model with 4-fold cross validation. Report mean accuracy.
    scores = cross_val_score(mlp, X_train, y_train, cv=4)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    # Plot ROC, AUC.
    classes = ["Normal", "Pre-Ictal", "Seizure"]
    visualizer = ROCAUC(mlp, classes=classes)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    ROC_title = "ROCAUC_{}.png".format(animal_id)
    g = visualizer.poof(outpath=ROC_title)  # Save plot w unique title

    # Plot the precision-recall curve.
    viz = PrecisionRecallCurve(mlp)
    viz.fit(X_train, y_train)  # Fit the training data to the visualizer
    viz.score(X_test, y_test)  # Evaluate the model on the test data
    PR_title = "PR_{}.png".format(animal_id)
    viz.poof(outpath=PR_title)  # Save plot w unique title

    # Plot loss curve aka cost function.
    loss_values = mlp.loss_curve_
    plt.plot(loss_values)
    plt.show()
    Loss_title = "Loss_{}.png".format(animal_id)
    plt.savefig(Loss_title)
sys.stdout.close()
Esempio n. 15
0
ax = fig.add_subplot()

viz = FeatureImportances(
    rf, ax=ax, labels=cancer.feature_names,
    relative=False)  # if True, puts all on scale, max = 100
viz.fit(X, y)
viz.poof()

### ROC-AUC

from yellowbrick.classifier import ROCAUC

roc = ROCAUC(rf, classes=cancer.target_names)
roc.fit(X_train, y_train)
roc.score(X_test, y_test)
roc.poof()

### Confusion Matrix

from yellowbrick.classifier import ConfusionMatrix

classes = cancer.target_names

conf_matrix = ConfusionMatrix(rf,
                              classes=classes,
                              label_encoder={
                                  0: 'benign',
                                  1: 'malignant'
                              })
conf_matrix.fit(X_train, y_train)
conf_matrix.score(X_test, y_test)
Esempio n. 16
0
test_mean

test_std = np.std(test_scores, axis=1)
test_std

plt.plot(train_sizes, train_mean, label='Training Score')
plt.plot(train_sizes, test_mean, label='Cross-Validation Score')
plt.fill_between(train_sizes,
                 train_mean - train_std,
                 train_mean + train_std,
                 color='#DDDDDD')
plt.fill_between(train_sizes,
                 test_mean - test_std,
                 test_mean + test_std,
                 color='#DDDDDD')

plt.title("Learning Curve")
plt.xlabel("Training Size")
plt.ylabel("Accuracy Score")
plt.legend(loc='best')
"""**ROC ve AUC**"""

from yellowbrick.classifier import ROCAUC

fig, ax = plt.subplots(1, 1, figsize=(12, 8))
roc_auc = ROCAUC(clf, ax=ax)
roc_auc.fit(pc_train, y_train)
roc_auc.score(pc_test, y_test)

roc_auc.poof()
def plot(X, Y):
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
    oz = ROCAUC(GaussianNB())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    oz.poof()
Esempio n. 18
0
model = GradientBoostingClassifier()
visualizer = ROCAUC(model)

visualizer.fit(X_train, y_train)

y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

run.log('Accuracy', acc)
run.log('F1', f1)

visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.poof('./outputs/AUC.png')

model_file_name = 'breast-cancer-model.pkl'
model_file_path = os.path.join('./outputs/', model_file_name)
# save model in the outputs folder so it automatically get uploaded
with open(model_file_name, "wb") as file:
    joblib.dump(value=model, filename=model_file_path)

# when running in offline mode, model cannot be registered
register_model = getattr(run, "register_model", None)
if callable(register_model):
    # supply a model name, and the full path to the serialized model file.
    model = run.register_model(model_name='breast-cancer-model',
                               model_path=model_file_path)
    model.add_tags({"run_id": run.id})
Esempio n. 19
0
def plot_rocauc(model, X_valid, y_valid):
    visualizer = ROCAUC(model, is_fitted=True)
    visualizer.score(X_valid, y_valid)
    visualizer.poof()
#Split the data into training and testing data set
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.3)


clf = GradientBoostingClassifier(random_state=1, max_depth=10, n_estimators=100, learning_rate=0.1)
# clf = AdaBoostClassifier(random_state=1,base_estimator=tree.DecisionTreeClassifier(max_depth=10), n_estimators=100,
#                           learning_rate=0.1)
clf.fit(X_train, y_train)


#Generate RUC-AUC curve for classifier
rocauc = ROCAUC(clf, size=(1080, 720), classes=classes)

rocauc.score(X_test, y_test)
r = rocauc.poof()

# Generate classification report for the given classifier
# report = ClassificationReport(clf, size=(1080, 720), classes=classes)
#
# report.score(X_test, y_test)
# c = report.poof()

#Generate Prediction error for each class
# error = ClassPredictionError(clf, size=(1080, 720), classes=classes)
#
# error.score(X_test, y_test)
# e = error.poof()