Exemple #1
0
def make_confusion_matrix(model, X_train, y_train, X_test, y_test):

    encoder = LabelEncoder()
    y_train = encoder.fit_transform(y_train)

    classes = list()

    for a in np.unique(y_train):
        classes.append(a)

    #For some reason it gives an error if not done this way...
    if len(classes) > 10:
        classes = [
            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
            20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
            37, 38, 39, 40
        ]
    else:
        classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

    encoder = LabelEncoder()
    encoder.fit(y_train)
    y_train = encoder.transform(y_train)

    cm = ConfusionMatrix(model, classes=classes)
    cm.fit(X_train, y_train)

    encoder.fit(y_train)
    y_test = encoder.transform(y_test)

    cm.score(X_test, y_test)

    cm.show()

    return
Exemple #2
0
def showConfusionMatrix():
    #First do our imports

    from sklearn.datasets import load_digits

    from yellowbrick.classifier import ConfusionMatrix
    # We'll use the handwritten digits data set from scikit-learn.
    # Each feature of this dataset is an 8x8 pixel image of a handwritten number.
    # Digits.data converts these 64 pixels into a single array of features
    digits = load_digits()
    X = digits.data
    y = digits.target

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=11)

    model = LogisticRegression()

    #The ConfusionMatrix visualizer taxes a model
    cm = ConfusionMatrix(model, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

    #Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model
    cm.fit(X_train, y_train)

    #To create the ConfusionMatrix, we need some test data. Score runs predict() on the data
    #and then creates the confusion_matrix from scikit learn.
    cm.score(X_test, y_test)

    #How did we do?
    cm.poof()
Exemple #3
0
 def draw_confusion_matrix(self):
     visualizer = ConfusionMatrix(self.model,
                                  classes=self.le.classes_,
                                  label_encoder=self.le)
     visualizer.fit(self.training_data, self.training_labels)
     visualizer.score(self.test_data, self.test_labels)
     visualizer.poof()
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir):
    model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c)
    rfe = RFE(model, n_features_to_select=n)

    ## learning curve
    plt.clf()
    viz_LC = LearningCurve(
        rfe, scoring='f1_weighted', n_jobs=4
    )
    viz_LC.fit(X, y)
    viz_LC.show(outpath=outdir + '/LC.png')

    ## classification report
    plt.clf()
    viz_CR = ClassificationReport(rfe, classes=class_names, support=True)
    viz_CR.fit(X, y)
    viz_CR.score(X_test, y_test)
    viz_CR.show(outpath=outdir + '/CR.png')

    ## confusion matrix
    plt.clf()
    viz_CM = ConfusionMatrix(rfe, classes=class_names)
    viz_CM.fit(X, y)
    viz_CM.score(X_test, y_test)
    viz_CM.show(outpath=outdir + '/CM.png')

    ## precision recall curve
    plt.clf()
    viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True,
                                   fill_area=False, micro=False, classes=class_names)
    viz_PRC.fit(X, y)
    viz_PRC.score(X_test, y_test)
    viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720))

    ## class prediction error
    plt.clf()
    viz_CPE = ClassPredictionError(
        rfe, classes=class_names
    )
    viz_CPE.fit(X, y)
    viz_CPE.score(X_test, y_test)
    viz_CPE.show(outpath=outdir + '/CPE.png')

    ## ROCAUC
    plt.clf()
    viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720))
    viz_RA.fit(X, y)
    viz_RA.score(X, y)
    viz_RA.show(outpath=outdir + '/RA.png')

    fit = rfe.fit(X,y)
    y_predict = fit.predict(X_test)
    f1 = f1_score(y_test, y_predict, average='weighted')

    features_retained_RFE = X.columns[rfe.get_support()].values
    feature_df =pd.DataFrame(features_retained_RFE.tolist())
    feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False)

    return f1
def nice_confusion(model, X_train, X_test, y_train, y_test):
    """Creates a nice looking confusion matrix"""
    plt.figure(figsize=(10, 10))
    plt.xlabel('Predicted Class', fontsize=18)
    plt.ylabel('True Class', fontsize=18)
    viz = ConfusionMatrix(model, cmap='PuBu', fontsize=18)
    viz.fit(X_train, y_train)
    viz.score(X_test, y_test)
    viz.show()
def log_confusion_matrix_chart(classifier,
                               X_train,
                               X_test,
                               y_train,
                               y_test,
                               experiment=None):
    """Log confusion matrix.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The classification target for training
        y_test (:obj:`ndarray`):
            | The classification target for testing
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            neptune.create_experiment()

            log_confusion_matrix_chart(rfc, X_train, X_test, y_train, y_test)
    """
    assert is_classifier(
        classifier), 'classifier should be sklearn classifier.'
    exp = _validate_experiment(experiment)

    try:
        fig, ax = plt.subplots()
        visualizer = ConfusionMatrix(classifier, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        exp.log_image('charts_sklearn', fig, image_name='Confusion Matrix')
        plt.close(fig)
    except Exception as e:
        print('Did not log Confusion Matrix chart. Error: {}'.format(e))
def create_confusion_matrix_chart(classifier, X_train, X_test, y_train,
                                  y_test):
    """Create confusion matrix.

    Tip:
        Check Sklearn-Neptune integration
        `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_
        for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The classification target for training
        y_test (:obj:`ndarray`):
            | The classification target for testing

    Returns:
        ``neptune.types.File`` object that you can assign to run's ``base_namespace``.

    Examples:
        .. code:: python3

            import neptune.new.integrations.sklearn as npt_utils

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            run = neptune.init(project='my_workspace/my_project')
            run['visuals/confusion_matrix'] = \
                npt_utils.create_confusion_matrix_chart(rfc, X_train, X_test, y_train, y_test)
    """
    assert is_classifier(
        classifier), 'classifier should be sklearn classifier.'

    chart = None

    try:
        fig, ax = plt.subplots()
        visualizer = ConfusionMatrix(classifier, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        chart = neptune.types.File.as_image(fig)
        plt.close(fig)
    except Exception as e:
        print('Did not log Confusion Matrix chart. Error: {}'.format(e))

    return chart
Exemple #8
0
def confusion_matrix(model, classes, X_train, Y_train, X_test, Y_test):
    from yellowbrick.classifier import ConfusionMatrix
    iris_cm = ConfusionMatrix(model,
                              classes=classes,
                              label_encoder={
                                  0: classes[0],
                                  1: classes[1]
                              })

    iris_cm.fit(X_train, Y_train)
    iris_cm.score(X_test, Y_test)

    iris_cm.poof()
 def confusion_matrix(self, class_name_dict=None) -> None:
     """Plot a confusion matrix
     """
     cm = ConfusionMatrix(self.trained_model,
                          classes=list(class_name_dict.keys()),
                          label_encoder=class_name_dict)
     cm.fit(self.X_train, self.y_train)
     cm.score(self.X_test, self.y_test)
     save_dir = f"{self.plots_dir}/confusion_matrix_{self.model_id}.png"
     cm.show(outpath=save_dir)
     if not LOCAL:
         upload_to_s3(save_dir,
                      f'plots/confusion_matrix_{self.model_id}.png',
                      bucket=S3_BUCKET_NAME)
     plt.clf()
def confusion(dataset):
    if dataset == "iris":
        data = load_iris()
    elif dataset == "digits":
        data = load_digits()
    else:
        raise ValueError("uknown dataset")

    X_train, X_test, y_train, y_test = tts(data.data,
                                           data.target,
                                           test_size=0.2)
    oz = ConfusionMatrix(LogisticRegression(), ax=newfig())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    savefig(oz, "confusion_matrix_{}".format(dataset))
Exemple #11
0
def plot_confusion_matrix(model:sklearn.base.BaseEstimator,
                          X_train: np.ndarray,
                          X_test: np.ndarray,
                          y_train: np.ndarray,
                          y_test: np.ndarray):
    """
    Plots confusion matrix for given model and train/test data.
    Inputs:
        model: an sklearn classifier
        X_train: training examples
        X_test: test examples
        y_train: training labels corresponding to examples in X_train
        y_test: test labels corresponding to examples in X_test
    Returns: None
    """
    model_cm = ConfusionMatrix(model)
    model_cm.fit(X_train, y_train)
    model_cm.score(X_test, y_test)
    model_cm.poof()
def draw_plots():
    classifier = MultinomialNB(alpha=0.01)

    for technique in ["base", "SMOTE", "ADASYN", "text-aug"]:
        X_train, X_test, y_train, y_test = get_baseline_split(representation="bow")
        if technique == "base":
            X_plot_train, X_plot_test, y_plot_train, y_plot_test = X_train, X_test, y_train, y_test
        elif technique == "SMOTE":
            X_plot_train, y_plot_train = smote.run(X_train, y_train)
            X_plot_test, y_plot_test = X_test, y_test
        elif technique == "ADASYN":
            X_plot_train, y_plot_train = adasyn.run(X_train, y_train)
            X_plot_test, y_plot_test = X_test, y_test
        elif technique == "text-aug":
            X_plot_train, X_plot_test, y_plot_train, y_plot_test = text_augmentation.run(
                books_df=get_fully_processed_books_df(),
                representation="bow")
        else:
            raise Exception()

        # ROC micro average
        viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=False)
        viz_roc.fit(X_plot_train, y_plot_train)  # Fit the training data to the viz_roc
        viz_roc.score(X_plot_test, y_plot_test)  # Evaluate the model on the test data
        viz_roc.show()  # Finalize and show the figure

        # ROC - Per Class
        viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=True)
        viz_roc.fit(X_plot_train, y_plot_train)  # Fit the training data to the viz_roc
        viz_roc.score(X_plot_test, y_plot_test)  # Evaluate the model on the test data
        viz_roc.show()  # Finalize and show the figure

        # Class Prediction Error
        viz_pred_err = ClassPredictionError(classifier, classes=get_selected_genres())
        viz_pred_err.fit(X_plot_train, y_plot_train)
        viz_pred_err.score(X_plot_test, y_plot_test)
        viz_pred_err.show()

        # The ConfusionMatrix
        cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8])
        cm.fit(X_plot_train, y_plot_train)
        cm.score(X_plot_test, y_plot_test)
        cm.show()
Exemple #13
0
def classifier_report(classifier, X_test, y_test):
    classes = np.unique(y_test)
    cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    cm.fit(X_test, y_test)
    cm.score(X_test, y_test)
    filename = classifier.__class__.__name__ + '_confusion_matrix.png'
    cm.poof(outpath=filename,
            clear_figure=True,
            kwargs=dict(transparent=False, dpi=80, inches='tight'))
    ex.add_artifact(filename)
    visualizer = ClassificationReport(classifier,
                                      classes=classes,
                                      support=True)
    visualizer.fit(X_test, y_test)
    visualizer.score(X_test, y_test)
    visualizer.poof(outpath="classification_report.png",
                    clear_figure=True,
                    kwargs=dict(transparent=False, dpi=80, inches='tight'))
    ex.add_artifact('classification_report.png')
def plot_confusion_matrix (X_train, y_train, X_test, y_test, model, encoder):
    """
    Function to plot a confusion matrix
    :param X_train: training set
    :param y_train: training set target
    :param X_test: test set
    :param y_test: test set target
    :param model: model to test performance for
    :param encoder:
    :return: Confusion matrix plot
    """
    encoder = encoder
    
    # The ConfusionMatrix visualizer taxes a model
    cm = ConfusionMatrix(model, encoder=encoder)

    # Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model
    cm.fit(X_train, y_train)

    # To create the ConfusionMatrix, we need some test data. Score runs predict() on the data
    # and then creates the confusion_matrix from scikit-learn.
    cm.score(X_test, y_test)

    cm.show();
def yellowbrick_visualizations(model, classes, X_tr, y_tr, X_te, y_te):
    visualizer = ConfusionMatrix(model, classes=classes)
    visualizer.fit(X_tr, y_tr)
    visualizer.score(X_te, y_te)
    visualizer.show()

    visualizer = ClassificationReport(model, classes=classes, support=True)
    visualizer.fit(X_tr, y_tr)
    visualizer.score(X_te, y_te)
    visualizer.show()

    visualizer = ROCAUC(model, classes=classes)
    visualizer.fit(X_tr, y_tr)
    visualizer.score(X_te, y_te)
    visualizer.show()
def nbml(dados):
    #Mongodb = MongoClient('localhost', 27123)
    #db = Mongodb.dbcredit
    #dados = db.cadastro.find_one({"cpf": cpf})
    #print(dados)
    dict_data = [
        [
            #                'nome',
            #                'cpf',
            #                'email',
            'checking_status',
            'duration',
            'credit_history',
            'purpose',
            'credit_amount',
            'savings_status',
            'employment',
            'installment_commitment',
            'personal_status',
            'other_parties',
            'residence_since',
            'property_magnitude',
            'age',
            'other_payment_plans',
            'housing',
            'existing_credits',
            'job',
            'num_dependents',
            'own_telephone',
            'foreign_worker'
        ],
        [
            #                [dados['nome'],
            #                dados['cpf'],
            #                dados['email'],
            dados['checking_status'],
            dados['duration'],
            dados['credit_history'],
            dados['purpose'],
            dados['credit_amount'],
            dados['savings_status'],
            dados['employment'],
            dados['installment_commitment'],
            dados['personal_status'],
            dados['other_parties'],
            dados['residence_since'],
            dados['property_magnitude'],
            dados['age'],
            dados['other_payment_plans'],
            dados['housing'],
            dados['existing_credits'],
            dados['job'],
            dados['num_dependents'],
            dados['own_telephone'],
            dados['foreign_worker']
        ]
    ]

    myfile = open('teste_head.csv', 'w')
    with myfile:
        writer = csv.writer(myfile)
        writer.writerows(dict_data)

    base = pd.read_csv('creditfiltrado.csv', sep=';')
    print(base)
    base2 = pd.read_csv('teste_head.csv', sep=",")
    print(base2)
    base.class_result.unique()
    tempo_ini = time.time()
    X = base.iloc[:, [
        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
    ]].values
    y = base.iloc[:, 20].values
    z = base2.iloc[:, 0:20].values

    labelencoder = LabelEncoder()

    z[:, 0] = labelencoder.fit_transform(z[:, 0])
    z[:, 2] = labelencoder.fit_transform(z[:, 2])
    z[:, 3] = labelencoder.fit_transform(z[:, 3])
    z[:, 5] = labelencoder.fit_transform(z[:, 5])
    z[:, 6] = labelencoder.fit_transform(z[:, 6])
    z[:, 8] = labelencoder.fit_transform(z[:, 8])
    z[:, 9] = labelencoder.fit_transform(z[:, 9])
    z[:, 11] = labelencoder.fit_transform(z[:, 11])
    z[:, 13] = labelencoder.fit_transform(z[:, 13])
    z[:, 14] = labelencoder.fit_transform(z[:, 14])
    z[:, 16] = labelencoder.fit_transform(z[:, 16])
    z[:, 18] = labelencoder.fit_transform(z[:, 18])
    z[:, 19] = labelencoder.fit_transform(z[:, 19])

    X[:, 0] = labelencoder.fit_transform(X[:, 0])
    X[:, 2] = labelencoder.fit_transform(X[:, 2])
    X[:, 3] = labelencoder.fit_transform(X[:, 3])
    X[:, 5] = labelencoder.fit_transform(X[:, 5])
    X[:, 6] = labelencoder.fit_transform(X[:, 6])
    X[:, 8] = labelencoder.fit_transform(X[:, 8])
    X[:, 9] = labelencoder.fit_transform(X[:, 9])
    X[:, 11] = labelencoder.fit_transform(X[:, 11])
    X[:, 13] = labelencoder.fit_transform(X[:, 13])
    X[:, 14] = labelencoder.fit_transform(X[:, 14])
    X[:, 16] = labelencoder.fit_transform(X[:, 16])
    X[:, 18] = labelencoder.fit_transform(X[:, 18])
    X[:, 19] = labelencoder.fit_transform(X[:, 19])

    X_treinamento, X_teste, y_treinamento, y_teste = train_test_split(
        X, y, test_size=0.5, random_state=0)
    modelo = GaussianNB()
    modelo.fit(X_treinamento, y_treinamento)
    previsoes = modelo.predict(X_teste)
    previsoes2 = modelo.predict(z)
    print(previsoes2)
    accuracy_score(y_teste, previsoes)
    confusao = ConfusionMatrix(modelo, classes=['good', 'bad'])
    confusao.fit(X_treinamento, y_treinamento)
    confusao.score(X_teste, y_teste)

    UpdateMongoPrevisao(dados['cpf'], str(previsoes2))
    return previsoes2
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from yellowbrick.classifier import ConfusionMatrix


if __name__ == '__main__':
    # Load the regression data set
    digits = load_digits()
    X = digits.data
    y = digits.target

    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size =0.2, random_state=11)

    model = LogisticRegression()

    #The ConfusionMatrix visualizer taxes a model
    cm = ConfusionMatrix(model, classes=[0,1,2,3,4,5,6,7,8,9])

    cm.fit(X_train, y_train)  # Fit the training data to the visualizer
    cm.score(X_test, y_test)  # Evaluate the model on the test data
    g = cm.poof(outpath="images/confusion_matrix.png")             # Draw/show/poof the data
def train(experiment_id, run_name, xtrain, xtest, ytrain, ytest):
    
    np.random.seed(100)

    
    with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run:
        
        tfid_vect =TfidfVectorizer(analyzer='word', tokenizer=nltk.tokenize.word_tokenize, stop_words='english', min_df=5)
        
        
        my_pipeline = Pipeline(steps=[('vectorizer', tfid_vect),
                                       ('lr', LogisticRegression(random_state=42))])
        
           
        my_pipeline.fit(xtrain, ytrain)
        predictions = my_pipeline.predict(xtest)
                                      
        joblib.dump(my_pipeline, 'pipeline_lr.pkl')
        
        accuracy = accuracy_score(ytest, predictions)
        
        f1score = f1_score(ytest, predictions)
        
        auc_score = roc_auc_score(ytest, predictions)
        
        class_report = classification_report(ytest, predictions)
        
        print(f'Accuracy : {round(accuracy, 2)}')
        print(f'f1_score : {round(f1score, 2)}')
        print(f'auc_score : {round(auc_score, 2)}')
        print(f'class_report : \n {class_report}')
        
        mlflow.log_metric('Accuracy', round(accuracy, 2))
        mlflow.log_metric('f1_score', round(f1score, 2))
        mlflow.log_metric('auc_score', round(auc_score, 2))
        
        fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4)
        
        visualizer = ClassificationReport(my_pipeline, ax=ax1, classes=[0,1])
        visualizer.fit(xtrain, ytrain)
        visualizer.score(xtest, ytest)
        a=visualizer.poof(outpath="image/classification_report.png")
        print(' ')
        
        mlflow.log_artifact("image/classification_report.png")
        
        # The ConfusionMatrix visualizer taxes a model
        cm = ConfusionMatrix(my_pipeline, ax=ax2, classes=[0,1])
        cm.fit(xtrain, ytrain)
        cm.score(xtest, ytest) 
        b=cm.poof(outpath="image/confusionmatrix.png")
        
        mlflow.log_artifact("image/confusionmatrix.png")
        print(' ')
        
        vis = ROCAUC(my_pipeline, ax=ax3, classes=[0,1])
        vis.fit(xtrain, ytrain)  # Fit the training data to the visualizer
        vis.score(xtest, ytest)  # Evaluate the model on the test data
        c = vis.poof(outpath="image/rocauc.png")             # Draw/show/poof the data
        print(' ')
        mlflow.log_artifact("image/rocauc.png")
        
        visual = ClassPredictionError(my_pipeline, ax=ax4, classes=[0,1])
        visual.fit(xtrain, ytrain)
        visual.score(xtest, ytest)
        g = visual.poof(outpath="image/ClassificationError.png")
        print(' ')
        mlflow.log_artifact("image/ClassificationError.png")
        
        
        return run.info.run_uuid
Exemple #19
0
x[:, 6] = labelencoder.fit_transform(x[:, 6])
x[:, 7] = labelencoder.fit_transform(x[:, 7])
x[:, 8] = labelencoder.fit_transform(x[:, 8])
x[:, 9] = labelencoder.fit_transform(x[:, 9])
x[:, 10] = labelencoder.fit_transform(x[:, 10])
x[:, 11] = labelencoder.fit_transform(x[:, 11])
x[:, 12] = labelencoder.fit_transform(x[:, 12])
x[:, 13] = labelencoder.fit_transform(x[:, 13])
x[:, 14] = labelencoder.fit_transform(x[:, 14])
x[:, 15] = labelencoder.fit_transform(x[:, 15])
x[:, 16] = labelencoder.fit_transform(x[:, 16])
x[:, 17] = labelencoder.fit_transform(x[:, 17])
x[:, 18] = labelencoder.fit_transform(x[:, 17])

x_treinamento, x_teste, y_treinamento, y_teste = train_test_split(
    x, y, test_size=0.3, random_state=0
)  # com 0 teremos sempre os mesmos registro, como no exemplo do video

modelo = GaussianNB()
modelo.fit(x_treinamento,
           y_treinamento)  # Criando a tabela de probabilidade Naive Bayes

previsoes = modelo.predict(x_teste)

accuracy_score(y_teste, previsoes)  # A taxa de acerto é de 70% e de erro 30%

# Matriz de confusão
confusao = ConfusionMatrix(modelo, classes=['ruim', 'bom'])
confusao.fit(x_treinamento, y_treinamento)
confusao.score(x_teste, y_teste)
confusao.poof()
Exemple #20
0
                                                    random_state=0)

# Treinamento do modelo
naive_bayes = GaussianNB()
naive_bayes.fit(X_train, y_train)

# Teste do modelo
previsoes = naive_bayes.predict(X_test)
confusao = confusion_matrix(y_test, previsoes)
taxaAcerto = accuracy_score(y_test, previsoes)
taxaErro = 1 - taxaAcerto

# Visualização de modelos de Machine Learning
from yellowbrick.classifier import ConfusionMatrix
visualizador = ConfusionMatrix(GaussianNB())
visualizador.fit(X_train, y_train)
visualizador.score(X_test, y_test)
visualizador.poof
""" Simulando modelo em produção """

# Carregando no dado para previsão
novoCredito = pd.read_csv('NovoCredit.csv')

# Identificação dos atributos categóricos (tipo 'Object')
atributosParaEncoderEmProducao = []
for i in list(novoCredito.columns):
    if (novoCredito[i].dtype == 'O'):
        atributosParaEncoderEmProducao.append(i)
del i

# Encoder dos atributos do tipo 'Object' para usar o modelo
Exemple #21
0
def get_plots():
    all_plots = []
    # FEATURE Visualization

    # Instantiate the visualizer
    plt.figure(figsize=(3.5, 3.5))
    viz = Manifold(manifold="tsne")
    # Fit the data to the visualizer
    viz.fit_transform(X_train, y_train)
    # save to html
    fig = plt.gcf()
    some_htmL = mpld3.fig_to_html(fig)
    all_plots.append("<h4 align='center'>Manifold Visualization</h4>" +
                     some_htmL)
    # clear plot
    plt.clf()

    if ML_ALG_nr == 1:
        # classification

        # Check if we can get the classes
        classes = None
        try:
            classes = list(Enc.inverse_transform(model_def.classes_))
        except ValueError as e:
            app.logger.info(e)

        if classes is not None:
            # Instantiate the classification model and visualizer
            visualizer = ClassPredictionError(DecisionTreeClassifier(),
                                              classes=classes)
            # Fit the training data to the visualizer
            visualizer.fit(X_train, y_train)
            # Evaluate the model on the test data
            visualizer.score(X_test, y_test)
            # save to html
            fig = plt.gcf()
            some_htmL = mpld3.fig_to_html(fig)
            all_plots.append("<h4 align='center'>Class Prediction Error</h4>" +
                             some_htmL)
            # clear plot
            plt.clf()
            # The ConfusionMatrix visualizer taxes a model
            cm = ConfusionMatrix(model_def, classes=classes)
            cm = ConfusionMatrix(model_def, classes=classes)
            # Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model
            cm.fit(X_train, y_train)
            # To create the ConfusionMatrix, we need some test data. Score runs predict() on the data
            # and then creates the confusion_matrix from scikit-learn.
            cm.score(X_test, y_test)
            # save to html
            fig = plt.gcf()
            some_htmL = mpld3.fig_to_html(fig)
            all_plots.append("<h4 align='center'>Confusion Matrix</h4>" +
                             some_htmL)
            # clear plot
            plt.clf()

        return all_plots

    elif ML_ALG_nr == 0:
        # regression

        # Instantiate the linear model and visualizer
        visualizer = PredictionError(model_def, identity=True)
        visualizer.fit(X_train,
                       y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data
        # save to html
        fig = plt.gcf()
        some_htmL = mpld3.fig_to_html(fig)
        all_plots.append("<h4 align='center'>Prediction Error Plot</h4>" +
                         some_htmL)
        # clear plot
        plt.clf()

        # Instantiate the model and visualizer
        visualizer = ResidualsPlot(model_def)
        visualizer.fit(X_train,
                       y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)
        # save to html
        fig = plt.gcf()
        some_htmL = mpld3.fig_to_html(fig)
        all_plots.append("<h4 align='center'>Residuals Plot</h4>" + some_htmL)
        # clear plot
        plt.clf()

        return all_plots
meilleur_score = 0
profondeur = [5, 7, 10, 12, 15]

for k in profondeur:

    arbre = DecisionTreeClassifier(max_depth=k)
    arbre.fit(x_train, y_train)
    score = arbre.score(x_test, y_test)

    if score > meilleur_score:
        meilleur_score = score
        meilleur_param = {'profondeur': k}

    cm = ConfusionMatrix(arbre, classes=[0, 1, 2, 3, 4, 5, 6], percent=True)
    cm.fit(x_train, y_train)
    cm.score(x_test, y_test)
    cm.poof()

print('meilleur score : {:.2f}'.format(meilleur_score))
print('meilleur paramètre : {}'.format(meilleur_param))
''' ----------- Forêt Aléatoire ----------------------'''

meilleur_score = 0

nbr_arbre = [20, 40, 60, 80, 100, 120, 140]
features = ['sqrt', 'log2']
critere = ['gini', 'entropy']

for arbre_choix in nbr_arbre:

#matriz,cl1=create_matriz('m1.txt')
#matriz2,cl2=create_matriz('m2.txt')
#matriz3,cl3=create_matriz('m3.txt')
#matriz4,cl4=create_matriz('m4.txt')
#matriz5,cl5=create_matriz('m5.txt')
#zero,cl0=create_matriz('zero.txt')

teste, cl_teste = create_matriz('m7.txt')

#data=matriz+matriz2+matriz3+matriz4+matriz5+zero
#cl_data=cl1+cl2+cl3+cl4+cl5+cl0
data, cl_data = create_matriz('Matriz total.txt')

modelo = GaussianNB(var_smoothing=1e-10)
modelo.fit(data, cl_data)

previsoes = modelo.predict(teste)
accuracy_score(cl_teste, previsoes)

confusao = ConfusionMatrix(modelo, classes=[0, 1, 2, 3, 4])
confusao.fit(data, cl_data)
confusao.score(teste, cl_teste)
confusao.poof()

with open(
        'C:\\Users\\RENÊ MICHEL\\Desktop\\Codigos\\Python\\Batalha Naval\\nv.pickle',
        'wb') as f:
    pickle.dump((modelo), f)
            vetor[:, i] = labelencoder.fit_transform(vetor[:, i])


labelEncoder(previsores)

X_treino, X_teste, y_treino, y_teste = train_test_split(previsores,
                                                        classe,
                                                        test_size=0.3,
                                                        random_state=0)

naive_bayes = GaussianNB()
naive_bayes.fit(X_treino, y_treino)

previsoes = naive_bayes.predict(X_teste)
confusao = confusion_matrix(y_teste, previsoes)
taxa_acerto = accuracy_score(y_teste, previsoes)

v = ConfusionMatrix(GaussianNB())
v.fit(X_treino, y_treino)
v.score(X_teste, y_teste)
v.poof()

novo_credito = pd.read_csv('NovoCredit.csv')
novo_credito = novo_credito.iloc[:, 0:20].values
labelEncoder(novo_credito)

nova_previsao = naive_bayes.predict(novo_credito)

print()
print('Seu novo cliente e: {} pagador'.format(nova_previsao[0]))
print()
Exemple #25
0
#Criação de uma variável com variável de resposta(y)
classe = dados.iloc[:, 1].values

#Aqui iremos transformar as colunas categóricas em colunas numéricas
labelencoder = LabelEncoder()
previsores[:, 0] = labelencoder.fit_transform(previsores[:, 0])

#Aqui hávera a divisão dos dados para treinamento e teste 
X_treinamento, X_teste, y_treinamento, y_teste = train_test_split(previsores, classe, test_size = 0.3, random_state = 0)

#Criação do algoritmo da floresta randômica (Random Forest), juntamente com o treinamento do algoritmo
floresta = RandomForestClassifier(criterion = 'gini', random_state = 0)
floresta.fit(X_treinamento, y_treinamento)

#Faz as previsões da variável teste.
previsoes = floresta.predict(X_teste)

#Gera uma variável com a matriz de confusão
confusao = confusion_matrix(y_teste, previsoes)

#Gera duas variáveis com as taxas de acertos e erros da floresta randômica (Random Forest)
taxa_acerto = accuracy_score(y_teste, previsoes)
taxa_erro = 1 - taxa_acerto

#Gera o imagem da matriz de confusão
v = ConfusionMatrix(floresta)
v.fit(X_treinamento, y_treinamento)
v.score(X_teste, y_teste)
v.poof()

'''Obs: Como constatado, a taxa de acerto foi 80%, aproximadamente, com 30% de dados para teste.''' 
Com as (previsoes) usando os atributos de Testes geramos previsoes que usando nossa I.A. 
Podemos comparar as (previsoes) com as (classesTestes), pois ela vai ter as respostas corretas assim podemos
já observar a porcentagem de acerto da nossa I.A 
'''

acuracidade = accuracy_score(classeTeste, previsoes)
'''
Usando a função (accuracy_score) passando como párametro a classeTeste e a nossá váriavel de previsões podemos
gerar o valor de porcentagem de acertos da nossa I.A

Neste exemplo nossa I.A acertou 0.8658 (86%)
'''

############################### MATRIZ DE CONFUSÃO ##################################
'''
Por meio da nossa biblioteca (ConfusionMatrix) podemos gerar a matriz de confusão em Python mostrando assim
de forma mais clara como foi o percentual de acerto da nossa I.A
'''

confusao = ConfusionMatrix(modelo,
                           classes=["Nenhum", "Severo", "Leve", "Moderado"])
confusao.fit(atributosTreinamentos, classeTreinamento)
confusao.score(atributosTestes, classeTeste)
confusao.poof()

confusao = ConfusionMatrix(modelo,
                           classes=["None", "Severe", "Mild", "Moderate"])
confusao.fit(atributosTreinamentos, classeTreinamento)
confusao.score(atributosTestes, classeTeste)
confusao.poof()
def confusion_matrix(xx,yy,estimatorss,**kwargs):
    vz1 = ConfusionMatrix(estimatorss, classes=['Reach, 1 Reach, or L/R Reach', 'Discard'],
        cmap="YlGn", size=(600, 360), **kwargs)
    vz1.fit(xx, yy)
    vz1.score(xx, yy)
    vz1.show()
# split the data into a training set and a test set
X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(
    X, Y, test_size=0.20, random_state=10)
# print("X_train: ", X_train.shape)
# print("X_validation: ", X_validation.shape))
# print("Y_train: ", Y_train.shape))
# print("Y_validation: ", Y_validation.shape))

gaussianNB = GaussianNB()

cm = ConfusionMatrix(
    gaussianNB,
    classes="A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z".split(','))

# train the model using the training sets
cm.fit(X_train, Y_train)

cm.score(X_validation, Y_validation)

# predict the responses for test dataset
predictions = cm.predict(X_validation)

# accuracy classification score
print("Accuracy: ", accuracy_score(Y_validation, predictions))

# compute confusion matrix
print(confusion_matrix(Y_validation, predictions))

# text report showing the main classification metrics
print(classification_report(Y_validation, predictions, digits=5))
Exemple #29
0
print(count)
print(count[0] / Ytest.shape[0], count[1] / Ytest.shape[0])

# decision tree ############################################3
tr = DecisionTreeClassifier()
tr.fit(Xtrain, Ytrain)
# print("\nimportances=",tr.feature_importances_)

y_predict = tr.predict(Xtest)
print(
    f"Accuracy score for Decision Tree Classifier is: {accuracy_score(Ytest, y_predict)}"
)
print("DT importances=", tr.feature_importances_)

cm = ConfusionMatrix(tr, classes=[0, 1])
cm.fit(Xtrain, Ytrain)
cm.score(Xtest, Ytest)
cm.show()

#Logistic Regression#####################################

tr2 = LogisticRegression()
tr2.fit(Xtrain, Ytrain)
# print("importances =", tr2.feature_importances_)

y_predict = tr2.predict(Xtest)
print(
    f"\nAccuracy score for Logistic Regression Classifier is: {accuracy_score(Ytest, y_predict)}"
)
print("RL importances=", tr2.coef_)
# print("Ypred=", y_predict)
Exemple #30
0
# make dummy variable for allergy column
df = pd.get_dummies(df, columns=['allergy'])

# define feature matrix and target variable
X = df[['choice_confidence', 'allergy_No']]
y = df['num_choices']

# split and train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

model = LogisticRegression()

# produce confusion matrix
cm = ConfusionMatrix(model)
cm.fit(X_train, y_train)
cm.score(X_test, y_test)
cm.poof()

# calculate accuracy of model
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

# (Drew)
# dummy variables for categorical food data for prediction models to make numerical variables
one_hot = pd.get_dummies(df['dinner_choice'])
df = df.drop('Timestamp', axis= 1)
df = df.drop('age', axis = 1)
df = df.drop('date', axis = 1)
df = df.drop('time', axis = 1)
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split as tts

from yellowbrick.classifier import ConfusionMatrix


if __name__ == '__main__':
    digits = load_digits()
    digit_X = digits.data
    digit_y = digits.target
    d_X_train, d_X_test, d_y_train, d_y_test = tts(
        digit_X, digit_y, test_size=0.2
    )
    model = LogisticRegression()
    digit_cm = ConfusionMatrix(model, classes=[0,1,2,3,4,5,6,7,8,9])
    digit_cm.fit(d_X_train, d_y_train)
    digit_cm.score(d_X_test, d_y_test)
    d = digit_cm.poof(outpath="images/confusion_matrix_digits.png")


    iris = load_iris()
    iris_X = iris.data
    iris_y = iris.target
    iris_classes = iris.target_names
    i_X_train, i_X_test, i_y_train, i_y_test = tts(
        iris_X, iris_y, test_size=0.2
    )
    model = LogisticRegression()
    iris_cm = ConfusionMatrix(
        model, classes=iris_classes,
        label_encoder={0: 'setosa', 1: 'versicolor', 2: 'virginica'}