def showConfusionMatrix(): #First do our imports from sklearn.datasets import load_digits from yellowbrick.classifier import ConfusionMatrix # We'll use the handwritten digits data set from scikit-learn. # Each feature of this dataset is an 8x8 pixel image of a handwritten number. # Digits.data converts these 64 pixels into a single array of features digits = load_digits() X = digits.data y = digits.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=11) model = LogisticRegression() #The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(model, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) #Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model cm.fit(X_train, y_train) #To create the ConfusionMatrix, we need some test data. Score runs predict() on the data #and then creates the confusion_matrix from scikit learn. cm.score(X_test, y_test) #How did we do? cm.poof()
def draw_confusion_matrix(self): visualizer = ConfusionMatrix(self.model, classes=self.le.classes_, label_encoder=self.le) visualizer.fit(self.training_data, self.training_labels) visualizer.score(self.test_data, self.test_labels) visualizer.poof()
def evaluation(estimator, X, Y, x, y): classes = [Y[1], Y[0]] f, (ax, ax1, ax2) = plt.subplots(1, 3, figsize=(18, 6)) #Confusion Matrix cmm = ConfusionMatrix(model=estimator, ax=ax1, classes=classes, label_encoder={ 0.0: 'Negativo', 1.0: 'Positivo' }) cmm.score(x, y) #ROCAUC viz = ROCAUC(model=estimator, ax=ax2) viz.fit(X, Y) viz.score(x, y) #Learning Curve cv_strategy = StratifiedKFold(n_splits=3) sizes = np.linspace(0.3, 1.0, 10) visualizer = LearningCurve(estimator, ax=ax, cv=cv_strategy, scoring='roc_auc', train_sizes=sizes, n_jobs=4) visualizer.fit(X, Y) cmm.poof(), viz.poof(), visualizer.poof() plt.show()
def nice_confusion(model): """Creates a nice looking confusion matrix""" plt.figure(figsize=(10, 10)) plt.xlabel('Predicted Class', fontsize=18) plt.ylabel('True Class', fontsize=18) # plt.xticks(labels=['']) viz = ConfusionMatrix(model, cmap='PuBu', fontsize=18) viz.fit(X_train, y_train) viz.score(X_test, y_test) viz.poof()
def get_confusion_matrix(self, on="test"): cm = ConfusionMatrix(self.pipe) if on == "test": cm.score(self._X_test, self._y_test) elif on == "train": cm.score(self._X_train, self._y_train) elif on == "all": cm.score(self.X, self.y) # graph the confusion matrix with yellowbrick cm.poof()
def confusion_matrix(model, classes, X_train, Y_train, X_test, Y_test): from yellowbrick.classifier import ConfusionMatrix iris_cm = ConfusionMatrix(model, classes=classes, label_encoder={ 0: classes[0], 1: classes[1] }) iris_cm.fit(X_train, Y_train) iris_cm.score(X_test, Y_test) iris_cm.poof()
def classifier_report(classifier, X_test, y_test): classes = np.unique(y_test) cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) cm.fit(X_test, y_test) cm.score(X_test, y_test) filename = classifier.__class__.__name__ + '_confusion_matrix.png' cm.poof(outpath=filename, clear_figure=True, kwargs=dict(transparent=False, dpi=80, inches='tight')) ex.add_artifact(filename) visualizer = ClassificationReport(classifier, classes=classes, support=True) visualizer.fit(X_test, y_test) visualizer.score(X_test, y_test) visualizer.poof(outpath="classification_report.png", clear_figure=True, kwargs=dict(transparent=False, dpi=80, inches='tight')) ex.add_artifact('classification_report.png')
def plot_confusion_matrix(model:sklearn.base.BaseEstimator, X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray, y_test: np.ndarray): """ Plots confusion matrix for given model and train/test data. Inputs: model: an sklearn classifier X_train: training examples X_test: test examples y_train: training labels corresponding to examples in X_train y_test: test labels corresponding to examples in X_test Returns: None """ model_cm = ConfusionMatrix(model) model_cm.fit(X_train, y_train) model_cm.score(X_test, y_test) model_cm.poof()
def store_experiment_data(self, X_test, y_test): class_report = ClassificationReport(self.model) score = class_report.score(X_test, y_test) class_report.poof( 'metrics/classification_report.png', clear_figure=True) self.ex.add_artifact('metrics/classification_report.png') confustion_matrix = ConfusionMatrix(self.model) confustion_matrix.score(X_test, y_test) confustion_matrix.poof( 'metrics/confusion_matrix.png', clear_figure=True) self.ex.add_artifact('metrics/confusion_matrix.png') cpd = ClassPredictionError(self.model) cpd.score(X_test, y_test) cpd.poof('metrics/class_prediction_error.png', clear_figure=True) self.ex.add_artifact('metrics/class_prediction_error.png') print('score=', score) self.ex.log_scalar('score', score)
def get_confusion_matrix(self, on="test"): """ Produces a confusion matrix made through the yellowbrick package. Input ----- on : string (default=test) Determines which set of data to score and create a confusion matrix on. Default is 'test', meaning it will make a confusion matrix of the test results. 'train' and 'all' are alternative values. """ cm = ConfusionMatrix(self.pipe) if on == "test": cm.score(self._X_test, self._y_test) elif on == "train": cm.score(self._X_train, self._y_train) elif on == "all": cm.score(self._X, self._y) # graph the confusion matrix with yellowbrick cm.poof()
Com as (previsoes) usando os atributos de Testes geramos previsoes que usando nossa I.A. Podemos comparar as (previsoes) com as (classesTestes), pois ela vai ter as respostas corretas assim podemos já observar a porcentagem de acerto da nossa I.A ''' acuracidade = accuracy_score(classeTeste, previsoes) ''' Usando a função (accuracy_score) passando como párametro a classeTeste e a nossá váriavel de previsões podemos gerar o valor de porcentagem de acertos da nossa I.A Neste exemplo nossa I.A acertou 0.8658 (86%) ''' ############################### MATRIZ DE CONFUSÃO ################################## ''' Por meio da nossa biblioteca (ConfusionMatrix) podemos gerar a matriz de confusão em Python mostrando assim de forma mais clara como foi o percentual de acerto da nossa I.A ''' confusao = ConfusionMatrix(modelo, classes=["Nenhum", "Severo", "Leve", "Moderado"]) confusao.fit(atributosTreinamentos, classeTreinamento) confusao.score(atributosTestes, classeTeste) confusao.poof() confusao = ConfusionMatrix(modelo, classes=["None", "Severe", "Mild", "Moderate"]) confusao.fit(atributosTreinamentos, classeTreinamento) confusao.score(atributosTestes, classeTeste) confusao.poof()
def train(experiment_id, run_name, xtrain, xtest, ytrain, ytest): np.random.seed(100) with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run: tfid_vect =TfidfVectorizer(analyzer='word', tokenizer=nltk.tokenize.word_tokenize, stop_words='english', min_df=5) my_pipeline = Pipeline(steps=[('vectorizer', tfid_vect), ('lr', LogisticRegression(random_state=42))]) my_pipeline.fit(xtrain, ytrain) predictions = my_pipeline.predict(xtest) joblib.dump(my_pipeline, 'pipeline_lr.pkl') accuracy = accuracy_score(ytest, predictions) f1score = f1_score(ytest, predictions) auc_score = roc_auc_score(ytest, predictions) class_report = classification_report(ytest, predictions) print(f'Accuracy : {round(accuracy, 2)}') print(f'f1_score : {round(f1score, 2)}') print(f'auc_score : {round(auc_score, 2)}') print(f'class_report : \n {class_report}') mlflow.log_metric('Accuracy', round(accuracy, 2)) mlflow.log_metric('f1_score', round(f1score, 2)) mlflow.log_metric('auc_score', round(auc_score, 2)) fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4) visualizer = ClassificationReport(my_pipeline, ax=ax1, classes=[0,1]) visualizer.fit(xtrain, ytrain) visualizer.score(xtest, ytest) a=visualizer.poof(outpath="image/classification_report.png") print(' ') mlflow.log_artifact("image/classification_report.png") # The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(my_pipeline, ax=ax2, classes=[0,1]) cm.fit(xtrain, ytrain) cm.score(xtest, ytest) b=cm.poof(outpath="image/confusionmatrix.png") mlflow.log_artifact("image/confusionmatrix.png") print(' ') vis = ROCAUC(my_pipeline, ax=ax3, classes=[0,1]) vis.fit(xtrain, ytrain) # Fit the training data to the visualizer vis.score(xtest, ytest) # Evaluate the model on the test data c = vis.poof(outpath="image/rocauc.png") # Draw/show/poof the data print(' ') mlflow.log_artifact("image/rocauc.png") visual = ClassPredictionError(my_pipeline, ax=ax4, classes=[0,1]) visual.fit(xtrain, ytrain) visual.score(xtest, ytest) g = visual.poof(outpath="image/ClassificationError.png") print(' ') mlflow.log_artifact("image/ClassificationError.png") return run.info.run_uuid
import pandas as pd import matplotlib.pyplot as plt from sklearn.datasets import load_digits from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from yellowbrick.classifier import ConfusionMatrix if __name__ == '__main__': # Load the regression data set digits = load_digits() X = digits.data y = digits.target X_train, X_test, y_train, y_test = train_test_split(X,y, test_size =0.2, random_state=11) model = LogisticRegression() #The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(model, classes=[0,1,2,3,4,5,6,7,8,9]) cm.fit(X_train, y_train) # Fit the training data to the visualizer cm.score(X_test, y_test) # Evaluate the model on the test data g = cm.poof(outpath="images/confusion_matrix.png") # Draw/show/poof the data
classes = ['Not_BendCurve', 'BendCurve'] cm = ConfusionMatrix(model, fontsize=13, classes=classes, percent=False) #Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model cm.fit(X_train, y_train) #To create the ConfusionMatrix, we need some test data. Score runs predict() on the data #and then creates the confusion_matrix from scikit learn. cm.score(X_val, y_val) # change fontsize of the labels in the figure for label in cm.ax.texts: label.set_size(20) #How did we do? cm.poof(bbox_inches='tight') # Precision, Recall, and F1 Score # set the size of the figure and the font size plt.tight_layout(rect=[.5, 0.5, .5, 0.05]) plt.rcParams['figure.figsize'] = (15, 7) plt.rcParams['font.size'] = 20 # Instantiate the visualizer visualizer = ClassificationReport(model, classes=classes) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_val, y_val) # Evaluate the model on the test data g = visualizer.poof() # ROC and AUC
def plot_confusion_matrix(model, X_valid, y_valid): visualizer = ConfusionMatrix(model, is_fitted=True) visualizer.score(X_valid, y_valid) visualizer.poof()
df = pd.get_dummies(df, columns=['allergy']) # define feature matrix and target variable X = df[['choice_confidence', 'allergy_No']] y = df['num_choices'] # split and train model X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0) model = LogisticRegression() # produce confusion matrix cm = ConfusionMatrix(model) cm.fit(X_train, y_train) cm.score(X_test, y_test) cm.poof() # calculate accuracy of model model.fit(X_train, y_train) y_pred = model.predict(X_test) print("Accuracy:", metrics.accuracy_score(y_test, y_pred)) # (Drew) # dummy variables for categorical food data for prediction models to make numerical variables one_hot = pd.get_dummies(df['dinner_choice']) df = df.drop('Timestamp', axis= 1) df = df.drop('age', axis = 1) df = df.drop('date', axis = 1) df = df.drop('time', axis = 1) df = df.drop('hour', axis = 1) df = df.drop('allergy_No', axis = 1)
predicciones1 = modelo1.predict(X_probar) #calcular la precisión accuracy_score(y_probar, predicciones1) print('salida: ',accuracy_score(y_probar, predicciones1)) #y_probar = vector de prueba #precciones1 = vector de predicciones #Generar la matriz de confusion confusion1= ConfusionMatrix(modelo1) confusion1.fit(X_entrenar, y_entrenar) confusion1.score(X_probar, y_probar) confusion1.poof() #Mejorar (modelar) modelo2 = DecisionTreeClassifier(criterion = 'entropy', min_samples_split = 100) modelo2.fit(X_entrenar, y_entrenar) export_graphviz(modelo1, out_file = 'modelo2.dot') predicciones2 = modelo1.predict(X_probar) #calcular la precisión accuracy_score(y_probar, predicciones2) print('salida: ',accuracy_score(y_probar, predicciones2)) #y_probar = vector de prueba #precciones1 = vector de predicciones #Generar la matriz de confusion confusion2= ConfusionMatrix(modelo2)
roc = ROCAUC(rf, classes=cancer.target_names) roc.fit(X_train, y_train) roc.score(X_test, y_test) roc.poof() ### Confusion Matrix from yellowbrick.classifier import ConfusionMatrix classes = cancer.target_names conf_matrix = ConfusionMatrix(rf, classes=classes, label_encoder={ 0: 'benign', 1: 'malignant' }) conf_matrix.fit(X_train, y_train) conf_matrix.score(X_test, y_test) conf_matrix.poof() ### Class Prediction Error from yellowbrick.classifier import ClassPredictionError visualizer = ClassPredictionError(rf, classes=classes) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof()
le = LabelEncoder() df_data_encoder = df_data.apply(lambda col: le.fit_transform(col)) X_train, X_test, y_train, y_test = \ train_test_split(df_data_encoder, df_class, test_size = 0.3, random_state = 0) modelo = GaussianNB() # modelo = MultinomialNB() modelo.fit(X_train, y_train) y_pred = modelo.predict(X_test) matrix = confusion_matrix(y_test, y_pred) accuracy_score(y_test, y_pred) # np.sum(matrix.diagonal()) / np.sum(matrix) # yeallowbrick from yellowbrick.classifier import ConfusionMatrix confusion = ConfusionMatrix(GaussianNB()) # confusion = ConfusionMatrix(MultinomialNB()) confusion.fit(X_train, y_train) confusion.score(X_test, y_test) confusion.poof() ### Em produção df_novo_credit = pd.read_csv('NovoCredit.csv', sep=',') df_novo_data = df_novo_credit.apply(lambda col: le.fit_transform(col)) modelo.predict(df_novo_data)
#Aprendizagem naive_bayes = GaussianNB() #objeto naive_bayes naive_bayes.fit(x_treinamento, y_treinamento) #Previsão/Teste do modelo previsoes = naive_bayes.predict(x_teste) confusao = confusion_matrix(y_teste, previsoes) #parâmetros: Ground truth (correct) labels; Predicted labels, as returned by a classifier indice_acerto = accuracy_score(y_teste, previsoes) indice_erro = 1 - indice_acerto print(indice_acerto) visualizador = ConfusionMatrix(GaussianNB()) #criando objeto visualizador.fit(x_treinamento, y_treinamento) visualizador.score(x_teste, y_teste) visualizador.poof() #renderiza a visualização #-------------------------------------------------------------------------- #Simulando o modelo em Produção novo_credito = pd.read_csv( r'/home/larag/Desktop/Data Science/Machine Learning/NovoCredit.csv') novo_credito = novo_credito.iloc[:, 0:20].values novo_credito[:, 0] = labelencoder.fit_transform(novo_credito[:, 0]) novo_credito[:, 2] = labelencoder.fit_transform(novo_credito[:, 2]) novo_credito[:, 3] = labelencoder.fit_transform(novo_credito[:, 3]) novo_credito[:, 5] = labelencoder.fit_transform(novo_credito[:, 5]) novo_credito[:, 6] = labelencoder.fit_transform(novo_credito[:, 6]) novo_credito[:, 8] = labelencoder.fit_transform(novo_credito[:, 8]) novo_credito[:, 9] = labelencoder.fit_transform(novo_credito[:, 9]) novo_credito[:, 11] = labelencoder.fit_transform(novo_credito[:, 11]) novo_credito[:, 13] = labelencoder.fit_transform(novo_credito[:, 13])
previsoes = naive_bayes.predict(X_teste) #predict passando os dados de teste! confusao = confusion_matrix( y_teste, previsoes) #comparar os resultados do treino com as previsoes taxa_acerto = accuracy_score(y_teste, previsoes) taxa_erro = 1 - taxa_acerto #Visualizacaoes from yellowbrick.classifier import ConfusionMatrix #visualizacao de modelos de Machine Learning v = ConfusionMatrix(GaussianNB()) #Tabela Confusao #treinamento v.fit(X_treinamento, y_treinamento) #treinamento v.score(X_teste, y_teste) #Calculo do Erro v.poof() #Visualizar #SIMULACAOOOOO COM NOVOS REGISTROS! novo_cliente = pd.read_csv('NovoCredit.csv') novo_cliente = novo_cliente.iloc[:, 0:20].values #deixar de forma de numpy array novo_cliente[:, 0] = labelEncoder.fit_transform(novo_cliente[:, 0]) novo_cliente[:, 2] = labelEncoder.fit_transform(novo_cliente[:, 2]) novo_cliente[:, 3] = labelEncoder.fit_transform(novo_cliente[:, 3]) novo_cliente[:, 5] = labelEncoder.fit_transform(novo_cliente[:, 5]) novo_cliente[:, 6] = labelEncoder.fit_transform(novo_cliente[:, 6]) novo_cliente[:, 8] = labelEncoder.fit_transform(novo_cliente[:, 8]) novo_cliente[:, 9] = labelEncoder.fit_transform(novo_cliente[:, 9]) novo_cliente[:, 11] = labelEncoder.fit_transform(novo_cliente[:, 11]) novo_cliente[:, 13] = labelEncoder.fit_transform(novo_cliente[:, 13])
from sklearn.model_selection import train_test_split as tts from yellowbrick.classifier import ConfusionMatrix if __name__ == '__main__': digits = load_digits() digit_X = digits.data digit_y = digits.target d_X_train, d_X_test, d_y_train, d_y_test = tts(digit_X, digit_y, test_size=0.2) model = LogisticRegression() digit_cm = ConfusionMatrix(model, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) digit_cm.fit(d_X_train, d_y_train) digit_cm.score(d_X_test, d_y_test) d = digit_cm.poof(outpath="images/confusion_matrix_digits.png") iris = load_iris() iris_X = iris.data iris_y = iris.target iris_classes = iris.target_names i_X_train, i_X_test, i_y_train, i_y_test = tts(iris_X, iris_y, test_size=0.2) model = LogisticRegression() iris_cm = ConfusionMatrix(model, classes=iris_classes, label_encoder={ 0: 'setosa', 1: 'versicolor', 2: 'virginica'
previsores = Arquivo.iloc[:, 0:4].values #Seleciona os atributos previsores classe = Arquivo.iloc[:, 3].values #Seleciona o atributo classificador labelencoder = LabelEncoder( ) #Prepara os atributos para analise (todos os que não forem numericos) altera os atributos categoricos em numericos previsores[:, 0] = labelencoder.fit_transform(previsores[:, 0]) previsores[:, 1] = labelencoder.fit_transform(previsores[:, 1]) previsores[:, 2] = labelencoder.fit_transform(previsores[:, 2]) previsores[:, 3] = labelencoder.fit_transform(previsores[:, 3]) X_treinamento, X_teste, y_treinamento, y_teste = train_test_split( previsores, classe, test_size=0.3, random_state=0 ) #DIvide a base em teste e treinamento. Deixando a base de treinamento com 30% do tamanho, rando sempre divide a base da mesma forma naive_bayes = GaussianNB() #Cria o classificador naive_bayes.fit(X_treinamento, y_treinamento) #Usa os dados para classificação previsoes = naive_bayes.predict(X_teste) # Faz as previsões print(previsoes) confusao = confusion_matrix(y_teste, previsoes) #Cria matriz de confusão print(confusao) taxa_acerto = accuracy_score(y_teste, previsoes) # Teste de acuracia print(taxa_acerto) taxa_erro = 1 - taxa_acerto print(taxa_erro) from yellowbrick.classifier import ConfusionMatrix #Biblioteca específica para visualização da matriz de confusão v = ConfusionMatrix(GaussianNB()) #Cria o visualizador v.fit(X_treinamento, y_treinamento) #Fazendo o treinamento v.score(X_teste, y_teste) #Fazendo o score v.poof() #Cria visualizador
def execute_classification_code(code, session): global df, model, problem_class, order code_str = urllib.parse.unquote(code) code_arr = code_str.split("\n") print(code_arr) problem_class = code_arr[0] print(problem_class) order = code_arr[1] print(order) exec(code_arr[2]) print(df) exec(code_arr[3], globals()) cmap_pink_green = sns.diverging_palette(352, 136, s=96, l=51, n=7) viz = ClassificationReport(model, cmap=cmap_pink_green) viz.fit(X_train, y_train) viz.score(X_test, y_test) viz.poof(outpath="./plots/classificationmatrix" + session + ".png") image_path_class = "classificationmatrix" plt.clf() plt.cla() plt.close() le = LabelEncoder() dec_viz = DecisionViz(model, title="Decision Boundaries", features=np.where(cols == True)[0].tolist(), classes=list(map(str, y.iloc[:, 0].unique())).sort()) dec_viz.fit(X_train.to_numpy(), le.fit_transform(y_train)) dec_viz.draw(X_test.to_numpy(), le.fit_transform(y_test)) dec_viz.poof(outpath="./plots/decviz" + session + ".png") image_path_dec = "decviz" plt.clf() plt.cla() plt.close() print(list(map(str, y.iloc[:, 0].unique()))) cmap_salmon_dijon = sns.diverging_palette(28, 65, s=98, l=78, n=7) cm = ConfusionMatrix(model, classes=list(map(str, y.iloc[:, 0].unique())).sort(), cmap=cmap_salmon_dijon) cm.fit(X_train, y_train) cm.score(X_test, y_test) plt.tight_layout() cm.poof(outpath="./plots/cm" + session + ".png") image_path_cm = "cm" plt.clf() plt.cla() plt.close() model.fit(X_train, y_train) file = 'pickled_models/trained_model' + session + '.sav' pickle_path = 'trained_model' pickle.dump(model, open(file, 'wb')) return jsonify(image_path_class, image_path_dec, image_path_cm, pickle_path)
#viz_classification_report= ClassificationReport(gb_classifier , classes=classes) #viz_classification_report.fit(X_train, y_train) # Fit the visualizer and the model #viz_classification_report.score(X_test, y_test) # Evaluate the model on the test data #c = viz_classification_report.poof() # Instantiate the visualizer with the classification model #viz_ROC = ROCAUC(gb_classifier, classes=classes) #viz_ROC.fit(X_train, y_train) # Fit the training data to the visualizer #viz_ROC.score(X_test, y_test) # Evaluate the model on the test data #g = viz_ROC.poof() # Draw/show/poof the data # The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(gb_classifier, classes=classes, label_encoder={ 0: 'non-seizure', 1: 'seizure' }) cm.fit(X_train, y_train) cm.score(X_test, y_test) c = cm.poof() plt.tight_layout() #Latency X_test_latency = df_seizures_power_22_states[ df_seizures_power_22_states.columns[:-2]] latency = gb_classifier.predict(X_test_latency) latency_test = pd.concat( [df_seizures_power_22_states['State'], pd.Series(latency)], axis=1)
from yellowbrick.classifier import ConfusionMatrix iris = datasets.load_iris() #iris stats.describe(iris.data) previsores = iris.data classe = iris.target x_treinamento, x_teste, y_treinamento, y_teste = train_test_split( previsores, classe, test_size=0.3, random_state=0) #n_neighbors=3 considera os 3 vizinhos mais próximos knn = KNeighborsClassifier(n_neighbors=3) #Fazendo o treinamento knn.fit(x_treinamento, y_treinamento) #Para fazer uma classificação simplesmente vai fazer a comparação da distância com #esses registros que já estão armazenados previsoes = knn.predict(x_teste) confusao = confusion_matrix(y_teste, previsoes) #visualização da matriz de confusão bonitinha visualizador = ConfusionMatrix(KNeighborsClassifier(n_neighbors=3)) visualizador.fit(x_treinamento, y_treinamento) visualizador.score(x_teste, y_teste) visualizador.poof() indice_acertos = accuracy_score(y_teste, previsoes) print(indice_acertos) indice_erros = 1 - indice_acertos # In[ ]:
vetor[:, i] = labelencoder.fit_transform(vetor[:, i]) labelEncoder(previsores) X_treino, X_teste, y_treino, y_teste = train_test_split(previsores, classe, test_size=0.3, random_state=0) naive_bayes = GaussianNB() naive_bayes.fit(X_treino, y_treino) previsoes = naive_bayes.predict(X_teste) confusao = confusion_matrix(y_teste, previsoes) taxa_acerto = accuracy_score(y_teste, previsoes) v = ConfusionMatrix(GaussianNB()) v.fit(X_treino, y_treino) v.score(X_teste, y_teste) v.poof() novo_credito = pd.read_csv('NovoCredit.csv') novo_credito = novo_credito.iloc[:, 0:20].values labelEncoder(novo_credito) nova_previsao = naive_bayes.predict(novo_credito) print() print('Seu novo cliente e: {} pagador'.format(nova_previsao[0])) print()
from yellowbrick.classifier import ConfusionMatrix if __name__ == '__main__': digits = load_digits() digit_X = digits.data digit_y = digits.target d_X_train, d_X_test, d_y_train, d_y_test = tts( digit_X, digit_y, test_size=0.2 ) model = LogisticRegression() digit_cm = ConfusionMatrix(model, classes=[0,1,2,3,4,5,6,7,8,9]) digit_cm.fit(d_X_train, d_y_train) digit_cm.score(d_X_test, d_y_test) d = digit_cm.poof(outpath="images/confusion_matrix_digits.png") iris = load_iris() iris_X = iris.data iris_y = iris.target iris_classes = iris.target_names i_X_train, i_X_test, i_y_train, i_y_test = tts( iris_X, iris_y, test_size=0.2 ) model = LogisticRegression() iris_cm = ConfusionMatrix( model, classes=iris_classes, label_encoder={0: 'setosa', 1: 'versicolor', 2: 'virginica'} ) iris_cm.fit(i_X_train, i_y_train)
) # Usando a entropy ganho de informação para o calculo dos atributos mais importantes modelo1.fit(X_treinamento, y_treinamento) # Gerando a árvore de decisão com os 700 registros export_graphviz(modelo1, out_file='modelo1.dot' ) # Visualizando a árvore de decisão (arq = modelo 1.dot) # Criando as previsões previsoes1 = modelo1.predict(X_teste) # teste com os 300 registros accuracy_score(y_teste, previsoes1) # Comparação onde se tem o percentual de acerto # Criando a matriz de confusão confusao1 = ConfusionMatrix(modelo1) confusao1.fit(X_treinamento, y_treinamento) confusao1.score(X_teste, y_teste) confusao1.poof() # Modelo 2 modelo2 = DecisionTreeClassifier( criterion='entropy', min_samples_split=20) # Usando a entropy mas com min_sample_split modelo2.fit(X_treinamento, y_treinamento) # Gerando a árvore de decisão com os 700 registros export_graphviz(modelo2, out_file='modelo2.dot' ) # Visualizando a árvore de decisão (arq = modelo 2.dot) # Criando as previsões previsoes2 = modelo2.predict(X_teste) # teste com os 300 registros accuracy_score(y_teste, previsoes2) # Comparação onde se tem o percentual de acerto
""" como foi constatado na comparação entre previsões e y_teste, há algumas previsões que vieram com erro (isso é comum) precisamos agora contabilizar essa taxa de erros """ confusao = confusion_matrix(y_teste, previsoes) #obtivemos 71% de acerto com esse algoritmo taxa_acerto = accuracy_score(y_teste, previsoes) taxa_erro = 1 - taxa_acerto """ como na matrix de confusão gerada pela confusion_matrix da sklearn não conseguimos distinguir os valores pra good e bad, vamos importar a ConfusionMatrix da yellowbrick e gerar uma nova matriz de confusão no resultado lê-se: ------------------------------------------------------------ | bad classificado como bad | bad classificado como good | ------------------------------------------------------------ | good classificado como bad | good classificado como good | ------------------------------------------------------------ """ from yellowbrick.classifier import ConfusionMatrix visualizador = ConfusionMatrix(GaussianNB()) visualizador.fit(X_treinamento, y_treinamento) visualizador.score(X_teste, y_teste) visualizador.poof() #para visualizar