def confusion_mt(model, test_x, test_y): y_pred = model.predict(test_x) y_pred = np.argmax(y_pred, axis=1) cm = sk_metrics.confusion_matrix(y_true=test_y, y_pred=y_pred) plt.figure() plot_confusion_matrix(cm, hide_ticks=True,cmap=plt.cm.Blues) plt.show()
def plot_confusion(model, X, y): """ Plot confusion matrix of model classifications on data. :param model: model to make predictions on images :param X: features :param y: labels """ predictions = model.predict(X).argmax(axis=-1) predictions = predictions.reshape(1, -1)[0] correct = np.nonzero(predictions == y)[0] incorrect = np.nonzero(predictions != y)[0] print("Correct:", len(correct)) print("Incorrect:", len(incorrect)) y_pred = model.predict(X) # to get the prediction, we pick the class with with the highest probability y_pred_classes = np.argmax(y_pred, axis=1) y_true = np.argmax(to_categorical(y, len(LABELS)), axis=1) conf_mtx = confusion_matrix(y_true, y_pred_classes) plot_confusion_matrix(conf_mtx, figsize=(12, 8), hide_ticks=True, cmap=plt.cm.Blues) plt.xticks(range(3), LABELS, fontsize=16) plt.yticks(range(3), LABELS, fontsize=16) plt.show()
def index(): project_id = 'weighty-flag-288919' bucket_name = 'delabs2020' client = storage.Client(project=project_id) bucket = client.get_bucket(bucket_name) blob = bucket.blob('results/predictions-00000-of-00001.csv') blob.download_to_filename('download.csv') # Load necessary data results = pd.read_csv('download.csv', header=None, names=['predicted', 'actual']) # Generate the figure **without using pyplot**. cm = confusion_matrix(results['actual'], results['predicted']) plt.figure() plot_confusion_matrix(cm, figsize=(7.5, 5), hide_ticks=True, cmap=plt.cm.Blues) plt.title("K Neighbors Model - Confusion Matrix") plt.xticks(range(2), ["Heart Not Failed", "Heart Fail"], fontsize=16) plt.yticks(range(2), ["Heart Not Failed", "Heart Fail"], fontsize=16, rotation=45) # Save it to a temporary buffer. buf = BytesIO() plt.savefig(buf, format="png") # Embed the result in the html output. data = base64.b64encode(buf.getbuffer()).decode("ascii") return render_template('index.html', data=data)
def test_model(API_KEY, df_test): y_true = df_test.genre y_pred = [] for i, row in df_test.reset_index().iterrows(): demo = classifyText(API_KEY, row['lyric']) y_pred.append(demo["class_name"]) cm = confusion_matrix(y_true, y_pred, labels=["hip_hop", "funk", "sertanejo"]) plot_confusion_matrix(conf_mat=cm, show_normed=True, figsize=(5, 5), class_names=["hip_hop", "funk", "sertanejo"]) plt.tight_layout() plt.savefig('assets/confusion_matrix.png') report = classification_report( y_true, y_pred, target_names=["hip_hop", "funk", "sertanejo"], output_dict=True) metrics = pd.DataFrame(report).transpose() metrics.to_csv('assets/metrics.csv')
def train(ctx, vocab_size, num_classes, filter_num, batch_size, word_embed_size, training_steps, learning_rate, print_loss_every, confusion_matrix, keep_proba, filter_sizes, save_model): # Load dataset (x_train, y_train), (x_test, y_test) = get_dataset(ctx.train_path, ctx.test_path) sequence_length = x_train.shape[1] dataset_size = x_train.shape[0] tf.reset_default_graph() with tf.Graph().as_default(): cnn = TextCNN(sequence_length, vocab_size, word_embed_size, filter_sizes, filter_num, num_classes) # Set eval feed_dict train_feed_dict = { cnn.input_x: x_train, cnn.input_y: y_train, cnn.keep_proba: 1.0 } test_feed_dict = { cnn.input_x: x_test, cnn.input_y: y_test, cnn.keep_proba: 1.0 } # Train saver = tf.train.Saver() train_step = tf.train.AdamOptimizer(learning_rate).minimize(cnn.loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(training_steps): start = (i * batch_size) % dataset_size end = min(start + batch_size, dataset_size) feed_dict = { cnn.input_x: x_train[start:end], cnn.input_y: y_train[start:end], cnn.keep_proba: keep_proba } sess.run(train_step, feed_dict=feed_dict) if i % print_loss_every == 0: avg_cost = cnn.loss.eval(feed_dict=feed_dict) train_acc = cnn.accuracy.eval(feed_dict=train_feed_dict) test_acc = cnn.accuracy.eval(feed_dict=test_feed_dict) test_pred = cnn.pred.eval(feed_dict=test_feed_dict) print(f"Epoch: {i:04d} | AvgCost: {avg_cost:7.4f}", end="") print(f" | Train/Test ACC: {train_acc:.3f}/{test_acc:.3f}") # After training, save the sess if save_model: save_path = saver.save(sess, SESS_PATH) print('Model state has been saved!') if confusion_matrix: binary = cm(y_true=y_test, y_pred=test_pred) print('\n', 'Confusion Matrix: ') print(binary) plot_confusion_matrix(binary) plt.show()
def evaluate_g(self, test_x, test_y): y_pre = self.combined.predict(test_x) y_pre = np.argmax(y_pre, axis=1) cm = metrics.confusion_matrix(y_true=test_y, y_pred=y_pre) # shape=(12, 12) plt.figure() plot_confusion_matrix(cm, hide_ticks=True,cmap=plt.cm.Blues,figsize=(8,8)) plt.show()
def run_svm(df): X = df['text'] y = df['polarity'] tfidf = TfidfVectorizer(norm = 'l1', ngram_range=(1,2), analyzer='word', max_features=5000) X = tfidf.fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state = 0, stratify = y) print('shape of X:', X.shape) print('') clf = LogisticRegression(penalty= 'l2', C= 2.7825594022071245, solver = 'liblinear', max_iter= 100) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print("Printing Report") print(classification_report(y_test, y_pred)) # accuracy plot plot_confusion_matrix(confusion_matrix(y_test, y_pred)) plt.show() return tfidf, clf
def main(): print('Stage 0: Importing Packages') param = getParameters() data, labels, dataOriginal = getData(param['data_path'], param['s']) trainData, testData, trainLabels, testLabels = splitData( data, labels, param['train_images_indices'], param['test_images_indices']) model = createModel(s=param['s'], optimizer=param['optimizer'], outputNeurons=param['classes'], lr=param['learnRate'], dropoutProp=param['dropRate'], layers=param['layers'], flat=param['flat'], max=param['max'], decay=param['decay']) model, rrr = trainModel(model=model, data=trainData, labels=trainLabels, aug=param['aug'], num=['numOfAug'], s=param['s'], val=param['valRate'], batch=param['batch'], epochs=param['epochs']) acc, predicts, confusion = prediction(model=model, data=testData, labels=testLabels, threshold=param['threshold']) print("The accuracy rate is:" + str(acc)) plot_confusion_matrix(conf_mat=confusion) plt.suptitle('Confusion-Matrix') plotPrecisionRecall(testLabels, predicts) return
def run_test_harness(): # load dataset trainX, trainY, testX, testY = load_dataset() # prepare pixel data trainX, testX = prep_pixels(trainX, testX) # define model model = define_model() # DATA AUGMENTATION: # making more copies of the dataset with small modifications # create data generator datagen = ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True) # prepare iterator it_train = datagen.flow(trainX, trainY, batch_size=64) # fit model steps = int(trainX.shape[0] / 64) # fit model history = model.fit_generator(it_train, steps_per_epoch=steps, epochs=200, validation_data=(testX, testY), verbose=1) # evaluate model _, acc = model.evaluate(testX, testY, verbose=0) print('> %.3f' % (acc * 100.0)) # learning curves plot_diagnostics(history) classes_name = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] #plot confusion matrix predY = model.predict_classes(testX) rounded_labels = np.argmax(testY, axis = 1) mat = confusion_matrix(rounded_labels, predY) plot_confusion_matrix(mat, figsize=(9,9), show_normed = True, class_names = classes_name) model.save('CIFAR10_2DENSELAYER_MODEL.h5')
def create_test_model(): # Example of testing bact vs viral test_model = create_empty_model(True) test_model.load_weights('best_bactviral_checkpoint.hdf5') loss, acc = test_model.evaluate(bactviral_test_data, bactviral_test_labels, verbose=2) print('Restored model, accuracy: {:5.2f}%'.format(100 * acc)) # Get predictions preds = test_model.predict(bactviral_test_data, batch_size=16) preds = np.argmax(preds, axis=-1) # Original labels orig_test_labels = np.argmax(bactviral_test_labels, axis=-1) cm = confusion_matrix(orig_test_labels, preds) plt.figure() plot_confusion_matrix(cm, figsize=(12, 8), hide_ticks=True, cmap=plt.cm.Blues) plt.xticks(range(2), ['Bacterial', 'Viral'], fontsize=16) plt.yticks(range(2), ['Bacterial', 'Viral'], fontsize=16) plt.show()
def evaluate_model(name='vgg16', filename=None): if filename is None: filename = 'data/' + name + 'cm.png' filepath = filepath_for(name) model, _, _ = build_model(name, filepath) test_data, test_labels = load_test_data() preds = model.predict(test_data, batch_size=config.BATCH_SIZE) preds = np.argmax(preds, axis=-1) orig_test_labels = np.argmax(test_labels, axis=-1) cm = confusion_matrix(orig_test_labels, preds) plot_confusion_matrix(cm,figsize=(12,8), hide_ticks=True, cmap=plt.cm.BuPu) plt.xticks(range(2), ['Normal', 'Pneumonia'], fontsize=16) plt.yticks(range(2), ['Normal', 'Pneumonia'], fontsize=16) plt.savefig(filename) tn, fp, fn, tp = cm.ravel() precision = tp/(tp+fp) recall = tp/(tp+fn) print("Recall of the model is {:.2f}".format(recall)) print("Precision of the model is {:.2f}".format(precision)) print("f1 score is {:.2f}".format(2 * (recall * precision)/(recall + precision))) del model del test_data del test_labels del preds del cm
def calc_cm(Y_true, Y_pred, output_file=None, labels=None, figsize=(8.5, 7.5), colorbar=True, show_absolute=True, show_normed=True, ylim=None): cm = confusion_matrix(Y_true, Y_pred) acc = np.trace(cm) / np.sum(cm).astype('float') loss = 1 - acc if output_file: plot_confusion_matrix(conf_mat=cm, colorbar=colorbar, show_absolute=show_absolute, show_normed=show_normed, class_names=labels, figsize=figsize) plt.title("Confusion Matrix\nAccuracy={:0.4f}; Loss={:0.4f}".format( acc, loss)) plt.tight_layout() if ylim: plt.ylim(ylim) plt.savefig(output_file) return (cm, acc, loss)
def executeKnn(X_train, X_test, y_train, y_test): from sklearn.neighbors import KNeighborsClassifier classifier = KNeighborsClassifier(n_neighbors=5) classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) plot_confusion_matrix(y_test, y_pred) from sklearn.metrics import accuracy_score acuracia = accuracy_score(y_test, y_pred) print('Acurácia KNN: ', acuracia)
def executeDecisionTree(X_train, X_test, y_train, y_test): from sklearn.tree import DecisionTreeClassifier classifier = DecisionTreeClassifier() classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) plot_confusion_matrix(y_test, y_pred) from sklearn.metrics import accuracy_score acuracia = accuracy_score(y_test, y_pred) print('Acurácia DecisionTree: ', acuracia)
def mlp_classifier(train_x, train_y, test_x, test_y): classifier = MLPClassifier(hidden_layer_sizes=(100, 100)) classifier.fit(train_x, train_y) prediction = classifier.predict(test_x) accuracy = metrics.accuracy_score(test_y, prediction) print("Accuracy for MLP classifier:", accuracy) plot_confusion_matrix(test_y, prediction, "MLP") return classifier
def svm_classifier(train_x, train_y, test_x, test_y): classifier = svm.SVC(kernel="linear") classifier.fit(train_x, train_y) prediction = classifier.predict(test_x) accuracy = metrics.accuracy_score(test_y, prediction) print("Accuracy for the SVM classifier:", accuracy) plot_confusion_matrix(test_y, prediction, "SVM") return classifier
def executeSVM(X_train, X_test, y_train, y_test): from sklearn.svm import SVC classifier = SVC(gamma='auto', kernel='rbf') # Kernel Linear classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) plot_confusion_matrix(y_test, y_pred) from sklearn.metrics import accuracy_score acuracia = accuracy_score(y_test, y_pred) print('Acurácia SVM: ', acuracia)
def executeRandomForest(X_train, X_test, y_train, y_test): from sklearn.ensemble import RandomForestClassifier classifier = RandomForestClassifier() classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) plot_confusion_matrix(y_test, y_pred) from sklearn.metrics import accuracy_score acuracia = accuracy_score(y_test, y_pred) print('Acurácia RandomForest: ', acuracia)
def executeMPL(X_train, X_test, y_train, y_test): from sklearn.neural_network import MLPClassifier classifier = MLPClassifier(alpha=1e-5, hidden_layer_sizes=(5, 5), random_state=1) classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) plot_confusion_matrix(y_test, y_pred) from sklearn.metrics import accuracy_score acuracia = accuracy_score(y_test, y_pred) print('Acurácia MPL: ', acuracia)
def generate_confusion_matrix(predictions, y_validation): cm = confusion_matrix(y_validation, predictions) plt.figure() plt.style.use("dark_background") plot_confusion_matrix(cm, figsize=(12, 8), hide_ticks=True, cmap=plt.cm.Greens) plt.xticks(range(2), ['Normal', 'Pneumonia'], fontsize=16) plt.yticks(range(2), ['Normal', 'Pneumonia'], fontsize=16) plt.show()
def evaluate_model_metric(embbeder, supports, x_test, y_test ,k_shot=1, metric='l2'): x_test_3 = triple_channels(x_test) y_pred = classify_by_metric(embbeder, supports, x_test_3, k_shot=k_shot, metric=metric) cm = sk_metrics.confusion_matrix(y_true=y_test, y_pred=y_pred) plt.figure() plot_confusion_matrix(cm, hide_ticks=True,cmap=plt.cm.Blues) plt.show() return (y_pred == y_test).mean(), 0
def confusion_matrix(self): """ Prints/displays the confusion matrix. """ self.matrix = confusion_matrix(self.data.y_test.argmax(axis=1), self.test_preds.argmax(axis=1)) self.helpers.logger.info("Confusion Matrix: " + str(self.matrix)) print("") plot_confusion_matrix(conf_mat=self.matrix) plt.savefig('model/plots/confusion-matrix.png') plt.show() plt.clf()
def train(ctx, vocab_size, num_classes, filter_num, batch_size, word_embed_size, training_steps, learning_rate, print_loss_every, confusion_matrix): # Load dataset train = np.loadtxt(ctx.train_path, dtype=int) test = np.loadtxt(ctx.test_path, dtype=int) x_train = train[:, :-1] y_train = train[:, -1:].reshape((-1, )) x_test = test[:, :-1] y_test = test[:, -1:].reshape((-1, )) sequence_length = x_train.shape[1] with tf.Graph().as_default(): cnn = TextCNN(sequence_length, vocab_size, word_embed_size, num_classes, filter_num) # Set feed_dict input_x, input_y = cnn.input_x, cnn.input_y train_feed_dict = {input_x: x_train, input_y: y_train} test_feed_dict = {input_x: x_test, input_y: y_test} # Train train_step = tf.train.AdamOptimizer(learning_rate).minimize(cnn.loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(training_steps): batch_data = train[ np.random.randint(train.shape[0], size=batch_size), :] X = batch_data[:, :-1] Y = batch_data[:, -1:].reshape((-1, )) feed_dict = {input_x: X, input_y: Y} sess.run(train_step, feed_dict=feed_dict) if i % print_loss_every == 0: total_cross_entropy = cnn.loss.eval(feed_dict=feed_dict) train_accuracy = cnn.accuracy.eval( feed_dict=train_feed_dict) test_accuracy = cnn.accuracy.eval(feed_dict=test_feed_dict) test_pred = cnn.pred.eval(feed_dict=test_feed_dict) print( "After %d training steps, cross entropy on batch data is" " %f, trian accuracy is %.2f, test accuracy is %.2f" % (i, total_cross_entropy, train_accuracy, test_accuracy)) if confusion_matrix: binary = cm(y_true=y_test, y_pred=test_pred) print('\n', 'Confusion Matrix: ') print(binary) plot_confusion_matrix(binary) plt.show()
def executeSVM(iris_ds, df_iris, X_train, X_test, y_train, y_test): """ Algoritimo supervisionado de classificação por SVM. Hiperplano de separação das classes. """ # Treinamento do modelo from sklearn.svm import SVC classifier = SVC(gamma='auto') # Kernel Linear classifier.fit(X_train, y_train) # Previsão y_pred = classifier.predict(X_test) plot_confusion_matrix(y_test, y_pred)
def plot(model): test_X = np.load('dataset/processed/test_x.npy') test_Y = np.load('dataset/processed/test_y.npy') Y_pred = model.predict(np.array(test_X)) y_pred = np.around(Y_pred) cm = multilabel_confusion_matrix(test_Y, y_pred) plt.figure() plot_confusion_matrix(cm[0], figsize=(12, 8), hide_ticks=True) plt.xticks(range(2), ['SPAM', 'HAM'], fontsize=16) plt.yticks(range(2), ['SPAM', 'HAM'], fontsize=16) plt.savefig("matrix.png") plt.show()
def executeKnn(iris_ds, df_iris, X_train, X_test, y_train, y_test): """ Algoritimo supervisionado que determina o rótulo de classificação de uma amostra baseado nas amostras vizinhas advindas de um conjunto de treinamento """ # Treinamento do modeloiris_ds, df_iris, X_train, X_test, y_train, y_test from sklearn.neighbors import KNeighborsClassifier classifier = KNeighborsClassifier(n_neighbors=5) classifier.fit(X_train, y_train) # Previsão y_pred = classifier.predict(X_test) plot_confusion_matrix(y_test, y_pred)
def executeDecisionTree(iris_ds, df_iris, X_train, X_test, y_train, y_test): """ Algoritimo supervisionado de classificação por árvore de decisão. """ from sklearn.tree import DecisionTreeClassifier classifier = DecisionTreeClassifier() classifier.fit(X_train, y_train) # Previsão y_pred = classifier.predict(X_test) plot_confusion_matrix(y_test, y_pred) plot_arvore_decisao(classifier, iris_ds['feature_names']) plot_arvore_decisao
def plot_knn(data, grid, y_true, y_pred): X_plot = data['X_train'].values y_plot = data['y_train'].values.astype(np.integer) plot_decision_regions(X_plot, y_plot, clf=grid.best_estimator_, legend=2, scatter_kwargs=dict(s=20), markers='+o') cm = confusion_matrix(y_true, y_pred) fig, ax = plot_confusion_matrix(conf_mat=cm, show_absolute=True, show_normed=True, colorbar=True)
def display_confusion_matrix(grid, input_train, target_train, filedir=None, taskname=None): """ Display prediction in confusion matrix Parameters ---------- grid: Object GridSearchCV object input_train_fs: 2D-list target_train: 2D-list filedir: str, Optional, default=None Directory for file taskname: str, Optional, default=None Name of file datapoints: str, Optional, default=350 How many datapoints that should be displayed in plot """ X_train, X_test, y_train, y_test = train_test_split(input_train, target_train, test_size=0.33, random_state=42) model, predict = predict_model(grid, X_train, y_train, X_test) cm = confusion_matrix(y_test,predict) fig, ax = plot_confusion_matrix(conf_mat=cm, show_absolute=True, show_normed=True, colorbar=True) ax.set_title("Confusion matrix - " + taskname) if taskname and filedir: plt.savefig(filedir +"Pictures/"+ taskname + "_prediction.png", format='png') plt.show()
def plot_accuracy(method, cm, accuracy): print("Classifier: " + method) print("Confusion matrix: \n") print(cm) print("Accuracy: " + str(accuracy) + "\n") fig, ax = plot_confusion_matrix(conf_mat=cm, show_absolute=False, colorbar=True, show_normed=True) plt.title("Confusion matrix " + method) x = [ "LUAD", "BRCA", "KIRC", "COAD", "OV", "READ", "LUSC", "GBM", "UCEC", "HNSC" ] y = [ "LUAD", "BRCA", "KIRC", "COAD", "OV", "READ", "LUSC", "GBM", "UCEC", "HNSC" ] lx = list(range(10)) ly = list(range(10)) ax.set_xticks(lx) ax.set_xticklabels(x) ax.set_yticks(ly) ax.set_yticklabels(y) plt.show()