def number_pred(save, show): y_true = [600, 200, 200, 200, 200, 200, 200, 200, 500, 500, 500, 200, 200, 200, 200, 200, 200, 200, 200, 200] y_pred = [100, 200, 200, 100, 100, 200, 200, 200, 100, 200, 500, 100, 100, 100, 100, 100, 100, 100, 500, 200] cm = ConfusionMatrix(y_true, y_pred) # print(cm.binarize(100).P) # cm.enlarge(300) # cm.enlarge([300, 400]) print(cm) cm.plot() filename = 'numbers.png' if save: plt.savefig(os.path.join(basepath, '..', 'screenshots', filename)) if show: plt.show() # print("") # print(cm.classes) # print("") # cm.print_stats(None) cm.print_stats()
def test_pandas_confusion_cm_stats_integers(): y_true = [600, 200, 200, 200, 200, 200, 200, 200, 500, 500, 500, 200, 200, 200, 200, 200, 200, 200, 200, 200] y_pred = [100, 200, 200, 100, 100, 200, 200, 200, 100, 200, 500, 100, 100, 100, 100, 100, 100, 100, 500, 200] print("y_true: %s" % y_true) print("y_pred: %s" % y_pred) cm = ConfusionMatrix(y_true, y_pred) assert isinstance(cm.stats(), OrderedDict) cm.print_stats()
def test_value_counts(): df = pd.DataFrame({ 'Height': [150, 150, 151, 151, 152, 155, 155, 157, 157, 157, 157, 158, 158, 159, 159, 159, 160, 160, 162, 162, 163, 164, 165, 168, 169, 169, 169, 170, 171, 171, 173, 173, 174, 176, 177, 177, 179, 179, 179, 179, 179, 181, 181, 182, 183, 184, 186, 190, 190], 'Weight': [54, 55, 55, 47, 58, 53, 59, 60, 56, 55, 62, 56, 55, 55, 64, 61, 59, 59, 63, 66, 64, 62, 66, 66, 72, 65, 75, 71, 70, 70, 75, 65, 79, 78, 83, 75, 84, 78, 74, 75, 74, 90, 80, 81, 90, 81, 91, 87, 100], 'Size': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL'], 'SizePred': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'L', 'L', 'XL', 'L', 'XL', 'XL', 'XL'], }) cm = ConfusionMatrix(df["Size"], df["SizePred"]) assert (cm.true - df.Size.value_counts()).sum() == 0 assert (cm.pred - df.SizePred.value_counts()).sum() == 0 cm.print_stats()
def test_pandas_confusion_cm_stats_animals(): y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit'] y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit'] print("y_true: %s" % y_true) print("y_pred: %s" % y_pred) cm = ConfusionMatrix(y_true, y_pred) assert isinstance(cm.stats(), OrderedDict) assert cm.population == len(y_true) # 12 cm.print_stats() cm_stats = cm.stats() # noqa assert cm.binarize("cat").TP == cm.get("cat") # cm.get("cat", "cat") assert cm.binarize("cat").TP == 3 assert cm.binarize("dog").TP == cm.get("dog") # 1 assert cm.binarize("rabbit").TP == cm.get("rabbit") # 3
def size_pred(save, show): df = pd.DataFrame({ 'Height': [150, 150, 151, 151, 152, 155, 155, 157, 157, 157, 157, 158, 158, 159, 159, 159, 160, 160, 162, 162, 163, 164, 165, 168, 169, 169, 169, 170, 171, 171, 173, 173, 174, 176, 177, 177, 179, 179, 179, 179, 179, 181, 181, 182, 183, 184, 186, 190, 190], 'Weight': [54, 55, 55, 47, 58, 53, 59, 60, 56, 55, 62, 56, 55, 55, 64, 61, 59, 59, 63, 66, 64, 62, 66, 66, 72, 65, 75, 71, 70, 70, 75, 65, 79, 78, 83, 75, 84, 78, 74, 75, 74, 90, 80, 81, 90, 81, 91, 87, 100], 'Size': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL'], 'SizePred': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'L', 'L', 'XL', 'L', 'XL', 'XL', 'XL'], }) cm = ConfusionMatrix(df["Size"], df["SizePred"]) print(cm) cm.print_stats() cm.plot() filename = 'size.png' if save: plt.savefig(os.path.join(basepath, '..', 'screenshots', filename)) if show: plt.show()
neigh = neigh.fit(X, y) y_predicted_train = neigh.predict_proba(X) #predicted class for training set #obtain optimal probability threshold for classification maxrev = 0 final_threshold = 0.5 for x in xrange(1, 100): thresh = 0.01 * x predicted_y_train = np.array( [1 if x > thresh else 0 for x in list(y_predicted_train[:, 1])]) cmatrix = confusion_matrix(y, predicted_y_train) newROI = cmatrix[1, 1] * 100 + cmatrix[0, 0] * 15 + cmatrix[0, 1] * ( -15) + cmatrix[1, 0] * (-30) if newROI > maxrev: maxrev = newROI final_threshold = thresh y_predicted_test = neigh.predict_proba( X_test) #predicted probability for test set predicted_y_test = np.array([ 1 if x > final_threshold else 0 for x in list(y_predicted_test[:, 1]) ]) #apply threshold to classify the test set #obtain relevant statistics cm = ConfusionMatrix(y_test, predicted_y_test) cm.print_stats() acc = accuracy_score(y_test, predicted_y_test) cmatrix = confusion_matrix(y_test, predicted_y_test) ROI = cmatrix[1, 1] * 100 + cmatrix[0, 0] * 15 + cmatrix[0, 1] * ( -15) + cmatrix[1, 0] * (-30)
def main(save, show): basepath = os.path.dirname(__file__) # y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] # y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] # cm = ConfusionMatrix(y_true, y_pred) # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"]) # y_true = [2, 0, 2, 2, 0, 1] # y_pred = [0, 0, 2, 2, 0, 2] # cm = ConfusionMatrix(y_true, y_pred) # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"]) y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit'] y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit'] cm = ConfusionMatrix(y_true, y_pred) # y_true = ["cat", "ant", "cat", "cat", "ant", "bird"] # y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"] # >>> cm(y_true, y_pred, labels=["ant", "bird", "cat"]) # array([[2, 0, 0], # [0, 0, 1], # [1, 0, 2]]) # cm = ConfusionMatrix(y_true, y_pred) print("Confusion matrix:\n%s" % cm) df = cm.to_dataframe() print(df) print(df.dtypes) cm.plot() filename = 'cm.png' if save: plt.savefig(os.path.join(basepath, '..', 'screenshots', filename)) if show: plt.show() cm.plot(normalized=True) filename = 'cm_norm.png' if save: plt.savefig(os.path.join(basepath, '..', 'screenshots', filename)) if show: plt.show() cm.print_stats() print(cm.classification_report) print("sklearn confusion_matrix:\n%s" % confusion_matrix(y_true, y_pred)) print(classification_report(y_true, y_pred)) # stat = 'precision' # print(cm._avg_stat(stat)) # print(cm.ACC) # import seaborn as sns # cm.plot(normalized=True, backend=Backend.Seaborn) # sns.plt.show() print("Binarize a confusion matrix") y_true = ["cat", "ant", "cat", "cat", "ant", "bird"] y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"] cm = ConfusionMatrix(y_true, y_pred) print(cm) binary_cm = cm.binarize(['ant', 'cat']) # A bird is not a "land_animal" print(binary_cm)
votes = dict.fromkeys(alpha, 0) # reset dictionary for next test case y.append(count) # add classfication to array for confusion matrix if count == tcf.index[i]: #if the vote matches the known value of the target incremement correct t_correct += 1 # *************************************************************************************************** # **************************** Creates and Displays Confusion Matrix ******************************** # *************************************************************************************************** # uses pandas_confusion library to generate confusion matrix print '\n\nConfusion Matrix:\n\n' print '\tAccuracy is: ', m.ceil(float(t_correct) / 10000 * 100), '\n\n' y_actul = pd.Series(y_true, name='Actual') y_pred = pd.Series(y, name='Predicted') confusion1 = ConfusionMatrix(y_actul, y_pred) # confusion1.print_stats() confusion2 = pd.crosstab(y_actul, y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True) print confusion2 print confusion_matrix(y_actul, y_pred)
y.append(count) # add classfication to array for confusion matrix if count == tcf.index[ i]: #if the vote matches the known value of the target incremement correct t_correct += 1 # *************************************************************************************************** # **************************** Creates and Displays Confusion Matrix ******************************** # *************************************************************************************************** # uses pandas_confusion library to generate confusion matrix print '\n\nConfusion Matrix:\n\n' print '\tAccuracy is: ', m.ceil(float(t_correct) / 10000 * 100), '\n\n' y_actul = pd.Series(y_true, name='Actual') y_pred = pd.Series(y, name='Predicted') confusion1 = ConfusionMatrix(y_actul, y_pred) # confusion1.print_stats() confusion2 = pd.crosstab(y_actul, y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True) print confusion2 print confusion_matrix(y_actul, y_pred)
def main(save, show): basepath = os.path.dirname(__file__) # y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] # y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] # cm = ConfusionMatrix(y_true, y_pred) # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"]) # y_true = [2, 0, 2, 2, 0, 1] # y_pred = [0, 0, 2, 2, 0, 2] # cm = ConfusionMatrix(y_true, y_pred) # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"]) y_true = [ 'rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit' ] y_pred = [ 'cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit' ] cm = ConfusionMatrix(y_true, y_pred) # y_true = ["cat", "ant", "cat", "cat", "ant", "bird"] # y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"] # >>> cm(y_true, y_pred, labels=["ant", "bird", "cat"]) # array([[2, 0, 0], # [0, 0, 1], # [1, 0, 2]]) # cm = ConfusionMatrix(y_true, y_pred) print("Confusion matrix:\n%s" % cm) df = cm.to_dataframe() print(df) print(df.dtypes) cm.plot() filename = 'cm.png' if save: plt.savefig(os.path.join(basepath, '..', 'screenshots', filename)) if show: plt.show() cm.plot(normalized=True) filename = 'cm_norm.png' if save: plt.savefig(os.path.join(basepath, '..', 'screenshots', filename)) if show: plt.show() cm.print_stats() print(cm.classification_report) print("sklearn confusion_matrix:\n%s" % confusion_matrix(y_true, y_pred)) print(classification_report(y_true, y_pred)) # stat = 'precision' # print(cm._avg_stat(stat)) # print(cm.ACC) # import seaborn as sns # cm.plot(normalized=True, backend=Backend.Seaborn) # sns.plt.show() print("Binarize a confusion matrix") y_true = ["cat", "ant", "cat", "cat", "ant", "bird"] y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"] cm = ConfusionMatrix(y_true, y_pred) print(cm) binary_cm = cm.binarize(['ant', 'cat']) # A bird is not a "land_animal" print(binary_cm)
# and record votes for which letter perceptron returns for letter in letters_list_testing: # text = "\rTesting instance "+str((letter_increment)+1)+"/"+str(len(letters_list_testing)) # sys.stdout.write(text) # collect perceptron votes to build confusion matrix # collect_votes runs perceptron for instances of letters in the testing data set # returns the winning letter by vote to store into predicted predicted = collect_votes(letter) #print letter.value[0], predicted # append to confusion matrix using pandas y_pred = y_pred.append(pd.Series(predicted, index=[letter_increment])) y_actu = y_actu.append(pd.Series(letter.value[0], index=[letter_increment])) # append pandas_confusion y_pred_stats.append(predicted) y_actu_stats.append(letter.value[0]) # increment counter for next letter letter_increment += 1 # make confusion matrix using pandas df_confusion = pd.crosstab(y_actu, y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True) print df_confusion # make confusion matrix and print stats using pandas_confusion cm = ConfusionMatrix(y_actu_stats, y_pred_stats) # print("Confusion matrix:\n%s" % cm) cm.print_stats()
def benchmark(self, clf): """ Prints out results of all classifier used Parameters ---------- clf : The classifier to benchmark (MultinonialNB and Ber...) returns: clf_descr, score, train_time, test_time the classifier description, score, training time and testing time to plot """ print('_' * 80) print("Training: ") print(clf) t0 = time() clf.fit(self.X_train, self.y_train ) # fit the classifier with the features/ train the classifier train_time = time() - t0 print("train time: %0.3fs" % train_time) # get the duration t0 = time() pred = clf.predict(self.X_test) # perform prediction print("Predictions: ", pred) test_time = time() - t0 print("Test time: %0.3fs" % test_time) # show estimated time for prediction score = metrics.accuracy_score( self.y_test, pred) # calculate the accuracy on the test file print("Accuracy: %0.3f" % score) if hasattr(clf, 'coef_'): print("dimensionality: %d" % clf.coef_.shape[1]) # prints the dimentionality of the data print("density: %f" % density(clf.coef_)) print("top 10 keywords per category:" ) # gets the Top10 features per category for i, category in enumerate(self.categories): top10 = np.argsort(clf.coef_[i])[-10:] print("%s: %s" % (category, " ".join(self.feature_names[top10]))) print() # prints the classification report for a regular test print("Classification report:") print( metrics.classification_report(self.y_test, pred, target_names=self.categories)) # prints the confusion matrix for a regular test #print("Confusion matrix:") #print(metrics.confusion_matrix(self.y_test, pred)) #y_actu = pd.Series(self.y_test, name='Actual') #y_pred = pd.Series(pred, name='Predicted') #df_confusion = pd.crosstab(y_actu, y_pred) #print (df_confusion) cm = ConfusionMatrix(self.y_test, pred) cm.print_stats() print() clf_descr = str(clf).split('(')[0] # if split_data is enabled, perform all forms of cross validation if self.split_data: processes = [self.kfoldCV, self.shuffleCV, self.recurCV] # our cross validations #processes = [self.kfoldCV] for p in processes: # use multiprocessing to make computation faster # ============= K-Fold Validation ============= # ============ Shuffle Split cross validation (learning Curve) ================ # ============ Recursive feature elimination ================ self.process = multiprocessing.Process(target=p, args=(clf, )) self.process.start() return clf_descr, score, train_time, test_time # return for regular splitting between test and training file