def calculate_accuracy(csv_filename): # Loading csv information into a data frame data = pd.read_csv(csv_filename) # assigning actual sentiment data to y_test y_test = data['Actual_Statement'] # assigning predicted sentiment data to y_pred y_pred = data['Prediction'] score = accuracy_score(y_test, y_pred) # calling accuracy_score method to get the accuracy_score print 'Accuracy Score : ', score # calling confusion_matrix method from pandas_ml to show the output confusion_matrix = ConfusionMatrix(y_test, y_pred) output = confusion_matrix.to_dataframe() writer = pd.ExcelWriter("azure_text_confusion_matrix_output.xlsx") output.to_excel(writer, startrow=4, startcol=0) Acuracy_Score = 'Accuracy Score : ' + str(score) worksheet = writer.sheets['Sheet1'] worksheet.write(1, 0, Acuracy_Score) writer.save() print("Confusion matrix:\n%s" % confusion_matrix)
def validate_epoch(self, val_model, epoch_cm): """ Computes the batch validation confusion matrix and then updates the epoch confusion matrix. """ # Loop through validation set for n in range(self.validation_steps): # Grab next batch X, y_true, _ = next(self.validation_data) # Make prediction with model y_pred = val_model([X])[0] # Find highest classes prediction y_true = np.argmax(y_true, axis=-1) y_pred = np.argmax(y_pred, axis=-1) # Flatten batch into single array y_true = np.ndarray.flatten(y_true) y_pred = np.ndarray.flatten(y_pred) # Create batch CM batch_cm = ConfusionMatrix(y_true, y_pred) # Get all classes in batch all_classes = list(batch_cm.classes) batch_cm = batch_cm.to_array() # Update epoch CM for i in all_classes: for j in all_classes: epoch_cm[i, j] += batch_cm[all_classes.index(i), all_classes.index(j)]
def compute_metrics(task_name, preds, labels): assert len(preds) == len(labels) if task_name == "cola": return {"mcc": matthews_corrcoef(labels, preds)} elif task_name == "sst-2": return {"acc": simple_accuracy(preds, labels)} elif task_name == "mrpc": return acc_and_f1(preds, labels) elif task_name == "sts-b": return pearson_and_spearman(preds, labels) elif task_name == "qqp": return acc_and_f1(preds, labels) elif task_name == "mnli": return {"acc": simple_accuracy(preds, labels)} elif task_name == "mnli-mm": return {"acc": simple_accuracy(preds, labels)} elif task_name == "qnli": return {"acc": simple_accuracy(preds, labels)} elif task_name == "rte": return {"acc": simple_accuracy(preds, labels)} elif task_name == "wnli": return {"acc": simple_accuracy(preds, labels)} elif task_name == "sa" or task_name == 'sa_csv': from pandas_ml import ConfusionMatrix pcm = ConfusionMatrix(labels, preds) pcm.print_stats() precision, recall, f1, _ = precision_recall_fscore_support( labels, preds, average='weighted') #return {"acc": simple_accuracy(preds, labels)} return {"acc": pcm.stats_overall['Accuracy']} elif task_name == "arg_mining": return {} else: raise KeyError(task_name)
def test_pandas_confusion_binary_cm_inverse(self): y_true = [True, True, False, False, False, True, False, True, True, False, True, False, False, False, False, False, True, False, True, True, True, True, False, False, False, True, False, True, False, False, False, False, True, True, False, False, False, True, True, True, True, False, False, False, False, True, False, False, False, False, False, False, False, False, False, True, True, False, True, False, True, True, True, False, False, True, False, True, False, False, True, False, False, False, False, False, False, False, False, True, False, True, True, True, True, False, False, True, False, True, True, False, True, False, True, False, False, True, True, False, False, True, True, False, False, False, False, False, False, True, True, False] y_pred = [False, False, False, False, False, True, False, False, True, False, True, False, False, False, False, False, False, False, True, True, True, True, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, True, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, True, False, False, False, False, True, False, False, False, False, False, False, False, False, True, False, False, True, False, False, False, False, True, False, True, True, False, False, False, True, False, False, True, True, False, False, True, True, False, False, False, False, False, False, True, False, False] binary_cm = ConfusionMatrix(y_true, y_pred) assert isinstance(binary_cm, pdml.confusion_matrix.BinaryConfusionMatrix) bcm_sum = binary_cm.sum() binary_cm_r = binary_cm.inverse() # reverse not in place assert bcm_sum == binary_cm_r.sum()
def test(self, test, test_targets, pdconf=False, filename="", legend=None): if self.net_type == "classification": pred = self.forward_classification(test) acc = self.cal_acc(pred, test_targets) conf = self.confusion_table(pred, test_targets) if pdconf: temp_pred = self.predict(test) if legend != None: predict = np.empty(len(temp_pred)) targets = np.empty(len(test_targets)) for i in range(len(targets)): predict[i] = legend[np.argmax(temp_pred[i])] targets[i] = legend[np.argmax(test_targets[i])] confus = ConfusionMatrix(targets, predict, display_sum=True) elif self.net_type == "regression": pred = self.forward_regression(test) r2 = self.cal_r2(pred, test_targets) err = self.cal_err(pred, test_targets, self.cost_function) print("The test error is: ", err) if self.net_type == "classification": print("The test accuracy is: ", acc) print("Confusion matrix:") print(conf) if pdconf: confus.plot(backend="seaborn") plt.savefig(filename) plt.clf() return err, acc, conf elif self.net_type == "regression": print("The test R2-score is: ", r2) return err, r2
def save_confusion_matrix(self, truth_res, pred_res): #truth_res = [self.label_map[i+1] for i in truth_res] #pred_res = [self.label_map[i+1] for i in pred_res] ''' print(len(truth_res)) print(len(pred_res)) confusion_matrix = ConfusionMatrix(truth_res, pred_res) plt.figure(dpi=200, figsize=(10, 7)) confusion_matrix.plot() plt.savefig(self.confusion_matrix_file_path) ''' s = sklearn.metrics.confusion_matrix(truth_res, pred_res) list_label = self.label_map[1:] df_cm = pd.DataFrame(data=s, columns=list_label, index=list_label) plt.figure(dpi=100) heatmap = sns.heatmap(df_cm, annot=True, fmt='d') heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=70, ha='right', fontsize=5) heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=20, ha='right', fontsize=5) plt.savefig(self.confusion_matrix_file_path) confusion_matrix = ConfusionMatrix(truth_res, pred_res) confusion_matrix.print_stats()
def random_forest(): l=1 if(l==1): print("------------------------RANDOM FOREST-----------------------") df = pd.read_csv(var.get(), low_memory=False) df = df.sample(frac=1).reset_index(drop=True) frauds = df.loc[df['Class'] == 1] non_frauds = df.loc[df['Class'] == 0] print("\nWe have", len(frauds), "fraud data points and", len(non_frauds), "nonfraudulent data points.") X = df.iloc[:,:-1] y = df['Class'] print("X and y sizes, respectively:", len(X), len(y)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35) print("Train and test sizes, respectively:", len(X_train), len(y_train), "|", len(X_test), len(y_test)) print("Total number of frauds:", len(y.loc[df['Class'] == 1])) print("Number of frauds on y_test:", len(y_test.loc[df['Class'] == 1])) print("Number of frauds on y_train:", len(y_train.loc[df['Class'] == 1])) clf= RandomForestClassifier() clf.fit(X_train, y_train) y_predicted1 =np.array(clf.predict(X_test)) y_right1=np.array(y_test) confusion_matrix1=ConfusionMatrix(y_right1,y_predicted1) print("\n\nConfusion matrix:\n%s" % confusion_matrix1) #confusion_matrix1.plot(normalized=True) T = Text(root, height=60, width=60) T.pack(pady=20,side=BOTTOM, fill=Y) for l in confusion_matrix1.stats(): T.insert(END,[l,confusion_matrix1.stats()[l]]) T.insert(END,"\n") d['ACC'].append(confusion_matrix1.stats()['ACC']*100) d['TPR'].append(confusion_matrix1.stats()['TPR']*100) fpr,tpr,thresholds=roc_curve(y_right1, y_predicted1) aucarr['auc'].append(auc(fpr,tpr))
def logistic_regression(): print("------------------------LOGISTIC REGRESSION-----------------------") df = pd.read_csv(var.get(), low_memory=False) df = df.sample(frac=1).reset_index(drop=True) frauds = df.loc[df['Class'] == 1] non_frauds = df.loc[df['Class'] == 0] print("\n") print("We have", len(frauds), "fraud data points and", len(non_frauds), "nonfraudulent data points.\n") X = df.iloc[:,:-1] y = df['Class'] print("X and y sizes, respectively:", len(X), len(y)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35) '''print("\nTrain and test sizes, respectively:", len(X_train), len(y_train), "|", len(X_test), len(y_test)) print("Total number of frauds:", len(y.loc[df['Class'] == 1])) print("Number of frauds on y_test:", len(y_test.loc[df['Class'] == 1])) print("Number of frauds on y_train:", len(y_train.loc[df['Class'] == 1]))''' logistic = linear_model.LogisticRegression(C=1e5) logistic.fit(X_train, y_train) print("\nScore: ", logistic.score(X_test, y_test)) y_predicted = np.array(logistic.predict(X_test)) y_right = np.array(y_test) confusion_matrix = ConfusionMatrix(y_right, y_predicted) print("\n\nConfusion matrix:\n%s" % confusion_matrix) #confusion_matrix.plot(normalized=True) T = Text(root, height=60, width=60) T.pack(pady=20,side=BOTTOM, fill=Y) for l in confusion_matrix.stats(): T.insert(END,[l,confusion_matrix.stats()[l]]) T.insert(END,"\n") d['ACC'].append(confusion_matrix.stats()['ACC']*100) d['TPR'].append(confusion_matrix.stats()['TPR']*100) fpr,tpr,thresholds=roc_curve(y_right, y_predicted) aucarr['auc'].append(auc(fpr,tpr))
def process_results(mode, file, thrshld): threshold = thrshld with open(file) as json_file: data = json.load(json_file) accuracy = 0.0 actual = [] predicted = [] for p in data['results']: labellingScore = int(p['labellingScore']) score = float(p['score']) if labellingScore == 1 and score > threshold: accuracy = accuracy + 1 elif labellingScore == 0 and score < threshold: accuracy = accuracy + 1 if labellingScore == 1: actual.append(1) else: actual.append(0) if score > threshold: predicted.append(1) else: predicted.append(0) if mode is 1: cm = ConfusionMatrix(actual, predicted) cm.print_stats() return accuracy/len(data['results'])
def test_value_counts(): df = pd.DataFrame({ 'Height': [ 150, 150, 151, 151, 152, 155, 155, 157, 157, 157, 157, 158, 158, 159, 159, 159, 160, 160, 162, 162, 163, 164, 165, 168, 169, 169, 169, 170, 171, 171, 173, 173, 174, 176, 177, 177, 179, 179, 179, 179, 179, 181, 181, 182, 183, 184, 186, 190, 190 ], 'Weight': [ 54, 55, 55, 47, 58, 53, 59, 60, 56, 55, 62, 56, 55, 55, 64, 61, 59, 59, 63, 66, 64, 62, 66, 66, 72, 65, 75, 71, 70, 70, 75, 65, 79, 78, 83, 75, 84, 78, 74, 75, 74, 90, 80, 81, 90, 81, 91, 87, 100 ], 'Size': [ 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL' ], 'SizePred': [ 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'L', 'L', 'XL', 'L', 'XL', 'XL', 'XL' ], }) cm = ConfusionMatrix(df["Size"], df["SizePred"]) assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix) assert (cm.true - df.Size.value_counts()).sum() == 0 assert (cm.pred - df.SizePred.value_counts()).sum() == 0 cm.print_stats()
def confusion_matrix(self, ground_truth, predictions, display=True): matrix = ConfusionMatrix(ground_truth, predictions) if display == True: print("Confusion matrix:\n%s" % matrix) if self.save_plots == True: matrix.plot() plt.savefig(self.evaluation_path)
def test_pandas_confusion_cm_int(self): y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] labels = ["ant", "bird", "cat"] cm = ConfusionMatrix(y_true, y_pred, labels=labels) assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix) print("Confusion matrix:\n%s" % cm) asserts(y_true, y_pred, cm) assert cm.len() == len(labels)
def test_pandas_confusion_cm_int(): y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] labels = ["ant", "bird", "cat"] cm = ConfusionMatrix(y_true, y_pred, labels=labels) assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix) print("Confusion matrix:\n%s" % cm) asserts(y_true, y_pred, cm) assert cm.len() == len(labels)
def test_pandas_confusion_normalized(self): y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] cm = ConfusionMatrix(y_true, y_pred) assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix) df = cm.to_dataframe() df_norm = cm.to_dataframe(normalized=True) assert(df_norm.sum(axis=1).sum() == len(df))
def test_pandas_confusion_normalized(): y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] cm = ConfusionMatrix(y_true, y_pred) assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix) df = cm.to_dataframe() df_norm = cm.to_dataframe(normalized=True) assert (df_norm.sum(axis=1).sum() == len(df))
def run(): start_time = time() data_cand, data_part, full_data = load_data() # numeric_parties = full_data.party.map(party_map) train_c, test_c = train_test_split(data_cand, test_size=0.2) train_p, test_p = train_test_split(data_part, test_size=0.2) candidatos_clf = Classifier(train_c.drop('candidatoId', axis=1), train_c.candidatoId) partidos_clf = Classifier(train_p.drop('idPartido', axis=1), train_p.idPartido) cand_solver = candidatos_clf._predict() n_cand, pca_cand_solver = candidatos_clf._pca() part_solver = partidos_clf._predict() n_part, pca_part_solver = partidos_clf._pca() cand_pred = candidatos_clf.classify(test_c.drop('candidatoId', axis=1), test_c.candidatoId, cand_solver) pca_cand_pred = candidatos_clf.classify(test_c.drop('candidatoId', axis=1), test_c.candidatoId, pca_cand_solver, n_cand) part_pred = partidos_clf.classify(test_p.drop('idPartido', axis=1), test_p.idPartido, part_solver) pca_part_pred = partidos_clf.classify(test_p.drop('idPartido', axis=1), test_p.idPartido, pca_part_solver, n_part) output_results(f'CANDIDATOS | {cand_solver}', test_c.candidatoId, cand_pred) output_results(f'CANDIDATOS_PCA | {pca_cand_solver}, {n_cand}', test_c.candidatoId, pca_cand_pred) output_results(f'PARTIDOS | {part_solver}', test_p.idPartido, part_pred) output_results(f'PARTIDOS_PCA | {pca_part_solver}, {n_part}', test_p.idPartido, pca_part_pred) cand_part_target, cand_part_pred = candidato_mapper(test_c.candidatoId, cand_pred) output_results(f'PARTIDOS CON CANDIDATO | {cand_solver}', cand_part_target, cand_part_pred) cm_cand = ConfusionMatrix(test_c.candidatoId, cand_pred) cm_pca_cand = ConfusionMatrix(test_c.candidatoId, pca_cand_pred) cm_part = ConfusionMatrix(test_p.idPartido, part_pred) cm_pca_part = ConfusionMatrix(test_p.idPartido, pca_part_pred) cm_cand_part = ConfusionMatrix(cand_part_target, cand_part_pred) elapsed_time = time() - start_time print(f'----------------------------------------') print(f'TOTAL TIME: {datetime.timedelta(seconds=elapsed_time)}') result = { 'data': { 'candidatos': (train_c, test_c), 'partidos': (train_p, test_p), }, 'results': { 'candidatos': (test_c.candidatoId, cand_pred), 'candidatos_pca': (test_c.candidatoId, pca_cand_pred), 'partidos': (test_p.idPartido, part_pred), 'partidos_pca': (test_p.idPartido, pca_part_pred), 'partidos_candidatos': (cand_part_target, cand_part_pred) }, 'matrices': { 'candidatos': cm_cand, 'candidatos_pca': cm_pca_cand, 'partidos': cm_part, 'partidos_pca': cm_pca_part, 'partidos_candidatos': cm_cand_part } } return result
def confusion_matrix(ground_truth, predictions, display=True, output_path=None): matrix = ConfusionMatrix(ground_truth, predictions) if display is True: print("Confusion matrix:\n%s" % matrix) if output_path is not None: matrix.plot() plt.savefig(output_path)
def plot_confusion_matrix(clf, X_best_test, y_best_test, matrix_name): # Doing the confusion matrix for the best K-validated training set y_best_pred = clf.predict(X_best_test) confusion_matrix = ConfusionMatrix(y_best_test, y_best_pred) #print("Confusion matrix:\n{}".format(confusion_matrix)) confusion_matrix.plot(normalized=True) plt.savefig('confusion_matrixes/K-fold_matrix_{}.png'.format(matrix_name)) print('Saved Confusion matrix of the previous test to confusion_matrixes/K-fold_matrix_{}.png\n'.format(matrix_name)) return confusion_matrix
def test_pandas_confusion_cm_stats_integers(self): y_true = [600, 200, 200, 200, 200, 200, 200, 200, 500, 500, 500, 200, 200, 200, 200, 200, 200, 200, 200, 200] y_pred = [100, 200, 200, 100, 100, 200, 200, 200, 100, 200, 500, 100, 100, 100, 100, 100, 100, 100, 500, 200] print("y_true: %s" % y_true) print("y_pred: %s" % y_pred) cm = ConfusionMatrix(y_true, y_pred) assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix) assert isinstance(cm.stats(), OrderedDict) cm.print_stats()
def metrics(y_true, y_pred, y_pred_proba=False): target_names = ['KEEP', 'UP', 'DOWN'] if y_pred_proba is not False: print('Cross Entropy: {}'.format(log_loss(y_true, y_pred_proba))) print('Accuracy: {}'.format(accuracy_score(y_true, y_pred))) print('Coefficient Kappa: {}'.format(cohen_kappa_score(y_true, y_pred))) print('Report: {}'.format( classification_report(y_true, y_pred, target_names=target_names))) cm = ConfusionMatrix(y_true.tolist(), y_pred.tolist()) cm.print_stats()
def test_pandas_confusion_max_min(self): y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit'] y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit'] print("y_true: %s" % y_true) print("y_pred: %s" % y_pred) cm = ConfusionMatrix(y_true, y_pred) assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix) assert cm.max() == 3 assert cm.min() == 0
def plotconfusion(truth, predictions): """ Function to plot the confusion fuction between the truth and predictions array. :type truth: np array :type predictions: np array """ cm = ConfusionMatrix(truth, predictions) _ = plt.figure(figsize=(10, 10)) _ = cm.plot() _ = plt.show()
def test_pandas_confusion_cm_empty_row(): y_true = [2, 0, 2, 2, 0, 0] y_pred = [0, 0, 2, 2, 1, 2] # cm = LabeledConfusionMatrix(y_true, y_pred) cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"]) assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix) cm = ConfusionMatrix(y_true, y_pred) assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix) print("Confusion matrix:\n%s" % cm) asserts(y_true, y_pred, cm)
def test_value_counts(self): df = pd.DataFrame({ 'Height': [150, 150, 151, 151, 152, 155, 155, 157, 157, 157, 157, 158, 158, 159, 159, 159, 160, 160, 162, 162, 163, 164, 165, 168, 169, 169, 169, 170, 171, 171, 173, 173, 174, 176, 177, 177, 179, 179, 179, 179, 179, 181, 181, 182, 183, 184, 186, 190, 190], 'Weight': [54, 55, 55, 47, 58, 53, 59, 60, 56, 55, 62, 56, 55, 55, 64, 61, 59, 59, 63, 66, 64, 62, 66, 66, 72, 65, 75, 71, 70, 70, 75, 65, 79, 78, 83, 75, 84, 78, 74, 75, 74, 90, 80, 81, 90, 81, 91, 87, 100], 'Size': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL'], 'SizePred': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'L', 'L', 'XL', 'L', 'XL', 'XL', 'XL'], }) cm = ConfusionMatrix(df["Size"], df["SizePred"]) assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix) assert (cm.true - df.Size.value_counts()).sum() == 0 assert (cm.pred - df.SizePred.value_counts()).sum() == 0 cm.print_stats()
def get_accuracy(results, array): act_arr = array predicted_arr = results confusion_matrix = ConfusionMatrix(act_arr, predicted_arr) print("Confusion matrix:\n%s" % confusion_matrix) confusion_matrix.plot() plt.show() print("\n") a = perf_measure(act_arr, predicted_arr) b = close_measure(act_arr, predicted_arr) print("The Accuracy is: " + str(a * 100) + "%") print("The close range Accuracy is: " + str(b * 100) + "%")
def confusion_matrix(self): df = DataFrame(index=range(0, 50), columns=['true', 'predict']) for i in range(0, 50): song = song_list[i]['song'] true_genre = str(song.genre) run_all(rule_list=self.rules, defined_variables=SongVariables(song), defined_actions=SongActions(song), stop_on_first_trigger=False) df.loc[i] = [true_genre, song.genre] cnf_matrix = ConfusionMatrix(df['true'], df['predict']) cnf_matrix.plot() plt.show()
def test(x_test, y_test): """ Tests the network to see how well the network has trained :param x_test: input to the test function :type x_test: list :param y_test: labels for the test function :type y_test: list :return: None """ print("Testing Now") with tf.Session(graph=train_graph) as sess: checkpoint = "./saves/best_model.ckpt" all_preds = [] # with tf.Session() as sess: saver = tf.train.Saver() # Load the model saver.restore(sess, checkpoint) state = sess.run(graph.initial_state) print("Total Batches: %d" % (len(x_test) // args.batch_size)) for ii, x in enumerate( utils.get_test_batches(x_test, args.batch_size, tokenizer.word2int), 1): if ii % 100 == 0: print("%d batches" % ii) feed = { graph.input_data: x, graph.keep_prob: args.keep_prob, graph.initial_state: state } test_preds = sess.run(graph.predictions, feed_dict=feed) for i in range(len(test_preds)): all_preds.append(test_preds[i, :]) all_preds = np.asarray(all_preds) y_predictions = np.argmax(all_preds, axis=1) y_true = y_test.argmax(axis=1) y_true = y_true[:y_predictions.shape[0]] cm = ConfusionMatrix(y_true, y_predictions) cm.plot(backend='seaborn', normalized=True) plt.title('Confusion Matrix Stars prediction') plt.figure(figsize=(12, 10)) test_correct_pred = np.equal(y_predictions, y_true) test_accuracy = np.mean(test_correct_pred.astype(float)) print("Test accuracy is: " + str(test_accuracy))
def test_plot(): try: import matplotlib.pyplot # noqa except ImportError: import nose raise nose.SkipTest() y_true = [ 'rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit' ] y_pred = [ 'cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit' ] cm = ConfusionMatrix(y_true, y_pred) # check plot works cm.plot() cm.plot(backend='seaborn') with tm.assertRaises(ValueError): cm.plot(backend='xxx')
def Test(epoch): global best_acc net.eval() test_loss = 0 correct = 0 total = 0 cm_targets = [] cm_predicted = [] with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(test_aaa): inputs, targets = inputs.to(device), targets.to(device) outputs = net(inputs) loss = criterion(outputs, targets) test_loss += loss.item() _, predicted = outputs.max(1) cm_targets.extend(targets.cpu().numpy()) cm_predicted.extend(predicted.cpu().numpy()) total += targets.size(0) correct += predicted.eq(targets).sum().item() progress_bar( batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss / (batch_idx + 1), 100. * correct / total, correct, total)) matrix = confusion_matrix(cm_targets, cm_predicted) plot_confusion_matrix(matrix, classes_str) from pandas_ml import ConfusionMatrix cm = ConfusionMatrix(cm_targets, cm_predicted) cm.print_stats() # Save checkpoint. acc = 100. * correct / total if acc > best_acc: print('Saving..') state = { 'net': net.state_dict(), 'acc': acc, 'epoch': epoch, } if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') torch.save(state, './checkpoint/vggckpt.pth') best_acc = acc
def accuracy(result): true = 0 total = len(result) cm_expected = [] cm_predicted = [] for i in range(len(result)): if result[i][0] == result[i][1]: true += 1 cm_expected.append(result[i][1]) cm_predicted.append(result[i][0]) misclassified = total - true; cm = ConfusionMatrix(cm_expected, cm_predicted) cm.print_stats() print("----------------------------------------") return cm, total, true, misclassified, true/len(result)*100
def get_pd_ml_cf_matrix(y_actual, y_predicted): data = {'y_Actual': y_actual, 'y_Predicted': y_predicted} df = pd.DataFrame(data, columns=['y_Actual', 'y_Predicted']) pd_ml_cf_matrix = ConfusionMatrix(df['y_Actual'], df['y_Predicted']) return pd_ml_cf_matrix
def test_pandas_confusion_cm_binarize(self): y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit'] y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit'] cm = ConfusionMatrix(y_true, y_pred) assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix) print("Confusion matrix:\n%s" % cm) select = ['cat', 'dog'] print("Binarize with %s" % select) binary_cm = cm.binarize(select) print("Binary confusion matrix:\n%s" % binary_cm) assert cm.sum() == binary_cm.sum()
def one_vs_all(): X_train0, X_train1, X_train2, X_train3, X_train4, X_train5, X_train6, X_train7, X_train8, X_train9 = data_clustering( X_train, y_train) numpy_predict = [] for number in range(10): train_number, train_rest, test_number, test_rest = join_cluster( X_train0, X_train1, X_train2, X_train3, X_train4, X_train5, X_train6, X_train7, X_train8, X_train9, number) training_data = np.vstack((train_number, train_rest)) test_data = np.hstack((test_number, test_rest)) clf = SVM(C=0.1) clf.train(training_data, test_data) y_predict = clf.compute(X_test) numpy_predict.append(y_predict) prediction = np.argmax(np.array(numpy_predict), axis=0) correct = np.sum(prediction == y_test) confusion_matrix = ConfusionMatrix(y_test, prediction) print("Confusion matrix:\n%s" % confusion_matrix) size = len(y_predict) accuracy = (correct / float(size)) * 100 print "%d out of %d predictions correct" % (correct, len(y_predict)) print "The accuracy in percentage is " print(accuracy)
def trainingHMM(training_set): # Count of words from training data freqOfWords = countFreqOfWords(training_set) # Extract unique tags from training data uniqTags = countUniqTags(training_set) # Add a value of 0 for key '<UNK>' freqOfWords['<UNK>'] = 0 training_set_mod, freqOfWords_mod = handlingUNK(training_set, freqOfWords) # Count tag frequency tagFrequencyList = countTagFrequency(training_set_mod) # Calculate bigram list tagtagBigram, tagWordsBigram = calcBigram(training_set_mod) # Calculate transition and emission probability transitionList,emissionList = hmm_train_tagger(freqOfWords_mod, tagtagBigram, tagWordsBigram, uniqTags, tagFrequencyList, len(training_set)-1) # Decoding and Apply viterbi applyViterbi(uniqTags, testing_set, transitionList, emissionList, freqOfWords_mod) # Evaluation Script ourPredict = [line.rstrip('\n') for line in open('predict_out.txt')] samplePredict = [line.rstrip('\n') for line in open('predict_out.txt')] # Our Predictions predictSet = [] for eachPair in ourPredict: if eachPair: predictSet.append(eachPair.split()[1]) # Sample Set sampleSet = [] for eachPair in samplePredict: if eachPair: sampleSet.append(eachPair.split()[1]) #confusion matrix cm = ConfusionMatrix(sampleSet, predictSet) print cm
def test_pandas_confusion_normalized_issue1(self): # should insure issue 1 is fixed # see http://stackoverflow.com/questions/19233771/sklearn-plot-confusion-matrix-with-labels/31720054#31720054 y_true = ['business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business'] y_pred = ['health', 'business', 'business', 'business', 'business', 'business', 'health', 'health', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'health', 'health', 'business', 'health'] cm = ConfusionMatrix(y_true, y_pred) assert isinstance(cm, pdml.confusion_matrix.BinaryConfusionMatrix) df = cm.to_dataframe() df_norm = cm.to_dataframe(normalized=True) assert(df_norm.sum(axis=1, skipna=False).fillna(1).sum() == len(df))
def on_epoch_end(self, epoch, logs=None): y_true, y_pred = [], [] for i in range(self.validation_steps): X_batch, y_true_batch = next(self.validation_data) y_pred_batch = self.model.predict(X_batch) y_true.extend(y_true_batch) y_pred.extend(y_pred_batch) y_true = np.float32(y_true) y_pred = np.float32(y_pred) val_loss = log_loss(y_true, y_pred) # map integer labels to strings y_true = list(y_true.argmax(axis=-1)) y_pred = list(y_pred.argmax(axis=-1)) y_true = [self.int2label[y] for y in y_true] y_pred = [self.int2label[y] for y in y_pred] confusion = ConfusionMatrix(y_true, y_pred) accs = self.accuracies(confusion._df_confusion.values) acc = self.accuracy(confusion._df_confusion.values) # same for wanted words y_true = [y if y in self.wanted_words else '_unknown_' for y in y_true] y_pred = [y if y in self.wanted_words else '_unknown_' for y in y_pred] wanted_words_confusion = ConfusionMatrix(y_true, y_pred) wanted_accs = self.accuracies(wanted_words_confusion._df_confusion.values) acc_line = ('\n[%03d]: val_categorical_accuracy: %.2f, ' 'val_mean_categorical_accuracy_wanted: %.2f') % ( epoch, acc, wanted_accs.mean()) # noqa with open('confusion_matrix.txt', 'a') as f: f.write('%s\n' % acc_line) f.write(confusion.to_dataframe().to_string()) with open('wanted_confusion_matrix.txt', 'a') as f: f.write('%s\n' % acc_line) f.write(wanted_words_confusion.to_dataframe().to_string()) logs['val_loss'] = val_loss logs['val_categorical_accuracy'] = acc logs['val_mean_categorical_accuracy_all'] = accs.mean() logs['val_mean_categorical_accuracy_wanted'] = wanted_accs.mean()
def test_pandas_confusion_get(self): y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit'] y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit'] print("y_true: %s" % y_true) print("y_pred: %s" % y_pred) cm = ConfusionMatrix(y_true, y_pred) assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix) assert cm.get("cat") == cm.get("cat", "cat") assert cm.get("cat") == 3 assert cm.get("dog") == 1 assert cm.get("rabbit") == 3 assert cm.get("dog", "rabbit") == 2
def test_plot(self): try: import matplotlib.pyplot # noqa except ImportError: import nose raise nose.SkipTest() y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit'] y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit'] cm = ConfusionMatrix(y_true, y_pred) # check plot works cm.plot() cm.plot(backend='seaborn') with self.assertRaises(ValueError): cm.plot(backend='xxx')
def test_pandas_confusion_cm_stats_animals(self): y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit'] y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit'] print("y_true: %s" % y_true) print("y_pred: %s" % y_pred) cm = ConfusionMatrix(y_true, y_pred) assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix) assert isinstance(cm.stats(), OrderedDict) assert cm.population == len(y_true) # 12 cm.print_stats() cm_stats = cm.stats() # noqa assert cm.binarize("cat").TP == cm.get("cat") # cm.get("cat", "cat") assert cm.binarize("cat").TP == 3 assert cm.binarize("dog").TP == cm.get("dog") # 1 assert cm.binarize("rabbit").TP == cm.get("rabbit") # 3