def estimateAccuracy(model, limit): asTrain, asTest = split("../data/train.csv", limit) model.fit(asTrain) testY = [ x.Y for x in asTest ] testPredictions = model.predict(asTest) print("%f" % (accuracy_score(testY, testPredictions))) print confusion_matrix(testY, testPredictions)
def main(): X, y = get_data() X_train_std, X_test_std, y_train, y_test = stand_train_test(X, y) ''' ---hard margin svm --- ''' svc = LinearSVC(C=1e9, multi_class='ovr') ## 几乎就是一个hard svm,如果C 很大 svc.fit(X_train_std, y_train) y_predict = svc.predict(X_train_std) cm = confusion_matrix(y_train, y_predict) print("confusion_,matrix=", cm, "pre_score=", precision_score(y_train, y_predict), "recall_score=", recall_score(y_train, y_predict), "f1_score=", f1_score(y_train, y_predict)) plot_decision_boundary(svc, axis=[-3, 3, -3, 3]) plt.scatter(X_train_std[y_train == 0, 0], X_train_std[y_train == 0, 1]) plt.scatter(X_train_std[y_train == 1, 0], X_train_std[y_train == 1, 1]) plt.show() ''' ---soft margin svm --- ''' svc1 = LinearSVC(C=1e-1) ## 此时就是一个soft svm,有一个蓝色的outlier 被错误的分类 svc1.fit(X_train_std, y_train) y_predict1 = svc1.predict(X_train_std) cm = confusion_matrix(y_train, y_predict1) print("confusion_,matrix=", cm, "pre_score=", precision_score(y_train, y_predict1), "recall_score=", recall_score(y_train, y_predict1), "f1_score=", f1_score(y_train, y_predict1)) plot_decision_boundary(svc1, axis=[-3, 3, -3, 3]) plt.scatter(X_train_std[y_train == 0, 0], X_train_std[y_train == 0, 1]) plt.scatter(X_train_std[y_train == 1, 0], X_train_std[y_train == 1, 1]) plt.show()
def eval_data(data_dev_all, gold_labels): dev_batches = batch_iter(data_dev_all, batch_size_train, 1, shuffle=False) predictions_all = [] for batch in dev_batches: batch_stories, batch_endings1, batch_endings2, batch_labels, _ = zip( *batch) batch_stories_padded, batch_stories_seqlen = pad_data_and_return_seqlens( batch_stories) batch_endings1_padded, batch_endings1_seqlen = pad_data_and_return_seqlens( batch_endings1) batch_endings2_padded, batch_endings2_seqlen = pad_data_and_return_seqlens( batch_endings2) res_cost, res_acc, res_pred_y = dev_step( zip(batch_stories_padded, batch_stories_seqlen, batch_endings1_padded, batch_endings1_seqlen, batch_endings2_padded, batch_endings2_seqlen), batch_labels) predictions_all.extend(res_pred_y) logging.info("Confusion matrix:") conf_matrix = confusion_matrix(gold_labels, predictions_all) logging.info("\n" + str(conf_matrix)) res = precision_recall_fscore_support(gold_labels, predictions_all) logging.info("precision_recall_fscore_support:%s" % str(res)) logging.info("accuracy_score:%s" % accuracy_score(gold_labels, predictions_all)) return res
def qwkappa(y, ypred): """Calcula el Quadratic Wweighted Kappa para la clasificación realizada por la red. :param y: Vector de n elementos con los valores de clase reales de cada patrón. :param ypred: Matriz de nxk con las probabilidades de pertenencia o clases predichas de cada patrón a cada clase. :return: Valor de QWK para la clasificación realizada. """ with warnings.catch_warnings(): warnings.simplefilter("ignore") cm = confusion_matrix(y, ypred) n_class = cm.shape[0] costes = np.reshape(np.tile(range(n_class), n_class), (n_class, n_class)) costes = (costes - costes.T)**2 f = 1 - costes n = cm.sum() x = cm / n r = x.sum(axis=1) # Row sum s = x.sum(axis=0) # Col sum Ex = r.reshape(-1, 1) * s po = (x * f).sum() pe = (Ex * f).sum() return (po - pe) / (1 - pe)
def binary_class_measures(cls, y_true: list, y_predicted: list) -> OrderedDict: """Assessment measures of a classification task with binary classes i.e. Fantasy/Non-Fantasy Parameters ---------- y_true : list Expected class labels in binary form y_predicted : list Predicted class labels in binary form Returns ------- OrderedDict An ordered dictionary of assessment measures """ cm = confusion_matrix(y_true, y_predicted) tp, fp, fn, tn = cm.flatten() measures = OrderedDict() measures['accuracy'] = (tp + tn) / (tp + fp + fn + tn) measures['specificity'] = tn / (tn + fp) measures['sensitivity'] = tp / (tp + fn) measures['precision'] = tp / (tp + fp) measures['f1score'] = 2 * tp / (2 * tp + fp + fn) return measures
def matthews_corrcoef(y_true, y_pred, sample_weight=None): from sklearn.metrics.classification import ( _check_targets, LabelEncoder, confusion_matrix, ) y_type, y_true, y_pred = _check_targets(y_true, y_pred) if y_type not in {'binary', 'multiclass'}: raise ValueError('%s is not supported' % y_type) lb = LabelEncoder() lb.fit(np.hstack([y_true, y_pred])) y_true = lb.transform(y_true) y_pred = lb.transform(y_pred) C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) t_sum = C.sum(axis=1) p_sum = C.sum(axis=0) n_correct = np.trace(C) n_samples = p_sum.sum() cov_ytyp = n_correct * n_samples - np.dot(t_sum, p_sum) cov_ypyp = n_samples**2 - np.dot(p_sum, p_sum) cov_ytyt = n_samples**2 - np.dot(t_sum, t_sum) mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp) if np.isnan(mcc): return 0.0 else: return mcc
def plot_cm(y_trues, y_preds, normalize=True, cmap=plt.cm.Blues): classes = ['SNR', 'AF', 'IAVB', 'LBBB', 'RBBB', 'PAC', 'PVC', 'STD', 'STE'] for i, label in enumerate(classes): y_true = y_trues[:, i] y_pred = y_preds[:, i] cm = confusion_matrix(y_true, y_pred) if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] fig, ax = plt.subplots(figsize=(4, 4)) im = ax.imshow(cm, interpolation='nearest', cmap=cmap) ax.figure.colorbar(im, ax=ax) ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), xticklabels=[0, 1], yticklabels=[0, 1], title=label, ylabel='True label', xlabel='Predicted label') plt.setp(ax.get_xticklabels(), ha="center") fmt = '.3f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") np.set_printoptions(precision=3) fig.tight_layout() plt.savefig(f'results/{label}.png') plt.close(fig)
def test(model, cross_idx, cross_path, inner_epoch=0, outer_epoch=0, ckp_pth=None, device=None): # print() model.eval() print('============================Meta-test Testing Start============================') all_conf_mat = np.zeros((len(lab_m_list), len(lab_m_list))) with torch.no_grad(): with open(cross_path + 'test_set.pkl', 'rb') as f: data = pickle.load(f) ses_names = list(data.keys()) for wav_file in ses_names: mat, true_y = data.get(wav_file) true_y = np.array([true_y]) mat = torch.tensor(mat).type(torch.FloatTensor).to(device) pred_y = model(mat, mode='query').sum(dim=0) pred_y = ((pred_y.topk(1))[1]).data.cpu().flatten().tolist() confusion_mat = confusion_matrix(true_y, pred_y, labels=[0, 1, 2, 3]) all_conf_mat += confusion_mat all_conf_mat = all_conf_mat.T UA_metric = get_UA(all_conf_mat) WA_metric = get_WA(all_conf_mat) npy_name = ckp_pth + '/' + 'Leave_' + cross_idx + '_Outer_' + str(outer_epoch) + '_Inner_' + str(inner_epoch) + \ '_test.npy' np.save(npy_name, all_conf_mat) print('============================Meta-test Testing Finish============================') print() return UA_metric, WA_metric
def printConfusionMatrix(y_true, y_pred, class_names=None): """ Print a confusion matrix similar to R's confusionMatrix """ confMatrix = classification.confusion_matrix(y_true, y_pred) accuracy = classification.accuracy_score(y_true, y_pred) print('Confusion Matrix (Accuracy {:.4f})\n'.format(accuracy)) _printConfusionMatrix(confMatrix, class_names)
def test_model(classifier, X, y): y_pred = classifier.predict(X) conf_matrix = confusion_matrix(y, y_pred) accuracy = accuracy_score(y, y_pred) report = classification_report(y, y_pred) print(conf_matrix) print(report) print(accuracy)
def do_classification(x_train, y_train, x_test, y_test, gamma_val, c_val): classifier = svm.SVC(kernel='rbf', gamma=gamma_val, C=c_val) classifier.fit(x_train, y_train) predicted = classifier.predict(x_test) accuracy = np.mean(y_test == predicted) cfm = confusion_matrix(y_test, predicted) return accuracy, gamma_val, c_val, cfm
def balanced_accuracy_score(y_true, y_pred, balance=0.5): """Balanced accuracy classification score. The formula for the balanced accuracy score :: balanced accuracy = balance * TP/(TP + FP) + (1 - balance) * TN/(TN + FN) Because it needs true/false negative/positive notion it only supports binary classification. The `balance` parameter determines the weight of sensitivity in the combined score. ``balance -> 1`` lends more weight to sensitiviy, while ``balance -> 0`` favors specificity (``balance = 1`` considers only sensitivity, ``balance = 0`` only specificity). Read more in the :ref:`User Guide <balanced_accuracy_score>`. Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix Ground truth (correct) labels. y_pred : 1d array-like, or label indicator array / sparse matrix Predicted labels, as returned by a classifier. balance : float between 0 and 1. Weight associated with the sensitivity (or recall) against specificty in final score. Returns ------- score : float See also -------- accuracy_score References ---------- .. [1] `Wikipedia entry for the accuracy and precision <http://en.wikipedia.org/wiki/Accuracy_and_precision>` Examples -------- >>> import numpy as np >>> from sklearn.metrics import balanced_accuracy_score >>> y_pred = [0, 0, 1] >>> y_true = [0, 1, 1] >>> balanced_accuracy_score(y_true, y_pred) 0.75 >>> y_pred = ["cat", "cat", "ant"] >>> y_true = ["cat", "ant", "ant"] >>> balanced_accuracy_score(y_true, y_pred) 0.75 """ if balance < 0. or 1. < balance: raise ValueError("balance has to be between 0 and 1") y_type, y_true, y_pred = _check_targets(y_true, y_pred) if y_type is not "binary": raise ValueError("%s is not supported" % y_type) cm = confusion_matrix(y_true, y_pred) neg, pos = cm.sum(axis=1, dtype='float') tn, tp = np.diag(cm) sensitivity = tp / pos specificity = tn / neg return balance * sensitivity + (1 - balance) * specificity
def gm(y, ypred): with warnings.catch_warnings(): warnings.simplefilter("ignore") cm = confusion_matrix(y, ypred) sum_byclass = np.sum(cm,axis=1) sensitivities = np.diag(cm)/sum_byclass.astype('double') sensitivities[sum_byclass==0] = 1 gm_result = pow(np.prod(sensitivities),1.0/cm.shape[0]) return gm_result
def ms(y, ypred): with warnings.catch_warnings(): warnings.simplefilter("ignore") cm = confusion_matrix(y, ypred) sum_byclass = np.sum(cm,axis=1) sensitivities = np.diag(cm)/sum_byclass.astype('double') sensitivities[sum_byclass==0] = 1 ms = np.min(sensitivities) return ms
def evalRes(Y_test, pred, test_labels): y_pred = np.argmax(pred, axis=1) y_test = np.argmax(Y_test, axis=1) print('Classification Report') target_names = ['Reading', 'Speaking', 'Watching'] cnf_matrix = confusion_matrix(y_pred, test_labels) df_class_report = pandas_classification_report(y_true=y_test, y_pred=y_pred) df_class_report.to_csv('classification_report.csv', sep=',') plot_confusion_matrix(cnf_matrix, classes=target_names, normalize=True, title='Normalized confusion matrix')
def mmae(y, ypred): with warnings.catch_warnings(): warnings.simplefilter("ignore") cm = confusion_matrix(y, ypred) n_class = cm.shape[0] costes=np.reshape(np.tile(range(n_class),n_class),(n_class,n_class)) costes = np.abs(costes - np.transpose(costes)) errores = costes*cm amaes = np.sum(errores,axis=1)/np.sum(cm,axis=1).astype('double') amaes = amaes[~np.isnan(amaes)] return amaes.max()
def test_classification(self, test, testlabel,bestmodel): # bestmodel=bestmodel outputtest = bestmodel.predict(test) accuracytest = accuracy_score(testlabel, outputtest) print ("The accuracy for the test set is %r" %accuracytest, "and the confusion matrix is") print (confusion_matrix(outputtest,testlabel)) print( classification_report(testlabel, outputtest)) # probaout=bestmodel.predict_prob(test) # probaout= DataFrame(probaout) # print probaout return outputtest
def calculate(self) -> None: """ Calculates all of the metrics (precision, recall, F score and support) and stores them in the results dictionary. Note: This function may eat up a lot of memory if it's used on a large file. :return: """ print('\nCalculating metrics...') ftr_all = [] fpr_all = [] gen = generate_tuples_from_file(self.fpath, encodings=self.encodings, first_layer=self.first_layer, batch_size=self.batch_size) if tqdm: for _ in tqdm(range(self.steps)): x, y = next(gen) y_pred = self.model.predict_classes(x, verbose=0) y_true = y.argmax(2) ftr, fpr = self._score(y_true, y_pred) ftr_all.extend(ftr) fpr_all.extend(fpr) else: print('[!] For progress logging during metrics calculation ' 'install tqdm.') for _ in range(self.steps): x, y = next(gen) y_pred = self.model.predict_classes(x, verbose=0) y_true = y.argmax(2) ftr, fpr = self._score(y_true, y_pred) ftr_all.extend(ftr) fpr_all.extend(fpr) confusion = confusion_matrix(ftr_all, fpr_all) p, r, f, s = precision_recall_fscore_support(ftr_all, fpr_all) self.results = { 'confusion_matrix': confusion, 'precision': p, 'recall': r, 'fscore': f, 'f1mean': np.mean(f), 'support': s }
def test_classification(self, test, testlabel, bestmodel): # bestmodel=bestmodel outputtest = bestmodel.predict(test) accuracytest = accuracy_score(testlabel, outputtest) print("The accuracy for the test set is %r" % accuracytest, "and the confusion matrix is") print(confusion_matrix(outputtest, testlabel)) print(classification_report(testlabel, outputtest)) # probaout=bestmodel.predict_prob(test) # probaout= DataFrame(probaout) # print probaout return outputtest
def eval_data(sess, data_dev_all, gold_labels, save_features=False, save_features_file="file.features.pickle"): dev_batches = batch_iter(data_dev_all, batch_size_train, 1, shuffle=False) overal_loss = 0 steps_cnt = 0 ids_all = [] predictions_all = [] res_feats_all = [] for batch in dev_batches: batch_stories, batch_endings1, batch_endings2, batch_labels, batch_ids = zip( *batch) batch_stories_padded, batch_stories_seqlen = pad_data_and_return_seqlens( batch_stories) batch_endings1_padded, batch_endings1_seqlen = pad_data_and_return_seqlens( batch_endings1) batch_endings2_padded, batch_endings2_seqlen = pad_data_and_return_seqlens( batch_endings2) res_cost, res_acc, res_pred_y, res_feats = dnn_model.dev_step( sess, zip(batch_stories_padded, batch_stories_seqlen, batch_endings1_padded, batch_endings1_seqlen, batch_endings2_padded, batch_endings2_seqlen), batch_labels) steps_cnt += 1 overal_loss += res_cost predictions_all.extend(res_pred_y) res_feats_all.extend(res_feats) ids_all.extend(batch_ids) prec_rec_f_supp = precision_recall_fscore_support( gold_labels, predictions_all) conf_matrix = confusion_matrix(gold_labels, predictions_all) overall_accuracy = accuracy_score(gold_labels, predictions_all) overal_loss = overal_loss / steps_cnt if save_features: write_feats = open(save_features_file, "wb") pickle.dump(res_feats_all, write_feats) write_feats.close() # DataUtilities_ROCStories.save_data_to_json_file(res_feats_all, output_json_file=save_features_file) logging.info("Features saved to file: %s" % save_features_file) return prec_rec_f_supp, overal_loss, overall_accuracy, predictions_all, ids_all, conf_matrix
def main(): load_dataset_mnist("../libs") mndata = MNIST('../libs/data_mnist', gz=True) weight_path = "nn_weights.pkl" training_phase = weight_path not in os.listdir(".") if training_phase: images, labels = mndata.load_training() images, labels = preprocess_data(images, labels) epochs = 10 batch_size = 64 learning_rate = 0.01 optimizer = Adam(learning_rate) loss_func = CrossEntropy() archs = [ InputLayer(), FCLayer(num_neurons=100, weight_init="he_normal"), ActivationLayer(activation="relu"), DropoutLayer(keep_prob=0.8), FCLayer(num_neurons=125, weight_init="he_normal"), ActivationLayer(activation="relu"), DropoutLayer(keep_prob=0.8), FCLayer(num_neurons=50, weight_init="he_normal"), BatchNormLayer(), ActivationLayer(activation="relu"), FCLayer(num_neurons=labels.shape[1], weight_init="he_normal"), ActivationLayer(activation="softmax"), ] nn = NeuralNetwork(optimizer=optimizer, layers=archs, loss_func=loss_func) trainer = Trainer(nn, batch_size, epochs) trainer.train(images, labels) trainer.save_model("nn_weights.pkl") else: import pickle images_test, labels_test = mndata.load_testing() images_test, labels_test = preprocess_data(images_test, labels_test, test=True) with open(weight_path, "rb") as f: nn = pickle.load(f) pred = nn.predict(images_test) print("Accuracy:", len(pred[labels_test == pred]) / len(pred)) from sklearn.metrics.classification import confusion_matrix print("Confusion matrix: ") print(confusion_matrix(labels_test, pred))
def run_grid_search(grid_search, show_evaluation=True): """ Run the GridSearch algorithm and compute evaluation metrics """ X_train, X_test, y_train, y_test = split_dataset() grid_search.fit(X_train, y_train) # for key, value in grid_search.cv_results_.items(): # print key, value predictions = grid_search.predict(X_test) if show_evaluation: logger.debug("macro_recall: %s", recall_score(y_test, predictions, average="macro")) logger.debug(precision_recall_fscore_support(y_test, predictions)) logger.debug(confusion_matrix(y_test, predictions))
def classificationSummary(y_true, y_pred, class_names=None): """ Provide a comprehensive summary of classification performance similar to R's confusionMatrix """ confMatrix = classification.confusion_matrix(y_true, y_pred) TP = confMatrix[0, 0] FP = confMatrix[1, 0] TN = confMatrix[1, 1] FN = confMatrix[0, 1] N = TN + TP + FN + FP sensitivity = TP / (TP + FN) specificity = TN / (TN + FP) prevalence = (TP + FN) / N PPV = TP / (TP + FP) NPV = TN / (TN + FN) BAC = (sensitivity + specificity) / 2 metrics = [ ('Accuracy', classification.accuracy_score(y_true, y_pred)), ('95% CI', None), ('No Information Rate', None), ('P-Value [Acc > NIR]', None), (None, None), ('Kappa', classification.cohen_kappa_score(y_true, y_pred)), ("Mcnemar's Test P-Value", None), (None, None), ('Sensitivity', sensitivity), ('Specificity', specificity), ('Pos Pred Value', PPV), ('Neg Pred Value', NPV), ('Prevalence', prevalence), ('Detection Rate', None), ('Detection Prevalence', None), ('Balanced Accuracy', BAC), ] print('Confusion Matrix and Statistics\n') _printConfusionMatrix(confMatrix, class_names) if len(set(y_true)) < 5: print(classification_report(y_true, y_pred, digits=4)) fmt1 = '{{:>{}}} : {{:.3f}}'.format(max(len(m[0]) for m in metrics if m[0] is not None)) fmt2 = '{{:>{}}} : {{}}'.format(max(len(m[0]) for m in metrics if m[0] is not None)) for metric, value in metrics: if metric is None: print() elif value is None: pass # print(fmt2.format(metric, 'missing')) else: print(fmt1.format(metric, value))
def KNN(X_train, X_test, y_train, y_test): print("training data shape: ", X_train.shape) print("################# KNN #################") model = KNeighborsClassifier(n_neighbors=9) scores = sklearn.model_selection.cross_val_score(model, X_train, y_train, cv=KFold(n_splits=10, shuffle=True), scoring='accuracy') print("KNN cross-validation Accuracy: %0.2f" % scores.mean()) model.fit(X_train, y_train) test_predict = model.predict(X_test) print("report for KNN: ") report = sklearn.metrics.classification_report(y_test, test_predict, digits=4) print(report) print("KNN overall accuracy: " + str(sklearn.metrics.accuracy_score(y_test, test_predict))) print(confusion_matrix(y_test, test_predict))
def evalRes(pred, test_labels, y_testMultiClass, name): y_pred = np.argmax(pred, axis=1) y_test = test_labels target_names = ['Reading', 'Speaking', 'Watching'] cnf_matrix = confusion_matrix(y_pred, test_labels) df_class_report = pandas_classification_report(y_true=y_test, y_pred=y_pred) df_class_report.to_csv(folder + name + 'classification_report.csv', sep=',') plot_confusion_matrix(cnf_matrix, classes=target_names, normalize=True, title='', name=name) plot_ROC(pred, y_testMultiClass, name)
def plot_confusion_matrix2(y_true, y_pred, labels, ymap=None, figsize=(10, 10)): """ Generate matrix plot of confusion matrix with pretty annotations. The plot image is saved to disk. args: y_true: true label of the input, with shape (nsamples,) y_pred: prediction of the input, with shape (nsamples,) filename: filename of figure file to save labels: string array, name the order of class labels in the confusion matrix. use `clf.classes_` if using scikit-learn models. with shape (nclass,). ymap: dict: any -> string, length == nclass. if not None, map the labels & ys to more understandable strings. Caution: original y_true, y_pred and labels must align. figsize: the size of the figure plotted. """ if ymap is not None: y_pred = [ymap[yi] for yi in y_pred] y_true = [ymap[yi] for yi in y_true] labels = [ymap[yi] for yi in labels] cm = confusion_matrix(y_true, y_pred, labels=labels) cm_sum = np.sum(cm, axis=1, keepdims=True) cm_perc = cm / cm_sum.astype(float) * 100 annot = np.empty_like(cm).astype(str) nrows, ncols = cm.shape for i in range(nrows): for j in range(ncols): c = cm[i, j] p = cm_perc[i, j] if i == j: s = cm_sum[i] annot[i, j] = '%.1f%%\n%d/%d' % (p, c, s) elif c == 0: annot[i, j] = '' else: annot[i, j] = '%.1f%%\n%d' % (p, c) cm = pd.DataFrame(cm, index=labels, columns=labels) cm.index.name = 'Actual' cm.columns.name = 'Predicted' fig, ax = plt.subplots(figsize=figsize) sns.heatmap(cm, annot=annot, fmt='', ax=ax) # plt.savefig(filename) plt.show()
def evalRes(X_test, Y_test, pred, test_labels): y_pred = np.argmax(pred, axis=1) y_test = np.argmax(Y_test, axis=1) print('Classification Report') target_names = ['Reading', 'Speaking', 'Watching'] print(classification_report(y_pred, y_test, target_names=target_names)) print('Confusion Matrix') cnf_matrix = confusion_matrix(y_pred, test_labels) plt.figure() plot_confusion_matrix(cnf_matrix, classes=target_names, title='Confusion matrix, without normalization') plt.figure() plot_confusion_matrix(cnf_matrix, classes=target_names, normalize=True, title='Normalized confusion matrix') plt.show()
def wkappa(y, ypred): with warnings.catch_warnings(): warnings.simplefilter("ignore") cm = confusion_matrix(y, ypred) n_class = cm.shape[0] costes=np.reshape(np.tile(range(n_class),n_class),(n_class,n_class)) costes = np.abs(costes - np.transpose(costes)) f = 1 - costes n = cm.sum() x = cm/n r = x.sum(axis=1) # Row sum s = x.sum(axis=0) # Col sum Ex = r.reshape(-1, 1) * s po = (x * f).sum() pe = (Ex * f).sum() return (po - pe) / (1 - pe)
def classificationSummary(y_true, y_pred, class_names=None): """ Print a summary of classification performance Input: y_true: actual values y_pred: predicted values class_names (optional): list of class names """ confusionMatrix = classification.confusion_matrix(y_true, y_pred) accuracy = classification.accuracy_score(y_true, y_pred) print('Confusion Matrix (Accuracy {:.4f})\n'.format(accuracy)) # Pretty-print confusion matrix cm = confusionMatrix labels = class_names if labels is None: labels = [str(i) for i in range(len(cm))] # Convert the confusion matrix and labels to strings cm = [[str(i) for i in row] for row in cm] labels = [str(i) for i in labels] # Determine the width for the first label column and the individual cells prediction = 'Prediction' actual = 'Actual' labelWidth = max(len(s) for s in labels) cmWidth = max(max(len(s) for row in cm for s in row), labelWidth) + 1 labelWidth = max(labelWidth, len(actual)) # Construct the format statements fmt1 = '{{:>{}}}'.format(labelWidth) fmt2 = '{{:>{}}}'.format(cmWidth) * len(labels) # And print the confusion matrix print(fmt1.format(' ') + ' ' + prediction) print(fmt1.format(actual), end='') print(fmt2.format(*labels)) for cls, row in zip(labels, cm): print(fmt1.format(cls), end='') print(fmt2.format(*row))
def scores(y_test, predictions, pp, clf): print() if pp == 'Y': print('Scores After Preprocessing :') else: print('Scores Before Preprocessing :') print('Classifier = {clf}'.format(clf=clf)) print('Accuracy score = {accuracy}'.format( accuracy=accuracy_score(y_test, predictions))) print('Precision score = {precision}'.format( precision=precision_score(y_test, predictions))) print('Recall score = {recall}'.format( recall=recall_score(y_test, predictions))) print('F1 Score = {f1score}'.format(f1score=f1_score(y_test, predictions))) print('ROC AUC = {roc_auc}'.format( roc_auc=roc_auc_score(y_test, predictions))) print(confusion_matrix(y_test, predictions)) print(classification_report(y_test, predictions)) print()
def evaluate(y_pred, y_test): perf= matthews_corrcoef(y_test, y_pred) print("Prediction score:%s" % perf) tn,fp,fn,tp=confusion_matrix(y_test, y_pred).ravel() print("True negatives:%s" % tn) print("True positives: %s" % tp) print("False negatives: %s" % fn) print("False positives: %s" % fp) nn= tn + fp np= tp + fn ratio_tp = float(tp)/float(np) #Proche de 1 si on a bien prédit que ça échouait au test ratio_tn = float(tn)/float(nn) #Proche de 1 si on a bien prédit que ça passait le test ratio_fp = float(fp)/float(np+nn) #Proche de 0 si on se loupe pas ratio_fn = float(fn)/float(nn+nn) #Proche de 0 si on se loupe pas print("Ratio TP sur Nbre Pos:%s, Ratio TN sur Nbre N:%s, Ratio FP sur NTotal:%s, Ratio FN sur NTotal:%s" % (ratio_tp, ratio_tn, ratio_fp,ratio_fn)) return perf
def print_metrics(y_true, y_pred): print('auc:', roc_auc_score(y_true, y_pred)) print('accuracy:', classification.accuracy_score(y_true, y_pred)) confusion_matrix = classification.confusion_matrix(y_true, y_pred) # print('confusion matrix:') # print('report:', classification.classification_report(y_true, y_pred)) tn, fp, fn, tp = confusion_matrix.ravel() sensitivity = tp / (tp + fn) print('sensitivity: {}'.format(sensitivity)) specificity = tn / (tn + fp) print('specificity: {}'.format(specificity)) print('precision: {}'.format(tp / (tp + fp))) total_acc = (tp + tn) / (tp + tn + fp + fn) random_acc = (((tn + fp) * (tn + fn) + (fn + tp) * (fp + tp)) / (tp + tn + fp + fn)**2) kappa = (total_acc - random_acc) / (1 - random_acc) print('Cohen\'s kappa: {}'.format(kappa)) youdens = sensitivity - (1 - specificity) print('Youden\'s index: {}'.format(youdens)) print('log loss:', classification.log_loss(y_true, y_pred))
# model = LogisticRegression(C=subsample, verbose=0, penalty='l1', max_iter=100) # model = KNeighborsClassifier(n_neighbors=learning_rate) # model = xgb.XGBRegressor(max_depth=depth, n_estimators=n_estimators, learning_rate=learning_rate, # nthread=1, subsample=subsample, silent=True, colsample_bytree=0.8) # model = LinearSVC(C=0.9, penalty='l2', dual=False, verbose=1, max_iter=100000) model.fit(trtrfe, trtrtrue) # mean accuracy on the given test data and labels predicted = [math.floor(x) for x in model.predict(trtefe)] score = model.score(trtefe, trtetrue) print("score =", score) print(classification_report(trtetrue, predicted)) print(confusion_matrix(trtetrue, predicted)) if score > best_score or True: best_model = model best_score = score best_model.fit(train_features, train_true) predicted = [math.floor(x) for x in best_model.predict(test_features)] fname = "data/net_result/sol_" + str(score) + "_" + str(time.time()) + ".csv" write_sol(predicted, fname) print("this model", depth, "\t", subsample, "\t", score) print("best model", best_score) best_model.fit(trtefe, trtetrue) predicted = [math.floor(x) for x in best_model.predict(test_features)] write_sol(predicted, "data/net_result/sol.csv")