def evalModel(predictor, test_data, test_labels, train_data, train_labels, name, evalresults): predictor.fit(train_data, train_labels) evalresults.setdefault(name + " Accuracy raw \t\t", []).append(accuracy_score(test_labels, predictor.predict(test_data))) #predictor.fit(preprocessing.scale(train_data), train_labels) #evalresults.setdefault(name + " Accuracy std \t\t", []).append(metrics.accuracy_score(test_labels, predictor.predict(preprocessing.scale(test_data)))) #predictor.fit(preprocessing.normalize(train_data, norm='l2'), train_labels) #evalresults.setdefault(name + " Accuracy nml \t\t", []).append(metrics.accuracy_score(test_labels, predictor.predict(preprocessing.normalize(test_data, norm='l2')))) return if len(set(train_labels)) != 2: return predictor.fit(train_data, train_labels) fpr, tpr, _ = roc_curve(test_labels, predictor.decision_function(test_data)) evalresults.setdefault(name + " AUC raw \t\t", []).append(auc(fpr, tpr)) #return predictor.fit(preprocessing.scale(train_data), train_labels) fpr, tpr, _ = roc_curve(test_labels, predictor.decision_function(preprocessing.scale(test_data))) evalresults.setdefault(name + " AUC std \t\t", []).append(auc(fpr, tpr)) predictor.fit(preprocessing.normalize(train_data, norm='l2'), train_labels) fpr, tpr, _ = roc_curve(test_labels, predictor.decision_function(preprocessing.normalize(test_data, norm='l2'))) evalresults.setdefault(name + " AUC nml \t\t", []).append(auc(fpr, tpr))
def plot_roc_curve(y_true,y_pred,y_pred2=None): a,b,_thresholds = roc_curve(y_true,y_pred) plt.plot(a,b,c="green",label="model 1") if y_pred2 is not None: x,y,_thresholds = roc_curve(y_true,y_pred2) plt.plot(x,y,c="purple",label="model 2") plt.legend(loc=4) plt.show()
def plot_ROC(y_pred, y_test, name): fpr = dict() tpr = dict() roc_auc = dict() n_classes = 3 for i in range(n_classes): fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_pred[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_pred.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) mean_tpr = np.zeros_like(all_fpr) for i in range(n_classes): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) target_names = ['Reading', 'Speaking', 'Watching'] plt.figure() plt.plot(fpr["micro"], tpr["micro"], label='micro-average ROC (AUC = {0:0.2f})' ''.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=4) plt.plot(fpr["macro"], tpr["macro"], label='macro-average ROC (AUC = {0:0.2f})' ''.format(roc_auc["macro"]), color='navy', linestyle=':', linewidth=4) lw = 2 colors = cycle(['aqua', 'darkorange', 'cornflowerblue']) for i, color in zip(range(n_classes), colors): plt.plot(fpr[i], tpr[i], color=color, lw=lw, label='ROC - {0} (AUC = {1:0.2f})' ''.format(target_names[i], roc_auc[i])) plt.plot([0, 1], [0, 1], 'k--', lw=lw) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc="lower right") plt.savefig("res/{}-ROC-AUC".format(name))
def _binary_roc_auc_score(y_true, y_score, sample_weight=None): if len(np.unique(y_true)) != 2: raise ValueError("Only one class present in y_true. ROC AUC score " "is not defined in that case.") fpr, tpr, _ = roc_curve(y_true, y_score, sample_weight=sample_weight) if max_fpr is None or max_fpr == 1: return auc(fpr, tpr) if max_fpr <= 0 or max_fpr > 1: raise ValueError("Expected max_frp in range ]0, 1], got: %r" % max_fpr) # Add a single point at max_fpr by linear interpolation stop = np.searchsorted(fpr, max_fpr, 'right') x_interp = [fpr[stop - 1], fpr[stop]] y_interp = [tpr[stop - 1], tpr[stop]] tpr = np.append(tpr[:stop], np.interp(max_fpr, x_interp, y_interp)) fpr = np.append(fpr[:stop], max_fpr) partial_auc = auc(fpr, tpr) # McClish correction: standardize result to be 0.5 if non-discriminant # and 1 if maximal min_area = 0.5 * max_fpr**2 max_area = max_fpr return 0.5 * (1 + (partial_auc - min_area) / (max_area - min_area))
def Predict(self, inp, labels, classifier, folds, name, paramdesc): X= inp y = labels X, y = X[y != 2], y[y != 2] n_samples, n_features = X.shape ############################################################################### # Classification and ROC analysis # Run classifier with cross-validation and plot ROC curves cv = StratifiedKFold(y, n_folds=folds) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] _precision = 0.0 _recall = 0.0 _accuracy = 0.0 _f1 = 0.0 for i, (train, test) in enumerate(cv): probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) pred_ = classifier.predict(X[test]) _precision += precision_score(y[test], pred_) _recall += recall_score(y[test], pred_) _accuracy += accuracy_score(y[test], pred_) _f1 += f1_score(y[test], pred_) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) _precision /= folds _recall /= folds _accuracy /= folds _f1 /= folds plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') mean_tpr /= len(cv) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % mean_auc, lw=2) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic - {0}'.format(name)) plt.legend(loc="lower right") plt.savefig(self.configObject['outputdir'] + '/' + name + '.png') plt.close() result = self.OutputResult(name, paramdesc, len(inp), floor(labels.size / folds), _precision, _recall, _accuracy, _f1) Announce(result)
def train_model(self, model, X_train, X_test, y_train, y_test): """Training a model to predict the presence or absence of a species. Various instance variables are used to define how the model trains, like: batch size, random seed and number of epochs. :param model: Keras Model Object. Initialized model ready for training. :param X_train: Array. Contains training data. :param X_test: Array. Contains testing data. :param y_train: Array. Contains training (ground truth) labels. :param y_test: Array. Contains testing (ground truth) labels. :return: Tuple. Containing: float 'AUC' performance metric between 0 and 1 (0 = 100% wrong, 1 = 100% right); keras model 'model' a keras model with an identical architecture to the input variable 'model' but with trained weights. """ training_generator, steps_per_epoch = balanced_batch_generator( X_train, y_train, sampler=NearMiss(), batch_size=self.batch, random_state=self.random_seed) model.fit_generator(generator=training_generator, steps_per_epoch=steps_per_epoch, epochs=self.epoch, verbose=0) score = model.evaluate(X_test, y_test, verbose=0) predictions = model.predict(X_test) fpr, tpr, thresholds = roc_curve(y_test[:, 1], predictions[:, 1]) len_tpr = int(len(tpr) / 2) self.test_loss.append(score[0]) self.test_acc.append(score[1]) self.test_AUC.append(roc_auc_score(y_test[:, 1], predictions[:, 1])) self.test_tpr.append(tpr[len_tpr]) AUC = roc_auc_score(y_test[:, 1], predictions[:, 1]) n_bootstraps = 1000 y_pred = predictions[:, 1] y_true = y_test[:, 1] bootstrapped_scores = [] rng = np.random.RandomState(self.random_seed) for i in range(n_bootstraps): indices = rng.randint(0, len(y_pred) - 1, len(y_pred)) if len(np.unique(y_true[indices])) < 2: continue score = roc_auc_score(y_true[indices], y_pred[indices]) bootstrapped_scores.append(score) sorted_scores = np.array(bootstrapped_scores) sorted_scores.sort() ci_lower = sorted_scores[int(0.05 * len(sorted_scores))] ci_upper = sorted_scores[int(0.95 * len(sorted_scores))] self.test_lci.append(ci_lower) self.test_uci.append(ci_upper) return AUC, model
def roc_curve_raw(self, probs): prob_categories = ['DATA', 'DEV', 'DOCS', 'EDU', 'HW', 'OTHER', 'WEB'] fprs = [] tprs = [] for i, category in enumerate(prob_categories): scores = [prob[i] for prob in probs] fpr, tpr, _ = roc_curve(y_true=self.test_labels, pos_label=category, y_score=scores) fprs.append(fpr.tolist()) tprs.append(tpr.tolist()) return fprs, tprs
def roc(outcomes, prediction): fps, tps, thresholds = _binary_clf_curve(outcomes, prediction) clf = pd.DataFrame([fps, tps, thresholds]).T clf.columns = ['fps', 'tps', 'thresholds'] clf['fps'] = clf['fps'].astype(int) clf['tps'] = clf['tps'].astype(int) fpr, tpr, thresholds = roc_curve(outcomes, prediction, drop_intermediate=False) r = pd.DataFrame([fpr, tpr, thresholds]).T r.columns = ['fpr', 'tpr', 'thresholds'] df = pd.merge(clf, r, on='thresholds') return df
def refine_with_unexpectedness(data_set, classes_dict, preY, Ytrue, unexpected_rules): print('Refine with unexpected rules...') y_pred = np.copy(preY) for i in range(data_set.size()): x = data_set.get_transaction(i) for r in unexpected_rules: if r.satisfy_rule(x, is_lhs=True): label = r.right_items[0] y_pred[i] = classes_dict[label] print(f1_score(Ytrue, y_pred, average=None)) if (data_set.number_of_classes() <= 2): fpr, tpr, _ = roc_curve(Ytrue, y_pred.flatten()) print(auc(fpr, tpr))
def computeAUROC(dataGT, dataPRED, classCount): fpr = [] tpr = [] outAUROC = [] thresholds = [] datanpGT = dataGT.cpu().numpy() datanpPRED = dataPRED.cpu().numpy() for i in range(classCount): outAUROC.append(roc_auc_score(datanpGT[:, i], datanpPRED[:, i])) _fpr, _tpr, threshold = roc_curve(datanpGT[:, i], datanpPRED[:, i]) fpr.append(_fpr) tpr.append(_tpr) thresholds.append(threshold) return outAUROC, fpr, tpr, thresholds
def from_labels(cls, labels_true, y_score, is_class_pos=num2bool): """Instantiate assuming binary labeling of {0, 1} labels_true : array, shape = [n_samples] Class labels. If binary, 'is_class_pos' is optional y_score : array, shape = [n_samples] Predicted scores is_class_pos: label_true -> Bool Boolean predicate used to binarize true (class) labels """ # num2bool Y labels y_true = map(is_class_pos, labels_true) # calculate axes fprs, tprs, thresholds = roc_curve( y_true, y_score, pos_label=True) return cls(fprs, tprs, thresholds=thresholds)
def plot_roc(true_labels, pred_probs, fig_title='', savepath=''): false_positive_rate, true_positive_rate, _ = roc_curve(true_labels, pred_probs[:, 1], pos_label=1) roc_auc = auc(false_positive_rate, true_positive_rate) plt.figure() plt.title(fig_title) plt.plot(false_positive_rate, true_positive_rate, 'b', label='AUC = %0.4f' % roc_auc) plt.plot([0, 1], [0, 1], 'r--') plt.legend(loc='lower right') plt.ylabel('True Positive Rate') plt.xlabel('False Positive Rate') if savepath != '': plt.savefig(savepath) plt.show() return ''
def handle(self, *args, **options): filepath = options['file'] print filepath x = [] y = [] with open(filepath, 'rb') as csvfile: data = csv.reader(csvfile, delimiter=',') for row in data: x.append(float(row[0])) y.append(int(row[1])) print(x) print(y) fpr, tpr, _ = roc_curve(y, x) roc_auc = auc(fpr, tpr) plt.figure() lw = 2 plt.plot(fpr, tpr, color=COLOR_4, lw=lw, label='ROC curve (area = %0.2f)' % roc_auc) for i in range(len(tpr)): print "tpr: %s fpr: %s thres: %s" % (tpr[i],fpr[i],_[i]) plt.plot([0, 1], [0, 1], color=COLOR_6, lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc="lower right") base = os.path.basename(filepath) os.path.splitext(base)[0] savefig('%s-roc.png' % base)
# binary class y_pred = [1, 2, 3, 4] y_true = [1, 2, 3, 4] y_true = [2, 2, 3, 4] y_true = [5, 6, 7, 8] hamming_loss(y_true, y_pred) hamming_loss(list("ABFD"), list("ABCD")) #multi class hamming_loss(np.array([[0, 1], [1, 1]]), np.zeros((2, 2))) y_true = [0, 0, 1, 1] y_pred = [[.9, .1], [.8, .2], [.3, .7], [.01, .99]] # [Pr(0), Pr(1)] log_loss(y_true, y_pred) """ Receiver operating characteristic (ROC) Curve roc_curve? roc_curve(y_true, y_score, pos_label=None, sample_weight=None, drop_intermediate=True) Note: this implementation is restricted to the binary classification task. y_true : array, shape = [n_samples] True binary labels in range {0, 1} or {-1, 1}. If labels are not binary, pos_label should be explicitly given. y_score : array, shape = [n_samples] Target scores, can either be probability estimates of the positive class, confidence values, or non-thresholded measure of decisions (as returned by "decision_function" on some classifiers).
def show_results(y_test, prob_test, name, show=True, output_folder='', maxFNR=0.03, thresh = None): auc = ranking.roc_auc_score(y_test, prob_test, average=None, sample_weight=None) fpr, tpr, thresholds = ranking.roc_curve(y_test, prob_test, pos_label=1, sample_weight=None) fnr = 1 - tpr eer = min(zip(fpr, fnr, thresholds), key=lambda x: abs(x[0] - x[1])) idx_fnr = np.where(fnr<maxFNR)[0][0] if thresh == None: target_fnr = thresholds[idx_fnr] else: target_fnr = thresh y_pred = [float(score>=target_fnr) for score in prob_test] #fig = plt.figure() # show ROC if show: plt.figure(221) plt.plot(fpr, tpr, linewidth=2) plt.ylim(0, 1) plt.xlim(0, 1) plt.xlabel('FPR') plt.ylabel('TPR') plt.title(name + ' - ROC curve, AUC = %f' % (auc)) # show FPR-FNR vs threshold curves plt.figure(222) fnr_line, = plt.plot(thresholds, fnr * 100, linewidth=2, color='blue') fpr_line, = plt.plot(thresholds, fpr * 100, linewidth=2, color='red', linestyle='--') plt.legend([fnr_line, fpr_line], ['False Negative Rate (FNR)', 'False Positive Rate (FPR)']) plt.ylim(0, 100.001) plt.xlim(np.min(prob_test), np.max(prob_test)) plt.title(name + ' - EER = %0.1f%% at t=%0.2f' % (100 * (eer[0] + eer[1]) / 2, eer[2])) plt.show() tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel() print ('AUC = %.2f' % (auc)) print ('Confusion matrix (absolute frequency) at threshold = %.2f' % (target_fnr)) print ('+---------------+------------+------------+') print ('| | TRUTH |') print ('+---------------+------------+------------+') print ('| PREDICTED | LEGIT(1) | FAKE (0) |') print ('+---------------+------------+------------+') print ('| LEGIT (1) |%12d|%12d|' % (tp, fp)) print ('+---------------+------------+------------+') print ('| FAKE (0) |%12d|%12d|' % (fn, tn)) print ('+---------------+------------+------------+') print ('Confusion matrix (relative to |LEGIT| and |FAKE|) at threshold = %.2f' % (target_fnr)) print ('+---------------+------------+------------+') print ('| | TRUTH |') print ('+---------------+------------+------------+') print ('| PREDICTED | LEGIT(1) | FAKE (0) |') print ('+---------------+------------+------------+') print ('| LEGIT (1) |%11.1f%%|%11.1f%%|' % (tp*100.0/(tp+fn), fp*100.0/(fp+tn))) print ('+---------------+------------+------------+') print ('| FAKE (0) |%11.1f%%|%11.1f%%|' % (fn*100.0/(tp+fn), tn*100.0/(fp+tn))) print ('+---------------+------------+------------+') return y_pred, target_fnr
# -*- coding: utf-8 -*- """ Created on Sat Jun 2 23:04:45 2018 @author: justinxin """ '''Somer's D ''' from sklearn.metrics.ranking import roc_auc_score, roc_curve def somers_d(y_true, y_score, average="macro", sample_weight=None): return 2 * roc_auc_score(y_true, y_score, average, sample_weight) - 1 ''' plot ROC curve ''' fpr, tpr, td = roc_curve(y_test, rf_new.predict(x_test)) plt.figure() lw = 2 plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve') plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic example') plt.legend(loc="lower right") plt.show()
def nv_binary_clf_curve_test(): N = np.random.randint(low=1, high=10) y_bool = np.random.rand(N) <= 0.5 y_pred = np.random.rand(N) sample_weight = None if np.random.rand() <= 0.2: sample_weight = np.abs(np.random.randn(N)) if np.random.rand() <= 0.2: sample_weight = 1 + np.random.multinomial(N, np.ones(N) / N) if np.random.rand() <= 0.2: sample_weight = np.maximum(np.random.multinomial(N, np.ones(N) / N), 1e-6) fps, tps, thresholds = _nv_binary_clf_curve(y_bool, y_pred, sample_weight) assert (fps.shape == tps.shape and fps.shape == thresholds.shape) assert (np.all(np.isfinite(fps))) assert (np.all(np.isfinite(tps))) assert (np.all(np.isfinite(thresholds[1:]))) assert (fps[0] == 0 and tps[0] == 0 and thresholds[0] == np.inf) if sample_weight is None: assert (np.abs(fps[-1] - np.sum(~y_bool)) <= 1e-8) assert (np.abs(tps[-1] - np.sum(y_bool)) <= 1e-8) else: assert (np.abs(fps[-1] - np.sum(sample_weight * ~y_bool)) <= 1e-8) assert (np.abs(tps[-1] - np.sum(sample_weight * y_bool)) <= 1e-8) assert (np.all((np.diff(fps) >= 0.0) & (np.diff(tps) >= 0.0))) assert (np.all((np.diff(fps) > 0) | (np.diff(tps) > 0))) assert (np.all(np.diff(thresholds) < 0.0)) fpr, tpr, thresholds_roc = _nv_roc_curve(y_bool, y_pred, sample_weight) assert (fpr.shape == tpr.shape and fpr.shape == thresholds_roc.shape) assert (np.all(np.isfinite(fpr))) assert (np.all(np.isfinite(tpr))) assert (np.all(np.isfinite(thresholds_roc[1:]))) assert (fpr[0] == 0.0 and tpr[0] == 0.0) assert (fpr[-1] == 1.0 and tpr[-1] == 1.0) assert (np.all((np.diff(fpr) >= 0.0) & (np.diff(tpr) >= 0.0))) assert (np.all((np.diff(fpr) > 0.0) | (np.diff(tpr) > 0.0))) assert (np.all(np.diff(thresholds_roc) < 0.0)) rec, prec, thresholds_pr = _nv_recall_precision_curve( y_bool, y_pred, sample_weight) assert (rec.shape == prec.shape and rec.shape == thresholds_pr.shape) assert (np.all(np.isfinite(rec))) assert (np.all(np.isfinite(prec))) assert (np.all(np.isfinite(thresholds_pr[1:]))) assert (rec[0] == 0.0 and rec[-1] == 1.0) assert (len(prec) >= 2 and prec[0] == prec[1]) b_rate = np.mean(y_bool) if sample_weight is None else \ np.true_divide(np.sum(sample_weight * y_bool), np.sum(sample_weight)) assert (np.max(np.abs(prec[-1] - b_rate)) <= 1e-8) # Note: may have repeats in PR curve assert (np.all(np.diff(rec) >= 0.0)) assert (np.all(np.diff(thresholds_pr) < 0.0)) rec_gain, prec_gain, thresholds_prg = _nv_prg_curve( y_bool, y_pred, sample_weight) assert (rec_gain.shape == prec_gain.shape) assert (rec_gain.shape == thresholds_prg.shape) assert (np.all(np.isfinite(thresholds_prg[1:]))) assert (rec_gain[0] == 0.0 and rec_gain[-1] == 1.0) assert (np.all(rec_gain <= 1.0) and np.all(prec_gain <= 1.0)) assert (np.all(np.diff(rec_gain) >= 0.0)) assert (np.allclose(prec_gain[-1], 0.0)) if np.all(y_bool) or (not np.any(y_bool)): assert (np.allclose(0.5, np.trapz(fpr, tpr))) assert (np.allclose(np.mean(y_bool), np.sum(prec[:-1] * np.diff(rec)))) assert (np.allclose(0.0, np.sum(prec_gain[:-1] * np.diff(rec_gain)))) return fps2, tps2, thresholds2 = _binary_clf_curve(y_bool, y_pred, pos_label=True, sample_weight=sample_weight) assert (np.allclose(fps[1:], fps2)) assert (np.allclose(tps[1:], tps2)) assert (np.allclose(thresholds[1:], thresholds2)) fpr2, tpr2, thresholds2 = roc_curve(y_bool, y_pred, pos_label=True, sample_weight=sample_weight, drop_intermediate=False) # sklearn inconsistent on including origin ==> need if statement if len(fpr) == len(fpr2): assert (np.allclose(fpr, fpr2)) assert (np.allclose(tpr, tpr2)) assert (np.allclose(thresholds_roc[1:], thresholds2[1:])) else: assert (np.allclose(fpr[1:], fpr2)) assert (np.allclose(tpr[1:], tpr2)) assert (np.allclose(thresholds_roc[1:], thresholds2)) prec2, rec2, thresholds2 = \ precision_recall_curve(y_bool, y_pred, pos_label=True, sample_weight=sample_weight) prec2, rec2, thresholds2 = prec2[::-1], rec2[::-1], thresholds2[::-1] prec2[0] = prec2[1] err = rec[len(rec2):] - 1.0 assert (len(err) == 0 or np.max(np.abs(err)) <= 1e-8) assert (np.allclose(rec[:len(rec2)], rec2)) assert (np.allclose(prec[:len(rec2)], prec2)) assert (np.allclose(thresholds_pr[1:len(rec2)], thresholds2)) with np.errstate(divide='ignore', invalid='ignore'): rec_gain2 = (rec - b_rate) / ((1.0 - b_rate) * rec) prec_gain2 = (prec - b_rate) / ((1.0 - b_rate) * prec) idx = rec_gain2 > 0.0 assert (np.allclose(rec_gain[1:], rec_gain2[idx])) assert (np.allclose(prec_gain[1:], prec_gain2[idx])) assert (np.allclose(thresholds_prg[1:], thresholds_pr[idx])) assert (np.allclose(rec_gain[0], 0.0)) idx0 = np.where(~idx)[0][-1] assert (np.allclose(prec_gain[0], prec_gain2[idx0])) assert (np.allclose(thresholds_prg[0], thresholds_pr[idx0]))
def model_predict(X_train,X_test,y_train,y_test): """ 采用各类模型进行预测 """ classifiers = { 'LogisticRegression' : LogisticRegression(C=0.001), 'Support Vector Machine Classifier' : SVC(), 'Decision Tree Classifier' : DecisionTreeClassifier(), 'Random Forest Classifier' : RandomForestClassifier(), 'Xgboost Classifier' : XGBClassifier() } model_metrics = [] for model_name,model in classifiers.items(): model.fit(X_train,y_train) # cross_val_score_local = cross_val_score(model,X_train,y_train,cv=5) print('*' * 10,'模型名称:',model_name,'*' * 10) # print(',交叉验证得分:',round(cross_val_score_local.mean() * 100,2),'%') model_sc = {} """这是训练集上的评价""" y_pred_t = model.predict(X_train) accuracy_t = accuracy_score(y_train,y_pred_t) precision_t = precision_score(y_train,y_pred_t) f1_t = f1_score(y_train,y_pred_t) recall_t = recall_score(y_train,y_pred_t) auc_t = roc_auc_score(y_train,y_pred_t) model_sc['model_name'] = model_name model_sc['model_data_sort'] = 'Train Data' model_sc['accuracy'] = accuracy_t model_sc['precision'] = precision_t model_sc['f1'] = f1_t model_sc['recall'] = recall_t model_sc['auc'] = auc_t model_metrics.append(model_sc) print('\n训练集:Accuracy Score:{:.2f}'.format(accuracy_t)) print('\n训练集:Precision Score:{:.2f}'.format(precision_t)) print('\n训练集:F1 Score:{:.2f}'.format(f1_t)) print('\n训练集:Recall Score:{:.2f}'.format(recall_t)) print('\n训练集:Auc Roc Score:{:.2f}'.format(auc_t)) """这是才测试集上的评价""" y_pred = model.predict(X_test) accuracy = accuracy_score(y_test,y_pred) precision = precision_score(y_test,y_pred) f1 = f1_score(y_test,y_pred) recall = recall_score(y_test,y_pred) auc = roc_auc_score(y_test,y_pred) model_sc = {} model_sc['model_name'] = model_name model_sc['model_data_sort'] = 'Test Data' model_sc['accuracy'] = accuracy model_sc['precision'] = precision model_sc['f1'] = f1 model_sc['recall'] = recall model_sc['auc'] = auc model_metrics.append(model_sc) print('\n测试集:Accuracy Score:{:.2f}'.format(accuracy)) print('\n测试集:Precision Score:{:.2f}'.format(precision)) print('\n测试集:F1 Score:{:.2f}'.format(f1)) print('\n测试集:Recall Score:{:.2f}'.format(recall)) print('\n测试集:Auc Roc Score:{:.2f}'.format(auc)) # 绘制ROC曲线 fpr,tpr,threshold = roc_curve(y_test,y_pred) fpr_t,tpr_t,threshold_t = roc_curve(y_train,y_pred_t) plt.figure(figsize=(10,8)) plt.title('{} 模型 ROC曲线'.format(model_name),fontsize=18) plt.plot(fpr_t,tpr_t, label='{} 模型 在训练数据 得分:{:.4f}'.format(model_name,auc_t)) plt.plot(fpr,tpr,label='{} 模型 在测试数据 得分:{:.4f}'.format(model_name,auc)) plt.plot([0,1],[0,1],'k--') plt.axis([-0.01,1,0,1]) plt.xlabel('False Positive Rate', fontsize=16) plt.ylabel('True Positive Rate', fontsize=16) plt.legend(loc='best') plt.show() return model_metrics
X_train.item_dict, Y_train.item_dict) Ytest = Ytest.flatten() class_count = train_data_set.number_of_classes() unexpected_rules = IOHelper.load_json_object(config.get_value('rules')) refined_unexpected_rules = filter_association_rules(unexpected_rules) print('svm testing...') svc_model = SVC(kernel='poly', degree=3, coef0=0.1, random_state=1) svc_model.fit(X_train.relation_matrix, Y_train.values.flatten()) svc_y_pred = svc_model.predict(Xtest) print(f1_score(Ytest, svc_y_pred, average=None)) if (class_count <= 2): fpr, tpr, _ = roc_curve(Ytest, svc_y_pred.flatten()) print(auc(fpr, tpr)) refine_with_unexpectedness(test_data_set, Y_train.item_dict, svc_y_pred, Ytest, refined_unexpected_rules) print('Random forest testing...') rf_model = RandomForestClassifier(n_estimators=20, random_state=1) rf_model.fit(X_train.relation_matrix, Y_train.values.flatten()) rf_y_pred = rf_model.predict(Xtest) print(f1_score(Ytest, rf_y_pred, average=None)) if (class_count <= 2): fpr, tpr, _ = roc_curve(Ytest, rf_y_pred.flatten()) print(auc(fpr, tpr))
def area_under_the_roc_curve(yTrue, yPred): fpr, tpr, _ = roc_curve(yTrue, yPred) AUC = auc(fpr, tpr) return AUC, fpr, tpr
def showROC(prediction, target): nGestures = target.shape[1] n_classes = nGestures y_test = target y_score = prediction # Compute ROC curve and ROC area for each class fpr = dict() tpr = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) ############################################################################## # Plot ROC curves for the multiclass problem # Compute macro-average ROC curve and ROC area # First aggregate all false positive rates all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for i in range(n_classes): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) # Plot all ROC curves plt.figure() plt.plot(fpr["micro"], tpr["micro"], label='micro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["micro"]), linewidth=2) plt.plot(fpr["macro"], tpr["macro"], label='macro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["macro"]), linewidth=2) for i in range(n_classes): plt.plot(fpr[i], tpr[i],label='ROC curve of class {0} (area = {1:0.2f})' ''.format(i, roc_auc[i])) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') #plt.title('Some extension of Receiver operating characteristic to multi-class') plt.legend(loc="lower right") plt.show()
training_dict, True, False, False) # build tesing inputs and labels X_test, y_test, test_labels = build_inputs(training_files, activity_labels, training_dict, True, False, False) random_state = np.random.RandomState(0) classifier = OneVsRestClassifier( svm.SVC(kernel='linear', probability=True, random_state=random_state)) y_score = classifier.fit(X_train, y_train).decision_function(X_test) fpr = dict() tpr = dict() roc_auc = dict() for i in range(3): fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) plt.figure() lw = 2 plt.plot(fpr[2], tpr[2], color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc[2]) plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0])