Esempio n. 1
0
def evalModel(predictor, test_data, test_labels, train_data, train_labels, name, evalresults):

    
    predictor.fit(train_data, train_labels)
    evalresults.setdefault(name + " Accuracy raw \t\t", []).append(accuracy_score(test_labels, predictor.predict(test_data)))

    #predictor.fit(preprocessing.scale(train_data), train_labels)
    #evalresults.setdefault(name + " Accuracy std \t\t", []).append(metrics.accuracy_score(test_labels, predictor.predict(preprocessing.scale(test_data))))
 
    #predictor.fit(preprocessing.normalize(train_data, norm='l2'), train_labels)
    #evalresults.setdefault(name + " Accuracy nml \t\t", []).append(metrics.accuracy_score(test_labels, predictor.predict(preprocessing.normalize(test_data, norm='l2'))))

    return

    if len(set(train_labels)) != 2:
        return
    
    predictor.fit(train_data, train_labels)
    fpr, tpr, _ = roc_curve(test_labels, predictor.decision_function(test_data))
    evalresults.setdefault(name + " AUC raw \t\t", []).append(auc(fpr, tpr))

    #return

    predictor.fit(preprocessing.scale(train_data), train_labels)
    fpr, tpr, _ = roc_curve(test_labels, predictor.decision_function(preprocessing.scale(test_data)))
    evalresults.setdefault(name + " AUC std \t\t", []).append(auc(fpr, tpr))
    
    
    predictor.fit(preprocessing.normalize(train_data, norm='l2'), train_labels)
    fpr, tpr, _ = roc_curve(test_labels, predictor.decision_function(preprocessing.normalize(test_data, norm='l2')))
    evalresults.setdefault(name + " AUC nml \t\t", []).append(auc(fpr, tpr))
Esempio n. 2
0
def plot_roc_curve(y_true,y_pred,y_pred2=None):
    a,b,_thresholds = roc_curve(y_true,y_pred)
    plt.plot(a,b,c="green",label="model 1")
    if y_pred2 is not None:
        x,y,_thresholds = roc_curve(y_true,y_pred2)
        plt.plot(x,y,c="purple",label="model 2")
    plt.legend(loc=4)
    plt.show()
Esempio n. 3
0
def plot_ROC(y_pred, y_test, name):
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    n_classes = 3
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_pred[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_pred.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])
    mean_tpr /= n_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    target_names = ['Reading', 'Speaking', 'Watching']
    plt.figure()
    plt.plot(fpr["micro"],
             tpr["micro"],
             label='micro-average ROC (AUC = {0:0.2f})'
             ''.format(roc_auc["micro"]),
             color='deeppink',
             linestyle=':',
             linewidth=4)

    plt.plot(fpr["macro"],
             tpr["macro"],
             label='macro-average ROC (AUC = {0:0.2f})'
             ''.format(roc_auc["macro"]),
             color='navy',
             linestyle=':',
             linewidth=4)
    lw = 2
    colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
    for i, color in zip(range(n_classes), colors):
        plt.plot(fpr[i],
                 tpr[i],
                 color=color,
                 lw=lw,
                 label='ROC - {0} (AUC = {1:0.2f})'
                 ''.format(target_names[i], roc_auc[i]))
    plt.plot([0, 1], [0, 1], 'k--', lw=lw)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(loc="lower right")
    plt.savefig("res/{}-ROC-AUC".format(name))
Esempio n. 4
0
    def _binary_roc_auc_score(y_true, y_score, sample_weight=None):
        if len(np.unique(y_true)) != 2:
            raise ValueError("Only one class present in y_true. ROC AUC score "
                             "is not defined in that case.")

        fpr, tpr, _ = roc_curve(y_true, y_score, sample_weight=sample_weight)
        if max_fpr is None or max_fpr == 1:
            return auc(fpr, tpr)
        if max_fpr <= 0 or max_fpr > 1:
            raise ValueError("Expected max_frp in range ]0, 1], got: %r" %
                             max_fpr)

        # Add a single point at max_fpr by linear interpolation
        stop = np.searchsorted(fpr, max_fpr, 'right')
        x_interp = [fpr[stop - 1], fpr[stop]]
        y_interp = [tpr[stop - 1], tpr[stop]]
        tpr = np.append(tpr[:stop], np.interp(max_fpr, x_interp, y_interp))
        fpr = np.append(fpr[:stop], max_fpr)
        partial_auc = auc(fpr, tpr)

        # McClish correction: standardize result to be 0.5 if non-discriminant
        # and 1 if maximal
        min_area = 0.5 * max_fpr**2
        max_area = max_fpr
        return 0.5 * (1 + (partial_auc - min_area) / (max_area - min_area))
 def Predict(self, inp, labels, classifier, folds, name, paramdesc):
     X= inp
     y = labels
     X, y = X[y != 2], y[y != 2]
     n_samples, n_features = X.shape
     
     ###############################################################################
     # Classification and ROC analysis
     
     # Run classifier with cross-validation and plot ROC curves
     cv = StratifiedKFold(y, n_folds=folds)
     
     mean_tpr = 0.0
     mean_fpr = np.linspace(0, 1, 100)
     all_tpr = []
     
     _precision = 0.0
     _recall = 0.0
     _accuracy = 0.0
     _f1 = 0.0
     
     for i, (train, test) in enumerate(cv):
         probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
         pred_ = classifier.predict(X[test])
         _precision += precision_score(y[test], pred_)
         _recall += recall_score(y[test], pred_)
         _accuracy += accuracy_score(y[test], pred_)
         _f1 += f1_score(y[test], pred_)
         # Compute ROC curve and area the curve
         fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
         mean_tpr += interp(mean_fpr, fpr, tpr)
         mean_tpr[0] = 0.0
         roc_auc = auc(fpr, tpr)
         plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))
     
     _precision /= folds
     _recall /= folds
     _accuracy /= folds
     _f1 /= folds
     
     
     plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')
     
     mean_tpr /= len(cv)
     mean_tpr[-1] = 1.0
     mean_auc = auc(mean_fpr, mean_tpr)
     plt.plot(mean_fpr, mean_tpr, 'k--',
              label='Mean ROC (area = %0.2f)' % mean_auc, lw=2)
     
     plt.xlim([-0.05, 1.05])
     plt.ylim([-0.05, 1.05])
     plt.xlabel('False Positive Rate')
     plt.ylabel('True Positive Rate')
     plt.title('Receiver operating characteristic - {0}'.format(name))
     plt.legend(loc="lower right")
     plt.savefig(self.configObject['outputdir'] + '/' + name + '.png')
     plt.close()
     
     result = self.OutputResult(name, paramdesc, len(inp), floor(labels.size / folds), _precision, _recall, _accuracy, _f1) 
     Announce(result)
Esempio n. 6
0
    def train_model(self, model, X_train, X_test, y_train, y_test):
        """Training a model to predict the presence or absence of a species. Various instance variables are used to
        define how the model trains, like: batch size, random seed and number of epochs.

        :param model: Keras Model Object. Initialized model ready for training.
        :param X_train: Array. Contains training data.
        :param X_test: Array. Contains testing data.
        :param y_train: Array. Contains training (ground truth) labels.
        :param y_test: Array. Contains testing (ground truth) labels.

        :return: Tuple. Containing:
        float 'AUC' performance metric between 0 and 1 (0 = 100% wrong, 1 = 100% right);
        keras model 'model' a keras model with an identical architecture to the input variable 'model' but with trained
        weights.
        """

        training_generator, steps_per_epoch = balanced_batch_generator(
            X_train,
            y_train,
            sampler=NearMiss(),
            batch_size=self.batch,
            random_state=self.random_seed)
        model.fit_generator(generator=training_generator,
                            steps_per_epoch=steps_per_epoch,
                            epochs=self.epoch,
                            verbose=0)
        score = model.evaluate(X_test, y_test, verbose=0)
        predictions = model.predict(X_test)
        fpr, tpr, thresholds = roc_curve(y_test[:, 1], predictions[:, 1])
        len_tpr = int(len(tpr) / 2)
        self.test_loss.append(score[0])
        self.test_acc.append(score[1])
        self.test_AUC.append(roc_auc_score(y_test[:, 1], predictions[:, 1]))
        self.test_tpr.append(tpr[len_tpr])
        AUC = roc_auc_score(y_test[:, 1], predictions[:, 1])
        n_bootstraps = 1000
        y_pred = predictions[:, 1]
        y_true = y_test[:, 1]
        bootstrapped_scores = []
        rng = np.random.RandomState(self.random_seed)
        for i in range(n_bootstraps):
            indices = rng.randint(0, len(y_pred) - 1, len(y_pred))
            if len(np.unique(y_true[indices])) < 2:
                continue
            score = roc_auc_score(y_true[indices], y_pred[indices])
            bootstrapped_scores.append(score)
        sorted_scores = np.array(bootstrapped_scores)
        sorted_scores.sort()
        ci_lower = sorted_scores[int(0.05 * len(sorted_scores))]
        ci_upper = sorted_scores[int(0.95 * len(sorted_scores))]
        self.test_lci.append(ci_lower)
        self.test_uci.append(ci_upper)
        return AUC, model
Esempio n. 7
0
 def roc_curve_raw(self, probs):
     prob_categories = ['DATA', 'DEV', 'DOCS', 'EDU', 'HW', 'OTHER', 'WEB']
     fprs = []
     tprs = []
     for i, category in enumerate(prob_categories):
         scores = [prob[i] for prob in probs]
         fpr, tpr, _ = roc_curve(y_true=self.test_labels,
                                 pos_label=category,
                                 y_score=scores)
         fprs.append(fpr.tolist())
         tprs.append(tpr.tolist())
     return fprs, tprs
def roc(outcomes, prediction):
    fps, tps, thresholds = _binary_clf_curve(outcomes, prediction)
    clf = pd.DataFrame([fps, tps, thresholds]).T
    clf.columns = ['fps', 'tps', 'thresholds']
    clf['fps'] = clf['fps'].astype(int)
    clf['tps'] = clf['tps'].astype(int)
    fpr, tpr, thresholds = roc_curve(outcomes,
                                     prediction,
                                     drop_intermediate=False)
    r = pd.DataFrame([fpr, tpr, thresholds]).T
    r.columns = ['fpr', 'tpr', 'thresholds']
    df = pd.merge(clf, r, on='thresholds')
    return df
def refine_with_unexpectedness(data_set, classes_dict, preY, Ytrue,
                               unexpected_rules):

    print('Refine with unexpected rules...')
    y_pred = np.copy(preY)
    for i in range(data_set.size()):
        x = data_set.get_transaction(i)
        for r in unexpected_rules:
            if r.satisfy_rule(x, is_lhs=True):
                label = r.right_items[0]
                y_pred[i] = classes_dict[label]
    print(f1_score(Ytrue, y_pred, average=None))
    if (data_set.number_of_classes() <= 2):
        fpr, tpr, _ = roc_curve(Ytrue, y_pred.flatten())
        print(auc(fpr, tpr))
Esempio n. 10
0
    def computeAUROC(dataGT, dataPRED, classCount):
        fpr = []
        tpr = []
        outAUROC = []
        thresholds = []

        datanpGT = dataGT.cpu().numpy()
        datanpPRED = dataPRED.cpu().numpy()

        for i in range(classCount):
            outAUROC.append(roc_auc_score(datanpGT[:, i], datanpPRED[:, i]))
            _fpr, _tpr, threshold = roc_curve(datanpGT[:, i], datanpPRED[:, i])
            fpr.append(_fpr)
            tpr.append(_tpr)
            thresholds.append(threshold)
        return outAUROC, fpr, tpr, thresholds
Esempio n. 11
0
    def from_labels(cls, labels_true, y_score, is_class_pos=num2bool):
        """Instantiate assuming binary labeling of {0, 1}

        labels_true : array, shape = [n_samples]
            Class labels. If binary, 'is_class_pos' is optional

        y_score : array, shape = [n_samples]
            Predicted scores

        is_class_pos: label_true -> Bool
            Boolean predicate used to binarize true (class) labels
        """

        # num2bool Y labels
        y_true = map(is_class_pos, labels_true)

        # calculate axes
        fprs, tprs, thresholds = roc_curve(
            y_true, y_score, pos_label=True)

        return cls(fprs, tprs, thresholds=thresholds)
Esempio n. 12
0
def plot_roc(true_labels, pred_probs, fig_title='', savepath=''):

    false_positive_rate, true_positive_rate, _ = roc_curve(true_labels,
                                                           pred_probs[:, 1],
                                                           pos_label=1)
    roc_auc = auc(false_positive_rate, true_positive_rate)
    plt.figure()
    plt.title(fig_title)
    plt.plot(false_positive_rate,
             true_positive_rate,
             'b',
             label='AUC = %0.4f' % roc_auc)
    plt.plot([0, 1], [0, 1], 'r--')
    plt.legend(loc='lower right')
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    if savepath != '':
        plt.savefig(savepath)
    plt.show()

    return ''
Esempio n. 13
0
    def handle(self, *args, **options):
        filepath = options['file']
        
        print filepath

        x = []
        y = []

        with open(filepath, 'rb') as csvfile:
            data = csv.reader(csvfile, delimiter=',')
            for row in data:
                x.append(float(row[0]))
                y.append(int(row[1]))
        print(x)
        print(y)
        
        fpr, tpr, _ = roc_curve(y, x)
        roc_auc = auc(fpr, tpr)
            
        plt.figure()
        lw = 2
        plt.plot(fpr, tpr, color=COLOR_4,
                     lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
        
        for i in range(len(tpr)):
            print "tpr: %s fpr: %s thres: %s" % (tpr[i],fpr[i],_[i])
        
        plt.plot([0, 1], [0, 1], color=COLOR_6, lw=lw, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.legend(loc="lower right")
        
        base = os.path.basename(filepath)
        os.path.splitext(base)[0]
        
        savefig('%s-roc.png' % base)
Esempio n. 14
0
# binary class
y_pred = [1, 2, 3, 4]
y_true = [1, 2, 3, 4]
y_true = [2, 2, 3, 4]
y_true = [5, 6, 7, 8]
hamming_loss(y_true, y_pred)
hamming_loss(list("ABFD"), list("ABCD"))

#multi class
hamming_loss(np.array([[0, 1], [1, 1]]), np.zeros((2, 2)))

y_true = [0, 0, 1, 1]
y_pred = [[.9, .1], [.8, .2], [.3, .7], [.01, .99]]  # [Pr(0), Pr(1)]
log_loss(y_true, y_pred)
"""
Receiver operating characteristic (ROC) Curve

roc_curve?
roc_curve(y_true, y_score, pos_label=None, 
          sample_weight=None, drop_intermediate=True)
Note: this implementation is restricted to the binary classification task.

y_true : array, shape = [n_samples]
    True binary labels in range {0, 1} or {-1, 1}.  If labels are not
    binary, pos_label should be explicitly given.

y_score : array, shape = [n_samples]
    Target scores, can either be probability estimates of the positive
    class, confidence values, or non-thresholded measure of decisions
    (as returned by "decision_function" on some classifiers).
Esempio n. 15
0
def show_results(y_test, prob_test, name, show=True, output_folder='', maxFNR=0.03, thresh = None):
    auc = ranking.roc_auc_score(y_test, prob_test, average=None, sample_weight=None)

    fpr, tpr, thresholds = ranking.roc_curve(y_test, prob_test, pos_label=1, sample_weight=None)
    fnr = 1 - tpr

    eer = min(zip(fpr, fnr, thresholds), key=lambda x: abs(x[0] - x[1]))

    idx_fnr = np.where(fnr<maxFNR)[0][0]
    if thresh == None:
        target_fnr = thresholds[idx_fnr]
    else:
        target_fnr = thresh
    y_pred = [float(score>=target_fnr) for score in prob_test]

    #fig = plt.figure()

    # show ROC
    if show:
        plt.figure(221)
        plt.plot(fpr, tpr, linewidth=2)
        plt.ylim(0, 1)
        plt.xlim(0, 1)
        plt.xlabel('FPR')
        plt.ylabel('TPR')
        plt.title(name + ' - ROC curve, AUC = %f' % (auc))

        # show FPR-FNR vs threshold curves
        plt.figure(222)
        fnr_line, = plt.plot(thresholds, fnr * 100, linewidth=2, color='blue')
        fpr_line, = plt.plot(thresholds, fpr * 100, linewidth=2, color='red', linestyle='--')
        plt.legend([fnr_line, fpr_line], ['False Negative Rate (FNR)', 'False Positive Rate (FPR)'])
        plt.ylim(0, 100.001)
        plt.xlim(np.min(prob_test), np.max(prob_test))
        plt.title(name + ' - EER = %0.1f%% at t=%0.2f' % (100 * (eer[0] + eer[1]) / 2, eer[2]))
        plt.show()

    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

    print ('AUC = %.2f' % (auc))
    print ('Confusion matrix (absolute frequency) at threshold = %.2f' % (target_fnr))
    print ('+---------------+------------+------------+')
    print ('|               |          TRUTH          |')
    print ('+---------------+------------+------------+')
    print ('|   PREDICTED   |  LEGIT(1)  |  FAKE (0)  |')
    print ('+---------------+------------+------------+')
    print ('|    LEGIT (1)  |%12d|%12d|' % (tp, fp))
    print ('+---------------+------------+------------+')
    print ('|     FAKE (0)  |%12d|%12d|' % (fn, tn))
    print ('+---------------+------------+------------+')

    print ('Confusion matrix (relative to |LEGIT| and |FAKE|) at threshold = %.2f' % (target_fnr))
    print ('+---------------+------------+------------+')
    print ('|               |          TRUTH          |')
    print ('+---------------+------------+------------+')
    print ('|   PREDICTED   |  LEGIT(1)  |  FAKE (0)  |')
    print ('+---------------+------------+------------+')
    print ('|    LEGIT (1)  |%11.1f%%|%11.1f%%|' % (tp*100.0/(tp+fn), fp*100.0/(fp+tn)))
    print ('+---------------+------------+------------+')
    print ('|     FAKE (0)  |%11.1f%%|%11.1f%%|' % (fn*100.0/(tp+fn), tn*100.0/(fp+tn)))
    print ('+---------------+------------+------------+')

    return y_pred, target_fnr
# -*- coding: utf-8 -*-
"""
Created on Sat Jun  2 23:04:45 2018

@author: justinxin
"""
'''Somer's D '''

from sklearn.metrics.ranking import roc_auc_score, roc_curve


def somers_d(y_true, y_score, average="macro", sample_weight=None):
    return 2 * roc_auc_score(y_true, y_score, average, sample_weight) - 1


''' plot ROC curve '''

fpr, tpr, td = roc_curve(y_test, rf_new.predict(x_test))

plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve')
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()
Esempio n. 17
0
def nv_binary_clf_curve_test():
    N = np.random.randint(low=1, high=10)

    y_bool = np.random.rand(N) <= 0.5
    y_pred = np.random.rand(N)

    sample_weight = None
    if np.random.rand() <= 0.2:
        sample_weight = np.abs(np.random.randn(N))
    if np.random.rand() <= 0.2:
        sample_weight = 1 + np.random.multinomial(N, np.ones(N) / N)
    if np.random.rand() <= 0.2:
        sample_weight = np.maximum(np.random.multinomial(N,
                                                         np.ones(N) / N), 1e-6)

    fps, tps, thresholds = _nv_binary_clf_curve(y_bool, y_pred, sample_weight)
    assert (fps.shape == tps.shape and fps.shape == thresholds.shape)
    assert (np.all(np.isfinite(fps)))
    assert (np.all(np.isfinite(tps)))
    assert (np.all(np.isfinite(thresholds[1:])))
    assert (fps[0] == 0 and tps[0] == 0 and thresholds[0] == np.inf)
    if sample_weight is None:
        assert (np.abs(fps[-1] - np.sum(~y_bool)) <= 1e-8)
        assert (np.abs(tps[-1] - np.sum(y_bool)) <= 1e-8)
    else:
        assert (np.abs(fps[-1] - np.sum(sample_weight * ~y_bool)) <= 1e-8)
        assert (np.abs(tps[-1] - np.sum(sample_weight * y_bool)) <= 1e-8)
    assert (np.all((np.diff(fps) >= 0.0) & (np.diff(tps) >= 0.0)))
    assert (np.all((np.diff(fps) > 0) | (np.diff(tps) > 0)))
    assert (np.all(np.diff(thresholds) < 0.0))

    fpr, tpr, thresholds_roc = _nv_roc_curve(y_bool, y_pred, sample_weight)
    assert (fpr.shape == tpr.shape and fpr.shape == thresholds_roc.shape)
    assert (np.all(np.isfinite(fpr)))
    assert (np.all(np.isfinite(tpr)))
    assert (np.all(np.isfinite(thresholds_roc[1:])))
    assert (fpr[0] == 0.0 and tpr[0] == 0.0)
    assert (fpr[-1] == 1.0 and tpr[-1] == 1.0)
    assert (np.all((np.diff(fpr) >= 0.0) & (np.diff(tpr) >= 0.0)))
    assert (np.all((np.diff(fpr) > 0.0) | (np.diff(tpr) > 0.0)))
    assert (np.all(np.diff(thresholds_roc) < 0.0))

    rec, prec, thresholds_pr = _nv_recall_precision_curve(
        y_bool, y_pred, sample_weight)
    assert (rec.shape == prec.shape and rec.shape == thresholds_pr.shape)
    assert (np.all(np.isfinite(rec)))
    assert (np.all(np.isfinite(prec)))
    assert (np.all(np.isfinite(thresholds_pr[1:])))
    assert (rec[0] == 0.0 and rec[-1] == 1.0)
    assert (len(prec) >= 2 and prec[0] == prec[1])
    b_rate = np.mean(y_bool) if sample_weight is None else \
        np.true_divide(np.sum(sample_weight * y_bool), np.sum(sample_weight))
    assert (np.max(np.abs(prec[-1] - b_rate)) <= 1e-8)
    # Note: may have repeats in PR curve
    assert (np.all(np.diff(rec) >= 0.0))
    assert (np.all(np.diff(thresholds_pr) < 0.0))

    rec_gain, prec_gain, thresholds_prg = _nv_prg_curve(
        y_bool, y_pred, sample_weight)
    assert (rec_gain.shape == prec_gain.shape)
    assert (rec_gain.shape == thresholds_prg.shape)
    assert (np.all(np.isfinite(thresholds_prg[1:])))
    assert (rec_gain[0] == 0.0 and rec_gain[-1] == 1.0)
    assert (np.all(rec_gain <= 1.0) and np.all(prec_gain <= 1.0))
    assert (np.all(np.diff(rec_gain) >= 0.0))
    assert (np.allclose(prec_gain[-1], 0.0))

    if np.all(y_bool) or (not np.any(y_bool)):
        assert (np.allclose(0.5, np.trapz(fpr, tpr)))
        assert (np.allclose(np.mean(y_bool), np.sum(prec[:-1] * np.diff(rec))))
        assert (np.allclose(0.0, np.sum(prec_gain[:-1] * np.diff(rec_gain))))
        return

    fps2, tps2, thresholds2 = _binary_clf_curve(y_bool,
                                                y_pred,
                                                pos_label=True,
                                                sample_weight=sample_weight)
    assert (np.allclose(fps[1:], fps2))
    assert (np.allclose(tps[1:], tps2))
    assert (np.allclose(thresholds[1:], thresholds2))

    fpr2, tpr2, thresholds2 = roc_curve(y_bool,
                                        y_pred,
                                        pos_label=True,
                                        sample_weight=sample_weight,
                                        drop_intermediate=False)
    # sklearn inconsistent on including origin ==> need if statement
    if len(fpr) == len(fpr2):
        assert (np.allclose(fpr, fpr2))
        assert (np.allclose(tpr, tpr2))
        assert (np.allclose(thresholds_roc[1:], thresholds2[1:]))
    else:
        assert (np.allclose(fpr[1:], fpr2))
        assert (np.allclose(tpr[1:], tpr2))
        assert (np.allclose(thresholds_roc[1:], thresholds2))

    prec2, rec2, thresholds2 = \
        precision_recall_curve(y_bool, y_pred, pos_label=True,
                               sample_weight=sample_weight)
    prec2, rec2, thresholds2 = prec2[::-1], rec2[::-1], thresholds2[::-1]
    prec2[0] = prec2[1]
    err = rec[len(rec2):] - 1.0
    assert (len(err) == 0 or np.max(np.abs(err)) <= 1e-8)
    assert (np.allclose(rec[:len(rec2)], rec2))
    assert (np.allclose(prec[:len(rec2)], prec2))
    assert (np.allclose(thresholds_pr[1:len(rec2)], thresholds2))

    with np.errstate(divide='ignore', invalid='ignore'):
        rec_gain2 = (rec - b_rate) / ((1.0 - b_rate) * rec)
        prec_gain2 = (prec - b_rate) / ((1.0 - b_rate) * prec)
    idx = rec_gain2 > 0.0
    assert (np.allclose(rec_gain[1:], rec_gain2[idx]))
    assert (np.allclose(prec_gain[1:], prec_gain2[idx]))
    assert (np.allclose(thresholds_prg[1:], thresholds_pr[idx]))
    assert (np.allclose(rec_gain[0], 0.0))
    idx0 = np.where(~idx)[0][-1]
    assert (np.allclose(prec_gain[0], prec_gain2[idx0]))
    assert (np.allclose(thresholds_prg[0], thresholds_pr[idx0]))
Esempio n. 18
0
def model_predict(X_train,X_test,y_train,y_test):
    """
    采用各类模型进行预测
    """
    classifiers = {
        'LogisticRegression' : LogisticRegression(C=0.001),
        'Support Vector Machine Classifier' : SVC(),
        'Decision Tree Classifier' : DecisionTreeClassifier(),
        'Random Forest Classifier' : RandomForestClassifier(),
        'Xgboost Classifier' : XGBClassifier()
        }
    
    model_metrics = []
    
    for model_name,model in classifiers.items():
        model.fit(X_train,y_train)
#         cross_val_score_local = cross_val_score(model,X_train,y_train,cv=5)
        print('*' * 10,'模型名称:',model_name,'*' * 10)
#         print(',交叉验证得分:',round(cross_val_score_local.mean() * 100,2),'%')

        model_sc = {}
        
        """这是训练集上的评价"""
        y_pred_t = model.predict(X_train)
        accuracy_t = accuracy_score(y_train,y_pred_t)
        precision_t = precision_score(y_train,y_pred_t)
        f1_t = f1_score(y_train,y_pred_t)
        recall_t = recall_score(y_train,y_pred_t)
        auc_t = roc_auc_score(y_train,y_pred_t)        
        
        model_sc['model_name'] = model_name
        model_sc['model_data_sort'] = 'Train Data'
        model_sc['accuracy'] = accuracy_t
        model_sc['precision'] = precision_t
        model_sc['f1'] = f1_t
        model_sc['recall'] = recall_t
        model_sc['auc'] = auc_t
        model_metrics.append(model_sc)
        
        print('\n训练集:Accuracy Score:{:.2f}'.format(accuracy_t))
        print('\n训练集:Precision Score:{:.2f}'.format(precision_t))
        print('\n训练集:F1 Score:{:.2f}'.format(f1_t))
        print('\n训练集:Recall Score:{:.2f}'.format(recall_t))
        print('\n训练集:Auc Roc Score:{:.2f}'.format(auc_t))

        """这是才测试集上的评价"""
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test,y_pred)
        precision = precision_score(y_test,y_pred)
        f1 = f1_score(y_test,y_pred)
        recall = recall_score(y_test,y_pred)
        auc = roc_auc_score(y_test,y_pred)   
         
        model_sc = {}
        model_sc['model_name'] = model_name
        model_sc['model_data_sort'] = 'Test Data'
        model_sc['accuracy'] = accuracy
        model_sc['precision'] = precision
        model_sc['f1'] = f1
        model_sc['recall'] = recall
        model_sc['auc'] = auc
        model_metrics.append(model_sc)            
        
        print('\n测试集:Accuracy Score:{:.2f}'.format(accuracy))
        print('\n测试集:Precision Score:{:.2f}'.format(precision))
        print('\n测试集:F1 Score:{:.2f}'.format(f1))
        print('\n测试集:Recall Score:{:.2f}'.format(recall))
        print('\n测试集:Auc Roc Score:{:.2f}'.format(auc))
        # 绘制ROC曲线
        fpr,tpr,threshold = roc_curve(y_test,y_pred)
        fpr_t,tpr_t,threshold_t = roc_curve(y_train,y_pred_t)
        plt.figure(figsize=(10,8))
        plt.title('{} 模型 ROC曲线'.format(model_name),fontsize=18)
        plt.plot(fpr_t,tpr_t, label='{} 模型 在训练数据 得分:{:.4f}'.format(model_name,auc_t))
        plt.plot(fpr,tpr,label='{} 模型 在测试数据 得分:{:.4f}'.format(model_name,auc))
        plt.plot([0,1],[0,1],'k--')
        plt.axis([-0.01,1,0,1])
        plt.xlabel('False Positive Rate', fontsize=16)
        plt.ylabel('True Positive Rate', fontsize=16)
        plt.legend(loc='best')
        plt.show()
        
    return model_metrics
        X_train.item_dict, Y_train.item_dict)
    Ytest = Ytest.flatten()

    class_count = train_data_set.number_of_classes()

    unexpected_rules = IOHelper.load_json_object(config.get_value('rules'))
    refined_unexpected_rules = filter_association_rules(unexpected_rules)

    print('svm testing...')
    svc_model = SVC(kernel='poly', degree=3, coef0=0.1, random_state=1)
    svc_model.fit(X_train.relation_matrix, Y_train.values.flatten())

    svc_y_pred = svc_model.predict(Xtest)
    print(f1_score(Ytest, svc_y_pred, average=None))
    if (class_count <= 2):
        fpr, tpr, _ = roc_curve(Ytest, svc_y_pred.flatten())
        print(auc(fpr, tpr))

    refine_with_unexpectedness(test_data_set, Y_train.item_dict, svc_y_pred,
                               Ytest, refined_unexpected_rules)

    print('Random forest testing...')
    rf_model = RandomForestClassifier(n_estimators=20, random_state=1)
    rf_model.fit(X_train.relation_matrix, Y_train.values.flatten())

    rf_y_pred = rf_model.predict(Xtest)
    print(f1_score(Ytest, rf_y_pred, average=None))
    if (class_count <= 2):
        fpr, tpr, _ = roc_curve(Ytest, rf_y_pred.flatten())
        print(auc(fpr, tpr))
Esempio n. 20
0
def area_under_the_roc_curve(yTrue, yPred):
    fpr, tpr, _ = roc_curve(yTrue, yPred)
    AUC = auc(fpr, tpr)
    return AUC, fpr, tpr
Esempio n. 21
0
def showROC(prediction, target):
    nGestures = target.shape[1]

    n_classes = nGestures
    y_test = target
    y_score = prediction
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    
    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    
    
    
    ##############################################################################
    # Plot ROC curves for the multiclass problem
    
    # Compute macro-average ROC curve and ROC area
    
    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
    
    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])
    
    # Finally average it and compute AUC
    mean_tpr /= n_classes
    
    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    
    # Plot all ROC curves
    plt.figure()
    plt.plot(fpr["micro"], tpr["micro"],
             label='micro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["micro"]),
             linewidth=2)
    
    plt.plot(fpr["macro"], tpr["macro"],
             label='macro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["macro"]),
             linewidth=2)
    
    for i in range(n_classes):
        plt.plot(fpr[i], tpr[i],label='ROC curve of class {0} (area = {1:0.2f})'
                                       ''.format(i, roc_auc[i]))
        

    
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    #plt.title('Some extension of Receiver operating characteristic to multi-class')
    plt.legend(loc="lower right")
    plt.show()
                                                  training_dict, True, False,
                                                  False)
    # build tesing inputs and labels
    X_test, y_test, test_labels = build_inputs(training_files, activity_labels,
                                               training_dict, True, False,
                                               False)
    random_state = np.random.RandomState(0)
    classifier = OneVsRestClassifier(
        svm.SVC(kernel='linear', probability=True, random_state=random_state))
    y_score = classifier.fit(X_train, y_train).decision_function(X_test)

    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(3):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    plt.figure()
    lw = 2
    plt.plot(fpr[2],
             tpr[2],
             color='darkorange',
             lw=lw,
             label='ROC curve (area = %0.2f)' % roc_auc[2])
    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    plt.xlim([0.0, 1.0])
Esempio n. 23
0
def showROC(prediction, target):
    nGestures = target.shape[1]

    n_classes = nGestures
    y_test = target
    y_score = prediction
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    
    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    
    
    
    ##############################################################################
    # Plot ROC curves for the multiclass problem
    
    # Compute macro-average ROC curve and ROC area
    
    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
    
    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])
    
    # Finally average it and compute AUC
    mean_tpr /= n_classes
    
    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    
    # Plot all ROC curves
    plt.figure()
    plt.plot(fpr["micro"], tpr["micro"],
             label='micro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["micro"]),
             linewidth=2)
    
    plt.plot(fpr["macro"], tpr["macro"],
             label='macro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["macro"]),
             linewidth=2)
    
    for i in range(n_classes):
        plt.plot(fpr[i], tpr[i],label='ROC curve of class {0} (area = {1:0.2f})'
                                       ''.format(i, roc_auc[i]))
        

    
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    #plt.title('Some extension of Receiver operating characteristic to multi-class')
    plt.legend(loc="lower right")
    plt.show()