Example #1
0
    def _binary_roc_auc_score(y_true, y_score, sample_weight=None):
        if len(np.unique(y_true)) != 2:
            raise ValueError("Only one class present in y_true. ROC AUC score "
                             "is not defined in that case.")

        fpr, tpr, _ = roc_curve(y_true, y_score, sample_weight=sample_weight)
        if max_fpr is None or max_fpr == 1:
            return auc(fpr, tpr)
        if max_fpr <= 0 or max_fpr > 1:
            raise ValueError("Expected max_frp in range ]0, 1], got: %r" %
                             max_fpr)

        # Add a single point at max_fpr by linear interpolation
        stop = np.searchsorted(fpr, max_fpr, 'right')
        x_interp = [fpr[stop - 1], fpr[stop]]
        y_interp = [tpr[stop - 1], tpr[stop]]
        tpr = np.append(tpr[:stop], np.interp(max_fpr, x_interp, y_interp))
        fpr = np.append(fpr[:stop], max_fpr)
        partial_auc = auc(fpr, tpr)

        # McClish correction: standardize result to be 0.5 if non-discriminant
        # and 1 if maximal
        min_area = 0.5 * max_fpr**2
        max_area = max_fpr
        return 0.5 * (1 + (partial_auc - min_area) / (max_area - min_area))
 def Predict(self, inp, labels, classifier, folds, name, paramdesc):
     X= inp
     y = labels
     X, y = X[y != 2], y[y != 2]
     n_samples, n_features = X.shape
     
     ###############################################################################
     # Classification and ROC analysis
     
     # Run classifier with cross-validation and plot ROC curves
     cv = StratifiedKFold(y, n_folds=folds)
     
     mean_tpr = 0.0
     mean_fpr = np.linspace(0, 1, 100)
     all_tpr = []
     
     _precision = 0.0
     _recall = 0.0
     _accuracy = 0.0
     _f1 = 0.0
     
     for i, (train, test) in enumerate(cv):
         probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
         pred_ = classifier.predict(X[test])
         _precision += precision_score(y[test], pred_)
         _recall += recall_score(y[test], pred_)
         _accuracy += accuracy_score(y[test], pred_)
         _f1 += f1_score(y[test], pred_)
         # Compute ROC curve and area the curve
         fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
         mean_tpr += interp(mean_fpr, fpr, tpr)
         mean_tpr[0] = 0.0
         roc_auc = auc(fpr, tpr)
         plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))
     
     _precision /= folds
     _recall /= folds
     _accuracy /= folds
     _f1 /= folds
     
     
     plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')
     
     mean_tpr /= len(cv)
     mean_tpr[-1] = 1.0
     mean_auc = auc(mean_fpr, mean_tpr)
     plt.plot(mean_fpr, mean_tpr, 'k--',
              label='Mean ROC (area = %0.2f)' % mean_auc, lw=2)
     
     plt.xlim([-0.05, 1.05])
     plt.ylim([-0.05, 1.05])
     plt.xlabel('False Positive Rate')
     plt.ylabel('True Positive Rate')
     plt.title('Receiver operating characteristic - {0}'.format(name))
     plt.legend(loc="lower right")
     plt.savefig(self.configObject['outputdir'] + '/' + name + '.png')
     plt.close()
     
     result = self.OutputResult(name, paramdesc, len(inp), floor(labels.size / folds), _precision, _recall, _accuracy, _f1) 
     Announce(result)
Example #3
0
def evalModel(predictor, test_data, test_labels, train_data, train_labels, name, evalresults):

    
    predictor.fit(train_data, train_labels)
    evalresults.setdefault(name + " Accuracy raw \t\t", []).append(accuracy_score(test_labels, predictor.predict(test_data)))

    #predictor.fit(preprocessing.scale(train_data), train_labels)
    #evalresults.setdefault(name + " Accuracy std \t\t", []).append(metrics.accuracy_score(test_labels, predictor.predict(preprocessing.scale(test_data))))
 
    #predictor.fit(preprocessing.normalize(train_data, norm='l2'), train_labels)
    #evalresults.setdefault(name + " Accuracy nml \t\t", []).append(metrics.accuracy_score(test_labels, predictor.predict(preprocessing.normalize(test_data, norm='l2'))))

    return

    if len(set(train_labels)) != 2:
        return
    
    predictor.fit(train_data, train_labels)
    fpr, tpr, _ = roc_curve(test_labels, predictor.decision_function(test_data))
    evalresults.setdefault(name + " AUC raw \t\t", []).append(auc(fpr, tpr))

    #return

    predictor.fit(preprocessing.scale(train_data), train_labels)
    fpr, tpr, _ = roc_curve(test_labels, predictor.decision_function(preprocessing.scale(test_data)))
    evalresults.setdefault(name + " AUC std \t\t", []).append(auc(fpr, tpr))
    
    
    predictor.fit(preprocessing.normalize(train_data, norm='l2'), train_labels)
    fpr, tpr, _ = roc_curve(test_labels, predictor.decision_function(preprocessing.normalize(test_data, norm='l2')))
    evalresults.setdefault(name + " AUC nml \t\t", []).append(auc(fpr, tpr))
Example #4
0
def plot_ROC(y_pred, y_test, name):
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    n_classes = 3
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_pred[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_pred.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])
    mean_tpr /= n_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    target_names = ['Reading', 'Speaking', 'Watching']
    plt.figure()
    plt.plot(fpr["micro"],
             tpr["micro"],
             label='micro-average ROC (AUC = {0:0.2f})'
             ''.format(roc_auc["micro"]),
             color='deeppink',
             linestyle=':',
             linewidth=4)

    plt.plot(fpr["macro"],
             tpr["macro"],
             label='macro-average ROC (AUC = {0:0.2f})'
             ''.format(roc_auc["macro"]),
             color='navy',
             linestyle=':',
             linewidth=4)
    lw = 2
    colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
    for i, color in zip(range(n_classes), colors):
        plt.plot(fpr[i],
                 tpr[i],
                 color=color,
                 lw=lw,
                 label='ROC - {0} (AUC = {1:0.2f})'
                 ''.format(target_names[i], roc_auc[i]))
    plt.plot([0, 1], [0, 1], 'k--', lw=lw)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(loc="lower right")
    plt.savefig("res/{}-ROC-AUC".format(name))
Example #5
0
    def recall_curve(rank, index_set=None, min_events=None):
        """
        Calculate x and y of recall curve.

        :param rank: pandas.Series

        :param index_set: pandas.Series
            indices in rank

        :param min_events: int or None, optional
            Number of minimum number of index_set to calculate curve

        :return:
        """
        x = rank.sort_values().dropna()

        # Observed cumsum
        if index_set is None:
            index_set = Utils.get_essential_genes(return_series=False)

        y = x.index.isin(index_set)

        if (min_events is not None) and (sum(y) < min_events):
            return None

        y = np.cumsum(y) / sum(y)

        # Rank fold-changes
        x = st.rankdata(x) / x.shape[0]

        # Calculate AUC
        xy_auc = auc(x, y)

        return x, y, xy_auc
Example #6
0
def auc_xscaled(xs, ys):
    """AUC score scaled to fill x interval
    """
    xmin, xmax = minmaxr(xs)
    denom = float(xmax - xmin)
    xs_corr = [(x - xmin) / denom for x in xs]
    return auc(xs_corr, ys)
Example #7
0
    def auc_score(self):
        """Replacement for Scikit-Learn's method

        If number of Y classes is other than two, a warning will be triggered
        but no exception thrown (the return value will be a NaN).  Also, we
        don't reorder arrays during ROC calculation since they are assumed to be
        in order.
        """
        return auc(self.fprs, self.tprs, reorder=False)
def refine_with_unexpectedness(data_set, classes_dict, preY, Ytrue,
                               unexpected_rules):

    print('Refine with unexpected rules...')
    y_pred = np.copy(preY)
    for i in range(data_set.size()):
        x = data_set.get_transaction(i)
        for r in unexpected_rules:
            if r.satisfy_rule(x, is_lhs=True):
                label = r.right_items[0]
                y_pred[i] = classes_dict[label]
    print(f1_score(Ytrue, y_pred, average=None))
    if (data_set.number_of_classes() <= 2):
        fpr, tpr, _ = roc_curve(Ytrue, y_pred.flatten())
        print(auc(fpr, tpr))
Example #9
0
def plot_roc(true_labels, pred_probs, fig_title='', savepath=''):

    false_positive_rate, true_positive_rate, _ = roc_curve(true_labels,
                                                           pred_probs[:, 1],
                                                           pos_label=1)
    roc_auc = auc(false_positive_rate, true_positive_rate)
    plt.figure()
    plt.title(fig_title)
    plt.plot(false_positive_rate,
             true_positive_rate,
             'b',
             label='AUC = %0.4f' % roc_auc)
    plt.plot([0, 1], [0, 1], 'r--')
    plt.legend(loc='lower right')
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    if savepath != '':
        plt.savefig(savepath)
    plt.show()

    return ''
Example #10
0
    def handle(self, *args, **options):
        filepath = options['file']
        
        print filepath

        x = []
        y = []

        with open(filepath, 'rb') as csvfile:
            data = csv.reader(csvfile, delimiter=',')
            for row in data:
                x.append(float(row[0]))
                y.append(int(row[1]))
        print(x)
        print(y)
        
        fpr, tpr, _ = roc_curve(y, x)
        roc_auc = auc(fpr, tpr)
            
        plt.figure()
        lw = 2
        plt.plot(fpr, tpr, color=COLOR_4,
                     lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
        
        for i in range(len(tpr)):
            print "tpr: %s fpr: %s thres: %s" % (tpr[i],fpr[i],_[i])
        
        plt.plot([0, 1], [0, 1], color=COLOR_6, lw=lw, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.legend(loc="lower right")
        
        base = os.path.basename(filepath)
        os.path.splitext(base)[0]
        
        savefig('%s-roc.png' % base)
    Ytest = Ytest.flatten()

    class_count = train_data_set.number_of_classes()

    unexpected_rules = IOHelper.load_json_object(config.get_value('rules'))
    refined_unexpected_rules = filter_association_rules(unexpected_rules)

    print('svm testing...')
    svc_model = SVC(kernel='poly', degree=3, coef0=0.1, random_state=1)
    svc_model.fit(X_train.relation_matrix, Y_train.values.flatten())

    svc_y_pred = svc_model.predict(Xtest)
    print(f1_score(Ytest, svc_y_pred, average=None))
    if (class_count <= 2):
        fpr, tpr, _ = roc_curve(Ytest, svc_y_pred.flatten())
        print(auc(fpr, tpr))

    refine_with_unexpectedness(test_data_set, Y_train.item_dict, svc_y_pred,
                               Ytest, refined_unexpected_rules)

    print('Random forest testing...')
    rf_model = RandomForestClassifier(n_estimators=20, random_state=1)
    rf_model.fit(X_train.relation_matrix, Y_train.values.flatten())

    rf_y_pred = rf_model.predict(Xtest)
    print(f1_score(Ytest, rf_y_pred, average=None))
    if (class_count <= 2):
        fpr, tpr, _ = roc_curve(Ytest, rf_y_pred.flatten())
        print(auc(fpr, tpr))

    refine_with_unexpectedness(test_data_set, Y_train.item_dict, rf_y_pred,
Example #12
0
def area_under_the_roc_curve(yTrue, yPred):
    fpr, tpr, _ = roc_curve(yTrue, yPred)
    AUC = auc(fpr, tpr)
    return AUC, fpr, tpr
                                                  False)
    # build tesing inputs and labels
    X_test, y_test, test_labels = build_inputs(training_files, activity_labels,
                                               training_dict, True, False,
                                               False)
    random_state = np.random.RandomState(0)
    classifier = OneVsRestClassifier(
        svm.SVC(kernel='linear', probability=True, random_state=random_state))
    y_score = classifier.fit(X_train, y_train).decision_function(X_test)

    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(3):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    plt.figure()
    lw = 2
    plt.plot(fpr[2],
             tpr[2],
             color='darkorange',
             lw=lw,
             label='ROC curve (area = %0.2f)' % roc_auc[2])
    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
Example #14
0
def showROC(prediction, target):
    nGestures = target.shape[1]

    n_classes = nGestures
    y_test = target
    y_score = prediction
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    
    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    
    
    
    ##############################################################################
    # Plot ROC curves for the multiclass problem
    
    # Compute macro-average ROC curve and ROC area
    
    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
    
    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])
    
    # Finally average it and compute AUC
    mean_tpr /= n_classes
    
    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    
    # Plot all ROC curves
    plt.figure()
    plt.plot(fpr["micro"], tpr["micro"],
             label='micro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["micro"]),
             linewidth=2)
    
    plt.plot(fpr["macro"], tpr["macro"],
             label='macro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["macro"]),
             linewidth=2)
    
    for i in range(n_classes):
        plt.plot(fpr[i], tpr[i],label='ROC curve of class {0} (area = {1:0.2f})'
                                       ''.format(i, roc_auc[i]))
        

    
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    #plt.title('Some extension of Receiver operating characteristic to multi-class')
    plt.legend(loc="lower right")
    plt.show()
    # Export
    ppis = df_corr[(df_corr["prot_corr"].abs() > .5) |
                   (df_corr["gexp_corr"].abs() > .5) |
                   (df_corr["crispr_corr"].abs() > .5)]
    ppis.round(4).to_csv(f"{RPATH}/PPInteractions_filtered.csv", index=False)

    rc_dict = dict()
    for y in ["corum", "biogrid", "string", "huri"]:
        rc_dict[y] = dict()
        for x in ["prot", "gexp", "crispr", "merged"]:
            rc_df = df_corr.sort_values(f"{x}_pvalue")[y].reset_index(
                drop=True).copy()

            rc_df_y = np.cumsum(rc_df) / np.sum(rc_df)
            rc_df_x = np.array(rc_df.index) / rc_df.shape[0]
            rc_df_auc = auc(rc_df_x, rc_df_y)

            rc_dict[y][x] = dict(x=list(rc_df_x),
                                 y=list(rc_df_y),
                                 auc=rc_df_auc)

    rc_pal = dict(
        biogrid=sns.color_palette("tab20c").as_hex()[0:4],
        corum=sns.color_palette("tab20c").as_hex()[4:8],
        string=sns.color_palette("tab20c").as_hex()[8:12],
        huri=sns.color_palette("tab20c").as_hex()[12:16],
    )

    # Recall curves
    _, ax = plt.subplots(1, 1, figsize=(3, 3), dpi=600)
Example #16
0
def showROC(prediction, target):
    nGestures = target.shape[1]

    n_classes = nGestures
    y_test = target
    y_score = prediction
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    
    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    
    
    
    ##############################################################################
    # Plot ROC curves for the multiclass problem
    
    # Compute macro-average ROC curve and ROC area
    
    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
    
    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])
    
    # Finally average it and compute AUC
    mean_tpr /= n_classes
    
    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    
    # Plot all ROC curves
    plt.figure()
    plt.plot(fpr["micro"], tpr["micro"],
             label='micro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["micro"]),
             linewidth=2)
    
    plt.plot(fpr["macro"], tpr["macro"],
             label='macro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["macro"]),
             linewidth=2)
    
    for i in range(n_classes):
        plt.plot(fpr[i], tpr[i],label='ROC curve of class {0} (area = {1:0.2f})'
                                       ''.format(i, roc_auc[i]))
        

    
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    #plt.title('Some extension of Receiver operating characteristic to multi-class')
    plt.legend(loc="lower right")
    plt.show()