Exemple #1
0
    specificity = 0.0
    TN = get_true_neg(y,pred,th)
    FP = get_false_neg(y,pred,th)
    specificity = TN/(TN+FP)
    return specificity
#Positive predictive value (PPV) and Negative predictive value (NPV)
def get_ppv(y, pred, th=0.5):
    PPV = 0.0
    TP = get_true_pos(y,pred,th)
    FP = get_false_pos(y,pred,th)
    PPV = TP/(TP + FP)
    return PPV

def get_npv(y, pred, th=0.5):
    NPV = 0.0
    TN = get_true_neg(y,pred,th)
    FN = get_false_neg(y,pred,th)
    NPV = TN/(TN + FN)
    return NPV

#Confidence Intervals

# Calibration

result = get_performance_metrics(y, pred, class_labels, acc=get_accuracy, prevalence=get_prevalence, 
                        sens=get_sensitivity, spec=get_specificity, ppv=get_ppv, npv=get_npv, auc=roc_auc_score,f1=f1_score)


get_curve(y, pred, class_labels)
#Precision-Recall Curve
get_curve(y, pred, class_labels, curve='prc')
# * The sensitivity for `Edema` is 0.75.
# * However, given that the model predicted positive, the probability that a person has Edema (its PPV) is only 0.066!

# <a name='3-6'></a>
# ### 3.6 ROC Curve
# 
# So far we have been operating under the assumption that our model's prediction of `0.5` and above should be treated as positive and otherwise it should be treated as negative. This however was a rather arbitrary choice. One way to see this, is to look at a very informative visualization called the receiver operating characteristic (ROC) curve.
# 
# The ROC curve is created by plotting the true positive rate (TPR) against the false positive rate (FPR) at various threshold settings. The ideal point is at the top left, with a true positive rate of 1 and a false positive rate of 0. The various points on the curve are generated by gradually changing the threshold.
# 
# Let's look at this curve for our model:

# In[39]:


util.get_curve(y, pred, class_labels)


# The area under the ROC curve is also called AUCROC or C-statistic and is a measure of goodness of fit. In medical literature this number also gives the probability that a randomly selected patient who experienced a condition had a higher risk score than a patient who had not experienced the event. This summarizes the model output across all thresholds, and provides a good sense of the discriminative power of a given model.
# 
# Let's use the `sklearn` metric function of `roc_auc_score` to add this score to our metrics table.

# In[40]:


from sklearn.metrics import roc_auc_score
util.get_performance_metrics(y, pred, class_labels, acc=get_accuracy, prevalence=get_prevalence, 
                        sens=get_sensitivity, spec=get_specificity, ppv=get_ppv, npv=get_npv, auc=roc_auc_score)


# <a name='4'></a>
Exemple #3
0
threshold = 0.5
print(f"threshold: {threshold}\n")

print(f"computed ppv: {get_ppv(y_test, preds_test, threshold):.2f}")
print(f"computed npv: {get_npv(y_test, preds_test, threshold):.2f}")


# Summarizes the model output across all thresholds, and provides a good sense of the discriminative power of a given model.
util.get_performance_metrics(y, pred, class_labels, acc=get_accuracy, prevalence=get_prevalence, 
                        sens=get_sensitivity, spec=get_specificity, ppv=get_ppv, npv=get_npv)


# The ROC curve is created by plotting the true positive rate (TPR) against the false positive rate (FPR) at various threshold settings. 
# Look at this curve for our model:
util.get_curve(y, pred, class_labels)
 

# Use the `sklearn` metric function of `roc_auc_score` to add this score to our metrics table.
from sklearn.metrics import roc_auc_score # a measure of goodness of fit
util.get_performance_metrics(y, pred, class_labels, acc=get_accuracy, prevalence=get_prevalence, 
                        sens=get_sensitivity, spec=get_specificity, ppv=get_ppv, npv=get_npv, auc=roc_auc_score)


# Create bootstrap samples and compute sample AUCs from those samples.
def bootstrap_auc(y, pred, classes, bootstraps = 100, fold_size = 1000):
    statistics = np.zeros((len(classes), bootstraps))

    for c in range(len(classes)):
        df = pd.DataFrame(columns=['y', 'pred'])
        df.loc[:, 'y'] = y[:, c]