Beispiel #1
0
def plotPrc(clfName, folds, outdir):
    y_tests = []
    y_scores = []
    plt.clf()
    for i, (clf, X_test, y_test, _, _, _, _,_,_,_) in enumerate(folds):
        try:
            y_score = clf.decision_function(X_test)
        except AttributeError:
            y_score = clf.predict_proba(X_test)[:, 0]
        precision, recall, _ = precision_recall_curve(y_test, y_score, pos_label=POSTIVE_LABEL)
        y_tests.extend(y_test)
        y_scores.extend(y_score)

        try:
            area = average_precision_score(y_test, y_score)
        except ValueError:
            area = 0.0
        clf.prc_auc = area
        plt.plot(recall, precision, label='Fold %d, AUC = %0.2f' % (i, area), lw=1)

    precision, recall, _ = precision_recall_curve(y_tests, y_scores, pos_label=POSTIVE_LABEL)
    try:
        area = average_precision_score(y_tests, y_scores)
    except ValueError:
        area = 0.0
    plt.plot(recall, precision, 'k--', label='Mean, AUC = %0.2f' % (area), lw=2)

    plt.title('Precision-Recall: %s\n%s'%(clfName,outdir.name.replace("_"," ")))
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.legend(loc="lower center", prop=legendprop)

    plt.savefig(str(outdir/(clfName.replace(" ","_")+'_precision-recall.png')))
Beispiel #2
0
def create_all_eval_results(y_true,y_pred,key,system_features,sampling,replacement,num_of_samples):
    # precision = metrics.precision_score(y_true, y_pred, average='weighted')
    # recall = metrics.recall_score(y_true, y_pred, average='weighted')
    # F2 = calculateF2(precision, recall)
    name = data_names[key]

    y_true_bugs, y_pred_bugs = zip(*[[y_true[i], y_pred[i]] for i in range(len(y_true)) if y_true[i] == 1])
    # precision_bug, recall_bug, F_measure_bug ,_ = metrics.precision_recall_fscore_support(y_true_bugs,
    #                                                                                                  y_pred_bugs,
    #                                                                                                  average='micro')
    precision_bug =metrics.precision_score(y_true_bugs,y_pred_bugs,average='micro')
    recall_bug =metrics.recall_score(y_true_bugs,y_pred_bugs,average='micro')
    F2_bug = calculateF2(precision_bug,recall_bug)
    precision_bug_all, recall_bug_all,_ = metrics.precision_recall_curve(y_true_bugs, y_pred_bugs)
    prc_area_bug = metrics.auc(recall_bug_all, precision_bug_all)

    # precision, recall, F_measure,_ = metrics.precision_recall_fscore_support(y_true,
    #                                                                                                 y_pred,
    #                                                                                                 average='micro')
    precision = metrics.average_precision_score(y_true, y_pred, average='micro')
    recall = metrics.recall_score(y_true, y_pred, average='micro')
    F2 = calculateF2(precision, recall)
    precision_all, recall_all, _ = metrics.precision_recall_curve(y_true, y_pred)
    prc_area = metrics.auc(recall_all, precision_all)

    global results
    results.loc[len(results)] = [name,precision_bug,recall_bug,F2_bug,prc_area_bug, precision, recall,F2,prc_area,str(system_features),str(sampling),str(replacement),str(num_of_samples)]
def RunExp(StrModel:str, Param:str, FeaUsed:list, DataPath:str, Label:str, std:bool = False, N:int = 0):
	Data = np.genfromtxt(DataPath + Label, delimiter = ',', dtype = int)
	Data = Data[:, np.newaxis]

	for f in FeaUsed:
		T = (np.genfromtxt(DataPath + Features[f], delimiter = ',' , dtype = float))
		if len(T.shape) < 2:
			T = T[:, np.newaxis]
		Data = np.concatenate((Data, T), axis = 1)
	
	if N > 0:
		Data = Data[:N, :]

	Lbl = Data[:, 0]
	Fea = Data[:,1:]
	if std:
		scaler = preprocessing.StandardScaler()
		Fea = scaler.fit_transform(Fea)

	Model = base.clone(Models[StrModel])
	SetParam(Model, Param)

	Model.fit(Fea, Lbl)
	Pred = Model.predict_proba(Fea)[:, 1]
	st = metrics.precision_recall_curve(Lbl, Pred)
		
	Folds = cross_validation.KFold(Fea.shape[0], n_folds = 5)
	for train, valid in Folds:
		Model = base.clone(Models[StrModel])
		SetParam(Model, Param)
		Model.fit(Fea[train], Lbl[train])
		Pred[valid] = Model.predict_proba(Fea[valid])[:, 1]
	
	sv = metrics.precision_recall_curve(Lbl, Pred)
	return st, sv
Beispiel #4
0
def multiclass_pr(y_true, y_pred, class_names=[]):
    """Computes the precision and recall for multiclass predictions.
    
    Args:
        y_true: True class labels, shape (examples, classes).
        y_pred: Predicted class scores, shape (examples, classes).
        class_names: List of class name strings, with NoneType to
            indicate the no-connection class - eg ['inh', None, 'xct'].
            
    Returns:
        precision: Dict of precision for each class, plus the 
            micro-average.
        recall: Dict of recall for each class, plus the micro-average.
    """
    assert(len(class_names) == y_true.shape[1])
    classes = y_true.shape[1]
    
    precision = dict()
    recall = dict()
    
    for i, class_name in enumerate(class_names):
        
        if class_name != None:
            precision[class_name], recall[class_name], _ = precision_recall_curve(
                y_true[:,i], y_pred[:,i])
            
    # Don't include no-connection class in micro-average                                                                              
    mask = np.ones(classes, dtype=bool)
    mask[mask.size//2 - 1 + mask.size%2] = 0
                                                                              
    precision['micro-avg'], recall['micro-avg'], _ = precision_recall_curve(
        y_true[:,mask].ravel(), y_pred[:,mask].ravel())
    
    return precision, recall
def evaluate_multiple(ground_truths, prediction_scores, compute_micro_macro_avg=False):
    """

    :param ground_truths: 1-d array annotated with class labels start from 0, e.g. gt: [0, 0, 1, 3, 2, 1, 0]
    :param prediction_scores: 2-d array recorded the corresponding probability scores for each class
    :param compute_micro_macro_avg: switch if the micro and macro average roc are needed
    :return: Dictory with number of class: false_positive_rates, true_positive_rates, thresholds, roc_aucs

    """

    # Check dimension
    if len(prediction_scores.shape) != 2:
        print 'The dimension of \'prediction_scores\' should be 2.'
        return

    N = prediction_scores.shape[0]
    M = prediction_scores.shape[1]

    precisions = {}
    recalls = {}
    thresholds = {}
    avg_precisions = {}

    if compute_micro_macro_avg:
        gt_label_array = []
        prediction_score_array = []

    for class_label in range(0, M):

        # Generate Class Label
        ground_truth_label = np.zeros(N, dtype=int)
        idx = (ground_truths == class_label)
        ground_truth_label[idx] = 1

        # Extract positive scores
        prediction_score = prediction_scores[:, class_label]

        # Compute ROC curve
        precision, recall, threshold = precision_recall_curve(ground_truth_label, prediction_score)
        avg_precision = average_precision_score(ground_truth_label, prediction_score)

        precisions[class_label] = precision
        recalls[class_label] = recall
        thresholds[class_label] = threshold
        avg_precisions[class_label] = avg_precision

        if compute_micro_macro_avg:
            gt_label_array.append(ground_truth_label)
            prediction_score_array.append(prediction_score)

    if compute_micro_macro_avg:
        gt_label_array = np.asarray(gt_label_array)
        prediction_score_array = np.asarray(prediction_score_array)

        # Compute Micro Avg.
        precisions["micro"], recalls["micro"], _ = precision_recall_curve(gt_label_array.ravel(),
                                                                                   prediction_score_array.ravel())
        avg_precisions["micro"] = average_precision_score(gt_label_array, prediction_score_array, average="micro")

    return precisions, recalls, thresholds, avg_precisions
Beispiel #6
0
def calc_auc(model, y_test, y_score, auctype = "ROC"):
    y_score = 1 / ( 1 + np.exp(-y_score) ) # sigmoid it!
    n_classes = y_test.shape[1] # 164
    if auctype == "ROC":
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for i in range(n_classes):
            fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
        # Compute micro-average ROC curve and ROC area
        fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
        return roc_auc["micro"]
    elif auctype == "PR":
        prec = dict()
        rec = dict()
        pr_auc = dict()
        for i in range(n_classes):
            prec[i], rec[i], _ = precision_recall_curve(y_test[:,i], y_score[:,i])
            pr_auc[i] = auc(rec[i], prec[i])
        # Compute micro-average prec-rec curve and prec-rec AUC
        prec["micro"], rec["micro"], _ = precision_recall_curve(y_test.ravel(), y_score.ravel())
        pr_auc["micro"] = auc(rec["micro"], prec["micro"])
        return pr_auc["micro"]
Beispiel #7
0
def prc_curve(targets_ts, scores_ts, targets_tr, scores_tr, model_no):
    plt.clf()
    colors = ['r', 'g', 'b', 'y', 'k', 'm']
    classes = ['lunge', 'wing_threat', 'charge', 'hold', 'tussle', 'other']
    for i in range(NUM_CLASSES):
        i = 5
        precision_ts, recall_ts, thresholds_ts = precision_recall_curve(targets_ts[:,i], scores_ts[:,i], pos_label=1)
        precision_tr, recall_tr, thresholds = precision_recall_curve(targets_tr[:,i], scores_tr[:,i], pos_label=1)
        area_ts = auc(recall_ts, precision_ts)
        area_tr = auc(recall_tr, precision_tr)
        test_i, f1_ts = compute_f1(precision_ts, recall_ts)
        train_i, f1_tr = compute_f1(precision_tr, recall_tr)
        print thresholds_ts[train_i]
        plt.plot(recall_ts, precision_ts, '--',label="%s test AUC: %0.3f f1: %0.3f" %(classes[i], area_ts, f1_ts), 
            color=colors[i])
        plt.plot(recall_tr, precision_tr, label="%s train AUC: %0.3f f1: %0.3f" %(classes[i],area_tr, f1_tr),
            color=colors[i])
        break
    plt.title('Precision Recall of MC Model ' + model_no)
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.legend(loc="lower left", prop={'size':8})
    plt.grid(b=True, which='major')
    figure = plt.gcf()
    figure.set_size_inches(8, 6)
    plt.savefig('PRC_mc_model' + model_no +'.png')
Beispiel #8
0
def plotPrecisionRecall(learner, learner_name, testFeatures, testAnswers):
    print 'Plotting Precision and Recall for ' + learner_name
    precDown, recDown, thrDown = precision_recall_curve(testAnswers == 0, learner.predict_proba(testFeatures)[:, 0])
    precUp, recUp, thrUp = precision_recall_curve(testAnswers == 1, learner.predict_proba(testFeatures)[:, 1])
    plotLines([[recDown], [precDown]], learner_name + ': Precision vs Recall(Down)', 'Recall', 'Precision')
    plotLines([[recUp], [precUp]], learner_name + ': Precision vs Recall(Up)', 'Recall', 'Precision')
    return (precDown, recDown, thrDown, precUp, recUp, thrUp)
Beispiel #9
0
def fscore(y_test, y_score):
    """

    :param y_test: output vector - predictions on the test set
    :param y_score: output vector which contains probabilities for each contained estimator
    :return: plot object
    """

    # binarize output vector
    y_test = binarize(y_test)
    print('y_test binarized shape = ', np.shape(y_test))
    n_classes = np.shape(y_test)[1]

    # Compute Precision-Recall and plot curve
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(y_test[:, i],
                                                            y_score[:, i])
        average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i])

    # Compute micro-average ROC curve and ROC area
    precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(),
                                                                    y_score.ravel())
    average_precision["micro"] = average_precision_score(y_test, y_score,
                                                         average="micro")

    # Plot Precision-Recall curve
    plt.clf()
    plt.plot(recall[0], precision[0], label='Precision-Recall curve')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0]))
    plt.legend(loc="lower left")
    plt.show()

    # Plot Precision-Recall curve for each class
    plt.clf()
    plt.plot(recall["micro"], precision["micro"],
             label='micro-average Precision-recall curve (area = {0:0.2f})'
                   ''.format(average_precision["micro"]))
    for i in range(n_classes):
        plt.plot(recall[i], precision[i],
                 label='Precision-recall curve of class {0} (area = {1:0.2f})'
                       ''.format(i, average_precision[i]))

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Extension of Precision-Recall curve to multi-class')
    plt.legend(loc="lower right")
    plt.show()

    return plt
Beispiel #10
0
def plotCurve(arr):
    X = arr[:, :-1]
    y = arr[:, -1]
    # Binarize the output
    y = label_binarize(y, classes=[0,1])
    n_classes = y.shape[1]

    # Add noisy features
    random_state = np.random.RandomState(0)
    n_samples, n_features = X.shape

    X = np.c_[X, random_state.randn(n_samples, 150 * n_features)]

    # Split into training and test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=random_state)

    # Run classifier
    classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=random_state))
    y_score = classifier.fit(X_train, y_train).decision_function(X_test)

    # Compute Precision-Recall and plot curve
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(y_test[:, i],y_score[:, i])
        average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i])

    # Compute micro-average ROC curve and ROC area
    precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(), y_score.ravel())
    average_precision["micro"] = average_precision_score(y_test, y_score, average="micro")

    # Plot Precision-Recall curve
    plt.clf()
    plt.plot(recall[0], precision[0], label='Precision-Recall curve')
    print(recall)
    print(precision)
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.05])
    plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0]))
    plt.legend(loc="lower left")
    plt.show()

    # Plot Precision-Recall curve for each class
    plt.clf()
    plt.plot(recall["micro"], precision["micro"], label='micro-average Precision-recall curve (area = {0:0.2f})'''.format(average_precision["micro"]))
    for i in range(n_classes):
        plt.plot(recall[i], precision[i], label='Precision-recall curve of class {0} (area = {1:0.2f})'''.format(i, average_precision[i]))
    plt.xlim([0.0, 1.05])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Extension of Precision-Recall curve to multi-class')
    plt.legend(loc="lower right")
    plt.show()
Beispiel #11
0
def fscore_plot(classifier, X_test, y_test):


    # Binarize the output
    n_classes = max(y_test) - min(y_test) + 1

    y_test = label_binarize(y_test, classes=list(range(0,n_classes)))


    y_score = classifier.predict_proba(X_test)

    # Compute Precision-Recall and plot curve
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(y_test[:, i],
                                                            y_score[:, i])
        average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i])

    # Compute micro-average ROC curve and ROC area
    precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(),
        y_score.ravel())
    average_precision["micro"] = average_precision_score(y_test, y_score,
                                                         average="micro")

    # Plot Precision-Recall curve
    plt.clf()
    plt.plot(recall[0], precision[0], label='Precision-Recall curve')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0]))
    plt.legend(loc="lower left")
    plt.show()

    # Plot Precision-Recall curve for each class
    plt.clf()
    plt.plot(recall["micro"], precision["micro"],
             label='micro-average Precision-recall curve (area = {0:0.2f})'
                   ''.format(average_precision["micro"]))
    for i in range(n_classes):
        plt.plot(recall[i], precision[i],
                 label='Precision-recall curve of class {0} (area = {1:0.2f})'
                       ''.format(i, average_precision[i]))

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Extension of Precision-Recall curve to multi-class')
    plt.legend(loc="lower right")
    plt.show()

    return plt
Beispiel #12
0
def _test_precision_recall_curve(y_true, probas_pred):
    """Test Precision-Recall and aread under PR curve"""
    p, r, thresholds = precision_recall_curve(y_true, probas_pred)
    precision_recall_auc = auc(r, p)
    assert_array_almost_equal(precision_recall_auc, 0.85, 2)
    assert_array_almost_equal(precision_recall_auc, average_precision_score(y_true, probas_pred))
    # Smoke test in the case of proba having only one value
    p, r, thresholds = precision_recall_curve(y_true, np.zeros_like(probas_pred))
    precision_recall_auc = auc(r, p)
    assert_array_almost_equal(precision_recall_auc, 0.75, 3)
def test_precision_recall_curve_pos_label():
    y_true, _, probas_pred = make_prediction(binary=False)
    pos_label = 2
    p, r, thresholds = precision_recall_curve(y_true, probas_pred[:, pos_label], pos_label=pos_label)
    p2, r2, thresholds2 = precision_recall_curve(y_true == pos_label, probas_pred[:, pos_label])
    assert_array_almost_equal(p, p2)
    assert_array_almost_equal(r, r2)
    assert_array_almost_equal(thresholds, thresholds2)
    assert_equal(p.size, r.size)
    assert_equal(p.size, thresholds.size + 1)
def precision_recall(predictions):
    prc_logreg = precision_recall_curve([int(y) for y in predictions[:, 0]], [float(w) for w in predictions[:, 1]])
    prc_svm = precision_recall_curve([int(y) for y in predictions[:, 0]], [float(w) for w in predictions[:, 2]])
    prc_knn = precision_recall_curve([int(y) for y in predictions[:, 0]], [float(w) for w in predictions[:, 3]])
    prc_tree = precision_recall_curve([int(y) for y in predictions[:, 0]], [float(w) for w in predictions[:, 4]])

    print('logreg => %s' % round(max(prc_logreg[0][prc_logreg[1] >= 0.7]), 2))
    print('svm => %s' % round(max(prc_svm[0][prc_svm[1] >= 0.7]), 2))
    print('knn => %s' % round(max(prc_knn[0][prc_knn[1] >= 0.7]), 2))
    print('tree => %s' % round(max(prc_tree[0][prc_tree[1] >= 0.7]), 2))
def compute_auc_prc(
    A_true,
    W_xcorr=None,
    bfgs_model=None,
    sgd_model=None,
    gibbs_samples=None,
    vb_models=None,
    svi_models=None,
    average="macro",
):
    """
    Compute the AUC of the precision recall curve
    :return:
    """
    A_flat = A_true.ravel()
    aucs = {}
    precs = {}
    recalls = {}

    if W_xcorr is not None:
        aucs["xcorr"] = average_precision_score(A_flat, W_xcorr.ravel(), average=average)
        precs["xcorr"], recalls["xcorr"], _ = precision_recall_curve(A_flat, W_xcorr.ravel())

    if bfgs_model is not None:
        assert isinstance(bfgs_model, DiscreteTimeStandardHawkesModel)
        W_bfgs = bfgs_model.W.copy()
        W_bfgs -= np.diag(np.diag(W_bfgs))
        aucs["bfgs"] = average_precision_score(A_flat, W_bfgs.ravel(), average=average)
        precs["bfgs"], recalls["bfgs"], _ = precision_recall_curve(A_flat, W_bfgs.ravel())

    if sgd_model is not None:
        assert isinstance(sgd_model, DiscreteTimeStandardHawkesModel)
        aucs["sgd"] = average_precision_score(A_flat, sgd_model.W.ravel(), average=average)
        # precs['sgd'], recalls['sgd'], _ = precision_recall_curve(A_flat, W_sgd.ravel())

    if gibbs_samples is not None:
        # Compute ROC based on mean value of W_effective in second half of samples
        Weff_samples = np.array([s.weight_model.W_effective for s in gibbs_samples])
        N_samples = Weff_samples.shape[0]
        offset = N_samples // 2
        Weff_mean = Weff_samples[offset:, :, :].mean(axis=0)

        aucs["gibbs"] = average_precision_score(A_flat, Weff_mean.ravel(), average=average)

    if vb_models is not None:
        # Compute ROC based on E[A] under variational posterior
        aucs["vb"] = average_precision_score(A_flat, vb_models[-1].weight_model.expected_A().ravel(), average=average)

    if svi_models is not None:
        # Compute ROC based on E[A] under variational posterior
        W_svi = svi_models[-1].weight_model.expected_W()
        aucs["svi"] = average_precision_score(A_flat, W_svi.ravel(), average=average)
        precs["svi"], recalls["svi"], _ = precision_recall_curve(A_flat, W_svi.ravel())

    return aucs, precs, recalls
Beispiel #16
0
def wiggle_room_precision_recall(pred, boundary, margin=2, connectivity=1):
    struct = nd.generate_binary_structure(boundary.ndim, connectivity)
    gtd = nd.binary_dilation(boundary, struct, margin)
    struct_m = nd.iterate_structure(struct, margin)
    pred_dil = nd.grey_dilation(pred, footprint=struct_m)
    missing = np.setdiff1d(np.unique(pred), np.unique(pred_dil))
    for m in missing:
        pred_dil.ravel()[np.flatnonzero(pred==m)[0]] = m
    prec, _, ts = precision_recall_curve(gtd.ravel(), pred.ravel())
    _, rec, _ = precision_recall_curve(boundary.ravel(), pred_dil.ravel())
    return zip(ts, prec, rec)
def create_all_eval_results(y_true, y_pred, key,result_type,features_type,num_of_bugs,num_of_all_instances,bugs_Precent,from_model,system_features):
    precision_bugged = metrics.precision_score(y_true,y_pred,pos_label=1,average='binary')
    recall_bugged = metrics.recall_score(y_true,y_pred,pos_label=1,average='binary')
    f_measure_bugged = metrics.f1_score(y_true,y_pred,pos_label=1,average='binary')
    f2_measure_bugged = calculateF2(precision_bugged,recall_bugged)

    un_true,_ = np.unique(y_true, return_counts=True)
    un_pred,_ = np.unique(y_pred, return_counts=True)
    if len(un_true) ==1 or len(un_pred)==1:
        roc_bugged = '?'
        prc_bugged =  '?'
        print("zero")
    else:
        try:
            roc_bugged = metrics.roc_auc_score(y_true,y_pred,average=None)
        except:
            print("exception_roc")
            roc_bugged = '?'
        try:
            precision, recall, thresholds = metrics.precision_recall_curve(y_true, y_pred,pos_label=1)
            prc_bugged = metrics.auc(precision, recall)
        except:
            print("exception_prc")
            prc_bugged = '?'

    precision_all = metrics.precision_score(y_true, y_pred, average='weighted')
    recall_all = metrics.recall_score(y_true, y_pred, average='weighted')
    f_measure_all = metrics.f1_score(y_true, y_pred,average='weighted')
    f2_measure_all = calculateF2(precision_all, recall_all)
    if len(un_true) ==1 or len(un_pred)==1:
        roc_all = 0
        prc_all =  1
        print("zero")
    else:
        try:
            roc_all = metrics.roc_auc_score(y_true, y_pred, average='weighted')
        except:
            print("exception_roc")
            roc_all = 0
        try:
            precision, recall, thresholds = metrics.precision_recall_curve(y_true, y_pred)
            prc_all = metrics.auc(recall, precision)
        except:
            print("exception_prc")
            prc_all = 1

    global results_all_projects
    results_all_projects.loc[len(results_all_projects)] = [key, from_model ,result_type, features_type, "record-sensitive",str(system_features), precision_bugged,
                                                           recall_bugged, f_measure_bugged,
                                                           f2_measure_bugged, roc_bugged, prc_bugged,
                                                           precision_all, recall_all, f_measure_all, f2_measure_all,
                                                           roc_all, prc_all, num_of_bugs, num_of_all_instances,
                                                           bugs_Precent]
Beispiel #18
0
def evaluate_classifier(X_train, X_test, y_train, y_test):
    '''
    Run multiple times with different classifiers to get an idea of the
    relative performance of each configuration.

    Returns a sequence of tuples containing:
        (title, precision, recall)
    for each learner.
    '''

    # Import some classifiers to test
    from sklearn.svm import LinearSVC, NuSVC
    from sklearn.ensemble import AdaBoostClassifier

    # We will calculate the P-R curve for each classifier
    from sklearn.metrics import precision_recall_curve, f1_score
    
    # Here we create classifiers with default parameters. These need
    # to be adjusted to obtain optimal performance on your data set.
    
    # Test the linear support vector classifier
    classifier = LinearSVC(C=1)
    # Fit the classifier
    classifier.fit(X_train, y_train)
    score = f1_score(y_test, classifier.predict(X_test))
    # Generate the P-R curve
    y_prob = classifier.decision_function(X_test)
    precision, recall, _ = precision_recall_curve(y_test, y_prob)
    # Include the score in the title
    yield 'Linear SVC (F1 score={:.3f})'.format(score), precision, recall

    # Test the Nu support vector classifier
    classifier = NuSVC(kernel='rbf', nu=0.5, gamma=1e-3)
    # Fit the classifier
    classifier.fit(X_train, y_train)
    score = f1_score(y_test, classifier.predict(X_test))
    # Generate the P-R curve
    y_prob = classifier.decision_function(X_test)
    precision, recall, _ = precision_recall_curve(y_test, y_prob)
    # Include the score in the title
    yield 'NuSVC (F1 score={:.3f})'.format(score), precision, recall

    # Test the Ada boost classifier
    classifier = AdaBoostClassifier(n_estimators=50, learning_rate=1.0, algorithm='SAMME.R')
    # Fit the classifier
    classifier.fit(X_train, y_train)
    score = f1_score(y_test, classifier.predict(X_test))
    # Generate the P-R curve
    y_prob = classifier.decision_function(X_test)
    precision, recall, _ = precision_recall_curve(y_test, y_prob)
    # Include the score in the title
    yield 'Ada Boost (F1 score={:.3f})'.format(score), precision, recall
Beispiel #19
0
  def eval_measures(self, x, fwd, ninputs, prefix, logging, rng=42):
    xp, yp, xn, yn = [], [], [], []
    xp_raw, yp_raw, xn_raw, yn_raw = [], [], [], []
    print "Evaluation on %i mini-batches" % len(x)
    for i in xrange(len(x)):
      xp.append(fwd(*x[i][:ninputs]))
      yp.append(fwd(*x[i][ninputs:2*ninputs]))
      xn.append(fwd(*x[i][2*ninputs:3*ninputs]))
      yn.append(fwd(*x[i][3*ninputs:]))

      xp_raw.append(x[i][0])
      yp_raw.append(x[i][ninputs])
      xn_raw.append(x[i][2*ninputs])
      yn_raw.append(x[i][3*ninputs])

    xp = np.concatenate(xp, axis=0) 
    yp = np.concatenate(yp, axis=0) 
    xn = np.concatenate(xn, axis=0) 
    yn = np.concatenate(yn, axis=0) 
    
    xp_raw = np.concatenate(xp_raw, axis=0) 
    yp_raw = np.concatenate(yp_raw, axis=0) 
    xn_raw = np.concatenate(xn_raw, axis=0) 
    yn_raw = np.concatenate(yn_raw, axis=0) 
    
    np.random.seed(rng)

    dp_raw = np.sqrt(np.sum((xp_raw - yp_raw)**2., axis=1))
    dn_raw = np.sqrt(np.sum((xn_raw - yn_raw)**2., axis=1))
    D_raw = np.concatenate([dp_raw, dn_raw], axis=0)
    
    dp = np.sqrt(np.sum((xp - yp)**2., axis=1))
    dn = np.sqrt(np.sum((xn - yn)**2., axis=1))
    D = np.concatenate([dp, dn], axis=0)
    
    dp_bin = np.sqrt(np.sum((np.float32(xp > 0.) - np.float32(yp > 0.))**2., axis=1))
    dn_bin = np.sqrt(np.sum((np.float32(xn > 0.) - np.float32(yn > 0.))**2., axis=1))
    D_bin = np.concatenate([dp_bin, dn_bin], axis=0)

    y_true = np.zeros((D.shape[0], )) - 1.
    y_true[:dp.shape[0]] = 1.
    
    AUC = roc_auc_score(y_true, -D)
    AUC_raw = roc_auc_score(y_true, -D_raw)
    AUC_bin = roc_auc_score(y_true, -D_bin)
    precision, recall, thresholds = precision_recall_curve(y_true, -D)
    precision_raw, recall_raw, thresholds_raw = precision_recall_curve(y_true, -D_raw)
    precision_bin, recall_bin, thresholds_bin = precision_recall_curve(y_true, -D_bin)
   
    logging.info("...NN AUC (bin) %f (%f), RAW AUC %f", AUC, AUC_bin, AUC_raw)

    return AUC, precision, recall
Beispiel #20
0
def prPlot(ytest, yprob, yprob2=None, method1="", method2=""):
    pl.clf()
    auc = prAUC(ytest, yprob)
    precision, recall, th = metrics.precision_recall_curve(ytest, yprob)
    pl.plot(recall, precision, label='Precision-Recall: %s (%0.3f)' % (method1, auc) )
    if yprob2 is not None:
        pr2, rc2, t2 = metrics.precision_recall_curve(ytest, yprob2)
        pl.plot(rc2, pr2, 'r', label='Precision-Recall: %s (%0.3f)' % (method2, prAUC(ytest, yprob2)) )
    pl.xlabel('Recall')
    pl.ylabel('Precision')
    pl.ylim([0.0, 1.05])
    pl.xlim([0.0, 1.0])
    pl.title('Precision-Recall curve' % auc)
    pl.legend(loc="lower left")
    pl.show()
def plot_precision_recall_charts(df_result, classifiers, feature_sets, grouping, graph_files_dir, save=False):
    if grouping == 'fset':
        for fset in feature_sets:
            fig, axes = plt.subplots(1, 1, figsize=(10, 7))
            for clf in classifiers:
                try:
                    df = df_result[(df_result.model_name == clf) & (df_result.fset_name == fset)]
                    precision, recall, thresholds = precision_recall_curve(df.target_ind, df.pred_1_prob)
                    axes.plot(recall, precision, label=clf)
                except:
                    pass
            plt.title('Feature Set: ' + fset + '\nPrecision Recall Chart')
            plt.grid(True)
            plt.yticks(np.arange(0, 1.1, 0.1))
            plt.xticks(np.arange(0, 1.1, 0.1))
            plt.xlabel('Recall')
            plt.ylabel('Precision')
            plt.legend(loc='lower left')
            leg = plt.gca().get_legend()
            leg.set_title('Classification Model')
            ltext = leg.get_texts()
            plt.setp(ltext, fontsize='small')
            if save:
                plt.savefig(graph_files_dir + 'pr chart - ' + fset + '.png')

    if grouping == 'clf':
        for clf in classifiers:
            fig, axes = plt.subplots(1, 1, figsize=(10, 7))
            for fset in feature_sets:
                try:
                    df = df_result[(df_result.model_name == clf) & (df_result.fset_name == fset)]
                    precision, recall, thresholds = precision_recall_curve(df.target_ind, df.pred_1_prob)
                    axes.plot(recall, precision, label=fset)
                except:
                    pass
            plt.title('Classification Model: ' + clf + '\nPrecision Recall Chart')
            plt.grid(True)
            plt.yticks(np.arange(0, 1.1, 0.1))
            plt.xticks(np.arange(0, 1.1, 0.1))
            plt.xlabel('Recall')
            plt.ylabel('Precision')
            plt.legend(loc='lower left')
            leg = plt.gca().get_legend()
            leg.set_title('Feature Set')
            ltext = leg.get_texts()
            plt.setp(ltext, fontsize='small')
            if save:
                plt.savefig(graph_files_dir + 'pr chart - ' + clf + '.png')
def PR_multi_class(data_train, data_test, data_test_vectors):
    # Binarize the output
    y_train_label = label_binarize(data_train.target, classes=[0, 1, 2])
    n_classes = y_train_label.shape[1]
    
    random_state = np.random.RandomState(0)
    
    # shuffle and split training and test sets
    X_train, X_test, y_train, y_test = train_test_split(data_train_vectors, y_train_label, test_size=.5,
                                                        random_state=random_state)
    
    # Learn to predict each class against the other
    classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=random_state))
    classifier.fit(X_train, y_train)
    y_pred_score = classifier.decision_function(data_test_vectors)
    
    y_test_label = label_binarize(data_test.target, classes=[0, 1, 2])
    
    # Compute Precision-Recall and plot curve
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(y_test_label[:, i], y_pred_score[:, i])
        average_precision[i] = average_precision_score(y_test_label[:, i], y_pred_score[:, i])
    
    # Compute micro-average ROC curve and ROC area
    precision["micro"], recall["micro"], _ = precision_recall_curve(y_test_label.ravel(), y_pred_score.ravel())
    average_precision["micro"] = average_precision_score(y_test_label, y_pred_score, average="micro")
    
    # Plot Precision-Recall curve for each class
    plt.clf()
#    plt.plot(recall["micro"], precision["micro"],
#             label='micro-average PR curve (area = {0:0.2f})'
#                   ''.format(average_precision["micro"]))
    for i in range(n_classes):
        plt.plot(recall[i], precision[i],
                 label='PR curve of class {0} (area = {1:0.2f})'
                       ''.format(i, average_precision[i]))
    
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall curve of multi-class')
    plt.legend(loc="lower right")
    plt.show()
    return 0
Beispiel #23
0
 def calc_precision_recall_fmeasure(self):
     """ Computes Precision, Recall, F-measure and Support """
     
     #  precision, recall, F-measure and support for each class for a given thresholds
     for threshold in [10, 30, 50]:
         result = precision_recall_fscore_support(self.y_true, prediction_to_binary(self.y_pred, threshold))
         self.scores['Precision ' + str(threshold) + '%'] = result[0]
         self.scores['Recall ' + str(threshold) + '%'] = result[1]
         self.scores['F-score ' + str(threshold) + '%'] = result[2]
         self.scores['Support'] = result[3]
        
     # Computes precision-recall pairs for different probability thresholds
     self.precision, self.recall, self.thresholds = precision_recall_curve(self.y_true, self.y_pred)    
     #print "precision = " + str(precision)
     #print "recall = " + str(recall)
     #print "thresholds = " +  str(thresholds)
     
     # Compute the area under the precision-recall curve (average precision from prediction scores)
     self.scores['Precision-Recall AUC'] = average_precision_score(self.y_true, self.y_pred)    
     
     
     self.scores['Weighted Precision'] = average_precision_score(self.y_true, self.y_pred, average='weighted') # weighted average precision by support (the number of true instances for each label).
     self.scores['Average Recall'] = np.average(self.recall)
     self.scores['Average Threshold'] = np.average(self.thresholds)
     
     return
Beispiel #24
0
def _test_precision_recall_curve(y_true, probas_pred):
    # Test Precision-Recall and aread under PR curve
    p, r, thresholds = precision_recall_curve(y_true, probas_pred)
    precision_recall_auc = _average_precision_slow(y_true, probas_pred)
    assert_array_almost_equal(precision_recall_auc, 0.859, 3)
    assert_array_almost_equal(precision_recall_auc,
                              average_precision_score(y_true, probas_pred))
    assert_almost_equal(_average_precision(y_true, probas_pred),
                        precision_recall_auc, decimal=3)
    assert_equal(p.size, r.size)
    assert_equal(p.size, thresholds.size + 1)
    # Smoke test in the case of proba having only one value
    p, r, thresholds = precision_recall_curve(y_true,
                                              np.zeros_like(probas_pred))
    assert_equal(p.size, r.size)
    assert_equal(p.size, thresholds.size + 1)
def plot_precision_recall_n(y_true, y_prob, model_name):
	from sklearn.metrics import precision_recall_curve
	y_score = y_prob
	precision_curve, recall_curve, pr_thresholds = precision_recall_curve(y_true, y_score)
	precision_curve = precision_curve[:-1]
	recall_curve = recall_curve[:-1]
	pct_above_per_thresh = []
	number_scored = len(y_score)
	for value in pr_thresholds:
		num_above_thresh = len(y_score[y_score>=value])
		pct_above_thresh = num_above_thresh / float(number_scored)
		pct_above_per_thresh.append(pct_above_thresh)
	pct_above_per_thresh = np.array(pct_above_per_thresh)
	plt.clf()
	fig, ax1 = plt.subplots()
	ax1.plot(pct_above_per_thresh, precision_curve, 'b')
	ax1.set_xlabel('percent of population')
	ax1.set_ylabel('precision', color='b')
	ax2 = ax1.twinx()
	ax2.plot(pct_above_per_thresh, recall_curve, 'r')
	ax2.set_ylabel('recall', color='r')
	# fig.show()
	name = model_name + " Precision Recall vs Population for "+ ".png"
	plt.title(name)
	plt.savefig(name)
	return fig
Beispiel #26
0
def model(x_train, x_test, y_train, y_test,clf):
    print(clf)  
    clf.fit(x_train, y_train)  
    f = file(outpath,'a+')
    f.write(str(clf))
    f.write("\n")
    ''''' 系数反映每个特征的影响力。越大表示该特征在分类中起到的作用越大 '''  
#     print(clf.feature_importances_)  
  
    '''''测试结果的打印'''  
    answer = clf.predict(x_test)  
#     print(x_train)  
#     print(answer)  
#     print(y_train)  
    avrage = np.mean(answer == y_test)
    f.write(str(avrage))
    f.write("\n")
    '''''准确率与召回率'''  
    precision, recall, thresholds = precision_recall_curve(y_test, clf.predict(x_test))  
#     print (precision)
#     answer = clf.predict_proba(x_trian)[:,1]  
#     print answer
#     answer = answer > 0.3
    report = classification_report(y_test, answer, target_names = ['女', '男'])
    print report
    f.write(str(report))
    f.write("\n\n")
    f.close()
def plot_precision_recall_n(y_true, y_prob, model_name):
    '''
    Takes the model, plots precision and recall curves
    '''
    y_score = y_prob
    precision_curve, recall_curve, pr_thresholds = precision_recall_curve(y_true, y_score)
    precision_curve = precision_curve[:-1]
    recall_curve = recall_curve[:-1]
    pct_above_per_thresh = []
    number_scored = len(y_score)

    for value in pr_thresholds:
        num_above_thresh = len(y_score[y_score >= value])
        pct_above_thresh = num_above_thresh / float(number_scored)
        pct_above_per_thresh.append(pct_above_thresh)

    pct_above_per_thresh = np.array(pct_above_per_thresh)
    plt.clf()
    fig, ax1 = plt.subplots()
    ax1.plot(pct_above_per_thresh, precision_curve, 'b')
    ax1.set_xlabel('percent of population')
    ax1.set_ylabel('precision', color='b')
    ax2 = ax1.twinx()
    ax2.plot(pct_above_per_thresh, recall_curve, 'r')
    ax2.set_ylabel('recall', color='r')

    name = str(model_name)
    try:
        plt.title(name)
        plt.savefig("Output/Images/{}.png".format(name))
    except:
        name = name[:75]
        plt.title(name)
        plt.savefig("Output/Images/{}.png".format(name))
    plt.close()
Beispiel #28
0
 def evaluation(self, test_data, test_label):
     dinx = np.array(list(self.train_drugs))
     DS = self.dsMat[:, dinx]
     tinx = np.array(list(self.train_targets))
     TS = self.tsMat[:, tinx]
     scores = []
     if self.K2 > 0:
         for d, t in test_data:
             if d in self.train_drugs:
                 if t in self.train_targets:
                     val = np.sum(self.U[d, :]*self.V[t, :])
                 else:
                     jj = np.argsort(TS[t, :])[::-1][:self.K2]
                     val = np.sum(self.U[d, :]*np.dot(TS[t, jj], self.V[tinx[jj], :]))/np.sum(TS[t, jj])
             else:
                 if t in self.train_targets:
                     ii = np.argsort(DS[d, :])[::-1][:self.K2]
                     val = np.sum(np.dot(DS[d, ii], self.U[dinx[ii], :])*self.V[t, :])/np.sum(DS[d, ii])
                 else:
                     ii = np.argsort(DS[d, :])[::-1][:self.K2]
                     jj = np.argsort(TS[t, :])[::-1][:self.K2]
                     v1 = DS[d, ii].dot(self.U[dinx[ii], :])/np.sum(DS[d, ii])
                     v2 = TS[t, jj].dot(self.V[tinx[jj], :])/np.sum(TS[t, jj])
                     val = np.sum(v1*v2)
             scores.append(np.exp(val)/(1+np.exp(val)))
     elif self.K2 == 0:
         for d, t in test_data:
             val = np.sum(self.U[d, :]*self.V[t, :])
             scores.append(np.exp(val)/(1+np.exp(val)))
     prec, rec, thr = precision_recall_curve(test_label, np.array(scores))
     aupr_val = auc(rec, prec)
     fpr, tpr, thr = roc_curve(test_label, np.array(scores))
     auc_val = auc(fpr, tpr)
     return aupr_val, auc_val
Beispiel #29
0
def two_class_combo_plotter(y_test, y_score):
    plt.figure()
    y_test = one_hot(y_test)
    fpr, tpr, thresholds = roc_curve(y_test[:, 1], y_score[:, 1])

    for i, t in enumerate(thresholds):
        if t < 0.425:
            print(t, fpr[i], tpr[i])
            break
    roc_auc = auc(fpr, tpr)

    plt.plot(thresholds, fpr, label="FPR")
    plt.plot(thresholds, tpr, label="TPR / Recall")

    precision, recall, thresholds = precision_recall_curve(y_test[:, 1], y_score[:, 1])

    for i, t in enumerate(thresholds):
        if t > 0.425:
            print(t, precision[i], recall[i])
            break

    plt.plot(thresholds, precision[1:], label="Precision")

    plt.plot(thresholds, [f1(precision[i], recall[i]) for i in range(len(thresholds))], label="F1")

    plt.plot([0.408] * 100, np.arange(0, 1, 0.01), "k--")
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel("Threshold")
    plt.legend(loc="lower right")
def train_and_evaluate_tree_model(X_train, X_test, y_train, y_test,
                                  show_plots=False):

  # Train model
  print 'training model...'
  a = time.clock()
  model = ensemble.GradientBoostingClassifier(n_estimators=100,
                                              max_depth=15,
                                              max_features='sqrt')
  model.fit(X_train, y_train)
  b = time.clock()
  print 'training took', (b - a) / 60, 'minutes'

  # Evaluate model
  print 'Model score (accuracy):', model.score(X_test, y_test)
  predicted = model.predict(X_test)
  predicted_probs = model.predict_proba(X_test)
  fpr, tpr, thresholds = roc_curve(y_test, predicted_probs[:,1])
  if show_plots:
    plot_auc(fpr, tpr)
 
  print 'AUC', roc_auc_score(y_test, predicted_probs[:,1])
  precision, recall, thresholds = (
    precision_recall_curve(y_test, predicted_probs[:,1]))
  both = zip(precision, recall)
  print 'Max min of P/Se', max([min(r) for r in both])
  if show_plots:
    plot_precision_recall(precision, recall)
# ROC曲线下面积
fpr, tpr, thresholds = roc_curve((y_true), y_scores)
AUC_ROC = roc_auc_score(y_true, y_scores)
# test_integral = np.trapz(tpr,fpr) #trapz is numpy integration
print("\nArea under the ROC curve: " + str(AUC_ROC))
roc_curve = plt.figure()
plt.plot(fpr, tpr, '-', label='Area Under the Curve (AUC = %0.4f)' % AUC_ROC)
plt.title('ROC curve')
plt.xlabel("FPR (False Positive Rate)")
plt.ylabel("TPR (True Positive Rate)")
plt.legend(loc="lower right")
plt.savefig(path_experiment + "ROC.png")

#Precision-recall curve
precision, recall, thresholds = precision_recall_curve(y_true, y_scores)
precision = np.fliplr(
    [precision])[0]  #so the array is increasing (you won't get negative AUC)
recall = np.fliplr(
    [recall])[0]  #so the array is increasing (you won't get negative AUC)
AUC_prec_rec = np.trapz(precision, recall)
print("\nArea under Precision-Recall curve: " + str(AUC_prec_rec))
prec_rec_curve = plt.figure()
plt.plot(recall,
         precision,
         '-',
         label='Area Under the Curve (AUC = %0.4f)' % AUC_prec_rec)
plt.title('Precision - Recall curve')
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.legend(loc="lower right")
Beispiel #32
0
def plot_precision_recall_curve(y_true,
                                y_probas,
                                title='Precision-Recall Curve',
                                curves=('micro', 'each_class'),
                                ax=None,
                                figsize=None,
                                title_fontsize="large",
                                text_fontsize="medium"):
    """Generates the Precision Recall Curve for a set of ground truth labels and classifier probability predictions.

    Args:
        y_true (array-like, shape (n_samples)):
            Ground truth (correct) target values.

        y_probas (array-like, shape (n_samples, n_classes)):
            Prediction probabilities for each class returned by a classifier.

        curves (array-like): A listing of which curves should be plotted on the
            resulting plot. Defaults to `("micro", "each_class")`
            i.e. "micro" for micro-averaged curve

        ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot
            the learning curve. If None, the plot is drawn on a new set of axes.

        figsize (2-tuple, optional): Tuple denoting figure size of the plot e.g. (6, 6).
            Defaults to ``None``.

        title_fontsize (string or int, optional): Matplotlib-style fontsizes.
            Use e.g. "small", "medium", "large" or integer-values. Defaults to "large".

        text_fontsize (string or int, optional): Matplotlib-style fontsizes.
            Use e.g. "small", "medium", "large" or integer-values. Defaults to "medium".

    Returns:
        ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn.

    Example:
        >>> import scikitplot.plotters as skplt
        >>> nb = GaussianNB()
        >>> nb = nb.fit(X_train, y_train)
        >>> y_probas = nb.predict_proba(X_test)
        >>> skplt.plot_precision_recall_curve(y_test, y_probas)
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490>
        >>> plt.show()

        .. image:: _static/examples/plot_precision_recall_curve.png
           :align: center
           :alt: Precision Recall Curve
    """
    classes = np.unique(y_true)
    probas = y_probas

    if 'micro' not in curves and 'each_class' not in curves:
        raise ValueError(
            'Invalid argument for curves as it only takes "micro" or "each_class"'
        )

    # Compute Precision-Recall curve and area for each class
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(len(classes)):
        precision[i], recall[i], _ = precision_recall_curve(
            y_true, probas[:, i], pos_label=classes[i])

    y_true = label_binarize(y_true, classes=classes)
    if len(classes) == 2:
        y_true = np.hstack((1 - y_true, y_true))

    for i in range(len(classes)):
        average_precision[i] = average_precision_score(y_true[:, i], probas[:,
                                                                            i])

    # Compute micro-average ROC curve and ROC area
    micro_key = 'micro'
    i = 0
    while micro_key in precision:
        i += 1
        micro_key += str(i)

    precision[micro_key], recall[micro_key], _ = precision_recall_curve(
        y_true.ravel(), probas.ravel())
    average_precision[micro_key] = average_precision_score(y_true,
                                                           probas,
                                                           average='micro')

    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=figsize)

    ax.set_title(title, fontsize=title_fontsize)

    if 'each_class' in curves:
        for i in range(len(classes)):
            ax.plot(recall[i],
                    precision[i],
                    lw=2,
                    label='Precision-recall curve of class {0} '
                    '(area = {1:0.3f})'.format(classes[i],
                                               average_precision[i]))

    if 'micro' in curves:
        ax.plot(recall[micro_key],
                precision[micro_key],
                label='micro-average Precision-recall curve '
                '(area = {0:0.3f})'.format(average_precision[micro_key]),
                color='navy',
                linestyle=':',
                linewidth=4)

    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_xlabel('Recall')
    ax.set_ylabel('Precision')
    ax.tick_params(labelsize=text_fontsize)
    ax.legend(loc='best', fontsize=text_fontsize)
    return ax
Beispiel #33
0
plt.title('Some extension of Receiver operating characteristic to multi-class')
plt.legend(loc="lower right")
plt.savefig('roc.png')



# ------------------------------------------------------------- #
# ----------------------- PR CURVE --------------------------- #
# ------------------------------------------------------------- #
plt.figure(3)
# precision recall curve
precision = dict()
recall = dict()
pr_auc = dict()
for i in range(n_classes):
    precision[i], recall[i], _ = precision_recall_curve(dummy_y_Test[:, i],
                                                            predictions[:, i])
    pr_auc[i] = auc(recall[i], precision[i])

colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
for i, color in zip(range(n_classes), colors):
    plt.plot(recall[i], precision[i], color=color, lw=lw,
             label='PR curve of class {0} (area = {1:0.2f})'
             ''.format(class_names[i], pr_auc[i]))
    
    # plt.plot([1, 0], [0, 1], 'k--', lw=lw)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])    
plt.xlabel("recall")
plt.ylabel("precision")
plt.legend(loc="best")
plt.title("precision vs. recall curve")
print(confusion_matrix(label__test, predictions))
print(classification_report(label__test, predictions))
accuracy = (accuracy_score(label__test, predictions, normalize=True, sample_weight=None) * 100)
print("Accuracy:" + str(accuracy) + "%")

# Tree Visualization
iris = load_iris()
clf = DecisionTreeClassifier()
clf.fit(iris.data, iris.target)
dot_data = tree.export_graphviz(clf, out_file=None,
                     feature_names=iris.feature_names,
                     class_names=iris.target_names,
                     filled=True, rounded=True,
                     special_characters=True)
graph = graphviz.Source(dot_data)

# Precision Recall Curve
average_precision = average_precision_score(label__test, predictions)
precision, recall, _ = precision_recall_curve(label__test, predictions)
step_kwargs = ({'step':'post'}
                if 'step' in signature(plt.fill_between).parameters
                else {})
plt.step(recall, precision, color='b', alpha=0.2, where='post')
plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs)

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(average_precision))
plt.show()
def train_model(clf_factory, X, Y, name, plot=False):
    labels = np.unique(Y)

    # cv = ShuffleSplit(n=len(X), n_iter=1, test_size=0.3, indices=True, random_state=0)
    cv = ShuffleSplit(n=len(X), n_iter=1, test_size=0.3, random_state=0)

    train_errors = []
    test_errors = []

    scores = []
    pr_scores = defaultdict(list)
    precisions, recalls, thresholds = defaultdict(list), defaultdict(
        list), defaultdict(list)

    roc_scores = defaultdict(list)
    tprs = defaultdict(list)
    fprs = defaultdict(list)

    clfs = []  # just to later get the median

    cms = []

    for train, test in cv:
        X_train, y_train = X[train], Y[train]
        X_test, y_test = X[test], Y[test]

        clf = clf_factory()
        clf.fit(X_train, y_train)  # todo 维度不一致
        clfs.append(clf)

        train_score = clf.score(X_train, y_train)
        test_score = clf.score(X_test, y_test)
        scores.append(test_score)

        train_errors.append(1 - train_score)
        test_errors.append(1 - test_score)

        y_pred = clf.predict(X_test)
        cm = confusion_matrix(y_test, y_pred)
        cms.append(cm)

        for label in labels:
            y_label_test = np.asarray(y_test == label, dtype=int)
            proba = clf.predict_proba(X_test)
            proba_label = proba[:, label]

            precision, recall, pr_thresholds = precision_recall_curve(
                y_label_test, proba_label)
            pr_scores[label].append(auc(recall, precision))
            precisions[label].append(precision)
            recalls[label].append(recall)
            thresholds[label].append(pr_thresholds)

            fpr, tpr, roc_thresholds = roc_curve(y_label_test, proba_label)
            roc_scores[label].append(auc(fpr, tpr))
            tprs[label].append(tpr)
            fprs[label].append(fpr)

    if plot:
        for label in labels:
            print("Plotting", genre_list[label])
            scores_to_sort = roc_scores[label]
            median = np.argsort(scores_to_sort)[len(scores_to_sort) / 2]

            desc = "%s %s" % (name, genre_list[label])
            plot_pr(pr_scores[label][median],
                    desc,
                    precisions[label][median],
                    recalls[label][median],
                    label='%s vs rest' % genre_list[label])
            plot_roc(roc_scores[label][median],
                     desc,
                     tprs[label][median],
                     fprs[label][median],
                     label='%s vs rest' % genre_list[label])

    all_pr_scores = np.asarray(pr_scores.values()).flatten()
    summary = (np.mean(scores), np.std(scores), np.mean(all_pr_scores),
               np.std(all_pr_scores))
    print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)

    return np.mean(train_errors), np.mean(test_errors), np.asarray(cms)
def pr_auc_score(y_true, y_score):
    precision, recall, thresholds = precision_recall_curve(y_true, y_score)
    return auc(recall, precision)
    # to avoid exploring the regime in which very noisy variables enter
    # the model
    alphas = np.linspace(lars_cv.alphas_[0], .1 * lars_cv.alphas_[0], 6)
    clf = RandomizedLasso(alpha=alphas, random_state=42).fit(X, y)
    trees = ExtraTreesRegressor(100).fit(X, y)
    # Compare with F-score
    F, _ = f_regression(X, y)

    plt.figure()
    for name, score in [
        ('F-test', F),
        ('Stability selection', clf.scores_),
        ('Lasso coefs', np.abs(lars_cv.coef_)),
        ('Trees', trees.feature_importances_),
    ]:
        precision, recall, thresholds = precision_recall_curve(
            coef != 0, score)
        plt.semilogy(np.maximum(score / np.max(score), 1e-4),
                     label="%s. AUC: %.3f" % (name, auc(recall, precision)))

    plt.plot(np.where(coef != 0)[0], [2e-4] * n_relevant_features,
             'mo',
             label="Ground truth")
    plt.xlabel("Features")
    plt.ylabel("Score")
    # Plot only the 100 first coefficients
    plt.xlim(0, 100)
    plt.legend(loc='best')
    plt.title('Feature selection scores - Mutual incoherence: %.1f' % mi)

plt.show()
Beispiel #38
0
    random_state=0)

y_pred = y_test.value_counts().idxmax()
total = len(y_test.index)
correct = y_test.value_counts().max()
y_pred = pd.Series([1] + [y_pred] * len(y_test - 1))
#y_pred = pd.Series([y_pred] * len(y_test - 1))
df_confusion = pd.crosstab(y_test, y_pred)
df_norm = df_confusion.values / df_confusion.sum(axis=1)[:, None]
ax = sn.heatmap(df_confusion,
                annot=True,
                annot_kws={"size": 20},
                cmap="YlGnBu")
plt.xlabel('Predicted label', fontsize=20)
plt.ylabel('True label', fontsize=20)
plt.title('Confusion Matrix w/o Normalization (baseline)', fontsize=20)
plt.show()
precision, recall, thresholds = metrics.precision_recall_curve(y_test, y_pred)
score = correct / total
average_precision = metrics.average_precision_score(y_test, y_pred)
auc = metrics.auc(recall, precision)
print("Majority predicts {}, score: {}".format(y_pred[0], score))
print("Average PR score: {0:0.2f}".format(average_precision))
print("AUC: {0:0.2f}".format(auc))
plt.plot([0, 1], [0.5, 0.5], linestyle='--')
plt.plot(recall, precision, marker='.')
plt.xlabel('Recall', fontsize=20)
plt.ylabel('Precision', fontsize=20)
plt.title('Precision-Recall Curve Baseline', fontsize=20)
#plt.show()
decoder = Dense(hidden_sizes[2], activation='relu')(encoder)
decoder = Dense(input_size)(decoder)
deep_ae = Model(inputs=input_layer, outputs=decoder)
print(deep_ae.summary())

optimizer = optimizers.Adam(lr=0.00005)
deep_ae.compile(optimizer=optimizer, loss='mean_squared_error')

tensorboard = TensorBoard(log_dir='./logs/run2/', write_graph=True, write_images=False)

model_file = "model_deep_ae.h5"
checkpoint = ModelCheckpoint(model_file, monitor='loss', verbose=1, save_best_only=True, mode='min')

num_epoch = 50
batch_size = 64
deep_ae.fit(X_train, X_train, epochs=num_epoch, batch_size=batch_size, shuffle=True, validation_data=(X_test, X_test),
            verbose=1, callbacks=[checkpoint, tensorboard])

recon = deep_ae.predict(X_test)

recon_error = np.mean(np.power(X_test - recon, 2), axis=1)


from sklearn.metrics import (precision_recall_curve, auc)

precision, recall, th = precision_recall_curve(Y_test, recon_error)
area = auc(recall, precision)
print('Area under precision-recall curve:', area)


                        y_pred_task[i].extend(pred_lable)
                        # y_pred_task_score[i].extend(y_pred)
                    except:
                        y_true_task[i] = []
                        y_pred_task[i] = []
                        # y_pred_task_score[i] = []
                        y_true_task[i].extend(y_label.cpu().numpy())
                        y_pred_task[i].extend(pred_lable)
                        # y_pred_task_score[i].extend(y_pred.cpu().detach().numpy())

                val_sum_loss.append(loss.cpu().detach().numpy())

            val_avg_loss = np.array(val_sum_loss).mean()

            trn_roc = [metrics.roc_auc_score(y_true_task[i], y_pred_task[i]) for i in range(tasks_num)]
            trn_prc = [metrics.auc(precision_recall_curve(y_true_task[i], y_pred_task[i])[1],
                                   precision_recall_curve(y_true_task[i], y_pred_task[i])[0]) for i in
                       range(tasks_num)]
            # acc = [metrics.accuracy_score(y_true_task[i], y_pred_task[i]) for i in range(tasks_num)]
            # recall = [metrics.recall_score(y_true_task[i], y_pred_task[i]) for i in range(tasks_num)]
            # specificity = [cm[i][0, 0] / (cm[i][0, 0] + cm[i][0, 1]) for i in range(tasks_num)]

            print("epoch:", epoch, "   val  "  "avg_loss:", val_avg_loss,
                  # "acc: ", np.array(acc).mean(),
                  # "recall: ", np.array(recall).mean(),
                  # "specificity: ", np.array(specificity).mean(),
                  # " val_auc: ", trn_roc,
                  " val_auc: ", np.array(trn_roc).mean(),
                  # " val_pr: ", trn_prc,
                  " val_pr: ", np.array(trn_prc).mean())
auc = roc_auc_score(y_test, probs)
print('AUC: %.3f' % auc)
# calculate roc curve
fpr, tpr, thresholds = roc_curve(y_test, probs)
# plot no skill
pyplot.plot([0, 1], [0, 1], linestyle='--')
# plot the roc curve for the model
pyplot.plot(fpr, tpr, marker='.')
pyplot.xlabel('FP RATE')
pyplot.ylabel('TP RATE')
# show the plot
pyplot.show()"""

average_precision = average_precision_score(y_test, rounded)
precision, recall, _ = precision_recall_curve(y_test, rounded)
"""
# In matplotlib < 1.5, plt.fill_between does not have a 'step' argument
step_kwargs = ({'step': 'post'}
               if 'step' in signature(plt.fill_between).parameters
               else {})
plt.step(recall, precision, color='b', alpha=0.2,
         where='post')
plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs)

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(average_precision))
plt.show()"""
Beispiel #42
0
clf = CV_rfc.best_estimator_

n_trials = 50
test_size_percent = 0.1

signals = X
labels = y

plot_data = []

train_signals, test_signals, train_labels, test_labels = train_test_split(
    signals, labels, test_size=test_size_percent)
clf.fit(train_signals, train_labels)
predictions = clf.predict_proba(test_signals)[:, 1]

precision, recall, thresholds = precision_recall_curve(test_labels,
                                                       predictions)
thresholds = np.append(thresholds, 1)

queue_rate = []
for threshold in thresholds:
    queue_rate.append((predictions >= threshold).mean())

plt.plot(thresholds, precision, color=sns.color_palette()[0])
plt.plot(thresholds, recall, color=sns.color_palette()[1])
plt.plot(thresholds, queue_rate, color=sns.color_palette()[2])

leg = plt.legend(('precision', 'recall', 'queue_rate'), frameon=True)
leg.get_frame().set_edgecolor('k')
plt.xlabel('threshold')
plt.ylabel('%')
Beispiel #43
0
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=.5,
                                                    random_state=random_state)

# Run classifier
classifier = OneVsRestClassifier(
    svm.SVC(kernel='linear', probability=True, random_state=random_state))
y_score = classifier.fit(X_train, y_train).decision_function(X_test)

# Compute Precision-Recall and plot curve
precision = dict()
recall = dict()
average_precision = dict()
for i in range(n_classes):
    precision[i], recall[i], _ = precision_recall_curve(
        y_test[:, i], y_score[:, i])
    average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i])

# Compute micro-average ROC curve and ROC area
precision["micro"], recall["micro"], _ = precision_recall_curve(
    y_test.ravel(), y_score.ravel())
average_precision["micro"] = average_precision_score(y_test,
                                                     y_score,
                                                     average="micro")

# Plot Precision-Recall curve
plt.clf()
plt.plot(recall[0],
         precision[0],
         lw=lw,
         color='navy',
def AUPR(correct_label, predict_score):
    precision, recall, _ = precision_recall_curve(correct_label, predict_score)
    aupr = auc(recall, precision)
    return aupr
Beispiel #45
0
ax1.set_ylabel('Precision')
ax1.set_title('PR Curve')

ax2 = fig.add_subplot(1,2,2)
ax2.set_xlim([-0.05,1.05])
ax2.set_ylim([-0.05,1.05])
ax2.set_xlabel('False Positive Rate')
ax2.set_ylabel('True Positive Rate')
ax2.set_title('ROC Curve')

for c,k in zip([0.0001, 0.001, 0.1, 1, 10, 25, 50, 100],'bgrcmywk'):
    lsvm_ = svm.LinearSVC(C=c, dual=False, class_weight={1:1,0:1})
    lsvm_.fit(dataset['X_train_'], dataset['y_train_'])
    y_pred = lsvm_.predict(dataset['X_test_'])

    p,r,_ = precision_recall_curve(dataset['y_test_'], y_pred)
    tpr,fpr,_ = roc_curve(dataset['y_test_'], y_pred)
    
    ax1.plot(r,p,c=k,label=c)
    ax2.plot(tpr,fpr,c=k,label=c)

ax1.legend(loc='lower left')    
ax2.legend(loc='lower left')
plt.show()


# In[ ]:


fig = plt.figure(figsize=(12,6))
ax1 = fig.add_subplot(1,2,1)
Beispiel #46
0
    args.prediction_window = win_size
    print(args)

    X_train, X_test, y_train, y_test, pos_rate = prepare_data(df_static,
                                                              df_dynamic,
                                                              dynamic_feature,
                                                              args=args)
    model = train_gbtree(X_train, y_train, pos_rate, args=args)

    # Testing
    y_prob = model.predict_proba(X_test)[:, 1]

    # Evaluation
    fpr, tpr, _ = metrics.roc_curve(y_test, y_prob)
    prec, rec, _ = metrics.precision_recall_curve(y_test, y_prob)
    (sensitivity, specificity, PPV, NPV, f1,
     acc), _ = line_search_best_metric(y_test, y_prob, spec_thresh=0.95)

    result_table = result_table.append(
        {
            'window': win_size,
            'fpr': fpr,
            'tpr': tpr,
            'roc': metrics.auc(fpr, tpr),
            'prec': prec,
            'rec': rec,
            'prc': metrics.auc(rec, prec),
            'y_test': y_test,
            'y_prob': y_prob,
            'pos_rate': pos_rate
Beispiel #47
0
testing_samples = pos + neg
shuffle(testing_samples)
features = [x[0] for x in testing_samples]
gold = [x[1] for x in testing_samples]
sia = SentimentIntensityAnalyzer()

labels = [get_prob(sia.polarity_scores(x)) for x in features]
# k fold
accuracy = []
import matplotlib.pyplot as plt

for i in range(10):
    testing_fold = testing_samples[i * 100:(i + 1) * 100]
    testing_labels=[x[1] for x in testing_fold]
    predicted_labels = labels[i * 100:(i + 1) * 100]
    precision, recall, _ = precision_recall_curve(testing_labels,predicted_labels)
    print (len(predicted_labels))
    lab = 'Fold %d AUC=%.4f' % (i + 1, auc(recall, precision))
    plt.step(recall, precision, label=lab)
    accuracy.append(eval(predicted_labels, testing_fold))
plt.legend(loc='lower left', fontsize='small')
plt.xlabel('Precision')
plt.ylabel('Recall')
plt.title('10 Fold Cross-Validation P-R Curve')
# plt.show()
plt.savefig('pr.png')
# fig, ax = plt.subplots()
# ax.plot([1,2,3],'ko-',label='line1')
# ax.plot([2,4,3],'ro-',label='lin2')
# ax.plot([1,5,9],'bo-',label='lin3')
# ax.set_xticklabels(['','A','B','C',''])
print(len(x_train_undersampled))
print(len(y_train_undersampled))

modxtr = np.array(x_train_undersampled)
modytr = np.array(y_train_undersampled)

clf = RFC(n_estimators=100)
svm_best_clf = clf.fit(modxtr, modytr)
test_predictions_svm = svm_best_clf.predict(X1)
test_predictions_svm_proba = svm_best_clf.predict_proba(X1)
accuracy = accuracy_score(y1,test_predictions_svm)
true_n, false_p, false_n, true_p = confusion_matrix(
     y1, test_predictions_svm).ravel()
print(true_n, false_p, false_n, true_p)

prec = precision_score(y1, test_predictions_svm)
f1 = f1_score(y1, test_predictions_svm)
sensitivity, specificity = compute_measures(true_p, false_p, false_n, true_n)
print(sensitivity, specificity)

average = (sensitivity + specificity) / 2
fpr, tpr, thresholds = roc_curve(y1, test_predictions_svm_proba[:, 1])
roc_auc1 = auc(fpr, tpr)
precision, recall, thresholds = precision_recall_curve(
     y1, test_predictions_svm_proba[:, 1])
area = auc(recall, precision)

print(sensitivity, specificity, average,
      roc_auc1, accuracy, precision, f1,
      area)
Beispiel #49
0
def evaluate_model(modelname, testX, testY, i, type):
    cnn = models.load_model(modelname)
    # cnn = models.load_model('%d-merge.h5' % i, {'isru': isru, 'pearson_r': pearson_r})
    #  ############### test ##########################
    pre_score = cnn.evaluate(testX, testY, batch_size=2048, verbose=0)

    # fileX = open('./fig1/%s' % type + '/without/pre_score%d.pickle' % i, 'wb')
    # pickle.dump(pre_score, fileX, protocol=4)
    # fileX.close()

    # 最后做对比图写出来
    #  ######### Print Precision and Recall ##########
    pred_proba = cnn.predict(testX, batch_size=2048)

    fileX = open('./fig1/%s' % type + '/without/pred_proba%d.pickle' % i, 'wb')
    pickle.dump(pred_proba, fileX, protocol=4)
    fileX.close()

    pred_score = pred_proba[:, 1]
    true_class = testY[:, 1]

    precision, recall, _ = precision_recall_curve(true_class, pred_score)
    average_precision = average_precision_score(true_class, pred_score)

    fpr, tpr, thresholds = roc_curve(true_class, pred_score)
    roc_auc = auc(fpr, tpr)

    for index in range(len(pred_score)):
        if pred_score[index] > config.getfloat('others', 'threshold'):
            pred_score[index] = 1
        else:
            pred_score[index] = 0

    mcc = matthews_corrcoef(true_class, pred_score)

    plt.figure()
    plt.step(recall, precision, color='navy', where='post')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.grid(True)
    plt.title('Precision-Recall curve: AP={0:0.2f}'.format(average_precision))
    plt.savefig('./fig1/%s' % type + '/without/curve/' + str(type) +
                'Precision-Recall%d.png' % i)

    #  ################# Print ROC####################

    plt.figure()
    lw = 2
    plt.plot(fpr,
             tpr,
             color='darkorange',
             lw=lw,
             label='Inception ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')
    plt.legend(loc="lower right")
    plt.savefig('./fig1/%s' % type + '/without/curve/' + str(type) +
                'ROC %d.png' % i)
    SN, SP = performance(true_class, pred_score)
    pre = precision_score(y_true=true_class, y_pred=pred_score)
    rec = recall_score(y_true=true_class, y_pred=pred_score)
    f1 = f1_score(y_true=true_class, y_pred=pred_score)

    # Sn和recall是同一个值
    return pre_score, pre, rec, SN, SP, f1, mcc, roc_auc
Beispiel #50
0
confusion_train[1, 1] / (confusion_train[1, 1] + confusion_train[0, 1])

# In[100]:

# calculate recall
# TP/(TP+FN)

confusion_train[1, 1] / (confusion_train[1, 1] + confusion_train[1, 0])

# Precision is less when compared to Recall

# #### Precision and Recall tradeoff

# In[101]:

p, r, thres = precision_recall_curve(y_train_pred_final.Converted,
                                     y_train_pred_final.Converted_Prob)

# In[102]:

plt.plot(thres, p[:-1], "g-")
plt.plot(thres, r[:-1], "r-")
plt.show()

# Based on the trade-off value 0.4 is chosen as the threshold for final prediction

# **Making Final Predictions using `0.4` as the cut-off value**

# In[103]:

# Creating new column 'Final_Pred_PR' with 1 if Converted_Prob > 0.4 else 0 and evaluating the model
y_train_pred_final['Final_Pred_PR'] = y_train_pred_final.Converted_Prob.map(
    metrics.roc_auc_score(scores['true'], scores[c_name])
    for c_name in column_names
]
auc_rocs_with_names = list(zip(auc_rocs, column_names))
auc_rocs_with_names.sort(key=lambda x: x[0])
best_classifier = auc_rocs_with_names[-1][1]

# Save the answer
submission_file = open('submissions/metrics/auc_roc.txt', 'w+')
submission_file.write(best_classifier)
submission_file.close()
print(best_classifier)

# Find the best classifier, based on precision, when recall is more than 70%
pr_curves = [
    metrics.precision_recall_curve(scores['true'], scores[c_name])
    for c_name in column_names
]
precisions = list(range(len(pr_curves)))

for index, pr_curve in enumerate(pr_curves):
    # loop through recall thresholds
    i = 0
    while i < len(pr_curve[1]):
        if pr_curve[1][i] <= 0.7:
            break
        else:
            i += 1

    # find the best precision for such recall
    precisions[index] = max(pr_curve[0][:i])
    print(recall)
    print("FPR : ")
    FPR = conf[0,1]/(conf[0,1] + conf[0,0])
    print(conf[0,1]/(conf[0,1] + conf[0,0]))


    #交叉评估
    from sklearn.model_selection import cross_val_score
    score = cross_val_score(clf,TestData,TestLabel, cv=3, scoring="accuracy")
    print("cross_val_score : {}".format(score))

    # precision-recall曲线
    label_score = clf.decision_function(TestData)
    from sklearn.metrics import precision_recall_curve

    precisions, recalls, thresholds = precision_recall_curve(TestLabel, label_score)
    prname = "pr" + str(gamma) + "vs" + str(nu)
    plt.figure("{}".format(prname))
    plot_precision_recall_threshold(precisions, recalls, thresholds)
    prpath = pr_path + os.sep + str(gamma) + "n" + str(nu) + r".png"
    plt.savefig(prpath)
    plt.close()

    # ROC曲线
    from sklearn.metrics import roc_curve

    fpr, tpr, thresholds = roc_curve(TestLabel, label_score)
    rocname = "roc" + str(gamma) + "vs" + str(nu)
    plt.figure("{}".format(rocname))
    plot_roc_curve(fpr, tpr)
    rocpath = roc_path + os.sep + str(gamma) + "n" + str(nu) + r".png"
Beispiel #53
0
                temp['MSE'],
                alpha=0.7,
                marker=markers[flag],
                c=colors[flag],
                label=labels[flag])
plt.legend(loc=[1, 0], fontsize=12)
plt.title('Reconstruction MSE')
plt.ylabel('Reconstruction MSE')
plt.xlabel('Index')
plt.show()

# 画出Precision-Recall曲线
plt.figure(figsize=(14, 6))
for i, metric in enumerate(['MAE', 'MSE']):
    plt.subplot(1, 2, i+1)
    precision, recall, _ = precision_recall_curve(mse_df['Class'], mse_df[metric])
    pr_auc = auc(recall, precision)
    plt.title('Precision-Recall curve based on %s\nAUC = %0.2f'%(metric, pr_auc))
    plt.plot(recall[:-2], precision[:-2], c='coral', lw=4)
    plt.xlabel('Recall'); plt.ylabel('Precision')
plt.show()

# 画出ROC曲线
plt.figure(figsize=(14, 6))
for i, metric in enumerate(['MAE', 'MSE']):
    plt.subplot(1, 2, i+1)
    fpr, tpr, _ = roc_curve(mse_df['Class'], mse_df[metric])
    roc_auc = auc(fpr, tpr)
    plt.title('Receiver Operating Characteristic based on %s\nAUC = %0.2f'%(metric, roc_auc))
    plt.plot(fpr, tpr, c='coral', lw=4)
    plt.plot([0,1],[0,1], c='dodgerblue', ls='--')
y_real = get_y_real()

Y_pred = model.predict_generator(test_gen, test_samples // batch_size + 1)
# y_pred = np.argmax(Y_pred, axis=1)
y_pred = []

for el in Y_pred:
    y_pred += [1 if el[1] >= 0.3 else 0]

print('Classification Report')
target_names = ['Authentic', 'Tampered']
print(classification_report(y_real, y_pred, target_names=target_names))
cm = confusion_matrix(
    list(map(lambda x: int(x[0]), y_real)),
    y_pred,
    normalize='true')

print(cm)
prec, rec, ll = precision_recall_curve(y_real, list(map(lambda x: x[1], Y_pred)))
plt.plot(rec, prec)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.show()
print(auc(rec, prec))
# plt.matshow(cm, cmap='binary')
# plt.show()



Beispiel #55
0
 def rec(y_true, y_pred):
     return precision_recall_curve(y_true, y_pred)[1]
        Res = [a_res]
        Res = get_CI(AUC, Res)
        Res = get_CI(ACC, Res)
        Res = get_CI(TPR, Res)
        Res = get_CI(TNR, Res)
        Res = get_CI(PPV, Res)
        Res = get_CI(NPV, Res)
        Res = get_CI(F1, Res)
        Res = get_CI(YOUDEN, Res)
        f.writerow(Res)

        plt.figure(1)
        fpr, tpr, threshold = metric.roc_curve(gt, pre)
        plt.plot(fpr, tpr, label=a_res.split('/')[-1].split('.npy')[0])
        plt.figure(2)
        precision, recall, t = metric.precision_recall_curve(gt, pre)
        plt.plot(recall,
                 precision,
                 label=a_res.split('/')[-1].split('.npy')[0])

plt.figure(1)
#plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic Curve')
plt.legend(loc="lower right")
#plt.show()
plt.savefig('jpgs/roc_ab_detect.jpg')
y = np.array(data.Class.tolist())
data = data.drop(['Time', 'Amount', 'Class'], axis=1)
X = np.array(data.as_matrix())
# 准备训练集和测试集
train_x, test_x, train_y, test_y = train_test_split(X,
                                                    y,
                                                    test_size=0.1,
                                                    random_state=33)

# 逻辑回归分类
model = svm.LinearSVC()
model.fit(train_x, train_y)
predict_y = model.predict(test_x)
# 预测样本的置信分数
score_y = model.decision_function(test_x)
# 计算混淆矩阵,并显示
cm = confusion_matrix(test_y, predict_y)
class_names = [0, 1]
# 显示混淆矩阵
plot_confusion_matrix(cm, classes=class_names, title='逻辑回归 混淆矩阵')
# 显示模型评估分数
show_metrics()
# 计算精确确率,召回率,阈值用于可视化
precision, recall, thresholds = precision_recall_curve(test_y, score_y)
plot_precision_recall()
'''
精确率: 0.846
召回率: 0.733
F1值: 0.786
'''
                                                test_size=0.5,
                                                random_state=2)

# generate a no skill prediction (majority class)
ns_probs = [0 for _ in range(len(testy))]
# fit models
model = LogisticRegression(solver='lbfgs')
model.fit(trainX, trainy)

# predict probabilities
lr_probs = model.predict_proba(testX)
# keep probabilities for the positive outcome only
lr_probs = lr_probs[:, 1]
# predict class values
yhat = model.predict(testX)
lr_precision, lr_recall, _ = precision_recall_curve(testy, lr_probs)
lr_f1, lr_auc = f1_score(testy, yhat), auc(lr_recall, lr_precision)
# calculate scores
ns_auc = roc_auc_score(testy, ns_probs)
lr_auc = roc_auc_score(testy, lr_probs)
# summarize scores
print('No Skill: ROC AUC=%.3f' % (ns_auc))
print('Logistic: ROC AUC=%.3f' % (lr_auc))
print('Logistic: f1=%.3f precision-recall AUC=%.3f' % (lr_f1, lr_auc))
# calculate roc curves
ns_fpr, ns_tpr, _ = roc_curve(testy, ns_probs)
lr_fpr, lr_tpr, _ = roc_curve(testy, lr_probs)
# plot the roc curve for the model
plt.plot(ns_fpr, ns_tpr, linestyle='--', label='No Skill')
plt.plot(lr_fpr, lr_tpr, marker='.', label='Logistic')
# axis labels
# Set up classifier using A projects
gb_clf = GradientBoostingClassifier(n_estimators=250, learning_rate=0.75, max_features=50, max_depth=6, random_state=1, verbose=0)
gb_clf.fit(A_X, A_y)

# print("Classifier ready. Performing predictions...")

# Perform predictions on B
B_pred = gb_clf.predict(B_X)

# print("Complete! Results: \n")

# Evaluate
prec = precision_score(y_true=B_y, y_pred=B_pred, average='binary')
rec = recall_score(y_true=B_y, y_pred=B_pred, average='binary')

p, r, _ = precision_recall_curve(B_y, B_pred, pos_label=1)
aucval = auc(r, p)

fm = 2 * (prec * rec) / (prec + rec)

print("PREC: ", prec)
print("REC: ", rec)
print("AUC: ", aucval)
print("F1: ", fm)

# Save the model
modelName = retrieve_name(B) + "_model.pkl"
with open(modelName, 'wb') as f:
    pickle.dump(gb_clf, f)

# print("\n \nModel saved as " + modelName)
Beispiel #60
0
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import f1_score
confusion_matrix = confusion_matrix(y_train, y_train_pred)
precison = precision_score(y_train, y_train_pred)
recall = recall_score(y_train, y_train_pred)
f1 = f1_score(y_train, y_train_pred)
#%%
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import precision_recall_curve

y_scores = cross_val_predict(logreg_model,
                             X_train,
                             y_train,
                             cv=kfolds,
                             method="decision_function")
precisions, recalls, thresholds = precision_recall_curve(y_train, y_scores)


def plot_precision_recall_vs_threshold(precisions, recalls, thresholds):
    plt.figure()
    plt.plot(thresholds, precisions[:-1], "b--", label="Precision")
    plt.plot(thresholds, recalls[:-1], "g-", label="Recall")
    plt.xlabel("Threshold")
    plt.legend(loc="upper right")
    plt.ylim([0, 1])


plot_precision_recall_vs_threshold(precisions, recalls, thresholds)

#%%
from sklearn.metrics import roc_curve