Python geometric_mean_score Exemples, imblearn.metrics.geometric_mean_score Python Exemples

Exemple #1

0

Afficher le fichier

def test_model(model, samples_train, categorical_labels_train, samples_test,
               categorical_labels_test):
    '''
    Test the keras model given as input with predict_classes()

    
    Epochs (nb_epoch) is the number of times that the model is exposed to the training dataset.
    Batch Size (batch_size) is the number of training instances shown to the model before a weight update is performed.
    '''

    predictions = model.predict_classes(samples_test, verbose=2)

    # Calculate soft predictionsmulti_layer_perceptron
    soft_values = model.predict(samples_test, verbose=2)
    mc.calculate_stats(predictions,
                       categorical_labels_test,
                       'mlp_confusion_matrix',
                       show_fig=False)

    training_predictions = model.predict_classes(samples_train, verbose=2)
    training_soft_values = model.predict(samples_train, verbose=2)

    # print(len(categorical_labels_test))
    # print(categorical_labels_test)

    # print(len(predictions))
    # print(predictions)

    # print(len(soft_values))
    # print(soft_values)

    # Accuracy, F-measure, and g-mean
    accuracy = accuracy_score(categorical_labels_test, predictions)
    fmeasure = f1_score(categorical_labels_test, predictions, average='macro')
    macro_gmean = mean(
        im.geometric_mean_score(categorical_labels_test,
                                predictions,
                                average=None))

    # Accuracy, F-measure, and g-mean on training set
    training_accuracy = accuracy_score(categorical_labels_train,
                                       training_predictions)
    training_fmeasure = f1_score(categorical_labels_train,
                                 training_predictions,
                                 average='macro')
    training_macro_gmean = mean(
        im.geometric_mean_score(categorical_labels_train,
                                training_predictions,
                                average=None))

    return soft_values, predictions, training_soft_values, training_predictions, accuracy, fmeasure, macro_gmean, training_accuracy, training_fmeasure, training_macro_gmean

Exemple #2

0

Afficher le fichier

def test_geometric_mean_multiclass():
    """Test geometric mean for multiclass classification task"""
    y_true, y_pred, _ = make_prediction(binary=False)

    # Compute the geometric mean for each of the classes
    geo_mean = geometric_mean_score(y_true, y_pred, average=None)
    assert_array_almost_equal(geo_mean, [0.85, 0.29, 0.7], 2)

    # average tests
    geo_mean = geometric_mean_score(y_true, y_pred, average='macro')
    assert_almost_equal(geo_mean, 0.68, 2)

    geo_mean = geometric_mean_score(y_true, y_pred, average='weighted')
    assert_array_almost_equal(geo_mean, 0.65, 2)

Exemple #3

0

Afficher le fichier

Fichier : classifier.py Projet : luffy-99/Deeplearning-SAE-SDAE-_IDS

def decisiontree(X_tr, Y_tr, X_te, Y_te):
    # X_tr, X_te = normalize_data(X_tr, X_te, "minmax")
    if Y_tr.shape[1] > 1:
        Y_tr = np.argmax(Y_tr, axis=1)
        Y_te = np.argmax(Y_te, axis=1)
    param_grid = {'max_depth': np.arange(3, 6)}

    tree = GridSearchCV(DecisionTreeClassifier(), param_grid)

    tree.fit(X_tr, Y_tr)
    start = time.time()
    y_pred = tree.predict(X_te)
    end = time.time()
    elapsed = (end - start) / float(len(X_te))
    acc = accuracy_score(Y_te, y_pred)
    fpr_vot, tpr_vot, _ = roc_curve(Y_te,
                                    y_pred,
                                    pos_label=1,
                                    drop_intermediate=False)
    roc_auc_vot = auc(fpr_vot, tpr_vot)
    cmat = classification_report_imbalanced(Y_te, y_pred)
    print("Decision tree")
    # print (cmat)

    geo = geometric_mean_score(Y_te, y_pred)
    f1 = f1_score(Y_te, y_pred, average='micro')

    print('The auc is {} '.format(roc_auc_vot))
    return roc_auc_vot, elapsed

Exemple #4

0

Afficher le fichier

Fichier : classifier.py Projet : luffy-99/Deeplearning-SAE-SDAE-_IDS

def svm(X_tr, Y_tr, X_te, Y_te):
    # bw = (len(X_tr)/2.0)**0.5        #default value in One-class SVM
    # gamma = 1/(2*bw*bw)
    X_tr, X_te = normalize_data(X_tr, X_te, "minmax")
    if Y_tr.shape[1] > 1:
        Y_tr = np.argmax(Y_tr, axis=1)
        Y_te = np.argmax(Y_te, axis=1)
    # parameters =  [{'kernel': ['rbf'], 'gamma': [1e-3],
    #                 'C': [1]}]
    # {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
    # svc = svm.SVC()
    # clf = GridSearchCV(svc, parameters,cv= 5)
    # clf = SVC (gamma = gamma)
    clf = LinearSVC(random_state=0)
    clf.fit(X_tr, Y_tr)
    start = time.time()
    y_pred = clf.predict(X_te)
    end = time.time()
    elapsed = (end - start) / float(len(X_te))
    acc = accuracy_score(Y_te, y_pred)
    fpr_vot, tpr_vot, _ = roc_curve(Y_te,
                                    y_pred,
                                    pos_label=1,
                                    drop_intermediate=False)
    roc_auc_vot = auc(fpr_vot, tpr_vot)
    cmat = classification_report_imbalanced(Y_te, y_pred)
    print("SVM")
    geo = geometric_mean_score(Y_te, y_pred)
    f1 = f1_score(Y_te, y_pred, average='macro')
    print('The auc is {} '.format(roc_auc_vot))
    return roc_auc_vot, elapsed

Exemple #5

0

Afficher le fichier

Fichier : util.py Projet : titubeta/Fall-detection-1

def get_output(labels, predictions, data_option = None, t=0.5, to_plot = False, pos_label = 1):
    predicted_classes = threshold(predictions, t)
    true_classes = labels
    conf_mat = confusion_matrix(y_true = true_classes, y_pred = predicted_classes)
    #report = classification_report(true_classes, predicted_classes)
    AUROC = []
    AUPR = []
    # print(np.where(labels==1))
    if np.count_nonzero(labels) > 0 and np.count_nonzero(labels) != labels.shape[0]: #Makes sure both classes present

        fpr, tpr, thresholds = roc_curve(y_true = true_classes, y_score = predictions, pos_label = pos_label)
        #auc1 = roc_auc_score(y_true = labels, y_score = predictions)
        AUROC = auc(fpr, tpr)

        precision, recall, thresholds = precision_recall_curve(true_classes, predictions)
        AUPR = auc(recall, precision)
        # if auc1<0.5:

        #     auc1 = roc_auc_score(y_true = 1-labels, y_score = predictions)
        #print('ROC AUC is ', auc1)
        if to_plot == True:
            plot_ROC_AUC(fpr,tpr, AUROC, data_option)
    else:
        print('only one class present')
        #g_mean = geometric_mean_score(labels, predicted_classes)
    g_mean = geometric_mean_score(labels, predicted_classes)
        # print(report)
        # print("\n")
        #print(conf_mat)

    return AUROC, conf_mat, g_mean, AUPR

Exemple #6

0

Afficher le fichier

def compute_metrics(y_test,
                    y_pred,
                    y_proba=None,
                    average='weighted',
                    return_index=False):
    """
    Function computing metrics of interest for a sets of prediction

    :input y_test: pd.DataFrame or np.array of original label
    :input y_pred: pd.DataFrame or np.array of predicted label

    :output red: list of value for metrics, in order - Accuracy - Precision - Recall - F1 Score - Sensitivity - Specifity
    """
    if return_index:
        return [
            'accuracy', 'precision', 'recall', 'f1_score', 'sensitivity_score',
            'specificity_score', 'geometric_mean_score',
            'average_precision_score'
        ]
    else:
        res = []
        res.append(accuracy_score(y_test, y_pred))
        res.append(precision_score(y_test, y_pred, average=average))
        res.append(recall_score(y_test, y_pred, average=average))
        res.append(f1_score(y_test, y_pred, average=average))
        res.append(sensitivity_score(y_test, y_pred, average=average))
        res.append(specificity_score(y_test, y_pred, average=average))
        res.append(geometric_mean_score(y_test, y_pred, average=average))
        if y_proba is not None:
            res.append(
                average_precision_score(y_test, y_proba, average=average))
        return res

Exemple #7

0

Afficher le fichier

Fichier : part_c.py Projet : aristeidismoustakas/Advanced-ML-techniques

def cross_validate(model, x, y, cv=5):
    kf = KFold(n_splits=5, random_state=42, shuffle=True)

    results = {
        "recall": [],
        "accuracy": [],
        "f1": [],
        "geometric-gmean": [],
        "average_precision_score": []
    }

    for train_index, test_index in kf.split(x):
        fit = model.fit([x[index] for index in train_index],
                        [y[index] for index in train_index])

        predictions = fit.predict([x[index] for index in test_index])
        y_true = [y[index] for index in test_index]

        results["recall"].append(recall_score(y_true, predictions))
        results["accuracy"].append(accuracy_score(y_true, predictions))
        results["f1"].append(f1_score(y_true, predictions))
        results["geometric-gmean"].append(
            geometric_mean_score(y_true, predictions, average='weighted'))
        results["average_precision_score"].append(
            average_precision_score(y_true, predictions))

    return results

Exemple #8

0

Afficher le fichier

def randomforest(X_tr, Y_tr, X_te, Y_te):
    if Y_tr.shape[1] > 1:
        Y_tr = np.argmax(Y_tr, axis=1)
        Y_te = np.argmax(Y_te, axis=1)
    rfc = RandomForestClassifier(n_jobs=-1,
                                 max_features='sqrt',
                                 n_estimators=40,
                                 oob_score=True)
    clf = RandomForestClassifier(n_estimators=100,
                                 max_depth=80,
                                 random_state=0)

    param_grid = {'n_estimators': [5, 10, 20, 40, 80, 150]}
    clf = GridSearchCV(estimator=rfc, param_grid=param_grid)
    clf.fit(X_tr, Y_tr)
    y_pred = clf.predict(X_te)
    acc = accuracy_score(Y_te, y_pred)
    fpr_vot, tpr_vot, _ = roc_curve(Y_te,
                                    y_pred,
                                    pos_label=1,
                                    drop_intermediate=False)
    roc_auc_vot = auc(fpr_vot, tpr_vot)
    cmat = classification_report(Y_te, y_pred)
    print(cmat)
    geo = geometric_mean_score(Y_te, y_pred)
    f1 = f1_score(Y_te, y_pred, average='micro')
    print('The geometric mean is {}'.format(geo))
    cnf_matrix = confusion_matrix(Y_te, y_pred)
    print(cnf_matrix)
    print('The auc is {}'.format(roc_auc_vot))
    print('The f1 is {}'.format(f1))

    return acc

Exemple #9

0

Afficher le fichier

Fichier : analyze.py Projet : Quintus-Zhang/Mei

def calc_metrics_radar(y_true, y_prob):
    # calculate the metrics for prediction probabilities from RaDaR
    vfunc = np.vectorize(lambda x: 1 if x > 0.05 else 0)
    y_pred = vfunc(y_prob).ravel()
    precision, recall, _ = precision_recall_curve(y_true, y_prob)
    fpr, tpr, _ = roc_curve(y_true, y_prob)
    roc_auc = auc(fpr, tpr)
    pr_auc = average_precision_score(y_true, y_prob)
    exp_pos = np.sum(y_prob)
    f_score = f1_score(y_true, y_pred)
    g_mean = geometric_mean_score(y_true, y_pred)

    loss = log_loss(y_true, y_prob)

    metrics = {
        'Loss': loss,
        'PR_AUC': pr_auc,
        'ROC_AUC': roc_auc,
        'F_score': f_score,
        'G_mean': g_mean,
        'Expeted_#_D60': exp_pos,
        'Actual_#_D60': np.sum(y_true),
        'Diff_D60': abs(np.sum(y_true) - exp_pos),
        'Ratio_D60': np.sum(y_true) / exp_pos,
    }
    return metrics

Exemple #10

0

Afficher le fichier

Fichier : visualization.py Projet : ammaraziz/HopPER

def randomforest_cross_validation(train_x, train_y):
    np.random.seed(100)
    clf = ensemble.RandomForestClassifier()
    
    clf.fit(train_x, train_y)
    
    #calculate the accuracy
    accuracy = cross_val_score(clf, train_x, train_y, cv=10, scoring='accuracy') 
    print "accuracy: %f" %accuracy.mean() + '\n'
    #print accuracy
    
    #calculate the precision
    precision = cross_val_score(clf, train_x, train_y, cv=10, scoring='precision_macro')
    print "precision: %f" %precision.mean() + '\n'
    
    #calculate the recall score
    recall = cross_val_score(clf, train_x, train_y, cv=10, scoring='recall_macro')
    print "recall: %f" %recall.mean() + '\n'
    
    #calculate the f_measure
    f_measure = cross_val_score(clf, train_x, train_y, cv=10, scoring='f1_macro')
    print "f_measure: %f " %f_measure.mean() + '\n' 

    #generate classification report and MCC and G-mean value
    y_pred = cross_val_predict(clf, train_x, train_y, cv=10)
    G_mean = geometric_mean_score(train_y, y_pred)
    MCC = matthews_corrcoef(train_y, y_pred)
    print "G_mean: %f" %G_mean.mean() + '\n'
    print "MCC: %f" %np.mean(MCC) + '\n'
    
    print "Classification_report:"
    print(metrics.classification_report(train_y, y_pred))    
    
    return clf

Exemple #11

0

Afficher le fichier

def calculate_performance(labels, predictions):
    output = dict()
    output["balanced_accuracy"] = balanced_accuracy_score(
        labels, predictions[0])
    output["gmean"] = metrics.geometric_mean_score(labels, predictions[0])
    output["accuracy"] = accuracy_score(labels, predictions[0])
    output["f1score"] = f1_score(labels, predictions[0])
    output["recall"] = recall_score(labels, predictions[0])
    output["precision"] = precision_score(labels, predictions[0])

    output["auc"] = roc_auc_score(labels, predictions[1][:, 1])
    output["prc"] = average_precision_score(labels, predictions[1][:, 1])

    tn, fp, fn, tp = confusion_matrix(labels, predictions[0]).ravel()
    output["tpr"] = float(tp) / (float(tp) + float(fn))
    output["tnr"] = float(tn) / (float(tn) + float(fp))
    output["opm"] = (output['gmean'] + output['balanced_accuracy'] +
                     output['f1score'] + output['tpr'] + output["tnr"]) / 5.

    output["opm_prc"] = (output['gmean'] + output['prc'] +
                         output['balanced_accuracy'] + output['f1score'] +
                         output['tpr'] + output["tnr"]) / 6.
    output["opm_auc"] = (output['gmean'] + output['auc'] +
                         output['balanced_accuracy'] + output['f1score'] +
                         output['tpr'] + output["tnr"]) / 6.

    return output

Exemple #12

0

Afficher le fichier

Fichier : smotepso.py Projet : Al-vish/SoftwareDefectPrediction

def f_per_particle(m, alpha):
    """Computes for the objective function per particle

    Inputs
    ------
    m : numpy.ndarray
        Binary mask that can be obtained from BinaryPSO, will
        be used to mask features.
    alpha: float (default is 0.5)
        Constant weight for trading-off classifier performance
        and number of features

    Returns
    -------
    numpy.ndarray
        Computed objective function
    """
    total_features = 19
    # Get the subset of the features from the binary mask
    if np.count_nonzero(m) == 0:
        X_subset = X
    else:
        X_subset = X[:,m==1]
    # Perform classification and store performance in P
    classifier.fit(X_subset, y)
    from imblearn.metrics import geometric_mean_score
    P = geometric_mean_score(y_train, classifier.predict(X_subset))
    # Compute for the objective function
    j = (alpha * (1.0 - P) + (1.0 - alpha) * (1 - (X_subset.shape[1] / total_features)))

    return j

Exemple #13

0

Afficher le fichier

Fichier : predict_data.py Projet : baibai25/MNDO

def calc_metrics(y_test, pred, auc, i):
    sen = metrics.sensitivity_score(y_test, pred, pos_label=1)
    spe = metrics.specificity_score(y_test, pred, pos_label=1)
    geo = metrics.geometric_mean_score(y_test, pred, pos_label=1)
    index = ['sm', 'b1', 'b2', 'enn', 'tom', 'ada', 'mnd']
    metrics_list = [index[i], sen, spe, geo, auc]
    return metrics_list

Exemple #14

0

Afficher le fichier

def decisiontree(X_tr, Y_tr, X_te, Y_te):
    if Y_tr.shape[1] > 1:
        Y_tr = np.argmax(Y_tr, axis=1)
        Y_te = np.argmax(Y_te, axis=1)
    param_grid = {'max_depth': [5, 6, 7, 8, 9, 10, 50, 100]}
    tree = GridSearchCV(DecisionTreeClassifier(), param_grid)

    tree.fit(X_tr, Y_tr)
    y_pred = tree.predict(X_te)
    acc = accuracy_score(Y_te, y_pred)
    fpr_vot, tpr_vot, _ = roc_curve(Y_te,
                                    y_pred,
                                    pos_label=1,
                                    drop_intermediate=False)
    roc_auc_vot = auc(fpr_vot, tpr_vot)
    cmat = classification_report_imbalanced(Y_te, y_pred)
    print("Decision tree")
    print(cmat)
    cnf_matrix = confusion_matrix(Y_te, y_pred)
    print(cnf_matrix)
    geo = geometric_mean_score(Y_te, y_pred)
    f1 = f1_score(Y_te, y_pred, average='micro')
    print('The geometric mean is {}'.format(geo))
    print('The auc is {}'.format(roc_auc_vot))
    print('The f1 is {}'.format(f1))

    return acc

Exemple #15

0

Afficher le fichier

Fichier : trainer.py Projet : v-mipeng/Class-Imbalance-Leawrning

    def get_reward(self, train_x, train_y, train_weights, valid_x, valid_y, test_x, test_y):
        '''Train the classifier with supervised

        :param train_x:
        :param train_y:
        :param train_weights:
        :param valid_x:
        :param valid_y:
        :return: The reward (F1)
        '''
        from imblearn.metrics import geometric_mean_score
        from sklearn.metrics import matthews_corrcoef
        idx = train_weights == 1
        x = train_x[idx]
        y = train_y[idx]
        self.env.fit(x, np.argmax(y, axis=1).astype('int32'))
        if task == 'vehicle':
            preds = self.env.predict(valid_x)
            valid_reward = geometric_mean_score(np.argmax(valid_y, axis=1).astype('int32'), preds)
        elif self.task == 'page':
            preds = self.env.predict(valid_x)
            valid_reward = matthews_corrcoef(np.argmax(valid_y, axis=1).astype('int32'), preds)
        elif self.task == 'spam':
            preds = self.env.predict(valid_x)
            valid_reward = evaluate_f2(np.argmax(valid_y, axis=1).astype('int32'), preds)  # for spam
        elif task == 'credit':
            preds = self.env.predict_proba(valid_x)[:, 1]
            valid_reward = evaluate_auc_prc(np.argmax(valid_y, axis=1).astype('int32'), preds)
        return valid_reward, valid_reward, valid_reward

Exemple #16

0

Afficher le fichier

def svm(X_tr, Y_tr, X_te, Y_te):
    if Y_tr.shape[1] > 1:
        Y_tr = np.argmax(Y_tr, axis=1)
        Y_te = np.argmax(Y_te, axis=1)
    parameters = [{
        'kernel': ['rbf'],
        'gamma': [1e-3, 1e-2, 1e-1, 1],
        'C': [1]
    }]
    #{'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
    svc = svm.SVC()
    clf = GridSearchCV(svc, parameters, cv=5)

    clf.fit(X_tr, Y_tr)
    y_pred = clf.predict(X_te)
    acc = accuracy_score(Y_te, y_pred)
    fpr_vot, tpr_vot, _ = roc_curve(Y_te,
                                    y_pred,
                                    pos_label=1,
                                    drop_intermediate=False)
    roc_auc_vot = auc(fpr_vot, tpr_vot)
    cmat = classification_report_imbalanced(Y_te, y_pred)
    print("SVM")

    print(cmat)

    cnf_matrix = confusion_matrix(Y_te, y_pred)
    print(cnf_matrix)
    geo = geometric_mean_score(Y_te, y_pred)
    f1 = f1_score(Y_te, y_pred, average='micro')
    print('The geometric mean is {}'.format(geo))
    print('The auc is {}'.format(roc_auc_vot))
    print('The f1 is {}'.format(f1))

    return acc

Exemple #17

0

Afficher le fichier

Fichier : part_c.py Projet : Panos277/ProjectsMS

def validate_easy_ensemble(estimator, X, y):
    acc = []
    b_acc = []
    a_p_c = []
    roc = []
    gm = []
    for key, x_val in zip(X.keys(), X.values()):
        preds = estimator.predict(x_val)

        acc.append(accuracy_score(preds, y[key]))
        b_acc.append(balanced_accuracy_score(preds, y[key]))
        a_p_c.append(average_precision_score(preds, y[key]))
        roc.append(roc_auc_score(preds, y[key]))
        gm.append(geometric_mean_score(preds, y[key]))

    scores = {
        'Accuracy Score = ': np.round(np.mean(acc), 3),
        'Accuracy Std = ': np.round(np.std(acc), 3),
        'Balanced Accuracy Score = ': np.round(np.mean(b_acc), 3),
        'Balanced Accuracy Std = ': np.round(np.std(b_acc), 3),
        'Average Precision Recall Score = ': np.round(np.mean(a_p_c), 3),
        'Average Precision Recall Std = ': np.round(np.std(a_p_c), 3),
        'Roc Auc Score = ': np.round(np.mean(roc), 3),
        'Roc Auc Std = ': np.round(np.std(roc), 3),
        'G Mean Score = ': np.round(np.mean(gm), 3),
        'G Mean Std = ': np.round(np.std(gm), 3)
    }

    return scores

Exemple #18

0

Afficher le fichier

Fichier : metrics.py Projet : jivanhoe/optimal-sampling

def performance_summary(
    clf: OptimalSamplingClassifier,
    X: np.ndarray,
    y: np.ndarray,
    info: Optional[Dict[str, any]] = None,
) -> Dict[str, float]:
    predicted_proba = clf.predict_proba(X)
    predicted = clf.predict(X)
    nominal_proba = (y == clf.positive_class).mean()
    return dict(model=str(clf.estimator).replace("\n", "").replace(" ", ""),
                class_ratio=1 / nominal_proba,
                weight_ratio=clf.positive_weight / clf.negative_weight,
                sampling_probability=clf._sampling_proba,
                previous_probabilities=clf._prev_sampling_probas,
                cross_val_probabilities=clf._cross_val_sampling_probas,
                sampling_ratio=clf._sampling_proba / nominal_proba,
                iter_to_converge=clf._iter_count,
                accuracy=accuracy_score(y, predicted),
                sensitivity=sensitivity_score(y, predicted),
                specificity=specificity_score(y, predicted),
                precision=precision_score(y, predicted) if
                (predicted == clf.positive_class).sum() > 0 else None,
                recall=recall_score(y, predicted) if
                (predicted == clf.positive_class).sum() > 0 else None,
                f1_score=f1_score(y, predicted),
                geometric_mean_score=geometric_mean_score(y, predicted) if
                (predicted == clf.positive_class).sum() > 0 else None,
                roc_auc_score=roc_auc_score(y, predicted_proba),
                average_precision_score=average_precision_score(
                    y, predicted_proba),
                weighted_loss=clf.weighted_loss(X, y).mean(),
                cost=clf.cost(X, y).mean(),
                **(info if info else {}))

Exemple #19

0

Afficher le fichier

def evalSampling(sampler, classifier, Xtrain, Xtest,ytrain, ytest):
    """Evaluate a sampling method with a given classifier and dataset
    
    Keyword arguments:
    sampler -- the sampling method to employ. None for no sampling
    classifer -- the classifier to use after sampling
    train -- (X, y) for training
    test -- (Xt, yt) for testing
    
    Returns:
    A tuple containing precision, recall, f1 score, AUC of ROC, Cohen's Kappa score, and 
    geometric mean score.
    """
    X = Xtrain
    y = ytrain
    Xt = Xtest
    yt = ytest
    
    if sampler is not None:
        X_resampled, y_resampled = sampler.fit_sample(X, y)
        classifier.fit(X_resampled, y_resampled)
    else:
        classifier.fit(X, y)
        
    yp = classifier.predict(Xt)
    yProb = classifier.predict_proba(Xt)[:,1] # Indicating class value 1 (not 0)

    precision = precision_score(yt, yp)
    recall    = recall_score(yt, yp)
    f1        = f1_score(yt, yp)
    rocauc    = roc_auc_score(yt, yProb)
    kappa     = cohen_kappa_score(yt, yp)
    gmean     = geometric_mean_score(yt, yp)
    
    return (precision, recall, f1, rocauc, kappa, gmean)

Exemple #20

0

Afficher le fichier

def test_clf(clf, X_test, Y_test):
    print(clf)
    try:
        y_prob = clf.predict_proba(X_test)[:, 1]
        roc_score = roc_auc_score(Y_test, y_prob)
        fpr, tpr, threshold = roc_curve(Y_test, y_prob)
        plt.plot(fpr, tpr, label='ROAUC')
        plt.plot(1 - fpr, tpr, 'r')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROAUC SCORE = %.2f' % roc_score)
        plt.legend(loc='best')
        plt.show()
        y_predictions = np.round(y_prob)
    except Exception as e:
        y_pred = clf.predict(X_test)
        y_predictions = np.round(y_pred)

    print('ACCURACY = ', accuracy_score(Y_test, np.round(y_predictions)))
    print('GEOMETRIC MEAN SCORE = ',
          geometric_mean_score(Y_test, np.round(y_predictions)))
    print(classification_report(Y_test, np.round(y_predictions)))
    tn, fp, fn, tp = confusion_matrix(Y_test, np.round(y_predictions)).ravel()
    metrics = {}
    metrics['TN'] = tn
    metrics['FP'] = fp
    metrics['FN'] = fn
    metrics['TP'] = tp
    print('confusion matrix : ', metrics)

Exemple #21

0

Afficher le fichier

Fichier : test_classification.py Projet : vishalbelsare/imbalanced-learn

def test_geometric_mean_support_binary():
    y_true, y_pred, _ = make_prediction(binary=True)

    # compute the geometric mean for the binary problem
    geo_mean = geometric_mean_score(y_true, y_pred)

    assert_allclose(geo_mean, 0.77, rtol=R_TOL)

Exemple #22

0

Afficher le fichier

Fichier : test_classification.py Projet : chkoar/imbalanced-learn

def test_geometric_mean_support_binary():
    y_true, y_pred, _ = make_prediction(binary=True)

    # compute the geometric mean for the binary problem
    geo_mean = geometric_mean_score(y_true, y_pred)

    assert_allclose(geo_mean, 0.77, rtol=R_TOL)

Exemple #23

0

Afficher le fichier

Fichier : classify.py Projet : celiosantosjr/PreAntiCoV

def evaluate(X, y, estm):
    # Performance metrics
    y_pred = estm.predict(X)
    print(confusion_matrix(y, y_pred).ravel())
    tn, fp, fn, tp = confusion_matrix(y, y_pred).ravel()
    # ROC curve
    try:
        if "decision_function" not in dir(estm):
            y_prob = estm.predict_proba(X)[:, 1]
        else:
            y_prob = estm.decision_function(X)
        pre, rec, _ = precision_recall_curve(y, y_prob)
        fpr, tpr, _ = roc_curve(y, y_prob)
        aucroc = auc(fpr, tpr)
        aucpr = auc(rec, pre)
    except AttributeError:
        print("Classifier don't have predict_proba or decision_function, ignoring roc_curve.")
        pre, rec = None, None
        fpr, tpr = None, None
        aucroc = None
        aucpr = None
    eval_dictionary = {
        "CM": confusion_matrix(y, y_pred),  # Confusion matrix
        "ACC": (tp + tn) / (tp + fp + fn + tn),  # accuracy
        "F1": fbeta_score(y, y_pred, beta=1),
        "F2": fbeta_score(y, y_pred, beta=2),
        "GMean": geometric_mean_score(y, y_pred, average='binary'),
        "SEN": tp / (tp + fn),
        "PREC": tp / (tp + fp),
        "SPEC": tn / (tn + fp),
        "MCC": matthews_corrcoef(y, y_pred),
        "PRCURVE": {"precision": pre, "recall": rec, "aucpr": aucpr},
        "ROCCURVE": {"fpr": fpr, "tpr": tpr, "aucroc": aucroc}
    }
    return eval_dictionary

Exemple #24

0

Afficher le fichier

def cros_val(X_train_folds, y_train_folds, X_test_folds, y_test_folds, models):
    metrics = {'acc':{}, 'f1':{}, 'gmean':{}, 'precision':{}, 'recall':{}}
    #Validação cruzada
    for name, model in models.items():
        print(f'\nModelo {name}: ')
        acc_folds = []
        f1_folds = []
        gmean_folds = []
        precision_folds = []
        recall_folds = []
        for i in range(len(X_train_folds)):
            print('.',end='')
            model.fit(X_train_folds[i], y_train_folds[i])
            y_pred = model.predict(X_test_folds[i])
            y_true = y_test_folds[i]
            #Calculando as métricas:
            acc_folds.append(accuracy_score(y_true, y_pred))
            f1_folds.append(f1_score(y_true, y_pred, average='macro'))
            gmean_folds.append(geometric_mean_score(y_true, y_pred, average='macro'))
            precision_folds.append(precision_score(y_true, y_pred, average='macro', zero_division=0))
            recall_folds.append(recall_score(y_true, y_pred, average='macro'))
        metrics['acc'][name] = acc_folds
        metrics['f1'][name] = f1_folds
        metrics['gmean'][name] = gmean_folds
        metrics['precision'][name] = precision_folds
        metrics['recall'][name] = recall_folds
    return metrics

Exemple #25

0

Afficher le fichier

 def on_epoch_end(self, epoch, logs=None):
     # fetch results
     targets = self.targets
     predictions = self.predictions
     # convert prediction class probabilities to class
     y_pred = np.asarray([np.argmax(line) for line in predictions])
     # calculate metrics with sklearn
     gmean = geometric_mean_score(targets, y_pred, average='macro')
     accuracy = accuracy_score(targets, y_pred)
     # save scores
     self.val_gmean.append(gmean)
     self.val_accuracy.append(accuracy)
     # reset results
     self.targets = []
     self.predictions = []
     # check results
     if (gmean > self.best_score):
         self.best_score = gmean
         self.best_epoch = epoch
         self.model.save(self.save_path)
         if (self.verbose is True):
             print(
                 f"{epoch} - gmean: {gmean} - accuracy: {accuracy} (best)")
     else:
         if (self.verbose is True):
             print(f"{epoch} - gmean: {gmean} - accuracy: {accuracy}")
     # end if patience is overdue
     if (epoch - self.patience > self.best_epoch):
         if (self.verbose is True):
             print(f"Epoch {epoch}: early stopping Threshold")
         self.model.stop_training = True

Exemple #26

0

Afficher le fichier

def randomforest(X_tr, Y_tr, X_te, Y_te):
    if Y_tr.shape[1] > 1:
        Y_tr = np.argmax(Y_tr, axis=1)
        Y_te = np.argmax(Y_te, axis=1)
    rfc = RandomForestClassifier(n_jobs=-1,max_features= 'sqrt' ,n_estimators=40, oob_score = True)

    param_grid = {
    'n_estimators': [40, 100]}


    CV_rfc = GridSearchCV(estimator=rfc, param_grid=param_grid)
    CV_rfc.fit(X_tr, Y_tr)
    #print CV_rfc.best_params_
    #clf = RandomForestClassifier(n_estimators=150, random_state =42)
    #clf.fit(X_tr, Y_tr)
    y_pred = CV_rfc.predict(X_te)
    fpr_vot , tpr_vot , _ = roc_curve(Y_te , y_pred , pos_label =1,  drop_intermediate=False)
    roc_auc_vot = auc(fpr_vot , tpr_vot)
    cmat = classification_report_imbalanced(Y_te, y_pred)
    #print (cmat.diagonal()/cmat.sum(axis=1))
    print (cmat)
    print('The geometric mean is {}'.format(geometric_mean_score(Y_te,y_pred)))
    print('The auc is {}'.format(roc_auc_vot))
    print('The f1 is {}'.format(f1_score(Y_te, y_pred, average='weighted')))
    return CV_rfc, fpr_vot, tpr_vot, roc_auc_vot

Exemple #27

0

Afficher le fichier

def validacion_cruzada(modelo, X, y, cv):
    y_test_all = []
    y_prob_all = []

    for train, test in cv.split(X, y):
        modelo = modelo.fit(X[train], y[train])
        y_pred = modelo.predict(X[test])
        y_prob = modelo.predict_proba(
            X[test]
        )[:, 1]  #la segunda columna es la clase positiva '1' en bank-marketing
        y_test_bin = y[test]
        #y_test_bin = le.fit_transform(y[test]) #se convierte a binario para AUC: 'yes' -> 1 (clase positiva) y 'no' -> 0 en bank-marketing

        print(
            "Accuracy: {:6.2f}%, F1-score: {:.4f}, G-mean: {:.4f}, AUC: {:.4f}"
            .format(
                accuracy_score(y[test], y_pred) * 100,
                f1_score(y[test], y_pred, average='macro'),
                geometric_mean_score(y[test], y_pred, average='macro'),
                roc_auc_score(y_test_bin, y_prob)))
        y_test_all = numpy.concatenate([y_test_all, y_test_bin])
        y_prob_all = numpy.concatenate([y_prob_all, y_prob])

    print("")

    return modelo, y_test_all, y_prob_all

Exemple #28

0

Afficher le fichier

def printar_resultados(y_test, pred, ensemble, nome_modelo):
    '''
    metodo para printar os resultados de cada modelo
    :param: y_test: dados correspondentes a saida de teste
    :param: pred: dados correspondentes a previsao do modelo
    :return: retorna as metricas: acuracia, auc, f1score e gmean
    '''
    
    # computando as metricas para os dados recebidos
    qtd_modelos = len(ensemble.estimators_)
    acuracia = metrics.accuracy_score(y_test, pred)
    auc = metrics.roc_auc_score(y_test, pred)
    f1measure = metrics.f1_score(y_test, pred, average='binary')
    gmean = geometric_mean_score(y_test, pred, average='binary')
    
    # calculando o desempenho
    print('\n'+nome_modelo)
    print("qtd modelos:", qtd_modelos)
    print("taxa de acerto:", acuracia)
    print("AUC:", auc)
    print("f-measure:", f1measure)
    print("g-mean:", gmean)
    
    # retornando os resultados
    return qtd_modelos, acuracia, auc, f1measure, gmean

Exemple #29

0

Afficher le fichier

Fichier : model.py Projet : mozilla/bugbug

def classification_report_imbalanced_values(
    y_true, y_pred, labels, target_names=None, sample_weight=None, digits=2, alpha=0.1
):
    """Copy of imblearn.metrics.classification_report_imbalanced to have
    access to the raw values. The code is mostly the same except the
    formatting code and generation of the report which haven removed. Copied
    from version 0.4.3. The original code is living here:
    https://github.com/scikit-learn-contrib/imbalanced-learn/blob/b861b3a8e3414c52f40a953f2e0feca5b32e7460/imblearn/metrics/_classification.py#L790
    """
    labels = np.asarray(labels)

    if target_names is None:
        target_names = [str(label) for label in labels]

    # Compute the different metrics
    # Precision/recall/f1
    precision, recall, f1, support = precision_recall_fscore_support(
        y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight
    )
    # Specificity
    specificity = specificity_score(
        y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight
    )
    # Geometric mean
    geo_mean = geometric_mean_score(
        y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight
    )
    # Index balanced accuracy
    iba_gmean = make_index_balanced_accuracy(alpha=alpha, squared=True)(
        geometric_mean_score
    )
    iba = iba_gmean(
        y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight
    )

    result = {"targets": {}}

    for i, label in enumerate(labels):
        result["targets"][target_names[i]] = {
            "precision": precision[i],
            "recall": recall[i],
            "specificity": specificity[i],
            "f1": f1[i],
            "geo_mean": geo_mean[i],
            "iba": iba[i],
            "support": support[i],
        }

    result["average"] = {
        "precision": np.average(precision, weights=support),
        "recall": np.average(recall, weights=support),
        "specificity": np.average(specificity, weights=support),
        "f1": np.average(f1, weights=support),
        "geo_mean": np.average(geo_mean, weights=support),
        "iba": np.average(iba, weights=support),
        "support": np.sum(support),
    }

    return result

Exemple #30

0

Afficher le fichier

    def run_research(self, parameters_dist: dict, n_params: int = 10):
        self.parameters_dist = parameters_dist
        self.__generate_permutations()

        parameter_ranges = self.__display_research_info()

        for variant in self.parameters_perm:
            self.resampler.set_params(**variant)
            X_resampled, y_resampled = self.resampler.resample_to_ndarray()

            X_train, X_test, y_train, y_test = train_test_split(X_resampled,
                                                                y_resampled,
                                                                test_size=0.3,
                                                                random_state=0)
            clf = RandomForestClassifier(random_state=0, n_jobs=-1)
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)

            if 'estimator' in variant.keys():
                if variant[
                        'estimator'].__class__.__name__ == 'RandomForestClassifier':
                    variant['estimator'] = 'RFC'
                elif variant[
                        'estimator'].__class__.__name__ == 'AdaBoostClassifier':
                    variant['estimator'] = 'ABC'
                elif variant[
                        'estimator'].__class__.__name__ == 'GradientBoostingClassifier':
                    variant['estimator'] = 'GBC'
                elif variant[
                        'estimator'].__class__.__name__ == 'KNeighborsClassifier':
                    variant['estimator'] = 'KNN'
                elif variant[
                        'estimator'].__class__.__name__ == 'DecisionTreeClassifier':
                    variant['estimator'] = 'DT'
                elif variant[
                        'estimator'].__class__.__name__ == 'LogisticRegression':
                    variant['estimator'] = 'LR'

            index = self.parameters_perm.index(variant)
            self.logs.update({
                index: {
                    "params": variant,
                    "gmean": geometric_mean_score(y_test, y_pred),
                    "recall": recall_score(y_test, y_pred),
                    "classes_size":
                    DataController.count_classes_size(y_resampled),
                    "roc_auc": roc_auc_score(y_test, y_pred)
                }
            })

            print('{0:0=3d}'.format(index + 1), self.logs[index])

        filename = self.resampler.get_name() + str(parameter_ranges)

        # Drawing plots
        self.__draw_plots(filename)

        # Print and save top found parameters
        self.__logs_to_file(filename, n_params)

Exemple #31

0

Afficher le fichier

def test_geometric_mean_support_binary():
    """Test the geometric mean for binary classification task"""
    y_true, y_pred, _ = make_prediction(binary=True)

    # compute the geometric mean for the binary problem
    geo_mean = geometric_mean_score(y_true, y_pred)

    assert_almost_equal(geo_mean, 0.77, 2)

Exemple #32

0

Afficher le fichier

Fichier : plot_metrics.py Projet : bodycat/imbalanced-learn

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    random_state=RANDOM_STATE)

# Train the classifier with balancing
pipeline.fit(X_train, y_train)

# Test the classifier and get the prediction
y_pred_bal = pipeline.predict(X_test)

###############################################################################
# The geometric mean corresponds to the square root of the product of the
# sensitivity and specificity. Combining the two metrics should account for
# the balancing of the dataset.

print('The geometric mean is {}'.format(geometric_mean_score(
    y_test,
    y_pred_bal)))

###############################################################################
# The index balanced accuracy can transform any metric to be used in
# imbalanced learning problems.

alpha = 0.1
geo_mean = make_index_balanced_accuracy(alpha=alpha, squared=True)(
    geometric_mean_score)

print('The IBA using alpha = {} and the geometric mean: {}'.format(
    alpha, geo_mean(
        y_test,
        y_pred_bal)))

Exemple #33

0

Afficher le fichier

Fichier : plot_comparison_ensemble_classifier.py Projet : chkoar/imbalanced-learn

###############################################################################
# We train a decision tree classifier which will be used as a baseline for the
# rest of this example.

###############################################################################
# The results are reported in terms of balanced accuracy and geometric mean
# which are metrics widely used in the literature to validate model trained on
# imbalanced set.

tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)
y_pred_tree = tree.predict(X_test)
print('Decision tree classifier performance:')
print('Balanced accuracy: {:.2f} - Geometric mean {:.2f}'
      .format(balanced_accuracy_score(y_test, y_pred_tree),
              geometric_mean_score(y_test, y_pred_tree)))
cm_tree = confusion_matrix(y_test, y_pred_tree)
fig, ax = plt.subplots()
plot_confusion_matrix(cm_tree, classes=np.unique(satimage.target), ax=ax,
                      title='Decision tree')

###############################################################################
# Classification using bagging classifier with and without sampling
###############################################################################
# Instead of using a single tree, we will check if an ensemble of decsion tree
# can actually alleviate the issue induced by the class imbalancing. First, we
# will use a bagging classifier and its counter part which internally uses a
# random under-sampling to balanced each boostrap sample.

bagging = BaggingClassifier(n_estimators=50, random_state=0, n_jobs=-1)
balanced_bagging = BalancedBaggingClassifier(n_estimators=50, random_state=0,

Exemple #34

0

Afficher le fichier

Fichier : test_classification.py Projet : chkoar/imbalanced-learn

def test_geometric_mean_multiclass(y_true, y_pred, correction, expected_gmean):
    gmean = geometric_mean_score(y_true, y_pred, correction=correction)
    assert gmean == pytest.approx(expected_gmean, rel=R_TOL)

Exemple #35

0

Afficher le fichier

Fichier : test_classification.py Projet : chkoar/imbalanced-learn

def test_geometric_mean_average(y_true, y_pred, average, expected_gmean):
    gmean = geometric_mean_score(y_true, y_pred, average=average)
    assert gmean == pytest.approx(expected_gmean, rel=R_TOL)

Exemple #36

0

Afficher le fichier

Fichier : test_classification.py Projet : chkoar/imbalanced-learn

def test_geometric_mean_sample_weight(y_true, y_pred, sample_weight, average,
                                      expected_gmean):
    gmean = geometric_mean_score(y_true, y_pred, labels=[0, 1],
                                 sample_weight=sample_weight,
                                 average=average)
    assert gmean == pytest.approx(expected_gmean, rel=R_TOL)

Exemple #37

0

Afficher le fichier

Fichier : test_classification.py Projet : bodycat/imbalanced-learn

def test_geometric_mean_multiclass():
    y_true = [0, 0, 1, 1]
    y_pred = [0, 0, 1, 1]
    assert_allclose(geometric_mean_score(y_true, y_pred), 1.0, rtol=R_TOL)

    y_true = [0, 0, 0, 0]
    y_pred = [1, 1, 1, 1]
    assert_allclose(geometric_mean_score(y_true, y_pred), 0.0, rtol=R_TOL)

    cor = 0.001
    y_true = [0, 0, 0, 0]
    y_pred = [0, 0, 0, 0]
    assert_allclose(
        geometric_mean_score(y_true, y_pred, correction=cor), 1.0, rtol=R_TOL)

    y_true = [0, 0, 0, 0]
    y_pred = [1, 1, 1, 1]
    assert_allclose(
        geometric_mean_score(y_true, y_pred, correction=cor), cor, rtol=R_TOL)

    y_true = [0, 0, 1, 1]
    y_pred = [0, 1, 1, 0]
    assert_allclose(
        geometric_mean_score(y_true, y_pred, correction=cor), 0.5, rtol=R_TOL)

    y_true = [0, 1, 2, 0, 1, 2]
    y_pred = [0, 2, 1, 0, 0, 1]
    assert_allclose(
        geometric_mean_score(y_true, y_pred, correction=cor),
        (1 * cor * cor) ** (1.0 / 3.0),
        rtol=R_TOL)

    y_true = [0, 1, 2, 3, 4, 5]
    y_pred = [0, 1, 2, 3, 4, 5]
    assert_allclose(
        geometric_mean_score(y_true, y_pred, correction=cor), 1, rtol=R_TOL)

    y_true = [0, 1, 1, 1, 1, 0]
    y_pred = [0, 0, 1, 1, 1, 1]
    assert_allclose(
        geometric_mean_score(y_true, y_pred, correction=cor),
        (0.5 * 0.75) ** 0.5,
        rtol=R_TOL)

    y_true = [0, 1, 2, 0, 1, 2]
    y_pred = [0, 2, 1, 0, 0, 1]
    assert_allclose(
        geometric_mean_score(y_true, y_pred, average='macro'),
        0.47140452079103168,
        rtol=R_TOL)
    assert_allclose(
        geometric_mean_score(y_true, y_pred, average='micro'),
        0.47140452079103168,
        rtol=R_TOL)
    assert_allclose(
        geometric_mean_score(y_true, y_pred, average='weighted'),
        0.47140452079103168,
        rtol=R_TOL)
    assert_allclose(
        geometric_mean_score(y_true, y_pred, average=None),
        [0.8660254, 0.0, 0.0],
        rtol=R_TOL)

    y_true = [0, 1, 2, 0, 1, 2]
    y_pred = [0, 1, 1, 0, 0, 1]
    assert_allclose(
        geometric_mean_score(y_true, y_pred, labels=[0, 1]),
        0.70710678118654752,
        rtol=R_TOL)
    assert_allclose(
        geometric_mean_score(
            y_true, y_pred, labels=[0, 1], sample_weight=[1, 2, 1, 1, 2, 1]),
        0.70710678118654752,
        rtol=R_TOL)
    assert_allclose(
        geometric_mean_score(
            y_true,
            y_pred,
            labels=[0, 1],
            sample_weight=[1, 2, 1, 1, 2, 1],
            average='weighted'),
        0.3333333333,
        rtol=R_TOL)

    y_true, y_pred, _ = make_prediction(binary=False)

    geo_mean = geometric_mean_score(y_true, y_pred)
    assert_allclose(geo_mean, 0.41, rtol=R_TOL)

    # Compute the geometric mean for each of the classes
    geo_mean = geometric_mean_score(y_true, y_pred, average=None)
    assert_allclose(geo_mean, [0.85, 0.29, 0.7], rtol=R_TOL)

    # average tests
    geo_mean = geometric_mean_score(y_true, y_pred, average='macro')
    assert_allclose(geo_mean, 0.68, rtol=R_TOL)

    geo_mean = geometric_mean_score(y_true, y_pred, average='weighted')
    assert_allclose(geo_mean, 0.65, rtol=R_TOL)

Exemple #38

0

Afficher le fichier

Fichier : test_classification.py Projet : chkoar/imbalanced-learn

def test_geometric_mean_score_prediction(average, expected_gmean):
    y_true, y_pred, _ = make_prediction(binary=False)

    gmean = geometric_mean_score(y_true, y_pred, average=average)
    assert gmean == pytest.approx(expected_gmean, rel=R_TOL)