Python ConfusionMatrix.hit_rate Beispiele

Programmiersprache: Python

Namespace / Paketname: confusion_matrix

Klasse / Typ: ConfusionMatrix

Methode / Funktion: hit_rate

Beispiele auf hotexamples.com: 3

Python ConfusionMatrix.hit_rate - 3 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die confusion_matrix.ConfusionMatrix.hit_rate, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

ConfusionMatrix(30)

add(7)

add_list(7)

get_accuracy(6)

store_cm(4)

from_predictions(4)

print_out(4)

hit_rate(3)

accuracy(3)

get_average_prf(3)

recall(3)

precision(2)

precision_lift(2)

fg_accuracy(2)

process_batch(2)

avg_recall(2)

return_matrix(2)

avg_f1score(2)

read_cms(2)

npv(1)

plot_confusion_matrix(1)

positive_rate(1)

ppv(1)

print(1)

statistics(1)

showMatrix(1)

save(1)

samples(1)

print_matrix(1)

print_summary(1)

independent_prob(1)

get_precision(1)

inc_according_to(1)

f1score(1)

avg_precision(1)

calculate_metrics_by_classes(1)

compute_accuracy(1)

compute_confusion_matrix(1)

compute_matrix(1)

count_actual(1)

count_predicted(1)

display(1)

fn(1)

get_true_pos(1)

fpr(1)

from_data(1)

get_false_neg(1)

get_false_pos(1)

get_mcc(1)

get_normalized_cm(1)

Beispiel #1

Datei anzeigen

def bootstrap_diff(df, ccp_estimator, rounds, sample_size):
    bootstrap_results = []
    for i in range(rounds):
        # Get first model parameters
        s1 = df.sample(sample_size, replace=True)
        bug_g = s1.groupby([classifier, concept],
                           as_index=False).agg({count: 'count'})
        bug_cm = ConfusionMatrix(g_df=bug_g,
                                 classifier=classifier,
                                 concept=concept,
                                 count=count)

        positive_rate = bug_cm.positive_rate()
        hit_rate = bug_cm.hit_rate()
        ccp = ccp_estimator.estimate_positives(hit_rate)
        ccp_diff = ccp - positive_rate

        # Find difference in given points
        bootstrap_results.append([positive_rate, hit_rate, ccp, ccp_diff])

        if (i % 100 == 0):
            print("finished " + str(i), datetime.datetime.now())

    results_df = pd.DataFrame(
        bootstrap_results,
        columns=['positive_rate', 'hit_rate', 'ccp', 'ccp_diff'])
    return results_df

Beispiel #2

Datei anzeigen

def quality_and_speed_over_years(commits_per_user_file):

    print("over the years ccp and speed change")
    trep = get_valid_repos()
    trep = trep[['repo_name']]
    users_per_project = pd.read_csv(commits_per_user_file)
    users_per_project = users_per_project[users_per_project.year > 2014]
    df = pd.merge(users_per_project, trep, on='repo_name')

    df = df[[
        'repo_name', 'year', 'corrective_commits_ratio',
        'commits_per_above11_users'
    ]]
    df = df.dropna()

    cur_df = df.copy()
    cur_df['prev_year'] = cur_df.year - 1
    cur_df = cur_df.rename(
        columns={
            'year': 'cur_year',
            'corrective_commits_ratio': 'cur_corrective_commits_ratio',
            'commits_per_above11_users': 'cur_commits_per_above11_users'
        })

    prev_df = df.copy()
    prev_df = prev_df.rename(
        columns={
            'year': 'prev_year',
            'corrective_commits_ratio': 'prev_corrective_commits_ratio',
            'commits_per_above11_users': 'prev_commits_per_above11_users'
        })

    two_years = pd.merge(cur_df,
                         prev_df,
                         left_on=['repo_name', 'prev_year'],
                         right_on=['repo_name', 'prev_year'])
    two_years[
        'improved_ccp'] = two_years.cur_corrective_commits_ratio < two_years.prev_corrective_commits_ratio
    two_years[
        'hurt_ccp'] = two_years.cur_corrective_commits_ratio > two_years.prev_corrective_commits_ratio
    two_years[
        'improved_speed'] = two_years.cur_commits_per_above11_users > two_years.prev_commits_per_above11_users

    g = two_years.groupby(['improved_ccp', 'improved_speed'],
                          as_index=False).agg({'repo_name': 'count'})
    print(g)

    cm = ConfusionMatrix(g_df=g,
                         classifier='improved_ccp',
                         concept='improved_speed',
                         count='repo_name')

    print(cm.summarize())
    print("speed & ccp improvement match", cm.accuracy())
    print("speed improvement given ccp improvement", cm.precision())
    print("ccp improvement given speed improvement",
          cm.tp() / (cm.fn() + cm.tp()))

    two_years[
        'sig_improved_ccp'] = two_years.cur_corrective_commits_ratio < two_years.prev_corrective_commits_ratio - 0.1
    two_years[
        'sig_improved_speed'] = two_years.cur_commits_per_above11_users > two_years.prev_commits_per_above11_users + 10

    g = two_years.groupby(['sig_improved_ccp', 'sig_improved_speed'],
                          as_index=False).agg({'repo_name': 'count'})
    print(g)

    cm = ConfusionMatrix(g_df=g,
                         classifier='sig_improved_ccp',
                         concept='sig_improved_speed',
                         count='repo_name')
    print(cm.summarize())

    g = two_years.groupby(['sig_improved_ccp', 'improved_speed'],
                          as_index=False).agg({'repo_name': 'count'})
    print(g)

    print(cm.summarize())
    print()
    print("speed & ccp improvement match", cm.accuracy())
    print("speed improvement given ccp improvement", cm.precision(), "lift",
          cm.precision_lift())
    print("ccp improvement given speed improvement", cm.recall(), "lift",
          cm.recall() / cm.hit_rate() - 1)
    print()

    g = two_years.groupby(['sig_improved_speed', 'hurt_ccp'],
                          as_index=False).agg({'repo_name': 'count'})
    cm = ConfusionMatrix(g_df=g,
                         classifier='sig_improved_speed',
                         concept='hurt_ccp',
                         count='repo_name')

    print(cm.summarize())
    print()
    print("ccp hurt given significant speed improvement", cm.precision(),
          "lift", cm.precision_lift())
    print()

Beispiel #3

Datei anzeigen

def two_years_analysis(two_years_df
                       , first_metric
                       , second_metric
                       , key):
    print()
    print("Co-change"
          , first_metric
          , second_metric)
    g = two_years_df.groupby([first_metric, second_metric]
                             , as_index=False).agg({key : 'count'})

    print(g)

    cm = ConfusionMatrix(g_df=g
                             , classifier=first_metric
                             , concept=second_metric, count=key)

    print(cm.summarize())
    print()
    print("Samples", cm.samples())
    print("Both metrics increment match", cm.accuracy())
    print(second_metric
            , " improvement given "
            , first_metric
            , " improvement", cm.precision(), "lift", cm.precision_lift())
    print(first_metric
            , " improvement given "
            , second_metric
            , "improvement",  cm.recall(), "lift", ifnull(safe_divide(ifnull(cm.recall()),cm.hit_rate())) - 1)
    print()