Exemple #1
0
def check_agreement(model, variables):
    check_data = load_data('check_agreement')
    probs = model.predict_proba(check_data[variables])[:, 1]

    ks = evaluation.compute_ks(
        probs[check_data['signal'].values == 0],
        probs[check_data['signal'].values == 1],
        check_data[check_data['signal'] == 0]['weight'].values,
        check_data[check_data['signal'] == 1]['weight'].values)

    bot.info('KS metric %s %s' % (ks, ks < 0.09))
    return ks
def check_auc(model,train_eval,variables):
    probs = model.predict_proba(train_eval[variables])[:, 1]
    AUC = roc_auc_truncated(train_eval['signal'], probs)
    bot.info('AUC %s' %AUC)
    return AUC
def check_correlation(model,variables):
    check_data = load_data('check_correlation')
    probs = model.predict_proba(check_data[variables])[:, 1]
    cvm = compute_cvm(probs, check_data['mass'])
    bot.info('CvM metric %s %s' %(cvm, cvm < 0.002))
    return cvm
variables = ['LifeTime', 'FlightDistance', 'pt']

baseline = GradientBoostingClassifier(n_estimators=40,
                                      learning_rate=0.01,
                                      subsample=0.7,
                                      min_samples_leaf=10,
                                      max_depth=7,
                                      random_state=11)

baseline.fit(train[variables], train['signal'])

# MODEL TESTING ########################################

from metrics import (check_agreement, check_correlation, check_auc)

bot.info("\nChecking Agreement:\n")
check_agreement(baseline, variables)

bot.info("\nChecking Correlation:\n")
check_correlation(baseline, variables)

train_eval = train[train['min_ANNmuon'] > 0.4]

bot.info("\nChecking AUC:\n")
check_auc(baseline, train_eval, variables)

# DERIVE RESULT FOR TEST ###############################
from helpers.results import save_result

test = load_data(name="test")
result = pandas.DataFrame({'id': test.index})