def check_agreement(model, variables): check_data = load_data('check_agreement') probs = model.predict_proba(check_data[variables])[:, 1] ks = evaluation.compute_ks( probs[check_data['signal'].values == 0], probs[check_data['signal'].values == 1], check_data[check_data['signal'] == 0]['weight'].values, check_data[check_data['signal'] == 1]['weight'].values) bot.info('KS metric %s %s' % (ks, ks < 0.09)) return ks
def check_auc(model,train_eval,variables): probs = model.predict_proba(train_eval[variables])[:, 1] AUC = roc_auc_truncated(train_eval['signal'], probs) bot.info('AUC %s' %AUC) return AUC
def check_correlation(model,variables): check_data = load_data('check_correlation') probs = model.predict_proba(check_data[variables])[:, 1] cvm = compute_cvm(probs, check_data['mass']) bot.info('CvM metric %s %s' %(cvm, cvm < 0.002)) return cvm
variables = ['LifeTime', 'FlightDistance', 'pt'] baseline = GradientBoostingClassifier(n_estimators=40, learning_rate=0.01, subsample=0.7, min_samples_leaf=10, max_depth=7, random_state=11) baseline.fit(train[variables], train['signal']) # MODEL TESTING ######################################## from metrics import (check_agreement, check_correlation, check_auc) bot.info("\nChecking Agreement:\n") check_agreement(baseline, variables) bot.info("\nChecking Correlation:\n") check_correlation(baseline, variables) train_eval = train[train['min_ANNmuon'] > 0.4] bot.info("\nChecking AUC:\n") check_auc(baseline, train_eval, variables) # DERIVE RESULT FOR TEST ############################### from helpers.results import save_result test = load_data(name="test") result = pandas.DataFrame({'id': test.index})