def _check_agreements(self, model): self.check_agreement = get_columns_in_df(self._check_agreement, self.flist.predictors) agreement_prob = model.predict_proba(self.check_agreement.values)[:,1] ks = compute_ks( agreement_prob[self._check_agreement['signal'].values == 0], agreement_prob[self._check_agreement['signal'].values == 1], self._check_agreement[self._check_agreement['signal'] == 0]['weight'].values, self._check_agreement[self._check_agreement['signal'] == 1]['weight'].values) return ks
def _check_correlations(self, model): self.check_correlation = get_columns_in_df(self._check_correlation, self.flist.predictors) correlation_probs = model.predict_proba(self.check_correlation.values)[:,1] cvm = compute_cvm(correlation_probs, self._check_correlation['mass']) return cvm
import pandas as pd from sklearn.ensemble import RandomForestClassifier from blue.featurelist import FeatureList from blue.pandas_utils import get_columns_in_df from blue.estimators import HyperoptEstimator from evaluation import roc_auc_truncated train_file = './data/training.csv' test_file = './data/test.csv' flist = FeatureList(train_file, spec='features.yml', derived_list=None) df_train = pd.read_csv(train_file, index_col='id') df_train = get_columns_in_df(df_train, flist.universe) df_test = pd.read_csv(test_file) df_test = get_columns_in_df(df_test, flist.predictors) hpest = HyperoptEstimator(RandomForestClassifier, max_evals=5, n_jobs=3, metric=lambda x,y : - roc_auc_truncated(x,y)) hpest.fit(df_train[flist.predictors].values, df_train[flist.target].values)