def check_classifiers(n_samples=10000, output_name_pattern=None): """ This function is not tested by default, it should be called manually """ testX, testY = generate_sample(n_samples, 10, 0.6) trainX, trainY = generate_sample(n_samples, 10, 0.6) uniform_variables = ['column0'] ada = AdaBoostClassifier(n_estimators=50) ideal_bayes = HidingClassifier(train_variables=trainX.columns[1:], base_estimator=GaussianNB()) uBoost_SAMME = uBoostClassifier(uniform_variables=uniform_variables, n_neighbors=50, efficiency_steps=5, n_estimators=50, algorithm="SAMME") uBoost_SAMME_R = uBoostClassifier(uniform_variables=uniform_variables, n_neighbors=50, efficiency_steps=5, n_estimators=50, algorithm="SAMME.R") clf_dict = ClassifiersDict({ "Ada": ada, "Ideal": ideal_bayes, "uBOOST": uBoost_SAMME, "uBOOST.R": uBoost_SAMME_R }) clf_dict.fit(trainX, trainY) predictions = Predictions(clf_dict, testX, testY) # predictions.print_mse(uniform_variables, in_html=False) print(predictions.compute_metrics()) predictions.sde_curves(uniform_variables) if output_name_pattern is not None: pl.savefig(output_name_pattern % "mse_curves", bbox="tight") _ = pl.figure() predictions.learning_curves() if output_name_pattern is not None: pl.savefig(output_name_pattern % "learning_curves", bbox="tight") predictions.efficiency(uniform_variables) if output_name_pattern is not None: pl.savefig(output_name_pattern % "efficiency_curves", bbox="tight")
from sklearn.metrics import roc_auc_score from hep_ml import reports from hep_ml.reports import ClassifiersDict from hep_ml.commonutils import generate_sample class MyNull(Null): def ylim(self, *args): return [0, 1] trainX, trainY = generate_sample(1000, 10) testX, testY = generate_sample(1000, 10) classifiers = ClassifiersDict() classifiers['ada'] = AdaBoostClassifier(n_estimators=20) classifiers['forest'] = RandomForestClassifier(n_estimators=20) predictions = classifiers.fit(trainX, trainY).test_on(testX, testY) def test_reports(null_pylab=True): if null_pylab: reports.pylab = MyNull() predictions.sde_curves(['column0']) predictions.correlation_curves('column1', ).show() predictions.learning_curves() predictions.show() predictions.hist(['column0']).show()