Ejemplo n.º 1
0
def check_classifiers(n_samples=10000, output_name_pattern=None):
    """
    This function is not tested by default, it should be called manually
    """
    testX, testY = generate_sample(n_samples, 10, 0.6)
    trainX, trainY = generate_sample(n_samples, 10, 0.6)
    uniform_variables = ['column0']

    ada = AdaBoostClassifier(n_estimators=50)
    ideal_bayes = HidingClassifier(train_variables=trainX.columns[1:],
                              base_estimator=GaussianNB())

    uBoost_SAMME = uBoostClassifier(
        uniform_variables=uniform_variables,
        n_neighbors=50,
        efficiency_steps=5,
        n_estimators=50,
        algorithm="SAMME")
    uBoost_SAMME_R = uBoostClassifier(
        uniform_variables=uniform_variables,
        n_neighbors=50,
        efficiency_steps=5,
        n_estimators=50,
        algorithm="SAMME.R")

    clf_dict = ClassifiersDict({
        "Ada": ada,
        "Ideal": ideal_bayes,
        "uBOOST": uBoost_SAMME,
        "uBOOST.R": uBoost_SAMME_R
        })

    clf_dict.fit(trainX, trainY)

    predictions = Predictions(clf_dict, testX, testY)
    # predictions.print_mse(uniform_variables, in_html=False)
    print(predictions.compute_metrics())

    predictions.sde_curves(uniform_variables)
    if output_name_pattern is not None:
        pl.savefig(output_name_pattern % "mse_curves", bbox="tight")
    _ = pl.figure()
    predictions.learning_curves()
    if output_name_pattern is not None:
        pl.savefig(output_name_pattern % "learning_curves", bbox="tight")
    predictions.efficiency(uniform_variables)
    if output_name_pattern is not None:
        pl.savefig(output_name_pattern % "efficiency_curves", bbox="tight")
Ejemplo n.º 2
0
def check_classifiers(n_samples=10000, output_name_pattern=None):
    """
    This function is not tested by default, it should be called manually
    """
    testX, testY = generate_sample(n_samples, 10, 0.6)
    trainX, trainY = generate_sample(n_samples, 10, 0.6)
    uniform_variables = ['column0']

    ada = AdaBoostClassifier(n_estimators=50)
    ideal_bayes = HidingClassifier(train_variables=trainX.columns[1:],
                                   base_estimator=GaussianNB())

    uBoost_SAMME = uBoostClassifier(uniform_variables=uniform_variables,
                                    n_neighbors=50,
                                    efficiency_steps=5,
                                    n_estimators=50,
                                    algorithm="SAMME")
    uBoost_SAMME_R = uBoostClassifier(uniform_variables=uniform_variables,
                                      n_neighbors=50,
                                      efficiency_steps=5,
                                      n_estimators=50,
                                      algorithm="SAMME.R")

    clf_dict = ClassifiersDict({
        "Ada": ada,
        "Ideal": ideal_bayes,
        "uBOOST": uBoost_SAMME,
        "uBOOST.R": uBoost_SAMME_R
    })

    clf_dict.fit(trainX, trainY)

    predictions = Predictions(clf_dict, testX, testY)
    # predictions.print_mse(uniform_variables, in_html=False)
    print(predictions.compute_metrics())

    predictions.sde_curves(uniform_variables)
    if output_name_pattern is not None:
        pl.savefig(output_name_pattern % "mse_curves", bbox="tight")
    _ = pl.figure()
    predictions.learning_curves()
    if output_name_pattern is not None:
        pl.savefig(output_name_pattern % "learning_curves", bbox="tight")
    predictions.efficiency(uniform_variables)
    if output_name_pattern is not None:
        pl.savefig(output_name_pattern % "efficiency_curves", bbox="tight")
Ejemplo n.º 3
0
from sklearn.metrics import roc_auc_score
from hep_ml import reports
from hep_ml.reports import ClassifiersDict
from hep_ml.commonutils import generate_sample


class MyNull(Null):
    def ylim(self, *args):
        return [0, 1]


trainX, trainY = generate_sample(1000, 10)
testX, testY = generate_sample(1000, 10)

classifiers = ClassifiersDict()
classifiers['ada'] = AdaBoostClassifier(n_estimators=20)
classifiers['forest'] = RandomForestClassifier(n_estimators=20)
predictions = classifiers.fit(trainX, trainY).test_on(testX, testY)


def test_reports(null_pylab=True):
    if null_pylab:
        reports.pylab = MyNull()

    predictions.sde_curves(['column0'])

    predictions.correlation_curves('column1', ).show()
    predictions.learning_curves()
    predictions.show()
    predictions.hist(['column0']).show()
Ejemplo n.º 4
0
from sklearn.metrics import roc_auc_score
from hep_ml import reports
from hep_ml.reports import ClassifiersDict
from hep_ml.commonutils import generate_sample


class MyNull(Null):
    def ylim(self, *args):
        return [0, 1]


trainX, trainY = generate_sample(1000, 10)
testX, testY = generate_sample(1000, 10)

classifiers = ClassifiersDict()
classifiers['ada'] = AdaBoostClassifier(n_estimators=20)
classifiers['forest'] = RandomForestClassifier(n_estimators=20)
predictions = classifiers.fit(trainX, trainY).test_on(testX, testY)


def test_reports(null_pylab=True):
    if null_pylab:
        reports.pylab = MyNull()

    predictions.sde_curves(['column0'])

    predictions.correlation_curves('column1', ).show()
    predictions.learning_curves()
    predictions.show()
    predictions.hist(['column0']).show()