Beispiel #1
0
def adult(dataType):
    title = '{0} Ada Boost'.format(dataType)
    package = data.createData(dataType)

    xTrain = package.xTrain
    xTest = package.xTest
    yTrain = package.yTrain
    yTest = package.yTest

    param_range = list(range(1, 160, 10))
    param = 'n_estimators'

    # params = {'algorithm': 'SAMME.R'}
    clf = AdaBoostClassifier()
    # clf.set_params(**params)

    plotter.plotValidationCurve(clf,
                                xTrain,
                                yTrain,
                                param,
                                param_range,
                                graphTitle=title)
    clf.n_estimators = 40
    plotter.plotLearningCurve(clf, title=title, xTrain=xTrain, yTrain=yTrain)
    title = 'Adult'
    clf.fit(xTrain, yTrain)
    plotter.plotConfusion(clf, title, ['>50K', '<=50K'], xTest, yTest)
Beispiel #2
0
    """Run evaluation of a classifier, for one category.

    If data isn't set explicitly, the test set is
    used by default.
    """
    log_file = os.path.join(datamanager.PATHS["LOGS"], "evaluation",
                            class_name(clf), category)
    log_file = os.path.join(log_file, str(datetime.now()) + ".log")

    vcd = VisualConceptDetection(None, datamanager, log_file=log_file)
    clf = vcd.load_object("Classifier", category, clf)
    vcd.classifier = clf
    if (data[0] is None) or (data[1] is None):
        return vcd.evaluate_test_set(category)
    else:
        return vcd.evaluate(X_test=data[0], y_test=data[1])


if __name__ == '__main__':
    # classifier = RandomForestClassifier()

    classifier = AdaBoostClassifier()
    classifier.n_estimators = 2000
    classifier.base_estimator.max_depth = 4

    # classifier = LinearSVC(C=100)

    category = "airplanes"
    datamanager = CaltechManager()
    evaluate(category, classifier, datamanager)
def evaluate(category, clf, datamanager, data=(None, None)):
    """Run evaluation of a classifier, for one category.

    If data isn't set explicitly, the test set is
    used by default.
    """
    log_file = os.path.join(datamanager.PATHS["LOGS"], "evaluation", class_name(clf), category)
    log_file = os.path.join(log_file, str(datetime.now()) + ".log")

    vcd = VisualConceptDetection(None, datamanager, log_file=log_file)
    clf = vcd.load_object("Classifier", category, clf)
    vcd.classifier = clf
    if (data[0] is None) or (data[1] is None):
        return vcd.evaluate_test_set(category)
    else:
        return vcd.evaluate(X_test=data[0], y_test=data[1])


if __name__ == "__main__":
    # classifier = RandomForestClassifier()

    classifier = AdaBoostClassifier()
    classifier.n_estimators = 2000
    classifier.base_estimator.max_depth = 4

    # classifier = LinearSVC(C=100)

    category = "airplanes"
    datamanager = CaltechManager()
    evaluate(category, classifier, datamanager)