コード例 #1
0
def evaluate(category, clf, datamanager, data=(None, None)):
    """Run evaluation of a classifier, for one category.

    If data isn't set explicitly, the test set is
    used by default.
    """
    log_file = os.path.join(datamanager.PATHS["LOGS"], "evaluation", class_name(clf), category)
    log_file = os.path.join(log_file, str(datetime.now()) + ".log")

    vcd = VisualConceptDetection(None, datamanager, log_file=log_file)
    clf = vcd.load_object("Classifier", category, clf)
    vcd.classifier = clf
    if (data[0] is None) or (data[1] is None):
        return vcd.evaluate_test_set(category)
    else:
        return vcd.evaluate(X_test=data[0], y_test=data[1])
コード例 #2
0
class TestVisualConceptDetection(unittest.TestCase):
    def setUp(self):
        self.datamanager = MyTestDataManager()
        clf = AdaBoostClassifier(n_estimators=14)
        clf.base_estimator.max_depth = 10
        self.vcd = VisualConceptDetection(clf, self.datamanager)

    def test_base_dir(self):
        path = os.path.join(BASE_PATH, "testdata", "bow")
        self.assertEqual(self.datamanager.PATHS["BOW"], path)

    # def test_dump_object(self):

    def test_dump_and_load_object(self):
        self.vcd.dump_object(self.vcd.classifier, "test_classifier")
        # TODO: meh
        self.assertIsInstance(self.vcd.load_object("test_classifier"),
                              self.vcd.classifier.__class__)

    def test_count_methods(self):
        classes = np.array([0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0])
        self.assertEqual(self.vcd.count_positives(classes), 5)
        self.assertEqual(self.vcd.count_negatives(classes), 7)

    def test_sample_weights(self):
        vec = np.array([1, 0, 0, 0, 1, 1, 0, 0, 0, 0])
        p = 1.0
        n = 3.0 / 7.0
        weights = np.array([p, n, n, n, p, p, n, n, n, n])
        calculated = self.vcd.calculate_weights(vec)
        self.assertTrue((calculated == weights).all(),
                        "Should be:\n%s\nbut is:\n%s" % (weights, calculated))
コード例 #3
0
class TestVisualConceptDetection(unittest.TestCase):
    
    def setUp(self):
        self.datamanager = MyTestDataManager()
        clf = AdaBoostClassifier(n_estimators=14)
        clf.base_estimator.max_depth = 10
        self.vcd = VisualConceptDetection(clf, self.datamanager)
        
    def test_base_dir(self):
        path = os.path.join(BASE_PATH, "testdata", "bow")
        self.assertEqual(self.datamanager.PATHS["BOW"], path)

    # def test_dump_object(self):
        

    def test_dump_and_load_object(self):
        self.vcd.dump_object(self.vcd.classifier, "test_classifier")
        # TODO: meh
        self.assertIsInstance(self.vcd.load_object("test_classifier"), self.vcd.classifier.__class__)

    def test_count_methods(self):
        classes = np.array([0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0])
        self.assertEqual(self.vcd.count_positives(classes), 5)
        self.assertEqual(self.vcd.count_negatives(classes), 7)

    def test_sample_weights(self):
        vec = np.array([1, 0, 0, 0, 1, 1, 0, 0, 0, 0])
        p = 1.0
        n = 3.0 / 7.0
        weights = np.array([p, n, n, n, p, p, n, n, n, n])
        calculated = self.vcd.calculate_weights(vec)
        self.assertTrue((calculated==weights).all(), "Should be:\n%s\nbut is:\n%s" % (weights, calculated))
コード例 #4
0
def evaluate(category, clf, datamanager, data=(None, None)):
    """Run evaluation of a classifier, for one category.

    If data isn't set explicitly, the test set is
    used by default.
    """
    log_file = os.path.join(datamanager.PATHS["LOGS"], "evaluation",
                            class_name(clf), category)
    log_file = os.path.join(log_file, str(datetime.now()) + ".log")

    vcd = VisualConceptDetection(None, datamanager, log_file=log_file)
    clf = vcd.load_object("Classifier", category, clf)
    vcd.classifier = clf
    if (data[0] is None) or (data[1] is None):
        return vcd.evaluate_test_set(category)
    else:
        return vcd.evaluate(X_test=data[0], y_test=data[1])
コード例 #5
0
 def setUp(self):
     self.datamanager = MyTestDataManager()
     clf = AdaBoostClassifier(n_estimators=14)
     clf.base_estimator.max_depth = 10
     self.vcd = VisualConceptDetection(clf, self.datamanager)
コード例 #6
0
        return result

if __name__ == "__main__":
    # ada = AdaBoostClassifier()
    # ada.n_estimators = 50
    # ada.base_estimator.max_depth = 1

    random_forest = RandomForestClassifier(n_estimators=100)

    category = "trilobite"
    dataset = "all"
    datamanager = CaltechManager()
    datamanager.PATHS["RESULTS"] = os.path.join(datamanager.PATHS["BASE"], "results_trilobite_rf_testing")

    # vcd = VisualConceptDetection(ada, datamanager)
    vcd = VisualConceptDetection(random_forest, datamanager)

    clf = vcd.load_object("Classifier", category)
    feature_importances = clf.feature_importances_

    sample_matrix = vcd.datamanager.build_sample_matrix(dataset, category)
    class_vector = vcd.datamanager.build_class_vector(dataset, category)
    pred = clf.predict_proba(sample_matrix)

    vis = EnsembleVisualization(datamanager)
    del clf
    image_titles = [vis.get_image_title(prediction, real) for prediction, real in
                    izip(pred, class_vector)]
    del class_vector
    del sample_matrix
コード例 #7
0
import sys
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.svm import LinearSVC, SVC
from vcd import VisualConceptDetection
import os
import time
from util import svm
from datamanagers.CaltechManager import CaltechManager
import numpy as np
import pylab as pl

from runGridSearch import GridSearch

if __name__ == "__main__":
    category = "airplanes"
    total = time.time()

    clf = RandomForestClassifier(n_estimators = 2000)

    # clf = AdaBoostClassifier(n_estimators = 2000)
    # clf.base_estimator.max_depth = 4

    # clf = LinearSVC(C=100)
    # clf = SVC(C=10)

    dm = CaltechManager()
    vcd = VisualConceptDetection(classifier=clf, datamanager=dm)

    vcd.run(category)
    print "Total execution time: %f minutes" % ((time.time() - total) / 60.0)
コード例 #8
0
import sys
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.svm import LinearSVC, SVC
from vcd import VisualConceptDetection
import os
import time
from util import svm
from datamanagers.CaltechManager import CaltechManager
import numpy as np
import pylab as pl

from runGridSearch import GridSearch

if __name__ == "__main__":
    category = "airplanes"
    total = time.time()

    clf = RandomForestClassifier(n_estimators=2000)

    # clf = AdaBoostClassifier(n_estimators = 2000)
    # clf.base_estimator.max_depth = 4

    # clf = LinearSVC(C=100)
    # clf = SVC(C=10)

    dm = CaltechManager()
    vcd = VisualConceptDetection(classifier=clf, datamanager=dm)

    vcd.run(category)
    print "Total execution time: %f minutes" % ((time.time() - total) / 60.0)
コード例 #9
0
if __name__ == "__main__":
    # ada = AdaBoostClassifier()
    # ada.n_estimators = 50
    # ada.base_estimator.max_depth = 1

    random_forest = RandomForestClassifier(n_estimators=100)

    category = "trilobite"
    dataset = "all"
    datamanager = CaltechManager()
    datamanager.PATHS["RESULTS"] = os.path.join(
        datamanager.PATHS["BASE"], "results_trilobite_rf_testing")

    # vcd = VisualConceptDetection(ada, datamanager)
    vcd = VisualConceptDetection(random_forest, datamanager)

    clf = vcd.load_object("Classifier", category)
    feature_importances = clf.feature_importances_

    sample_matrix = vcd.datamanager.build_sample_matrix(dataset, category)
    class_vector = vcd.datamanager.build_class_vector(dataset, category)
    pred = clf.predict_proba(sample_matrix)

    vis = EnsembleVisualization(datamanager)
    del clf
    image_titles = [
        vis.get_image_title(prediction, real)
        for prediction, real in izip(pred, class_vector)
    ]
    del class_vector
コード例 #10
0
 def setUp(self):
     self.datamanager = MyTestDataManager()
     clf = AdaBoostClassifier(n_estimators=14)
     clf.base_estimator.max_depth = 10
     self.vcd = VisualConceptDetection(clf, self.datamanager)
コード例 #11
0
def get_svm_importances(coef):
    """Normalize the SVM weights."""
    factor = 1.0 / np.linalg.norm(coef)
    return (coef * factor).ravel()


if __name__ == "__main__":
    svm = LinearSVC(C=0.1)

    category = "Faces"
    dataset = "all"
    datamanager = CaltechManager()
    datamanager.PATHS["RESULTS"] = os.path.join(
        datamanager.PATHS["BASE"], "results_Faces_LinearSVC_normalized")
    vcd = VisualConceptDetection(svm, datamanager)

    clf = vcd.load_object("Classifier", category)
    importances = get_svm_importances(clf.coef_)

    sample_matrix = vcd.datamanager.build_sample_matrix(dataset, category)
    class_vector = vcd.datamanager.build_class_vector(dataset, category)
    pred = clf.decision_function(sample_matrix)

    del clf
    image_titles = [
        get_image_title(prediction, real)
        for prediction, real in izip(pred, class_vector)
    ]
    del class_vector
    del sample_matrix