Beispiel #1
0
    def test_continuous(self):
        d = data.Table("iris")
        cont = contingency.Continuous(d, "sepal width")
        correct = [[
            2.3, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0,
            4.1, 4.2, 4.4
        ], [1, 1, 6, 5, 5, 2, 9, 6, 2, 3, 4, 2, 1, 1, 1, 1]]
        np.testing.assert_almost_equal(cont.unknowns, [0, 0, 0])
        np.testing.assert_almost_equal(cont["Iris-setosa"], correct)
        self.assertEqual(cont.unknown_rows, 0)

        correct = [[
            2.2, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.6, 3.8
        ], [1, 4, 2, 4, 8, 2, 12, 4, 5, 3, 2, 1, 2]]
        np.testing.assert_almost_equal(
            cont[d.domain.class_var.values.index("Iris-virginica")], correct)
        np.testing.assert_almost_equal(cont.unknowns, [0, 0, 0])
        self.assertEqual(cont.unknown_rows, 0)
Beispiel #2
0
    def test_discrete_with_fallback(self):
        d = data.Table("zoo")
        d.Y[25] = None
        d.Y[24] = None
        d.X[0, 0] = None
        d.X[24, 0] = None
        default = contingency.Discrete(d, 0)

        d._compute_contingency = Mock(side_effect=NotImplementedError)
        fallback = contingency.Discrete(d, 0)

        np.testing.assert_array_equal(np.asarray(fallback),
                                      np.asarray(default))
        np.testing.assert_array_equal(fallback.unknowns, default.unknowns)
        np.testing.assert_array_equal(fallback.row_unknowns,
                                      default.row_unknowns)
        np.testing.assert_array_equal(fallback.col_unknowns,
                                      default.col_unknowns)
Beispiel #3
0
    def test_discretize_class(self):
        table = data.Table('iris')
        domain = table.domain
        regr_domain = data.Domain(domain.attributes[:3],
                                  [domain.attributes[3], domain.class_var])
        table = data.Table.from_table(regr_domain, table)

        discretize = Discretize(remove_const=False)
        new_table = discretize(table)
        self.assertIs(new_table.domain.class_vars[0],
                      new_table.domain.class_vars[0])
        self.assertIs(new_table.domain.class_vars[1],
                      new_table.domain.class_vars[1])

        discretize = Discretize(remove_const=False, discretize_classes=True)
        new_table = discretize(table)
        self.assertIsInstance(new_table.domain.class_vars[0], DiscreteVariable)
        self.assertIs(new_table.domain.class_vars[1],
                      new_table.domain.class_vars[1])
Beispiel #4
0
    def test_indexing(self):
        d = data.Table("zoo")
        indamphibian = d.domain.class_var.to_val("amphibian")

        disc = distribution.class_distribution(d)

        self.assertEqual(len(disc), len(d.domain.class_var.values))

        self.assertEqual(disc["mammal"], 41)
        self.assertEqual(disc[indamphibian], 4)

        disc["mammal"] = 100
        self.assertEqual(disc[d.domain.class_var.to_val("mammal")], 100)

        disc[indamphibian] = 33
        self.assertEqual(disc["amphibian"], 33)

        disc = distribution.class_distribution(d)
        self.assertEqual(list(disc), self.freqs)
Beispiel #5
0
    def test_from_table(self):
        d = data.Table("iris")
        petal_length = d.columns.petal_length

        disc = distribution.Continuous(d, "petal length")
        self.assertIsInstance(disc, np.ndarray)
        self.assertIs(disc.variable, petal_length)
        self.assertEqual(disc.unknowns, 0)
        np.testing.assert_almost_equal(disc, self.freqs)

        disc2 = distribution.Continuous(d, d.domain[2])
        self.assertIsInstance(disc2, np.ndarray)
        self.assertIs(disc2.variable, petal_length)
        self.assertEqual(disc, disc2)

        disc3 = distribution.Continuous(d, 2)
        self.assertIsInstance(disc3, np.ndarray)
        self.assertIs(disc3.variable, petal_length)
        self.assertEqual(disc, disc3)
Beispiel #6
0
 def test_replacement(self):
     s = [0] * 50 + [1] * 50
     c1 = np.array(s).reshape((100, 1))
     s = [0] * 5 + [1] * 5 + [2] * 90
     c2 = np.array(s).reshape((100, 1))
     x = np.hstack([c1, c2])
     domain = data.Domain(
         [
             data.ContinuousVariable("a"),
             data.DiscreteVariable("b", values="ABC")
         ],
         data.ContinuousVariable("c"),
     )
     table = data.Table(domain, x, c1)
     var1 = preprocess.Average()(table, 0)
     self.assertIsInstance(var1.compute_value, preprocess.ReplaceUnknowns)
     self.assertEqual(var1.compute_value.value, 0.5)
     var2 = preprocess.Average()(table, 1)
     self.assertIsInstance(var2.compute_value, preprocess.ReplaceUnknowns)
     self.assertEqual(var2.compute_value.value, 2)
Beispiel #7
0
    def test_construction(self):
        d = data.Table("zoo")

        disc = distribution.Discrete(d, "type")
        self.assertIsInstance(disc, np.ndarray)
        self.assertIs(disc.variable, d.domain["type"])
        self.assertEqual(disc.unknowns, 0)
        self.assertIs(disc.variable, d.domain.class_var)

        disc7 = distribution.Discrete(self.freqs)
        self.assertIsInstance(disc, np.ndarray)
        self.assertIsNone(disc7.variable)
        self.assertEqual(disc7.unknowns, 0)
        self.assertEqual(disc, disc7)

        disc1 = distribution.Discrete(None, d.domain.class_var)
        self.assertIsInstance(disc1, np.ndarray)
        self.assertIs(disc1.variable, d.domain.class_var)
        self.assertEqual(disc.unknowns, 0)
        assert_dist_equal(disc1, [0] * len(d.domain.class_var.values))
Beispiel #8
0
    def test_get_distributions(self):
        d = data.Table("iris")
        ddist = distribution.get_distributions(d)

        self.assertEqual(len(ddist), 5)
        for i in range(4):
            self.assertIsInstance(ddist[i], distribution.Continuous)
        self.assertIsInstance(ddist[-1], distribution.Discrete)

        freqs = np.array([(1.0, 1), (1.1, 1), (1.2, 2), (1.3, 7), (1.4, 12),
                          (1.5, 14), (1.6, 7), (1.7, 4), (1.9, 2), (3.0, 1),
                          (3.3, 2), (3.5, 2), (3.6, 1), (3.7, 1), (3.8, 1),
                          (3.9, 3), (4.0, 5), (4.1, 3), (4.2, 4), (4.3, 2),
                          (4.4, 4), (4.5, 8), (4.6, 3), (4.7, 5), (4.8, 4),
                          (4.9, 5), (5.0, 4), (5.1, 8), (5.2, 2), (5.3, 2),
                          (5.4, 2), (5.5, 3), (5.6, 6), (5.7, 3), (5.8, 3),
                          (5.9, 2), (6.0, 2), (6.1, 3), (6.3, 1), (6.4, 1),
                          (6.6, 1), (6.7, 2), (6.9, 1)]).T
        assert_dist_equal(ddist[2], freqs)
        assert_dist_equal(ddist[-1], [50, 50, 50])
Beispiel #9
0
    def test_get_distribution(self):
        d = data.Table("iris")
        cls = d.domain.class_var
        disc = distribution.get_distribution(d, cls)
        self.assertIsInstance(disc, np.ndarray)
        self.assertIs(disc.variable, cls)
        self.assertEqual(disc.unknowns, 0)
        assert_dist_equal(disc, [50, 50, 50])

        petal_length = d.columns.petal_length
        freqs = np.array([(1.0, 1), (1.1, 1), (1.2, 2), (1.3, 7), (1.4, 12),
                          (1.5, 14), (1.6, 7), (1.7, 4), (1.9, 2), (3.0, 1),
                          (3.3, 2), (3.5, 2), (3.6, 1), (3.7, 1), (3.8, 1),
                          (3.9, 3), (4.0, 5), (4.1, 3), (4.2, 4), (4.3, 2),
                          (4.4, 4), (4.5, 8), (4.6, 3), (4.7, 5), (4.8, 4),
                          (4.9, 5), (5.0, 4), (5.1, 8), (5.2, 2), (5.3, 2),
                          (5.4, 2), (5.5, 3), (5.6, 6), (5.7, 3), (5.8, 3),
                          (5.9, 2), (6.0, 2), (6.1, 3), (6.3, 1), (6.4, 1),
                          (6.6, 1), (6.7, 2), (6.9, 1)]).T
        disc = distribution.get_distribution(d, petal_length)
        assert_dist_equal(disc, freqs)
Beispiel #10
0
    def test_from_table(self):
        d = data.Table("zoo")
        disc = distribution.Discrete(d, "type")
        self.assertIsInstance(disc, np.ndarray)
        self.assertIs(disc.variable, d.domain["type"])
        self.assertEqual(disc.unknowns, 0)
        assert_dist_equal(disc, self.freqs)

        disc2 = distribution.Discrete(d, d.domain.class_var)
        self.assertIsInstance(disc2, np.ndarray)
        self.assertIs(disc2.variable, d.domain.class_var)
        self.assertEqual(disc, disc2)

        disc3 = distribution.Discrete(d, len(d.domain.attributes))
        self.assertIsInstance(disc3, np.ndarray)
        self.assertIs(disc3.variable, d.domain.class_var)
        self.assertEqual(disc, disc3)

        disc5 = distribution.class_distribution(d)
        self.assertIsInstance(disc5, np.ndarray)
        self.assertIs(disc5.variable, d.domain.class_var)
        self.assertEqual(disc, disc5)
    def test_split_by_classifier(self):
        learners = [random_learner, random_learner, random_learner]
        ds = data.Table("lenses")
        cv = testing.cross_validation(learners,
                                      ds,
                                      folds=5,
                                      store_examples=True)
        cv_split = scoring.split_by_classifiers(cv)
        ca_scores = scoring.CA(cv)
        auc_scores = scoring.AUC(cv)
        for i, cv1 in enumerate(cv_split):
            self.assertEqual(cv1.class_values, cv.class_values)
            self.assertEqual(cv1.classifier_names, [cv.classifier_names[i]])
            self.assertEqual(cv1.number_of_iterations, cv.number_of_iterations)
            self.assertEqual(cv1.number_of_learners, 1)
            self.assertEqual(cv1.base_class, cv.base_class)
            self.assertEqual(cv1.weights, cv.weights)
            self.assertEqual(len(cv1.results), len(cv.results))
            self.assertEqual(cv1.examples, cv.examples)

            ca_one = scoring.CA(cv1)[0]
            auc_one = scoring.AUC(cv1)[0]
            self.assertAlmostEqual(ca_scores[i], ca_one, delta=1e-10)
            self.assertAlmostEquals(auc_scores[i], auc_one, delta=1e-10)
 def test_imputer(self):
     auto = data.Table(test_filename('datasets/imports-85.tab'))
     auto2 = preprocess.Impute()(auto)
     self.assertFalse(np.isnan(auto2.X).any())
 def test_imputer(self):
     auto = data.Table('auto-mpg')
     auto2 = preprocess.Impute()(auto)
     self.assertFalse(np.isnan(auto2.X).any())
Beispiel #14
0
    print "%8s"*n % tuple(m[1] for m in measures)
    print "=" * 8 * n
    for tr in test_results:
        print "%8.4f"*n % tuple(m[0](tr)[0] for m in measures)
    print

def run_tests(datasets, measures, tests, iterations=10):
    for ds, ds_name in datasets:
        for t, t_name in tests:
            print "Testing %s on %s" % (t_name, ds_name)
            test_results = [t(random_learner, ds) for _ in xrange(iterations)]
            test(measures, test_results)


datasets = (
    (data.Table("iris"), "Iris"),
    (data.Table("monks-1"), "Monks")
)

measures = (
    (lambda x:auc(x), "AUC"),
    (lambda x:auc(x, multiclass=0), "AUC+M0"),
    (lambda x:auc(x, multiclass=1), "AUC+M1"),
    (lambda x:auc(x, multiclass=2), "AUC+M2"),
    (lambda x:auc(x, multiclass=3), "AUC+M3"),
)

tests = (
    (lambda l, ds: testing.cross_validation([l], ds), "CV"),
    (lambda l, ds: testing.proportion_test([l], ds, .7, 1), "Proportion test"),
)
Beispiel #15
0
 def test_empty(self):
     iris = data.Table('iris')
     learn = majority_.MajorityFitter()
     clf = learn(iris[:0])
     y = clf(iris[0], clf.Probs)
     self.assertTrue(np.allclose(y, y.sum() / y.size))
Beispiel #16
0
from Orange import data
from Orange.classification import svm

vehicle = data.Table("vehicle.tab")

svm_easy = svm.SVMLearnerEasy(name="svm easy", folds=3)
svm_normal = svm.SVMLearner(name="svm")
learners = [svm_easy, svm_normal]

from Orange.evaluation import testing, scoring

results = testing.cross_validation(learners, vehicle, folds=5)
print "Name     CA        AUC"
for learner, CA, AUC in zip(learners, scoring.CA(results),
                            scoring.AUC(results)):
    print "%-8s %.2f      %.2f" % (learner.name, CA, AUC)
Beispiel #17
0
 def test_compute_distributions_metas(self):
     d = data.Table(test_filename("test9.tab"))
     variable = d.domain[-2]
     dist, _ = d._compute_distributions([variable])[0]
     np.testing.assert_almost_equal(dist, [3, 3, 2])
Beispiel #18
0
 def setUpClass(cls):
     cls.iris = data.Table("iris")
from Orange import data
from Orange import evaluation

from Orange.classification.svm import SVMLearner, kernels
from Orange.distance import Euclidean
from Orange.distance import Hamming

iris = data.Table("iris.tab")
l1 = SVMLearner()
l1.kernel_func = kernels.RBFKernelWrapper(Euclidean(iris), gamma=0.5)
l1.kernel_type = SVMLearner.Custom
l1.probability = True
c1 = l1(iris)
l1.name = "SVM - RBF(Euclidean)"

l2 = SVMLearner()
l2.kernel_func = kernels.RBFKernelWrapper(Hamming(iris), gamma=0.5)
l2.kernel_type = SVMLearner.Custom
l2.probability = True
c2 = l2(iris)
l2.name = "SVM - RBF(Hamming)"

l3 = SVMLearner()
l3.kernel_func = kernels.CompositeKernelWrapper(
    kernels.RBFKernelWrapper(Euclidean(iris), gamma=0.5),
    kernels.RBFKernelWrapper(Hamming(iris), gamma=0.5),
    l=0.5)
l3.kernel_type = SVMLearner.Custom
l3.probability = True
c3 = l1(iris)
l3.name = "SVM - Composite"
Beispiel #20
0
 def test_transform(self):
     table = data.Table('iris')
     table2 = Discretize(table)
     ins = data.Instance(table2.domain, table[0])
     table3 = data.Table(table2.domain, table[:10])
     self.assertEqual(ins, table3[0])
Beispiel #21
0
    def test_modus(self):
        d = data.Table("iris")
        petal_length = d.columns.petal_length

        disc = distribution.Continuous([list(range(5)), [1, 1, 2, 5, 1]])
        self.assertEqual(disc.modus(), 3)
Beispiel #22
0
 def setUpClass(cls):
     cls.zoo = data.Table("zoo")
     cls.test9 = data.Table(test_filename("datasets/test9.tab"))
from Orange import data
from Orange.classification import svm

brown = data.Table("brown-selected")
classifier = svm.SVMLearner(brown,
                            kernel_type=svm.kernels.Linear,
                            normalization=False)

weights = svm.get_linear_svm_weights(classifier)
print sorted("%.10f" % w for w in weights.values())

import pylab as plt
plt.hist(weights.values())
Beispiel #24
0
 def setUp(self):
     self.data = data.Table('ionosphere')
     self.data.shuffle()
Beispiel #25
0
 def test_modus(self):
     d = data.Table("zoo")
     disc = distribution.Discrete(d, "type")
     self.assertEqual(str(disc.modus()), "mammal")
Beispiel #26
0
 def test_compute_contingency_metas(self):
     d = data.Table(test_filename("test9.tab"))
     var1, var2 = d.domain[-2], d.domain[-4]
     cont, _ = d._compute_contingency([var1], var2)[0][0]
     np.testing.assert_almost_equal(
         cont, [[3, 0, 0], [0, 2, 0], [0, 0, 2], [0, 1, 0]])
Beispiel #27
0
 def test_empty(self):
     autompg = data.Table('auto-mpg')
     learn = mean_.MeanFitter()
     clf = learn(autompg[:0])
     y = clf(autompg[0])
     self.assertTrue(y == 0)
Beispiel #28
0
 def test_discrete(self):
     iris = data.Table('iris')
     learn = mean_.MeanFitter()
     self.assertRaises(ValueError, learn, iris)
Beispiel #29
0
 def setUpClass(cls):
     cls.zoo = data.Table("zoo")
Beispiel #30
0
 def test_continuous(self):
     autompg = data.Table('auto-mpg')
     learn = majority_.MajorityFitter()
     self.assertRaises(ValueError, learn, autompg)