def test_NaiveBayes(self): table = SqlTable( connection_params(), "iris", type_hints=Domain( [], DiscreteVariable("iris", values=[ "Iris-setosa", "Iris-virginica", "Iris-versicolor" ]), ), ) table = preprocess.Discretize(table) bayes = nb.NaiveBayesLearner() clf = bayes(table) # Single instance prediction self.assertEqual(clf(table[0]), table[0].get_class()) # Table prediction pred = clf(table) actual = array([ins.get_class() for ins in table]) ca = pred == actual ca = ca.sum() / len(ca) self.assertGreater(ca, 0.95) self.assertLess(ca, 1.0)
def test_chi2(self): nrows, ncols = 500, 5 X = np.random.randint(4, size=(nrows, ncols)) y = 10 + (-3 * X[:, 1] + X[:, 3]) // 2 data = preprocess.Discretize()(Table(X, y)) scorer = score.Chi2() sc = [scorer(data, a) for a in range(ncols)] self.assertTrue(np.argmax(sc) == 1)
def createinstance(params): params = dict(params) method = params.pop("method", DiscretizeEditor.EqualFreq) method, defaults = DiscretizeEditor.Discretizers[method] if method is None: return None resolved = dict(defaults) # update only keys in defaults? resolved.update(params) return preprocess.Discretize(method(**params), remove_const=False)
def test_chi2(self): nrows, ncols = 500, 5 X = np.random.randint(4, size=(nrows, ncols)) y = 10 + (-3 * X[:, 1] + X[:, 3]) // 2 domain = Domain.from_numpy(X, y) domain = Domain(domain.attributes, DiscreteVariable('c', values=np.unique(y))) table = Table(domain, X, y) data = preprocess.Discretize()(table) scorer = Chi2() sc = [scorer(data, a) for a in range(ncols)] self.assertTrue(np.argmax(sc) == 1)
def test_NaiveBayes(self): table = SqlTable(dict(host='localhost', database='test'), 'iris', type_hints=Domain([], DiscreteVariable("iris", values=['Iris-setosa', 'Iris-virginica', 'Iris-versicolor']))) table = preprocess.Discretize(table) bayes = nb.NaiveBayesLearner() clf = bayes(table) # Single instance prediction self.assertEqual(clf(table[0]), table[0].get_class()) # Table prediction pred = clf(table) actual = array([ins.get_class() for ins in table]) ca = pred == actual ca = ca.sum() / len(ca) self.assertGreater(ca, 0.95) self.assertLess(ca, 1.)
def test_NaiveBayes(self): iris_v = ['Iris-setosa', 'Iris-virginica', 'Iris-versicolor'] table = SqlTable(self.conn, self.iris, type_hints=Domain([], DiscreteVariable("iris", values=iris_v))) disc = preprocess.Discretize() table = disc(table) bayes = nb.NaiveBayesLearner() clf = bayes(table) # Single instance prediction self.assertEqual(clf(table[0]), table[0].get_class()) # Table prediction pred = clf(table) actual = array([ins.get_class() for ins in table]) ca = pred == actual ca = ca.sum() / len(ca) self.assertGreater(ca, 0.95) self.assertLess(ca, 1.)