예제 #1
0
 def test_repr(self):
     self.assertEqual(str(Leaf([], [], self.config)), '⊥')
     x, y = zip(*generate_dataset(intercept=3,
                                  coeff=1, size=100, min_x=0, max_x=100))
     reg = Leaf(x, y, self.config)
     self.assertEqual(str(reg), 'y ~ 1.000e+00x + 3.000e+00')
     dot = graphviz.Digraph()
     reg._to_graphviz(dot)
     expected = 'digraph {\n\t%d [label="%s"]\n}' % (id(reg), str(reg))
     self.assertEqual(str(dot), expected)
예제 #2
0
 def test_plus(self):
     l1 = Leaf(range(10), range(10), config=self.config)
     for l2 in [Leaf(range(21, 11, -1), range(21, 11, -1), config=self.config),
                Leaf(range(10, 20), range(10, 20), config=self.config)]:
         leaf = l1 + l2
         self.assertAlmostEqual(leaf.intercept, 0)
         self.assertAlmostEqual(leaf.coeff, 1)
         self.assertAlmostEqual(leaf.MSE, 0)
         self.assertEqual(leaf.x.values, list(sorted(l1.x.values + l2.x.values)))
         self.assertEqual(leaf.x.values, list(sorted(l1.y.values + l2.y.values)))
예제 #3
0
 def perform_test_other_modes(self, mode):
     for noise in [0, 1, 2, 4, 8]:
         x = [d[0] for d in self.data]
         y = [d[1] + random.gauss(0, noise) for d in self.data]
         config = Config(mode=mode, epsilon=1e-6)
         node = Leaf(x, y, config=config)
         self.assertAlmostEqual(node.coeff,     self.coeff,      delta=1)
         self.assertAlmostEqual(node.intercept, self.intercept,  delta=3*(noise+0.001))
         # we add an "outlier" and check that it increases the error significantly
         error = node.error
         new_x = random.uniform(0, 100)
         node.add(new_x, new_x*(self.coeff*2) + self.intercept*2)
         new_error = node.error
         self.assertGreater(new_error, error)
예제 #4
0
    def test_repr(self):
        config = Config(mode='BIC', epsilon=1e-6)
        data = {}
        for i in range(1, 5):
            data[i] = generate_dataset(
                intercept=i, coeff=i, size=100, min_x=i*100, max_x=(i+1)*100) + [((i+1)*100, (i+1)*100*i+i)]
            x = [d[0] for d in data[i]]
            y = [d[1] for d in data[i]]
            data[i] = x, y
        left = Node(Leaf(*data[1], config),  Leaf(list(reversed(data[2][0])),
                                                  list(reversed(data[2][1])), config), no_check=True)
        right = Node(Leaf(*data[3], config), Leaf(list(reversed(data[4][0])),
                                                  list(reversed(data[4][1])), config), no_check=True)
        node = Node(left, right, no_check=True)
        expected = '\n'.join([
            'x ≤ 3.000e+02?',
            '    └──x ≤ 2.000e+02?',
            '    │    └──y ~ 1.000e+00x + 1.000e+00',
            '    │    └──y ~ 2.000e+00x + 2.000e+00',
            '    └──x ≤ 4.000e+02?',
            '         └──y ~ 3.000e+00x + 3.000e+00',
            '         └──y ~ 4.000e+00x + 4.000e+00', ])
        self.assertEqual(expected, str(node))
        dot = node.to_graphviz()

        expected = '\n'.join([
            'digraph {',
            f'\t{id(node)} [label="x ≤ {node.split:.3e}?" shape=box]',
            f'\t{id(node.left)} [label="x ≤ {node.left.split:.3e}?" shape=box]',
            f'\t{id(node.left.left)} [label="{str(node.left.left)}"]',
            f'\t{id(node.left.right)} [label="{str(node.left.right)}"]',
            f'\t{id(node.left)} -> {id(node.left.left)} [label=yes]',
            f'\t{id(node.left)} -> {id(node.left.right)} [label=no]',
            f'\t{id(node.right)} [label="x ≤ {node.right.split:.3e}?" shape=box]',
            f'\t{id(node.right.left)} [label="{str(node.right.left)}"]',
            f'\t{id(node.right.right)} [label="{str(node.right.right)}"]',
            f'\t{id(node.right)} -> {id(node.right.left)} [label=yes]',
            f'\t{id(node.right)} -> {id(node.right.right)} [label=no]',
            f'\t{id(node)} -> {id(node.left)} [label=yes]',
            f'\t{id(node)} -> {id(node.right)} [label=no]',
            '}', ])
        self.maxDiff = None
        self.assertEqual(str(dot), expected)
예제 #5
0
 def test_add_remove(self):
     for noise in [0, 1, 2, 4, 8]:
         x = [d[0] for d in self.data]
         y = [d[1] + random.gauss(0, noise) for d in self.data]
         limit = self.size // 3
         new_x = x[:limit]
         new_y = y[:limit]
         node = Leaf(list(new_x), list(new_y), config=self.config)
         self.perform_tests(new_x, new_y, node, noise > 0)
         for xx, yy in zip(x[limit:], y[limit:]):
             node.add(xx, yy)
             new_x.append(xx)
             new_y.append(yy)
             self.perform_tests(new_x, new_y, node, noise > 0)
         for _ in range(2*limit):
             xx, yy = node.pop()
             self.assertEqual(xx, new_x.pop())
             self.assertEqual(yy, new_y.pop())
             self.perform_tests(new_x, new_y, node, noise > 0)
예제 #6
0
 def assert_notequal_reg(self, dataset1, dataset2):
     leaf1 = Leaf([d[0] for d in dataset1], [d[1]
                                             for d in dataset1], config=self.config)
     leaf2 = Leaf([d[0] for d in dataset2], [d[1]
                                             for d in dataset2], config=self.config)
     self.assertNotEqual(leaf1, leaf2)
예제 #7
0
 def test_init(self):
     for noise in [0, 1, 2, 4, 8]:
         x = [d[0] for d in self.data]
         y = [d[1] + random.gauss(0, noise) for d in self.data]
         node = Leaf(x, y, config=self.config)
         self.perform_tests(x, y, node, noise > 0)