def test_repr(self): self.assertEqual(str(Leaf([], [], self.config)), '⊥') x, y = zip(*generate_dataset(intercept=3, coeff=1, size=100, min_x=0, max_x=100)) reg = Leaf(x, y, self.config) self.assertEqual(str(reg), 'y ~ 1.000e+00x + 3.000e+00') dot = graphviz.Digraph() reg._to_graphviz(dot) expected = 'digraph {\n\t%d [label="%s"]\n}' % (id(reg), str(reg)) self.assertEqual(str(dot), expected)
def test_plus(self): l1 = Leaf(range(10), range(10), config=self.config) for l2 in [Leaf(range(21, 11, -1), range(21, 11, -1), config=self.config), Leaf(range(10, 20), range(10, 20), config=self.config)]: leaf = l1 + l2 self.assertAlmostEqual(leaf.intercept, 0) self.assertAlmostEqual(leaf.coeff, 1) self.assertAlmostEqual(leaf.MSE, 0) self.assertEqual(leaf.x.values, list(sorted(l1.x.values + l2.x.values))) self.assertEqual(leaf.x.values, list(sorted(l1.y.values + l2.y.values)))
def perform_test_other_modes(self, mode): for noise in [0, 1, 2, 4, 8]: x = [d[0] for d in self.data] y = [d[1] + random.gauss(0, noise) for d in self.data] config = Config(mode=mode, epsilon=1e-6) node = Leaf(x, y, config=config) self.assertAlmostEqual(node.coeff, self.coeff, delta=1) self.assertAlmostEqual(node.intercept, self.intercept, delta=3*(noise+0.001)) # we add an "outlier" and check that it increases the error significantly error = node.error new_x = random.uniform(0, 100) node.add(new_x, new_x*(self.coeff*2) + self.intercept*2) new_error = node.error self.assertGreater(new_error, error)
def test_repr(self): config = Config(mode='BIC', epsilon=1e-6) data = {} for i in range(1, 5): data[i] = generate_dataset( intercept=i, coeff=i, size=100, min_x=i*100, max_x=(i+1)*100) + [((i+1)*100, (i+1)*100*i+i)] x = [d[0] for d in data[i]] y = [d[1] for d in data[i]] data[i] = x, y left = Node(Leaf(*data[1], config), Leaf(list(reversed(data[2][0])), list(reversed(data[2][1])), config), no_check=True) right = Node(Leaf(*data[3], config), Leaf(list(reversed(data[4][0])), list(reversed(data[4][1])), config), no_check=True) node = Node(left, right, no_check=True) expected = '\n'.join([ 'x ≤ 3.000e+02?', ' └──x ≤ 2.000e+02?', ' │ └──y ~ 1.000e+00x + 1.000e+00', ' │ └──y ~ 2.000e+00x + 2.000e+00', ' └──x ≤ 4.000e+02?', ' └──y ~ 3.000e+00x + 3.000e+00', ' └──y ~ 4.000e+00x + 4.000e+00', ]) self.assertEqual(expected, str(node)) dot = node.to_graphviz() expected = '\n'.join([ 'digraph {', f'\t{id(node)} [label="x ≤ {node.split:.3e}?" shape=box]', f'\t{id(node.left)} [label="x ≤ {node.left.split:.3e}?" shape=box]', f'\t{id(node.left.left)} [label="{str(node.left.left)}"]', f'\t{id(node.left.right)} [label="{str(node.left.right)}"]', f'\t{id(node.left)} -> {id(node.left.left)} [label=yes]', f'\t{id(node.left)} -> {id(node.left.right)} [label=no]', f'\t{id(node.right)} [label="x ≤ {node.right.split:.3e}?" shape=box]', f'\t{id(node.right.left)} [label="{str(node.right.left)}"]', f'\t{id(node.right.right)} [label="{str(node.right.right)}"]', f'\t{id(node.right)} -> {id(node.right.left)} [label=yes]', f'\t{id(node.right)} -> {id(node.right.right)} [label=no]', f'\t{id(node)} -> {id(node.left)} [label=yes]', f'\t{id(node)} -> {id(node.right)} [label=no]', '}', ]) self.maxDiff = None self.assertEqual(str(dot), expected)
def test_add_remove(self): for noise in [0, 1, 2, 4, 8]: x = [d[0] for d in self.data] y = [d[1] + random.gauss(0, noise) for d in self.data] limit = self.size // 3 new_x = x[:limit] new_y = y[:limit] node = Leaf(list(new_x), list(new_y), config=self.config) self.perform_tests(new_x, new_y, node, noise > 0) for xx, yy in zip(x[limit:], y[limit:]): node.add(xx, yy) new_x.append(xx) new_y.append(yy) self.perform_tests(new_x, new_y, node, noise > 0) for _ in range(2*limit): xx, yy = node.pop() self.assertEqual(xx, new_x.pop()) self.assertEqual(yy, new_y.pop()) self.perform_tests(new_x, new_y, node, noise > 0)
def assert_notequal_reg(self, dataset1, dataset2): leaf1 = Leaf([d[0] for d in dataset1], [d[1] for d in dataset1], config=self.config) leaf2 = Leaf([d[0] for d in dataset2], [d[1] for d in dataset2], config=self.config) self.assertNotEqual(leaf1, leaf2)
def test_init(self): for noise in [0, 1, 2, 4, 8]: x = [d[0] for d in self.data] y = [d[1] + random.gauss(0, noise) for d in self.data] node = Leaf(x, y, config=self.config) self.perform_tests(x, y, node, noise > 0)