Example #1
0
class TestRegularTree(TestCase):
    def setUp(self):
        self.data = pd.read_csv('data/iris.data.csv')
        self.tau_mat = self.data.corr(method='kendall').values
        self.u_matrix = np.empty(self.data.shape)
        count = 0
        for col in self.data:
            uni = GaussianKDE()
            uni.fit(self.data[col])
            self.u_matrix[:,
                          count] = uni.cumulative_distribution(self.data[col])
            count += 1
        self.tree = Tree(TreeTypes.REGULAR)
        self.tree.fit(0, 4, self.tau_mat, self.u_matrix)

    def test_first_tree(self):
        """ Assert the construction of first tree is correct
        The first tree should be:
                   1
                0--2--3
        """
        sorted_edges = Edge.sort_edge(self.tree.edges)

        assert sorted_edges[0].L == 0
        assert sorted_edges[0].R == 2
        assert sorted_edges[1].L == 1
        assert sorted_edges[1].R == 2
        assert sorted_edges[2].L == 2
        assert sorted_edges[2].R == 3

    @pytest.mark.xfail
    def test_first_tree_likelihood(self):
        """ Assert first tree likehood is correct"""
        uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]])

        value, new_u = self.tree.get_likelihood(uni_matrix)

        expected = 0.9545348664739628
        assert abs(value - expected) < 10E-3

    def test_get_constraints(self):
        """ Assert get constraint gets correct neighbor nodes"""
        self.tree._get_constraints()

        assert self.tree.edges[0].neighbors == [1, 2]
        assert self.tree.edges[1].neighbors == [0, 2]

    def test_get_tau_matrix(self):
        """ Assert second tree likelihood is correct """
        self.tau = self.tree.get_tau_matrix()

        test = np.isnan(self.tau)

        self.assertFalse(test.all())

    def test_second_tree_likelihood(self):
        """Assert second tree likelihood is correct."""
        tau = self.tree.get_tau_matrix()
        second_tree = Tree(TreeTypes.REGULAR)
        second_tree.fit(1, 3, tau, self.tree)
        uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]])

        first_value, new_u = self.tree.get_likelihood(uni_matrix)
        second_value, out_u = second_tree.get_likelihood(new_u)
Example #2
0
class TestDirectTree(TestCase):
    def setUp(self):
        self.data = pd.read_csv('data/iris.data.csv')
        self.tau_mat = self.data.corr(method='kendall').values
        self.u_matrix = np.empty(self.data.shape)
        count = 0
        for col in self.data:
            uni = GaussianKDE()
            uni.fit(self.data[col])
            self.u_matrix[:,
                          count] = uni.cumulative_distribution(self.data[col])
            count += 1
        self.tree = Tree(TreeTypes.DIRECT)
        self.tree.fit(0, 4, self.tau_mat, self.u_matrix)

    def test_first_tree(self):
        """ Assert 0 is the center node"""
        assert self.tree.edges[0].L == 0

    @pytest.mark.xfail
    def test_first_tree_likelihood(self):
        """ Assert first tree likehood is correct"""
        uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]])

        value, new_u = self.tree.get_likelihood(uni_matrix)

        expected = -0.1207611551427385
        assert abs(value - expected) < 10E-3

    def test_get_constraints(self):
        """ Assert get constraint gets correct neighbor nodes"""
        self.tree._get_constraints()

        assert self.tree.edges[0].neighbors == [1]
        assert self.tree.edges[1].neighbors == [0, 2]

    def test_get_tau_matrix_no_edges_empty(self):
        """get_tau_matrix returns an empty array if there are no edges."""
        # Setup
        tree = Tree(TreeTypes.DIRECT)
        tree.edges = []

        # Run
        result = tree.get_tau_matrix()

        # Check
        assert result.shape == (0, 0)

    def test_get_tau_matrix(self):
        """Assert none of get tau matrix is NaN."""
        self.tau = self.tree.get_tau_matrix()

        test = np.isnan(self.tau)

        self.assertFalse(test.all())

    @pytest.mark.xfail
    def test_second_tree_likelihood(self):
        """Assert second tree likelihood is correct."""
        tau = self.tree.get_tau_matrix()

        second_tree = Tree(TreeTypes.DIRECT)
        second_tree.fit(1, 3, tau, self.tree)

        uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]])

        first_value, new_u = self.tree.get_likelihood(uni_matrix)
        second_value, out_u = second_tree.get_likelihood(new_u)

        expected = 0.24428294700258632
        assert abs(second_value - expected) < 10E-3
Example #3
0
class TestCenterTree(TestCase):
    def setUp(self):
        self.data = pd.read_csv('data/iris.data.csv')
        self.tau_mat = self.data.corr(method='kendall').values
        self.u_matrix = np.empty(self.data.shape)
        count = 0
        for col in self.data:
            uni = GaussianKDE()
            uni.fit(self.data[col])
            self.u_matrix[:,
                          count] = uni.cumulative_distribution(self.data[col])
            count += 1
        self.tree = Tree(TreeTypes.CENTER)
        self.tree.fit(0, 4, self.tau_mat, self.u_matrix)

    def test_first_tree(self):
        """Assert 0 is the center node on the first tree."""
        assert self.tree.edges[0].L == 0

    @pytest.mark.xfail
    def test_first_tree_likelihood(self):
        """Assert first tree likehood is correct."""
        uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]])

        value, new_u = self.tree.get_likelihood(uni_matrix)

        expected = -0.19988720707143634
        assert abs(value - expected) < 10E-3

    def test_get_constraints(self):
        """Assert get constraint gets correct neighbor nodes."""
        self.tree._get_constraints()

        assert self.tree.edges[0].neighbors == [1, 2]
        assert self.tree.edges[1].neighbors == [0, 2]

    def test_get_tau_matrix(self):
        """Assert none of get tau matrix is NaN."""
        self.tau = self.tree.get_tau_matrix()

        test = np.isnan(self.tau)

        self.assertFalse(test.all())

    @pytest.mark.xfail
    def test_second_tree_likelihood(self):
        """Assert second tree likelihood is correct."""
        # Setup
        # Build first tree
        data = pd.read_csv('data/iris.data.csv')
        tau_mat = data.corr(method='kendall').values
        u_matrix = np.empty(data.shape)

        for index, col in enumerate(data):
            uni = GaussianKDE()
            uni.fit(data[col])
            u_matrix[:, index] = uni.cumulative_distribution(data[col])

        first_tree = Tree(TreeTypes.CENTER)
        first_tree.fit(0, 4, tau_mat, u_matrix)
        uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]])
        likelihood_first_tree, conditional_uni_first = first_tree.get_likelihood(
            uni_matrix)
        tau = first_tree.get_tau_matrix()

        # Build second tree
        second_tree = Tree(TreeTypes.CENTER)
        second_tree.fit(1, 3, tau, first_tree)
        expected_likelihood_second_tree = 0.4888802429313932

        # Run
        likelihood_second_tree, out_u = second_tree.get_likelihood(
            conditional_uni_first)

        # Check
        assert compare_values_epsilon(likelihood_second_tree,
                                      expected_likelihood_second_tree)