def test_second_tree_likelihood(self): """Assert second tree likelihood is correct.""" # Setup # Build first tree data = pd.read_csv('data/iris.data.csv') tau_mat = data.corr(method='kendall').values u_matrix = np.empty(data.shape) for index, col in enumerate(data): uni = GaussianKDE() uni.fit(data[col]) u_matrix[:, index] = uni.cumulative_distribution(data[col]) first_tree = Tree(TreeTypes.CENTER) first_tree.fit(0, 4, tau_mat, u_matrix) uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) likelihood_first_tree, conditional_uni_first = first_tree.get_likelihood(uni_matrix) tau = first_tree.get_tau_matrix() # Build second tree second_tree = Tree(TreeTypes.CENTER) second_tree.fit(1, 3, tau, first_tree) expected_likelihood_second_tree = 0.4888802429313932 # Run likelihood_second_tree, out_u = second_tree.get_likelihood(conditional_uni_first) # Check assert compare_values_epsilon(likelihood_second_tree, expected_likelihood_second_tree)
class TestDirectTree(TestCase): def setUp(self): self.data = pd.read_csv('data/iris.data.csv') self.tau_mat = self.data.corr(method='kendall').values self.u_matrix = np.empty(self.data.shape) count = 0 for col in self.data: uni = KDEUnivariate() uni.fit(self.data[col]) self.u_matrix[:, count] = [uni.cumulative_distribution(x) for x in self.data[col]] count += 1 self.tree = Tree(TreeTypes.DIRECT) self.tree.fit(0, 4, self.tau_mat, self.u_matrix) def test_first_tree(self): """ Assert 0 is the center node""" assert self.tree.edges[0].L == 0 def test_first_tree_likelihood(self): """ Assert first tree likehood is correct""" uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) value, new_u = self.tree.get_likelihood(uni_matrix) expected = -0.1207611551427385 assert abs(value - expected) < 10E-3 def test_get_constraints(self): """ Assert get constraint gets correct neighbor nodes""" self.tree._get_constraints() assert self.tree.edges[0].neighbors == [1] assert self.tree.edges[1].neighbors == [0, 2] def test_get_tau_matrix(self): """ Assert none of get tau matrix is NaN """ self.tau = self.tree.get_tau_matrix() test = np.isnan(self.tau) self.assertFalse(test.all()) def test_second_tree_likelihood(self): """ Assert second tree likelihood is correct """ tau = self.tree.get_tau_matrix() second_tree = Tree(TreeTypes.DIRECT) second_tree.fit(1, 3, tau, self.tree) uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) first_value, new_u = self.tree.get_likelihood(uni_matrix) second_value, out_u = second_tree.get_likelihood(new_u) expected = 0.7184205492690413 assert abs(second_value - expected) < 10E-3
def test_second_tree_likelihood(self): """Assert second tree likelihood is correct.""" tau = self.tree.get_tau_matrix() second_tree = Tree(TreeTypes.REGULAR) second_tree.fit(1, 3, tau, self.tree) uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) first_value, new_u = self.tree.get_likelihood(uni_matrix) second_value, out_u = second_tree.get_likelihood(new_u)
def test_second_tree_likelihood(self): """ Assert second tree likelihood is correct """ tau = self.tree.get_tau_matrix() second_tree = Tree(TreeTypes.CENTER) second_tree.fit(1, 3, tau, self.tree) uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) first_value, new_u = self.tree.get_likelihood(uni_matrix) second_value, out_u = second_tree.get_likelihood(new_u) expected = 0.540089320412914 assert abs(second_value - expected) < 10E-3
def test_second_tree_likelihood(self): """Assert second tree likelihood is correct.""" tau = self.tree.get_tau_matrix() second_tree = Tree(TreeTypes.DIRECT) second_tree.fit(1, 3, tau, self.tree) uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) first_value, new_u = self.tree.get_likelihood(uni_matrix) second_value, out_u = second_tree.get_likelihood(new_u) expected = 0.24428294700258632 assert abs(second_value - expected) < 10E-3
class TestDirectTree(TestCase): def setUp(self): self.data = pd.read_csv('data/iris.data.csv') self.tau_mat = self.data.corr(method='kendall').values self.u_matrix = np.empty(self.data.shape) count = 0 for col in self.data: uni = GaussianKDE() uni.fit(self.data[col]) self.u_matrix[:, count] = uni.cumulative_distribution(self.data[col]) count += 1 self.tree = Tree(TreeTypes.DIRECT) self.tree.fit(0, 4, self.tau_mat, self.u_matrix) def test_first_tree(self): """ Assert 0 is the center node""" assert self.tree.edges[0].L == 0 @pytest.mark.xfail def test_first_tree_likelihood(self): """ Assert first tree likehood is correct""" uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) value, new_u = self.tree.get_likelihood(uni_matrix) expected = -0.1207611551427385 assert abs(value - expected) < 10E-3 def test_get_constraints(self): """ Assert get constraint gets correct neighbor nodes""" self.tree._get_constraints() assert self.tree.edges[0].neighbors == [1] assert self.tree.edges[1].neighbors == [0, 2] def test_get_tau_matrix_no_edges_empty(self): """get_tau_matrix returns an empty array if there are no edges.""" # Setup tree = Tree(TreeTypes.DIRECT) tree.edges = [] # Run result = tree.get_tau_matrix() # Check assert result.shape == (0, 0) def test_get_tau_matrix(self): """Assert none of get tau matrix is NaN.""" self.tau = self.tree.get_tau_matrix() test = np.isnan(self.tau) self.assertFalse(test.all()) @pytest.mark.xfail def test_second_tree_likelihood(self): """Assert second tree likelihood is correct.""" tau = self.tree.get_tau_matrix() second_tree = Tree(TreeTypes.DIRECT) second_tree.fit(1, 3, tau, self.tree) uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) first_value, new_u = self.tree.get_likelihood(uni_matrix) second_value, out_u = second_tree.get_likelihood(new_u) expected = 0.24428294700258632 assert abs(second_value - expected) < 10E-3
class TestRegularTree(TestCase): def setUp(self): self.data = pd.read_csv('data/iris.data.csv') self.tau_mat = self.data.corr(method='kendall').values self.u_matrix = np.empty(self.data.shape) count = 0 for col in self.data: uni = GaussianKDE() uni.fit(self.data[col]) self.u_matrix[:, count] = uni.cumulative_distribution(self.data[col]) count += 1 self.tree = Tree(TreeTypes.REGULAR) self.tree.fit(0, 4, self.tau_mat, self.u_matrix) def test_first_tree(self): """ Assert the construction of first tree is correct The first tree should be: 1 0--2--3 """ sorted_edges = Edge.sort_edge(self.tree.edges) assert sorted_edges[0].L == 0 assert sorted_edges[0].R == 2 assert sorted_edges[1].L == 1 assert sorted_edges[1].R == 2 assert sorted_edges[2].L == 2 assert sorted_edges[2].R == 3 @pytest.mark.xfail def test_first_tree_likelihood(self): """ Assert first tree likehood is correct""" uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) value, new_u = self.tree.get_likelihood(uni_matrix) expected = 0.9545348664739628 assert abs(value - expected) < 10E-3 def test_get_constraints(self): """ Assert get constraint gets correct neighbor nodes""" self.tree._get_constraints() assert self.tree.edges[0].neighbors == [1, 2] assert self.tree.edges[1].neighbors == [0, 2] def test_get_tau_matrix(self): """ Assert second tree likelihood is correct """ self.tau = self.tree.get_tau_matrix() test = np.isnan(self.tau) self.assertFalse(test.all()) def test_second_tree_likelihood(self): """Assert second tree likelihood is correct.""" tau = self.tree.get_tau_matrix() second_tree = Tree(TreeTypes.REGULAR) second_tree.fit(1, 3, tau, self.tree) uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) first_value, new_u = self.tree.get_likelihood(uni_matrix) second_value, out_u = second_tree.get_likelihood(new_u)
class TestCenterTree(TestCase): def setUp(self): self.data = pd.read_csv('data/iris.data.csv') self.tau_mat = self.data.corr(method='kendall').values self.u_matrix = np.empty(self.data.shape) count = 0 for col in self.data: uni = GaussianKDE() uni.fit(self.data[col]) self.u_matrix[:, count] = uni.cumulative_distribution(self.data[col]) count += 1 self.tree = Tree(TreeTypes.CENTER) self.tree.fit(0, 4, self.tau_mat, self.u_matrix) def test_first_tree(self): """Assert 0 is the center node on the first tree.""" assert self.tree.edges[0].L == 0 @pytest.mark.xfail def test_first_tree_likelihood(self): """Assert first tree likehood is correct.""" uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) value, new_u = self.tree.get_likelihood(uni_matrix) expected = -0.19988720707143634 assert abs(value - expected) < 10E-3 def test_get_constraints(self): """Assert get constraint gets correct neighbor nodes.""" self.tree._get_constraints() assert self.tree.edges[0].neighbors == [1, 2] assert self.tree.edges[1].neighbors == [0, 2] def test_get_tau_matrix(self): """Assert none of get tau matrix is NaN.""" self.tau = self.tree.get_tau_matrix() test = np.isnan(self.tau) self.assertFalse(test.all()) @pytest.mark.xfail def test_second_tree_likelihood(self): """Assert second tree likelihood is correct.""" # Setup # Build first tree data = pd.read_csv('data/iris.data.csv') tau_mat = data.corr(method='kendall').values u_matrix = np.empty(data.shape) for index, col in enumerate(data): uni = GaussianKDE() uni.fit(data[col]) u_matrix[:, index] = uni.cumulative_distribution(data[col]) first_tree = Tree(TreeTypes.CENTER) first_tree.fit(0, 4, tau_mat, u_matrix) uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) likelihood_first_tree, conditional_uni_first = first_tree.get_likelihood(uni_matrix) tau = first_tree.get_tau_matrix() # Build second tree second_tree = Tree(TreeTypes.CENTER) second_tree.fit(1, 3, tau, first_tree) expected_likelihood_second_tree = 0.4888802429313932 # Run likelihood_second_tree, out_u = second_tree.get_likelihood(conditional_uni_first) # Check assert compare_values_epsilon(likelihood_second_tree, expected_likelihood_second_tree)