def test_serialization_fit_model(self): # Setup instance = Tree(TreeTypes.REGULAR) X = pd.DataFrame(data=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]) index = 0 n_nodes = X.shape[1] tau_matrix = X.corr(method='kendall').values univariates_matrix = np.empty(X.shape) for i, column in enumerate(X): distribution = GaussianKDE() distribution.fit(X[column]) univariates_matrix[:, i] = distribution.cumulative_distribution( X[column]) instance.fit(index, n_nodes, tau_matrix, univariates_matrix) # Run result = Tree.from_dict(instance.to_dict()) # Check assert result.to_dict() == instance.to_dict()
def test_prepare_next_tree_first_level(self, bivariate_mock): """prepare_next_tree computes the conditional U matrices on its edges.""" # Setup instance = Tree(TreeTypes.REGULAR) instance.level = 1 instance.u_matrix = np.array([[0.1, 0.2], [0.3, 0.4]]) edge = MagicMock(spec=Edge) edge.L = 0 edge.R = 1 edge.name = 'copula_type' edge.theta = 'copula_theta' instance.edges = [edge] copula_mock = bivariate_mock.return_value copula_mock.partial_derivative.return_value = np.array( [0.0, 0.25, 0.5, 0.75, 1.0]) expected_univariate = np.array( [[EPSILON, 0.25, 0.50, 0.75, 1 - EPSILON], [EPSILON, 0.25, 0.50, 0.75, 1 - EPSILON]]) expected_partial_derivative_call_args = [ ((instance.u_matrix, ), {}), ((instance.u_matrix[:, np.argsort([1, 0])], ), {}) ] # Run instance.prepare_next_tree() # Check compare_nested_iterables(instance.edges[0].U, expected_univariate) bivariate_mock.assert_called_once_with('copula_type') assert copula_mock.theta == 'copula_theta' compare_nested_iterables(copula_mock.partial_derivative.call_args_list, expected_partial_derivative_call_args)
class TestDirectTree(TestCase): def setUp(self): self.data = pd.read_csv('data/iris.data.csv') self.tau_mat = self.data.corr(method='kendall').values self.u_matrix = np.empty(self.data.shape) count = 0 for col in self.data: uni = GaussianKDE() uni.fit(self.data[col]) self.u_matrix[:, count] = uni.cumulative_distribution(self.data[col]) count += 1 self.tree = Tree(TreeTypes.DIRECT) self.tree.fit(0, 4, self.tau_mat, self.u_matrix) def test_first_tree(self): """ Assert 0 is the center node""" assert self.tree.edges[0].L == 0 @pytest.mark.xfail def test_first_tree_likelihood(self): """ Assert first tree likehood is correct""" uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) value, new_u = self.tree.get_likelihood(uni_matrix) expected = -0.1207611551427385 assert abs(value - expected) < 10E-3 def test_get_constraints(self): """ Assert get constraint gets correct neighbor nodes""" self.tree._get_constraints() assert self.tree.edges[0].neighbors == [1] assert self.tree.edges[1].neighbors == [0, 2] def test_get_tau_matrix_no_edges_empty(self): """get_tau_matrix returns an empty array if there are no edges.""" # Setup tree = Tree(TreeTypes.DIRECT) tree.edges = [] # Run result = tree.get_tau_matrix() # Check assert result.shape == (0, 0) def test_get_tau_matrix(self): """Assert none of get tau matrix is NaN.""" self.tau = self.tree.get_tau_matrix() test = np.isnan(self.tau) self.assertFalse(test.all()) @pytest.mark.xfail def test_second_tree_likelihood(self): """Assert second tree likelihood is correct.""" tau = self.tree.get_tau_matrix() second_tree = Tree(TreeTypes.DIRECT) second_tree.fit(1, 3, tau, self.tree) uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) first_value, new_u = self.tree.get_likelihood(uni_matrix) second_value, out_u = second_tree.get_likelihood(new_u) expected = 0.24428294700258632 assert abs(second_value - expected) < 10E-3
class TestRegularTree(TestCase): def setUp(self): self.data = pd.read_csv('data/iris.data.csv') self.tau_mat = self.data.corr(method='kendall').values self.u_matrix = np.empty(self.data.shape) count = 0 for col in self.data: uni = GaussianKDE() uni.fit(self.data[col]) self.u_matrix[:, count] = uni.cumulative_distribution(self.data[col]) count += 1 self.tree = Tree(TreeTypes.REGULAR) self.tree.fit(0, 4, self.tau_mat, self.u_matrix) def test_first_tree(self): """ Assert the construction of first tree is correct The first tree should be: 1 0--2--3 """ sorted_edges = Edge.sort_edge(self.tree.edges) assert sorted_edges[0].L == 0 assert sorted_edges[0].R == 2 assert sorted_edges[1].L == 1 assert sorted_edges[1].R == 2 assert sorted_edges[2].L == 2 assert sorted_edges[2].R == 3 @pytest.mark.xfail def test_first_tree_likelihood(self): """ Assert first tree likehood is correct""" uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) value, new_u = self.tree.get_likelihood(uni_matrix) expected = 0.9545348664739628 assert abs(value - expected) < 10E-3 def test_get_constraints(self): """ Assert get constraint gets correct neighbor nodes""" self.tree._get_constraints() assert self.tree.edges[0].neighbors == [1, 2] assert self.tree.edges[1].neighbors == [0, 2] def test_get_tau_matrix(self): """ Assert second tree likelihood is correct """ self.tau = self.tree.get_tau_matrix() test = np.isnan(self.tau) self.assertFalse(test.all()) def test_second_tree_likelihood(self): """Assert second tree likelihood is correct.""" tau = self.tree.get_tau_matrix() second_tree = Tree(TreeTypes.REGULAR) second_tree.fit(1, 3, tau, self.tree) uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) first_value, new_u = self.tree.get_likelihood(uni_matrix) second_value, out_u = second_tree.get_likelihood(new_u)
def test_to_dict_fit_model(self): # Setup instance = Tree(TreeTypes.REGULAR) X = pd.DataFrame(data=[ [1, 0, 0], [0, 1, 0], [0, 0, 1] ]) index = 0 n_nodes = X.shape[1] tau_matrix = X.corr(method='kendall').values univariates_matrix = np.empty(X.shape) for i, column in enumerate(X): distribution = GaussianKDE() distribution.fit(X[column]) univariates_matrix[:, i] = distribution.cumulative_distribution(X[column]) instance.fit(index, n_nodes, tau_matrix, univariates_matrix) expected_result = { 'type': 'copulas.multivariate.tree.RegularTree', 'fitted': True, 'level': 1, 'n_nodes': 3, 'previous_tree': [ [0.8230112726144534, 0.3384880496294825, 0.3384880496294825], [0.3384880496294825, 0.8230112726144534, 0.3384880496294825], [0.3384880496294825, 0.3384880496294825, 0.8230112726144534] ], 'tau_matrix': [ [1.0, -0.49999999999999994, -0.49999999999999994], [-0.49999999999999994, 1.0, -0.49999999999999994], [-0.49999999999999994, -0.49999999999999994, 1.0] ], 'tree_type': TreeTypes.REGULAR, 'edges': [ { 'index': 0, 'D': set(), 'L': 0, 'R': 1, 'U': [ [0.7969535322648066, 0.6887525261721343, 0.12077958383821545], [0.6887525261721343, 0.7969535322648066, 0.12077958383821545] ], 'likelihood': None, 'name': CopulaTypes.FRANK, 'neighbors': [], 'parents': None, 'tau': -0.49999999999999994, 'theta': -5.736282443655552 }, { 'index': 1, 'D': set(), 'L': 1, 'R': 2, 'U': [ [0.12077958383821545, 0.7969535322648066, 0.6887525261721343], [0.12077958383821545, 0.6887525261721343, 0.7969535322648066] ], 'likelihood': None, 'name': CopulaTypes.FRANK, 'neighbors': [], 'parents': None, 'tau': -0.49999999999999994, 'theta': -5.736282443655552 } ], } # Run result = instance.to_dict() # Check compare_nested_dicts(result, expected_result)
def test_second_tree_likelihood(self): """Assert second tree likelihood is correct.""" # Setup # Build first tree data = pd.read_csv('data/iris.data.csv') tau_mat = data.corr(method='kendall').values u_matrix = np.empty(data.shape) for index, col in enumerate(data): uni = GaussianKDE() uni.fit(data[col]) u_matrix[:, index] = uni.cumulative_distribution(data[col]) first_tree = Tree(TreeTypes.CENTER) first_tree.fit(0, 4, tau_mat, u_matrix) uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) likelihood_first_tree, conditional_uni_first = first_tree.get_likelihood(uni_matrix) tau = first_tree.get_tau_matrix() # Build second tree second_tree = Tree(TreeTypes.CENTER) second_tree.fit(1, 3, tau, first_tree) expected_likelihood_second_tree = 0.4888802429313932 # Run likelihood_second_tree, out_u = second_tree.get_likelihood(conditional_uni_first) # Check assert compare_values_epsilon(likelihood_second_tree, expected_likelihood_second_tree)
class TestCenterTree(TestCase): def setUp(self): self.data = pd.read_csv('data/iris.data.csv') self.tau_mat = self.data.corr(method='kendall').values self.u_matrix = np.empty(self.data.shape) count = 0 for col in self.data: uni = GaussianKDE() uni.fit(self.data[col]) self.u_matrix[:, count] = uni.cumulative_distribution(self.data[col]) count += 1 self.tree = Tree(TreeTypes.CENTER) self.tree.fit(0, 4, self.tau_mat, self.u_matrix) def test_first_tree(self): """Assert 0 is the center node on the first tree.""" assert self.tree.edges[0].L == 0 @pytest.mark.xfail def test_first_tree_likelihood(self): """Assert first tree likehood is correct.""" uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) value, new_u = self.tree.get_likelihood(uni_matrix) expected = -0.19988720707143634 assert abs(value - expected) < 10E-3 def test_get_constraints(self): """Assert get constraint gets correct neighbor nodes.""" self.tree._get_constraints() assert self.tree.edges[0].neighbors == [1, 2] assert self.tree.edges[1].neighbors == [0, 2] def test_get_tau_matrix(self): """Assert none of get tau matrix is NaN.""" self.tau = self.tree.get_tau_matrix() test = np.isnan(self.tau) self.assertFalse(test.all()) @pytest.mark.xfail def test_second_tree_likelihood(self): """Assert second tree likelihood is correct.""" # Setup # Build first tree data = pd.read_csv('data/iris.data.csv') tau_mat = data.corr(method='kendall').values u_matrix = np.empty(data.shape) for index, col in enumerate(data): uni = GaussianKDE() uni.fit(data[col]) u_matrix[:, index] = uni.cumulative_distribution(data[col]) first_tree = Tree(TreeTypes.CENTER) first_tree.fit(0, 4, tau_mat, u_matrix) uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) likelihood_first_tree, conditional_uni_first = first_tree.get_likelihood(uni_matrix) tau = first_tree.get_tau_matrix() # Build second tree second_tree = Tree(TreeTypes.CENTER) second_tree.fit(1, 3, tau, first_tree) expected_likelihood_second_tree = 0.4888802429313932 # Run likelihood_second_tree, out_u = second_tree.get_likelihood(conditional_uni_first) # Check assert compare_values_epsilon(likelihood_second_tree, expected_likelihood_second_tree)
def test_to_dict(self): """ """ # Setup instance = VineCopula('regular') instance.fitted = True instance.n_sample = 100 instance.n_var = 10 instance.depth = 3 instance.truncated = 3 tree = Tree('regular') instance.trees = [tree] uni = KDEUnivariate() instance.unis = [uni] tau_mat = np.array([ [0, 1], [1, 0] ]) instance.tau_mat = tau_mat u_matrix = np.array([ [0, 1], [1, 0] ]) instance.u_matrix = u_matrix expected_result = { 'type': 'copulas.multivariate.vine.VineCopula', 'fitted': True, 'vine_type': 'regular', 'n_sample': 100, 'n_var': 10, 'depth': 3, 'truncated': 3, 'trees': [ { 'type': 'copulas.multivariate.tree.RegularTree', 'tree_type': 'regular', 'fitted': False } ], 'tau_mat': [ [0, 1], [1, 0] ], 'u_matrix': [ [0, 1], [1, 0] ], 'unis': [ { 'type': 'copulas.univariate.kde.KDEUnivariate', 'fitted': False } ] } # Run result = instance.to_dict() # Check assert result == expected_result
def test_to_dict_fit_model(self): # Setup instance = Tree(TreeTypes.REGULAR) X = pd.DataFrame(data=[ [1, 0, 0], [0, 1, 0], [0, 0, 1] ]) index = 0 n_nodes = X.shape[1] tau_matrix = X.corr(method='kendall').values univariates_matrix = np.empty(X.shape) for i, column in enumerate(X): distribution = KDEUnivariate() distribution.fit(X[column]) univariates_matrix[:, i] = [distribution.cumulative_distribution(x) for x in X[column]] instance.fit(index, n_nodes, tau_matrix, univariates_matrix) expected_result = { 'type': 'copulas.multivariate.tree.RegularTree', 'fitted': True, 'level': 1, 'n_nodes': 3, 'previous_tree': [ [0.8230112726144534, 0.3384880496294825, 0.3384880496294825], [0.3384880496294825, 0.8230112726144534, 0.3384880496294825], [0.3384880496294825, 0.3384880496294825, 0.8230112726144534] ], 'tau_matrix': [ [1.0, -0.49999999999999994, -0.49999999999999994], [-0.49999999999999994, 1.0, -0.49999999999999994], [-0.49999999999999994, -0.49999999999999994, 1.0] ], 'tree_type': TreeTypes.REGULAR, 'edges': [ { 'D': set(), 'L': 0, 'R': 1, 'U': [ [6.533235975920359, 6.425034969827687, 5.857062027493768], [6.425034969827687, 6.533235975920359, 5.857062027493768] ], 'likelihood': None, 'name': CopulaTypes.FRANK, 'neighbors': [], 'parents': None, 'tau': -0.49999999999999994, 'theta': -5.736282443655552 }, { 'D': set(), 'L': 1, 'R': 2, 'U': [ [5.857062027493768, 6.533235975920359, 6.425034969827687], [5.857062027493768, 6.425034969827687, 6.533235975920359] ], 'likelihood': None, 'name': CopulaTypes.FRANK, 'neighbors': [], 'parents': None, 'tau': -0.49999999999999994, 'theta': -5.736282443655552 } ], } # Run result = instance.to_dict() # Check compare_nested_dicts(result, expected_result)