def prepare_next_tree(self): """Prepare conditional U matrix for next tree.""" for edge in self.edges: copula_theta = edge.theta if self.level == 1: left_u = self.u_matrix[:, edge.L] right_u = self.u_matrix[:, edge.R] else: left_parent, right_parent = edge.parents left_u, right_u = Edge.get_conditional_uni(left_parent, right_parent) # compute conditional cdfs C(i|j) = dC(i,j)/duj and dC(i,j)/du left_u = [x for x in left_u if x is not None] right_u = [x for x in right_u if x is not None] X_left_right = np.array([[x, y] for x, y in zip(left_u, right_u)]) X_right_left = np.array([[x, y] for x, y in zip(right_u, left_u)]) copula = Bivariate(copula_type=edge.name) copula.theta = copula_theta left_given_right = copula.partial_derivative(X_left_right) right_given_left = copula.partial_derivative(X_right_left) # correction of 0 or 1 left_given_right[left_given_right == 0] = EPSILON right_given_left[right_given_left == 0] = EPSILON left_given_right[left_given_right == 1] = 1 - EPSILON right_given_left[right_given_left == 1] = 1 - EPSILON edge.U = np.array([left_given_right, right_given_left])
def setUp(self): self.X = np.array([ [2641.16233666, 180.2425623], [921.14476418, 192.35609972], [-651.32239137, 150.24830291], [1223.63536668, 156.62123653], [3233.37342355, 173.80311908], [1373.22400821, 191.0922843], [1959.28188858, 163.22252158], [1076.99295365, 190.73280428], [2029.25100261, 158.52982435], [1835.52188141, 163.0101334], [1170.03850556, 205.24904026], [739.42628394, 175.42916046], [1866.65810627, 208.31821984], [3703.49786503, 178.98351969], [1719.45232017, 160.50981075], [258.90206528, 163.19294974], [219.42363944, 173.30395132], [609.90212377, 215.18996298], [1618.44207239, 164.71141696], [2323.2775272, 178.84973821], [3251.78732274, 182.99902513], [1430.63989981, 217.5796917], [-180.57028875, 201.56983421], [-592.84497457, 174.92272693] ]) self.copula = Bivariate(CopulaTypes.FRANK)
def get_likelihood(self, uni_matrix): """Compute likelihood given a U matrix. Args: uni_matrix (numpy.array): Matrix to compute the likelihood. Return: tuple (np.ndarray, np.ndarray, np.array): likelihood and conditional values. """ if self.parents is None: left_u = uni_matrix[:, self.L] right_u = uni_matrix[:, self.R] else: left_ing = list(self.D - self.parents[0].D)[0] right_ing = list(self.D - self.parents[1].D)[0] left_u = uni_matrix[self.L, left_ing] right_u = uni_matrix[self.R, right_ing] copula = Bivariate(copula_type=self.name) copula.theta = self.theta X_left_right = np.array([[left_u, right_u]]) X_right_left = np.array([[right_u, left_u]]) value = np.sum(copula.probability_density(X_left_right)) left_given_right = copula.partial_derivative(X_left_right) right_given_left = copula.partial_derivative(X_right_left) return value, left_given_right, right_given_left
def test_cdf_zero_if_single_arg_is_zero(self): """Test of the analytical properties of copulas on a range of values of theta.""" # Setup instance = Bivariate(CopulaTypes.FRANK) tau_values = np.linspace(-1.0, 1.0, 20)[1:-1] # Run/Check for tau in tau_values: instance.tau = tau instance.theta = instance.compute_theta() copula_zero_if_arg_zero(instance)
def test_cdf_value_if_all_other_arg_are_one(self): """Test of the analytical properties of copulas on a range of values of theta.""" # Setup instance = Bivariate(CopulaTypes.FRANK) tau_values = np.linspace(-1.0, 1.0, 20)[1:-1] # Run/Check for tau in tau_values: instance.tau = tau instance.theta = instance.compute_theta() copula_single_arg_not_one(instance, tolerance=1E-03)
def test_fit(self): """fit checks that the given values are independent.""" # Setup instance = Bivariate(CopulaTypes.INDEPENDENCE) data = np.array([[1, 2], [4, 3]]) # Run instance.fit(data) # Check instance.tau is None instance.theta is None
def test_partial_derivative_scalar(self, derivative_mock): """partial_derivative_scalar calls partial_derivative with its arguments in an array.""" # Setup instance = Bivariate(copula_type=CopulaTypes.CLAYTON) instance.fit(self.X) # Run result = instance.partial_derivative_scalar(0.5, 0.1) # Check assert result == derivative_mock.return_value expected_args = ((np.array([[0.5, 0.1]]), 0), {}) assert len(expected_args) == len(derivative_mock.call_args) assert (derivative_mock.call_args[0][0] == expected_args[0][0]).all()
def test_to_dict(self): """To_dict returns the defining parameters of a copula in a dict.""" # Setup instance = Bivariate(copula_type='frank') instance.fit(self.X) expected_result = { 'copula_type': 'FRANK', "tau": 0.9128709291752769, "theta": 44.2003852484162 } # Run result = instance.to_dict() # Check assert result == expected_result
def test_to_dict(self): """To_dict returns the defining parameters of a copula in a dict.""" # Setup instance = Bivariate('frank') instance.fit(self.X) expected_result = { 'copula_type': 'FRANK', "tau": 0.014492753623188406, "theta": 0.13070829945417198 } # Run result = instance.to_dict() # Check assert result == expected_result
def test___init__bivariate(self): """Independence copula can be instantiated from Bivariate base class.""" # Setup / Run instance = Bivariate(CopulaTypes.INDEPENDENCE) # Check assert isinstance(instance, Independence) assert instance.theta is None assert instance.tau is None
def _build_first_tree(self): """Build first level tree.""" tau_sorted = self._sort_tau_by_y(0) for itr in range(self.n_nodes - 1): ind = int(tau_sorted[itr, 0]) name, theta = Bivariate.select_copula(self.u_matrix[:, (0, ind)]) new_edge = Edge(itr, 0, ind, name, theta) new_edge.tau = self.tau_matrix[0, ind] self.edges.append(new_edge)
def get_child_edge(cls, left_parent, right_parent): """Construct a child edge from two parent edges.""" [ed1, ed2, depend_set] = cls._identify_eds_ing(left_parent, right_parent) left_u, right_u = cls.get_conditional_uni(left_parent, right_parent) X = np.array([[x, y] for x, y in zip(left_u, right_u)]) name, theta = Bivariate.select_copula(X) new_edge = Edge(ed1, ed2, name, theta) new_edge.D = depend_set new_edge.parents = [left_parent, right_parent] return new_edge
def test___init__random_seed(self): """If random_seed is passed as argument, will be set as attribute.""" # Setup random_seed = 'random_seed' # Run instance = Bivariate(CopulaTypes.CLAYTON, random_seed) # Check assert instance.random_seed == 'random_seed'
def test_copula_selection_negative_tau(self): """If tau is negative, should choose frank copula.""" # Setup X = np.array([[0.1, 0.6], [0.2, 0.5], [0.3, 0.4], [0.4, 0.3]]) assert stats.kendalltau(X[:, 0], X[:, 1])[0] < 0 # Run name, param = Bivariate.select_copula(X) expected = CopulaTypes.FRANK # Check assert name == expected
def test_from_dict(self): """From_dict sets the values of a dictionary as attributes of the instance.""" # Setup parameters = {'copula_type': 'FRANK', 'tau': 0.15, 'theta': 0.8} # Run instance = Bivariate.from_dict(parameters) # Check assert instance.copula_type == CopulaTypes.FRANK assert instance.tau == 0.15 assert instance.theta == 0.8
def test_cumulative_distribution(self): """cumulative_distribution is the product of both probabilities.""" # Setup instance = Bivariate(CopulaTypes.INDEPENDENCE) data = np.array([[0.0, 0.0], [0.1, 0.1], [0.2, 0.2], [0.5, 0.5], [0.9, 0.9], [1.0, 1.0]]) expected_result = np.array([ 0.00, 0.01, 0.04, 0.25, 0.81, 1.00, ]) # Run result = instance.cumulative_distribution(data) # Check (result == expected_result).all().all()
def test_sample(self, uniform_mock): """Sample use the inverse-transform method to generate new samples.""" # Setup instance = Bivariate(CopulaTypes.FRANK) instance.tau = 0.5 instance.theta = instance.compute_theta() uniform_mock.return_value = np.array([0.1, 0.2, 0.4, 0.6, 0.8]) expected_result = np.array([[6.080069565509917e-06, 0.1], [6.080069565509917e-06, 0.2], [6.080069565509917e-06, 0.4], [6.080069565509917e-06, 0.6], [4.500185268624483e-06, 0.8]]) expected_uniform_call_args_list = [((0, 1, 5), {}), ((0, 1, 5), {})] # Run result = instance.sample(5) # Check assert isinstance(result, np.ndarray) assert result.shape == (5, 2) compare_nested_iterables(result, expected_result) assert uniform_mock.call_args_list == expected_uniform_call_args_list
def get_likelihood(self, uni_matrix): """Compute likelihood given a U matrix.""" if self.parents is None: left_u = uni_matrix[:, self.L] right_u = uni_matrix[:, self.R] else: left_ing = list(self.D - self.parents[0].D)[0] right_ing = list(self.D - self.parents[1].D)[0] left_u = uni_matrix[self.L, left_ing] right_u = uni_matrix[self.R, right_ing] copula = Bivariate(self.name) copula.theta = self.theta X_left_right = np.array([[left_u, right_u]]) X_right_left = np.array([[right_u, left_u]]) value = np.sum(copula.probability_density(X_left_right)) left_given_right = copula.partial_derivative(X_left_right) right_given_left = copula.partial_derivative(X_right_left) return value, left_given_right, right_given_left
def test_load_from_file(self, json_mock, file_mock): """Load can recreate an instance from a saved file.""" # Setup json_mock.return_value = { 'copula_type': 'FRANK', 'tau': -0.33333333333333337, 'theta': -3.305771759329249 } # Run instance = Bivariate.load('somefile.json') # Check instance.copula_type == CopulaTypes.FRANK instance.tau == -0.33333333333333337 instance.theta == -3.305771759329249
def _build_first_tree(self): """Build the first tree with n-1 variable.""" # Prim's algorithm neg_tau = -1.0 * abs(self.tau_matrix) X = {0} while len(X) != self.n_nodes: adj_set = set() for x in X: for k in range(self.n_nodes): if k not in X and k != x: adj_set.add((x, k)) # find edge with maximum edge = sorted(adj_set, key=lambda e: neg_tau[e[0]][e[1]])[0] name, theta = Bivariate.select_copula(self.u_matrix[:, (edge[0], edge[1])]) left, right = sorted([edge[0], edge[1]]) new_edge = Edge(left, right, name, theta) new_edge.tau = self.tau_matrix[edge[0], edge[1]] self.edges.append(new_edge) X.add(edge[1])
def _build_first_tree(self): # find the pair of maximum tau tau_matrix = self.tau_matrix tau_sorted = self._sort_tau_by_y(0) left_ind = tau_sorted[0, 0] right_ind = tau_sorted[1, 0] T1 = np.array([left_ind, 0, right_ind]).astype(int) tau_T1 = tau_sorted[:2, 1] # replace tau matrix of the selected variables as a negative number tau_matrix[:, [T1]] = -10 for k in range(2, self.n_nodes - 1): left = np.argmax(tau_matrix[T1[0], :]) right = np.argmax(tau_matrix[T1[-1], :]) valL = np.max(tau_matrix[T1[0], :]) valR = np.max(tau_matrix[T1[-1], :]) if valL > valR: # add nodes to the left T1 = np.append(int(left), T1) tau_T1 = np.append(valL, tau_T1) tau_matrix[:, left] = -10 else: # add node to the right T1 = np.append(T1, int(right)) tau_T1 = np.append(tau_T1, valR) tau_matrix[:, right] = -10 for k in range(self.n_nodes - 1): name, theta = Bivariate.select_copula(self.u_matrix[:, (T1[k], T1[k + 1])]) left, right = sorted([T1[k], T1[k + 1]]) new_edge = Edge(k, left, right, name, theta) new_edge.tau = tau_T1[k] self.edges.append(new_edge)
def test_sample_random_state(self): """If random_state is set, the samples are the same.""" # Setup instance = Bivariate(CopulaTypes.CLAYTON, random_seed=0) instance.tau = 0.5 instance.theta = instance.compute_theta() expected_result = np.array([[0.68627770, 0.54881350], [0.64059280, 0.71518937], [0.90594782, 0.60276338], [0.96040856, 0.54488318], [0.40876969, 0.42365480]]) # Run result = instance.sample(5) # Check compare_nested_iterables(result, expected_result)
def test_save(self, json_mock): """Save stores the internal dictionary as a json in a file.""" # Setup instance = Bivariate('frank') instance.fit(self.X) expected_content = { "copula_type": "FRANK", "tau": 0.014492753623188406, "theta": 0.13070829945417198 } # Run instance.save('test.json') # Check assert json_mock.called compare_nested_dicts(json_mock.call_args[0][0], expected_content)
def test_sample_random_state(self): """If random_state is set, the samples are the same.""" # Setup instance = Bivariate(CopulaTypes.FRANK, random_seed=0) instance.tau = 0.5 instance.theta = instance.compute_theta() expected_result = np.array([[3.66330927e-06, 5.48813504e-01], [6.08006957e-06, 7.15189366e-01], [5.27582646e-06, 6.02763376e-01], [5.58315848e-06, 5.44883183e-01], [6.08006957e-06, 4.23654799e-01]]) # Run result = instance.sample(5) # Check compare_nested_iterables(result, expected_result)
def get_child_edge(cls, index, left_parent, right_parent): """Construct a child edge from two parent edges. Args: index (int): Index of the new Edge. left_parent (Edge): Left parent right_parent (Edge): Right parent Returns: Edge: The new child edge. """ [ed1, ed2, depend_set] = cls._identify_eds_ing(left_parent, right_parent) left_u, right_u = cls.get_conditional_uni(left_parent, right_parent) X = np.array([[x, y] for x, y in zip(left_u, right_u)]) copula = Bivariate.select_copula(X) name, theta = copula.copula_type, copula.theta new_edge = Edge(index, ed1, ed2, name, theta) new_edge.D = depend_set new_edge.parents = [left_parent, right_parent] return new_edge
def test_save(self, json_mock, open_mock): """Save stores the internal dictionary as a json in a file.""" # Setup instance = Bivariate(copula_type='frank') instance.fit(self.X) expected_content = { "copula_type": "FRANK", "tau": 0.9128709291752769, "theta": 44.2003852484162 } # Run instance.save('test.json') # Check assert open_mock.called_once_with('test.json', 'w') assert json_mock.called compare_nested_dicts(json_mock.call_args[0][0], expected_content)
def test_sample(self, uniform_mock): """Sample use the inverse-transform method to generate new samples.""" # Setup instance = Bivariate(CopulaTypes.CLAYTON) instance.tau = 0.5 instance.theta = instance.compute_theta() uniform_mock.return_value = np.array([0.1, 0.2, 0.4, 0.6, 0.8]) expected_result = np.array([[0.05233100, 0.1], [0.14271095, 0.2], [0.39959746, 0.4], [0.68567125, 0.6], [0.89420523, 0.8]]) expected_uniform_call_args_list = [((0, 1, 5), {}), ((0, 1, 5), {})] # Run result = instance.sample(5) # Check assert isinstance(result, np.ndarray) assert result.shape == (5, 2) compare_nested_iterables(result, expected_result) assert uniform_mock.call_args_list == expected_uniform_call_args_list
def _sample_row(self): """Generate a single sampled row from vine model. Returns: numpy.ndarray """ unis = np.random.uniform(0, 1, self.n_var) # randomly select a node to start with first_ind = np.random.randint(0, self.n_var) adj = self.trees[0].get_adjacent_matrix() visited = [] explore = [first_ind] sampled = np.zeros(self.n_var) itr = 0 while explore: current = explore.pop(0) neighbors = np.where(adj[current, :] == 1)[0].tolist() if itr == 0: new_x = self.ppfs[current](unis[current]) else: for i in range(itr - 1, -1, -1): current_ind = -1 if i >= self.truncated: continue current_tree = self.trees[i].edges # get index of edge to retrieve for edge in current_tree: if i == 0: if (edge.L == current and edge.R == visited[0]) or\ (edge.R == current and edge.L == visited[0]): current_ind = edge.index break else: if edge.L == current or edge.R == current: condition = set(edge.D) condition.add(edge.L) condition.add(edge.R) visit_set = set(visited) visit_set.add(current) if condition.issubset(visit_set): current_ind = edge.index break if current_ind != -1: # the node is not indepedent contional on visited node copula_type = current_tree[current_ind].name copula = Bivariate( copula_type=CopulaTypes(copula_type)) copula.theta = current_tree[current_ind].theta U = np.array([unis[visited[0]]]) if i == itr - 1: tmp = copula.percent_point( np.array([unis[current]]), U)[0] else: tmp = copula.percent_point(np.array([tmp]), U)[0] tmp = min(max(tmp, EPSILON), 0.99) new_x = self.ppfs[current](np.array([tmp])) sampled[current] = new_x for s in neighbors: if s not in visited: explore.insert(0, s) itr += 1 visited.insert(0, current) return sampled
class TestFrank(TestCase): def setUp(self): self.X = np.array([[2641.16233666, 180.2425623], [921.14476418, 192.35609972], [-651.32239137, 150.24830291], [1223.63536668, 156.62123653], [3233.37342355, 173.80311908], [1373.22400821, 191.0922843], [1959.28188858, 163.22252158], [1076.99295365, 190.73280428], [2029.25100261, 158.52982435], [1835.52188141, 163.0101334], [1170.03850556, 205.24904026], [739.42628394, 175.42916046], [1866.65810627, 208.31821984], [3703.49786503, 178.98351969], [1719.45232017, 160.50981075], [258.90206528, 163.19294974], [219.42363944, 173.30395132], [609.90212377, 215.18996298], [1618.44207239, 164.71141696], [2323.2775272, 178.84973821], [3251.78732274, 182.99902513], [1430.63989981, 217.5796917], [-180.57028875, 201.56983421], [-592.84497457, 174.92272693]]) self.copula = Bivariate(CopulaTypes.FRANK) def test_fit(self): """On fit, theta and tau attributes are set.""" # Setup expected_theta = 0.1307082 expected_tau = 0.01449275 # Run self.copula.fit(self.X) actual_theta = self.copula.theta actual_tau = self.copula.tau # Check self.assertAlmostEqual(actual_theta, expected_theta, places=3) self.assertAlmostEqual(actual_tau, expected_tau) def test_probability_density(self): """Probability_density returns the probability density for the given values.""" # Setup self.copula.fit(self.X) expected_result = 0.999672586804842 # Run result = self.copula.probability_density(np.array([[0.1, 0.5]])) # Check assert np.isclose(result, expected_result).all() assert isinstance(result, np.ndarray) def test_cumulative_distribution(self): """Cumulative_density returns the probability distribution value for a point.""" # Setup self.copula.fit(self.X) expected_result = 0.05147003 # Run result = self.copula.cumulative_distribution(np.array([[0.1, 0.5]])) # Check assert np.isclose(result, expected_result).all() assert isinstance(result, np.ndarray) @patch('copulas.bivariate.base.np.random.uniform') def test_sample(self, uniform_mock): """Sample use the inverse-transform method to generate new samples.""" # Setup instance = Bivariate(CopulaTypes.FRANK) instance.tau = 0.5 instance.theta = instance.compute_theta() uniform_mock.return_value = np.array([0.1, 0.2, 0.4, 0.6, 0.8]) expected_result = np.array([[6.080069565509917e-06, 0.1], [6.080069565509917e-06, 0.2], [6.080069565509917e-06, 0.4], [6.080069565509917e-06, 0.6], [4.500185268624483e-06, 0.8]]) expected_uniform_call_args_list = [((0, 1, 5), {}), ((0, 1, 5), {})] # Run result = instance.sample(5) # Check assert isinstance(result, np.ndarray) assert result.shape == (5, 2) compare_nested_iterables(result, expected_result) assert uniform_mock.call_args_list == expected_uniform_call_args_list def test_cdf_zero_if_single_arg_is_zero(self): """Test of the analytical properties of copulas on a range of values of theta.""" # Setup instance = Bivariate(CopulaTypes.FRANK) tau_values = np.linspace(-1.0, 1.0, 20)[1:-1] # Run/Check for tau in tau_values: instance.tau = tau instance.theta = instance.compute_theta() copula_zero_if_arg_zero(instance) def test_cdf_value_if_all_other_arg_are_one(self): """Test of the analytical properties of copulas on a range of values of theta.""" # Setup instance = Bivariate(CopulaTypes.FRANK) tau_values = np.linspace(-1.0, 1.0, 20)[1:-1] # Run/Check for tau in tau_values: instance.tau = tau instance.theta = instance.compute_theta() copula_single_arg_not_one(instance, tolerance=1E-03) def test_sample_random_state(self): """If random_state is set, the samples are the same.""" # Setup instance = Bivariate(CopulaTypes.FRANK, random_seed=0) instance.tau = 0.5 instance.theta = instance.compute_theta() expected_result = np.array([[3.66330927e-06, 5.48813504e-01], [6.08006957e-06, 7.15189366e-01], [5.27582646e-06, 6.02763376e-01], [5.58315848e-06, 5.44883183e-01], [6.08006957e-06, 4.23654799e-01]]) # Run result = instance.sample(5) # Check compare_nested_iterables(result, expected_result)
class TestFrank(TestCase): def setUp(self): self.X = np.array([ [2641.16233666, 180.2425623], [921.14476418, 192.35609972], [-651.32239137, 150.24830291], [1223.63536668, 156.62123653], [3233.37342355, 173.80311908], [1373.22400821, 191.0922843], [1959.28188858, 163.22252158], [1076.99295365, 190.73280428], [2029.25100261, 158.52982435], [1835.52188141, 163.0101334], [1170.03850556, 205.24904026], [739.42628394, 175.42916046], [1866.65810627, 208.31821984], [3703.49786503, 178.98351969], [1719.45232017, 160.50981075], [258.90206528, 163.19294974], [219.42363944, 173.30395132], [609.90212377, 215.18996298], [1618.44207239, 164.71141696], [2323.2775272, 178.84973821], [3251.78732274, 182.99902513], [1430.63989981, 217.5796917], [-180.57028875, 201.56983421], [-592.84497457, 174.92272693] ]) self.copula = Bivariate(CopulaTypes.FRANK) def test_fit(self): """On fit, theta and tau attributes are set.""" # Setup expected_theta = 0.1307082 expected_tau = 0.01449275 # Run self.copula.fit(self.X) actual_theta = self.copula.theta actual_tau = self.copula.tau # Check self.assertAlmostEqual(actual_theta, expected_theta, places=3) self.assertAlmostEqual(actual_tau, expected_tau) def test_probability_density(self): """Probability_density returns the probability density for the given values.""" # Setup self.copula.fit(self.X) expected_result = 0.999672586804842 # Run result = self.copula.probability_density(np.array([[0.1, 0.5]])) # Check assert np.isclose(result, expected_result).all() assert isinstance(result, np.ndarray) def test_cumulative_distribution(self): """Cumulative_density returns the probability distribution value for a point.""" # Setup self.copula.fit(self.X) expected_result = 0.05147003 # Run result = self.copula.cumulative_distribution(np.array([[0.1, 0.5]])) # Check assert np.isclose(result, expected_result).all() assert isinstance(result, np.ndarray) def test_sample(self): """After being fit, copula can produce samples.""" # Setup self.copula.fit(self.X) # Run result = self.copula.sample(10) # Check assert isinstance(result, np.ndarray) assert result.shape == (10, 2)