def test__extract_constant(self): distribution = GaussianUnivariate() distribution._params = {'loc': 1, 'scale': 0} constant = distribution._extract_constant() assert 1 == constant
def test__fit_constant(self): distribution = GaussianUnivariate() distribution._fit_constant(np.array([1, 1, 1, 1])) assert distribution._params == { 'loc': 1, 'scale': 0 }
def test_fit_empty_data(self): """On fit, if column is empty an error is raised.""" # Setup copula = GaussianUnivariate() column = pd.Series([]) # Run with self.assertRaises(ValueError): copula.fit(column)
def test_valid_serialization_unfit_model(self): """For a unfitted model to_dict and from_dict are opposites.""" # Setup instance = GaussianUnivariate() # Run result = GaussianUnivariate.from_dict(instance.to_dict()) # Check assert instance.to_dict() == result.to_dict()
def test__fit(self): distribution = GaussianUnivariate() data = norm.rvs(size=1000, loc=1, scale=1) distribution._fit(data) assert distribution._params == { 'loc': np.mean(data), 'scale': np.std(data), }
def test_test_fit_equal_values(self): """On fit, even if column has equal values, std is never 0.""" # Setup copula = GaussianUnivariate() column = [1, 1, 1, 1, 1, 1] # Run copula.fit(column) # Check assert copula.mean == 1 assert copula.std == 0.001
def test_to_dict(self): """To_dict returns the defining parameters of a distribution in a dict.""" # Setup copula = GaussianUnivariate() column = [0, 1, 2, 3, 4, 5] copula.fit(column) expected_result = {'mean': 2.5, 'std': 1.707825127659933} # Run result = copula.to_dict() # Check assert result == expected_result
def test_sample(self): """After fitting, GaussianUnivariate is able to sample new data.""" # Setup copula = GaussianUnivariate() column = [-1, 0, 1] copula.fit(column) # Run result = copula.sample(1000000) # Check assert len(result) == 1000000 assert abs(np.mean(result) - copula.mean) < 10E-3 assert abs(np.std(result) - copula.std) < 10E-3
def test_test_fit_equal_values(self): """If it's fit with constant data, contant_value is set.""" # Setup instance = GaussianUnivariate() column = np.array([5, 5, 5, 5, 5, 5]) # Run instance.fit(column) # Check assert instance.mean == 0 assert instance.std == 1 assert instance.constant_value == 5
def test_get_probability_density(self): """Probability_density returns the normal probability distribution for the given values.""" # Setup copula = GaussianUnivariate() column = np.array([-1, 0, 1]) copula.fit(column) expected_result = 0.48860251190292 # Run result = copula.probability_density(0) # Check assert result == expected_result
def test_sample_random_state(self): """When random state is set, samples are the same.""" # Setup instance = GaussianUnivariate(random_seed=0) X = np.array([1, 2, 3, 4, 5]) instance.fit(X) expected_result = np.array([5.494746752403546, 3.565907751154284, 4.384144531132039]) # Run result = instance.sample(3) # Check assert (result == expected_result).all()
def test_percent_point(self): """Percent_point returns the original point from the cumulative probability value.""" # Setup copula = GaussianUnivariate() column = np.array([-1, 0, 1]) copula.fit(column) x = 0.5 expected_result = 0 # Run result = copula.percent_point(x) # Check assert (result == expected_result).all()
def test_cumulative_distribution(self): """Cumulative_density returns the cumulative distribution value for a point.""" # Setup copula = GaussianUnivariate() column = np.array([-1, 0, 1]) copula.fit(column) x = pd.Series([0]) expected_result = [0.5] # Run result = copula.cumulative_distribution(x) # Check assert (result == expected_result).all()
def test___str__(self): """str returns details about the model.""" # Setup copula = GaussianUnivariate() expected_result = '\n'.join([ 'Distribution Type: Gaussian', 'Variable name: None', 'Mean: 0', 'Standard deviation: 1' ]) # Run result = copula.__str__() # Check assert result == expected_result
def test_fit(self): """On fit, stats from fit data are set in the model.""" # Setup copula = GaussianUnivariate() column = pd.Series([0, 1, 2, 3, 4, 5], name='column') # Run copula.fit(column) # Check assert copula.mean == 2.5 assert copula.std == 1.707825127659933 assert copula.name == 'column' assert copula.fitted is True
def fit(self, X, distrib_map=None): """Compute the distribution for each variable and then its covariance matrix. Args: X: `numpy.ndarray` or `pandas.DataFrame`. Data to model. distrib_map: `dict` mapping of distributions for the columns in X. Returns: None """ LOGGER.debug('Fitting Gaussian Copula') column_names = self.get_column_names(X) # create distributions based on user input if distrib_map: for key in distrib_map: # this isn't fully working yet self.distribs[key] = distrib_map[key](X[key]) else: for column_name in column_names: self.distribs[column_name] = GaussianUnivariate() column = self.get_column(X, column_name) self.distribs[column_name].fit(column) self.covariance = self._get_covariance(X)
def test_to_dict(self): """To_dict returns the defining parameters of a distribution in a dict.""" # Setup copula = GaussianUnivariate() column = np.array([0, 1, 2, 3, 4, 5]) copula.fit(column) expected_result = { 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 2.5, 'std': 1.707825127659933, 'fitted': True, } # Run result = copula.to_dict() # Check assert result == expected_result
def from_dict(cls, copula_dict): """Set attributes with provided values.""" instance = cls() instance.distribs = {} for name, parameters in copula_dict['distribs'].items(): instance.distribs[name] = GaussianUnivariate.from_dict(parameters) instance.covariance = np.array(copula_dict['covariance']) return instance
def test_sample(self, random_mock): """After fitting, GaussianUnivariate is able to sample new data.""" # Setup instance = GaussianUnivariate() column = np.array([-1, 0, 1]) instance.fit(column) expected_result = np.array([1, 2, 3, 4, 5]) random_mock.return_value = expected_result # Run result = instance.sample(5) # Check compare_nested_iterables(result, expected_result) assert instance.mean == 0.0 assert instance.std == 0.816496580927726 random_mock.assert_called_once_with(0.0, 0.816496580927726, 5)
def test___init__(self): """On init, default values are set on instance.""" # Setup / Run copula = GaussianUnivariate() # Check assert not copula.name assert copula.mean == 0 assert copula.std == 1
def test_from_dict(self): """From_dict sets the values of a dictionary as attributes of the instance.""" # Setup parameters = {'mean': 2.5, 'std': 1.707825127659933} # Run copula = GaussianUnivariate.from_dict(parameters) # Check assert copula.mean == 2.5 assert copula.std == 1.707825127659933 copula.sample(10)
def test_percent_point_reverse_cumulative_distribution(self): """Combined cumulative_distribution and percent_point is the identity function.""" # Setup copula = GaussianUnivariate() column = np.array([-1, 0, 1]) copula.fit(column) initial_value = np.array([0, 0.1, 0.2, 0.3, 0.4, 0.5]) # Run result_a = copula.percent_point(copula.cumulative_distribution(initial_value)) result_b = copula.cumulative_distribution(copula.percent_point(initial_value)) # Check assert (initial_value - result_a < 10E-5).all() assert (initial_value - result_b < 10E-5).all()
def test_percent_point_reverse_cumulative_distribution(self): """Combined cumulative_distribution and percent_point is the identity function.""" # Setup copula = GaussianUnivariate() column = [-1, 0, 1] copula.fit(column) initial_value = pd.Series([0]) # Run result_a = copula.percent_point( copula.cumulative_distribution(initial_value)) result_b = copula.cumulative_distribution( copula.percent_point(initial_value)) # Check assert (initial_value == result_a).all() assert (initial_value == result_b).all()
def test_sample_constant(self): """samples can be generated for constant distribution.""" # Setup instance = GaussianUnivariate() instance.constant_value = 3 instance._replace_constant_methods() instance.fitted = True expected_result = np.array([3, 3, 3, 3, 3]) # Run result = instance.sample(5) # Check compare_nested_iterables(result, expected_result)
def test_cumulative_distribution_constant(self): """cumulative_distribution can be computed for constant distribution.""" # Setup instance = GaussianUnivariate() instance.constant_value = 3 instance._replace_constant_methods() instance.fitted = True X = np.array([1, 2, 3, 4, 5]) expected_result = np.array([0, 0, 1, 1, 1]) # Run result = instance.cumulative_distribution(X) # Check compare_nested_iterables(result, expected_result)
def test__is_constant_false(self): distribution = GaussianUnivariate() distribution.fit(np.array([1, 2, 3, 4])) assert not distribution._is_constant()
def test__is_constant_true(self): distribution = GaussianUnivariate() distribution.fit(np.array([1, 1, 1, 1])) assert distribution._is_constant()