Beispiel #1
0
    def test__analyze(self):
        """Test _analyze"""
        # Setup
        hp = HyperTransformer(dtype_transformers={'O': 'one_hot_encoding'})

        # Run
        data = pd.DataFrame({
            'int': [1, 2, None],
            'float': [1.0, 2.0, None],
            'object': ['foo', 'bar', None],
            'category': [1, 2, None],
            'bool': [True, False, None],
            'datetime': pd.to_datetime(['1965-05-23', None, '1997-10-17']),
        })
        data['category'] = data['category'].astype('category')
        result = hp._analyze(data)

        # Asserts
        assert isinstance(result, dict)
        assert set(result.keys()) == {'int', 'float', 'object', 'category', 'bool', 'datetime'}

        assert isinstance(result['int'], NumericalTransformer)
        assert isinstance(result['float'], NumericalTransformer)
        assert isinstance(result['object'], OneHotEncodingTransformer)
        assert isinstance(result['category'], OneHotEncodingTransformer)
        assert isinstance(result['bool'], BooleanTransformer)
        assert isinstance(result['datetime'], DatetimeTransformer)
Beispiel #2
0
    def _fit(self, table_data):
        """Fit the model to the table.

        Args:
            table_data (pandas.DataFrame):
                Data to be learned.
        """
        distributions = self._field_distributions
        fields = self._metadata.get_fields()

        transformers = {}
        for field in table_data:
            field_name = field.replace('.value', '')

            if field_name in fields and fields.get(
                    field_name,
                    dict(),
            ).get('type') != 'categorical':
                transformers[field] = GaussianCopulaTransformer(
                    distribution=distributions.get(field_name,
                                                   self._default_distribution))

        self._ht = HyperTransformer(field_transformers=transformers)
        table_data = self._ht.fit_transform(table_data)

        super()._fit(table_data)
Beispiel #3
0
    def test_fit_transform(self):
        """Test call fit_transform"""
        # Run
        transformer = Mock()

        HyperTransformer.fit_transform(transformer, pd.DataFrame())

        # Asserts
        expect_call_count_fit = 1
        expect_call_count_transform = 1
        expect_call_args_fit = pd.DataFrame()
        expect_call_args_transform = pd.DataFrame()

        self.assertEqual(
            transformer.fit.call_count,
            expect_call_count_fit
        )
        pd.testing.assert_frame_equal(
            transformer.fit.call_args[0][0],
            expect_call_args_fit
        )

        self.assertEqual(
            transformer.transform.call_count,
            expect_call_count_transform
        )
        pd.testing.assert_frame_equal(
            transformer.transform.call_args[0][0],
            expect_call_args_transform
        )
Beispiel #4
0
    def test_fit_with_analyze(self):
        """Test fit and analyze the transformers"""
        # Setup
        data = pd.DataFrame({
            'integers': [1, 2, 3, 4],
            'floats': [1.1, 2.2, 3.3, 4.4],
            'booleans': [True, False, False, True]
        })

        int_mock = Mock()
        float_mock = Mock()
        bool_mock = Mock()

        analyzed_data = {
            'integers': int_mock,
            'floats': float_mock,
            'booleans': bool_mock
        }

        # Run
        transformer = Mock()
        transformer.transformers = None
        transformer._analyze.return_value = analyzed_data

        HyperTransformer.fit(transformer, data)

        # Asserts
        expect_int_call_count = 1
        expect_float_call_count = 1
        expect_bool_call_count = 1

        self.assertEqual(int_mock.fit.call_count, expect_int_call_count)
        self.assertEqual(float_mock.fit.call_count, expect_float_call_count)
        self.assertEqual(bool_mock.fit.call_count, expect_bool_call_count)
    def compute(cls, real_data, synthetic_data, metadata=None):
        """Compute this metric.

        Args:
            real_data (pandas.DataFrame):
                The values from the real dataset.
            synthetic_data (pandas.DataFrame):
                The values from the synthetic dataset.
            metadata (dict):
                Table metadata dict.

        Returns:
            Union[float, tuple[float]]:
                Metric output.
        """
        metadata = cls._validate_inputs(real_data, synthetic_data, metadata)
        transformer = HyperTransformer()
        fields = cls._select_fields(metadata, cls.field_types)
        real_data = transformer.fit_transform(real_data[fields])
        synthetic_data = transformer.transform(synthetic_data[fields])

        values = []
        for column_name, real_column in real_data.items():
            real_column = real_column.values
            synthetic_column = synthetic_data[column_name].values

            score = cls.single_column_metric.compute(real_column,
                                                     synthetic_column)
            values.append(score)

        return np.nanmean(values)
Beispiel #6
0
    def test__analyze_invalid_dtype(self):
        """Test _analyze when a list of dtypes containing an invalid dtype is passed."""
        # Setup
        hp = HyperTransformer(dtypes=['int', 'complex'])

        # Run
        data = pd.DataFrame({
            'int': [1, 2, None],
            'complex': [1.0 + 0j, 2.0 + 1j, None],
        })
        with pytest.raises(ValueError):
            hp._analyze(data)
Beispiel #7
0
    def test__analyze_raise_error(self):
        """Test _analyze raise error"""
        # Setup
        data = Mock()
        data.columns = ['foo']

        dtypes = [Mock()]

        # Run
        transformer = Mock()
        transformer.dtypes = dtypes

        with self.assertRaises(ValueError):
            HyperTransformer._analyze(transformer, data)
Beispiel #8
0
    def compute(cls, real_data, synthetic_data, metadata=None):
        """Compute this metric.

        This builds a Machine Learning Classifier that learns to tell the synthetic
        data apart from the real data, which later on is evaluated using Cross Validation.

        The output of the metric is one minus the average ROC AUC score obtained.

        Args:
            real_data (Union[numpy.ndarray, pandas.DataFrame]):
                The values from the real dataset.
            synthetic_data (Union[numpy.ndarray, pandas.DataFrame]):
                The values from the synthetic dataset.
            metadata (dict):
                Table metadata dict. If not passed, it is build based on the
                real_data fields and dtypes.

        Returns:
            float:
                One minus the ROC AUC Cross Validation Score obtained by the classifier.
        """
        metadata = cls._validate_inputs(real_data, synthetic_data, metadata)
        transformer = HyperTransformer(
            default_data_type_transformers={
                'categorical': OneHotEncodingTransformer(
                    error_on_unknown=False),
            })
        real_data = transformer.fit_transform(real_data).to_numpy()
        synthetic_data = transformer.transform(synthetic_data).to_numpy()

        X = np.concatenate([real_data, synthetic_data])
        y = np.hstack([np.ones(len(real_data)), np.zeros(len(synthetic_data))])
        if np.isin(X, [np.inf, -np.inf]).any():
            X[np.isin(X, [np.inf, -np.inf])] = np.nan

        try:
            scores = []
            kf = StratifiedKFold(n_splits=3, shuffle=True)
            for train_index, test_index in kf.split(X, y):
                y_pred = cls._fit_predict(X[train_index], y[train_index],
                                          X[test_index])
                roc_auc = roc_auc_score(y[test_index], y_pred)

                scores.append(max(0.5, roc_auc) * 2 - 1)

            return 1 - np.mean(scores)
        except ValueError as err:
            raise IncomputableMetricError(
                f'DetectionMetric: Unable to be fit with error {err}')
Beispiel #9
0
    def test__get_columns_one(self):
        data = pd.DataFrame({
            'a': [1, 2, 3],
        })

        returned = HyperTransformer._get_columns(data, 'a')

        np.testing.assert_equal(returned, np.array([1, 2, 3]))
Beispiel #10
0
    def test__get_columns_none(self):
        data = pd.DataFrame({
            'a': [1, 2, 3],
        })

        returned = HyperTransformer._get_columns(data, 'b')

        assert returned.empty
Beispiel #11
0
    def test___init__(self):
        """Test create new instance of HyperTransformer"""
        # Run
        ht = HyperTransformer()

        # Asserts
        self.assertTrue(ht.copy)
        self.assertEqual(ht.dtypes, None)
Beispiel #12
0
    def fit(self, table_data):
        """Fit the ``ColumnsModel``.

        Fit a ``GaussianUnivariate`` model to the ``self.constraint_column`` columns in the
        ``table_data`` in order to sample those columns when missing.

        Args:
            table_data (pandas.DataFrame):
                Table data.
        """
        data_to_model = table_data[self.constraint_columns]
        self._hyper_transformer = HyperTransformer(
            default_data_type_transformers={
                'categorical': 'OneHotEncodingTransformer'
            })
        transformed_data = self._hyper_transformer.fit_transform(data_to_model)
        self._model = GaussianMultivariate(distribution=GaussianUnivariate)
        self._model.fit(transformed_data)
Beispiel #13
0
    def test__get_columns_two(self):
        data = pd.DataFrame({
            'b': [4, 5, 6],
            'b#1': [7, 8, 9],
        })

        returned = HyperTransformer._get_columns(data, 'b')

        expected = np.array([[4, 7], [5, 8], [6, 9]])
        np.testing.assert_equal(returned, expected)
Beispiel #14
0
    def _fit(self, table_data):
        """Fit the model to the table.

        Args:
            table_data (pandas.DataFrame):
                Data to be learned.
        """
        distributions = self._field_distributions
        default = self._default_distribution
        fields = self._metadata.get_fields()
        transformers = {
            field: GaussianCopulaTransformer(
                distribution=distributions.get(field, default))
            for field in table_data.columns
            if fields.get(field, dict()).get('type') != 'categorical'
        }
        self._ht = HyperTransformer(transformers=transformers)
        table_data = self._ht.fit_transform(table_data)

        super()._fit(table_data)
Beispiel #15
0
def test_hypertransformer_with_transformers(faker_mock):
    faker_mock.return_value.first_name.side_effect = [
        'Jaime', 'Cersei', 'Tywin', 'Tyrion'
    ]
    data = get_input_data()
    transformers = get_transformers()

    ht = HyperTransformer(transformers)
    ht.fit(data)
    transformed = ht.transform(data)

    expected = get_transformed_data()

    np.testing.assert_allclose(
        transformed.sort_index(axis=1).values,
        expected.sort_index(axis=1).values)

    reversed_data = ht.reverse_transform(transformed)

    original_names = data.pop('names')
    reversed_names = reversed_data.pop('names')

    pd.testing.assert_frame_equal(data.sort_index(axis=1),
                                  reversed_data.sort_index(axis=1))

    for name in original_names:
        assert name not in reversed_names
Beispiel #16
0
    def _compute_auroc(self, real_table, synthetic_table):
        transformer = HyperTransformer()
        real_table = transformer.fit_transform(real_table).values
        synthetic_table = transformer.transform(synthetic_table).values

        X = np.concatenate([real_table, synthetic_table])
        y = np.hstack([np.ones(len(real_table)), np.zeros(len(synthetic_table))])
        X[np.isnan(X)] = 0.0

        if len(X) < 20:
            warnings.warn("Not enough data, skipping the detection tests.")

        scores = []
        kf = StratifiedKFold(n_splits=3, shuffle=True)
        for train_index, test_index in kf.split(X, y):
            self.fit(X[train_index], y[train_index])
            y_pred = self.predict_proba(X[test_index])
            auroc = roc_auc_score(y[test_index], y_pred)
            if auroc < 0.5:
                auroc = 1.0 - auroc
            scores.append(auroc)
        return np.mean(scores)
Beispiel #17
0
    def test__get_columns_regex(self):
        data = pd.DataFrame({
            'a(b)': [4, 5, 6],
            'a(b)#1': [7, 8, 9],
            'b(b)': [4, 5, 6],
            'b(b)#1': [7, 8, 9],
        })

        returned = HyperTransformer._get_columns(data, 'a(b)')

        expected = np.array([
            'a(b)',
            'a(b)#1',
        ])
        np.testing.assert_equal(returned, expected)
Beispiel #18
0
    def fit(self, table_data):
        """Fit ``Constraint`` class to data.

        If ``fit_columns_model`` is True, then this method will fit
        a ``GaussianCopula`` model to the relevant columns in ``table_data``.
        Subclasses can overwrite this method, or overwrite the ``_fit`` method
        if they will not be needing the model to handle conditional sampling.

        Args:
            table_data (pandas.DataFrame):
                Table data.
        """
        self._fit(table_data)

        if self.fit_columns_model and len(self.constraint_columns) > 1:
            data_to_model = table_data[list(self.constraint_columns)]
            self._hyper_transformer = HyperTransformer(dtype_transformers={
                'O': 'one_hot_encoding',
            })
            transformed_data = self._hyper_transformer.fit_transform(
                data_to_model)
            self._columns_model = GaussianMultivariate(
                distribution=GaussianUnivariate)
            self._columns_model.fit(transformed_data)
Beispiel #19
0
def test_single_category():
    ht = HyperTransformer(transformers={'a': OneHotEncodingTransformer()})
    data = pd.DataFrame({'a': ['a', 'a', 'a']})

    ht.fit(data)
    transformed = ht.transform(data)

    reverse = ht.reverse_transform(transformed)

    pd.testing.assert_frame_equal(data, reverse)
Beispiel #20
0
def test_dtype_category():
    df = pd.DataFrame({'a': ['a', 'b', 'c']}, dtype='category')

    ht = HyperTransformer()
    ht.fit(df)

    trans = ht.transform(df)

    rever = ht.reverse_transform(trans)

    pd.testing.assert_frame_equal(df, rever)
Beispiel #21
0
    def test__analyze_bool(self):
        """Test _analyze bool dtype"""
        # Setup
        data = pd.DataFrame({'booleans': [True, False, None, False, True]})

        dtypes = [bool]

        # Run
        transformer = Mock()
        transformer.dtypes = dtypes

        result = HyperTransformer._analyze(transformer, data)

        # Asserts
        expect_class = BooleanTransformer

        self.assertIsInstance(result['booleans'], expect_class)
Beispiel #22
0
    def test__analyze_int(self):
        """Test _analyze int dtype"""
        # Setup
        data = pd.DataFrame({'integers': [1, 2, 3, 4, 5, None, 6, 7, 8, 9, 0]})

        dtypes = [int]

        # Run
        transformer = Mock()
        transformer.dtypes = dtypes

        result = HyperTransformer._analyze(transformer, data)

        # Asserts
        expect_class = NumericalTransformer

        self.assertIsInstance(result['integers'], expect_class)
Beispiel #23
0
    def test__analyze_object(self):
        """Test _analyze object dtype"""
        # Setup
        data = pd.DataFrame({'objects': ['foo', 'bar', None, 'tar']})

        dtypes = [np.object]

        # Run
        transformer = Mock()
        transformer.dtypes = dtypes

        result = HyperTransformer._analyze(transformer, data)

        # Asserts
        expect_class = CategoricalTransformer

        self.assertIsInstance(result['objects'], expect_class)
Beispiel #24
0
def test_empty_transformers_nan_data():
    """If transformers is an empty dict, do nothing."""
    data = get_input_data_with_nan()

    ht = HyperTransformer(transformers={})
    ht.fit(data)

    transformed = ht.transform(data)
    reverse = ht.reverse_transform(transformed)

    pd.testing.assert_frame_equal(data, transformed)
    pd.testing.assert_frame_equal(data, reverse)
Beispiel #25
0
    def _load_hyper_transformer(self, table_name):
        """Create and return a new ``rdt.HyperTransformer`` instance for a table.

        First get the ``dtypes`` and ``pii fields`` from a given table, then use
        those to build a transformer dictionary to be used by the ``HyperTransformer``.

        Args:
            table_name (str):
                Table name for which to load the HyperTransformer.

        Returns:
            rdt.HyperTransformer:
                Instance of ``rdt.HyperTransformer`` for the given table.
        """
        dtypes = self.get_dtypes(table_name)
        pii_fields = self._get_pii_fields(table_name)
        transformers_dict = self._get_transformers(dtypes, pii_fields)
        return HyperTransformer(transformers=transformers_dict)
Beispiel #26
0
    def test__analyze_float(self):
        """Test _analyze float dtype"""
        # Setup
        data = pd.DataFrame({
            'floats': [1.1, 2.2, 3.3, 4.4, 5.5, None, 6.6, 7.7, 8.8, 9.9, 0.0]
        })

        dtypes = [float]

        # Run
        transformer = Mock()
        transformer.dtypes = dtypes

        result = HyperTransformer._analyze(transformer, data)

        # Asserts
        expect_class = NumericalTransformer

        self.assertIsInstance(result['floats'], expect_class)
Beispiel #27
0
def test_subset_of_columns_nan_data():
    """HyperTransform should be able to transform a subset of the training columns.

    See https://github.com/sdv-dev/RDT/issues/152
    """
    data = get_input_data_with_nan()

    ht = HyperTransformer()
    ht.fit(data)

    subset = data[[data.columns[0]]]
    transformed = ht.transform(subset)
    reverse = ht.reverse_transform(transformed)

    pd.testing.assert_frame_equal(subset, reverse)
Beispiel #28
0
    def test__analyze_datetime64(self):
        """Test _analyze datetime64 dtype"""
        # Setup
        data = pd.DataFrame({'datetimes': ['1965-05-23', None, '1997-10-17']})

        data['datetimes'] = pd.to_datetime(data['datetimes'],
                                           format='%Y-%m-%d',
                                           errors='coerce')

        dtypes = [np.datetime64]

        # Run
        transformer = Mock()
        transformer.dtypes = dtypes

        result = HyperTransformer._analyze(transformer, data)

        # Asserts
        expect_class = DatetimeTransformer

        self.assertIsInstance(result['datetimes'], expect_class)
Beispiel #29
0
def test_hypertransformer_without_transformers_nan_data():
    data = get_input_data_with_nan()

    ht = HyperTransformer()
    ht.fit(data)
    transformed = ht.transform(data)

    expected = get_transformed_nan_data()

    np.testing.assert_allclose(
        transformed.sort_index(axis=1).values,
        expected.sort_index(axis=1).values)

    reversed_data = ht.reverse_transform(transformed)

    original_names = data.pop('names')
    reversed_names = reversed_data.pop('names')

    pd.testing.assert_frame_equal(data.sort_index(axis=1),
                                  reversed_data.sort_index(axis=1))

    for name in original_names:
        assert name not in reversed_names
Beispiel #30
0
class CopulaGAN(CTGAN):
    """Combination of GaussianCopula transformation and GANs.

    This model extends the ``CTGAN`` model to add the flexibility of the GaussianCopula
    transformations provided by the ``GaussianCopulaTransformer`` from ``RDT``.

    Overall, the fitting process consists of the following steps:

    1. Transform each non categorical variable from the input
       data using a ``GaussianCopulaTransformer``:

       i. If not specified, find out the distribution which each one
          of the variables from the input dataset has.
       ii. Transform each variable to a standard normal space by applying
           the CDF of the corresponding distribution and later on applying
           an inverse CDF from a standard normal distribution.

    2. Fit CTGAN with the transformed table.

    And the process of sampling is:

    1. Sample using CTGAN
    2. Reverse the previous transformation by applying the CDF of a standard normal
       distribution and then inverting the CDF of the distribution that correpsonds
       to each variable.

    The arguments of this model are the same as for CTGAN except for two additional
    arguments, ``field_distributions`` and ``default_distribution`` that give the
    ability to define specific transformations for individual fields as well as
    which distribution to use by default if no specific distribution has been selected.

    Distributions can be passed as a ``copulas`` univariate instance or as one
    of the following string values:

    * ``univariate``: Let ``copulas`` select the optimal univariate distribution.
      This may result in non-parametric models being used.
    * ``parametric``: Let ``copulas`` select the optimal univariate distribution,
      but restrict the selection to parametric distributions only.
    * ``bounded``: Let ``copulas`` select the optimal univariate distribution,
      but restrict the selection to bounded distributions only.
      This may result in non-parametric models being used.
    * ``semi_bounded``: Let ``copulas`` select the optimal univariate distribution,
      but restrict the selection to semi-bounded distributions only.
      This may result in non-parametric models being used.
    * ``parametric_bounded``: Let ``copulas`` select the optimal univariate
      distribution, but restrict the selection to parametric and bounded distributions
      only.
    * ``parametric_semi_bounded``: Let ``copulas`` select the optimal univariate
      distribution, but restrict the selection to parametric and semi-bounded
      distributions only.
    * ``gaussian``: Use a Gaussian distribution.
    * ``gamma``: Use a Gamma distribution.
    * ``beta``: Use a Beta distribution.
    * ``student_t``: Use a Student T distribution.
    * ``gaussian_kde``: Use a GaussianKDE distribution. This model is non-parametric,
      so using this will make ``get_parameters`` unusable.
    * ``truncated_gaussian``: Use a Truncated Gaussian distribution.

    Args:
        field_names (list[str]):
            List of names of the fields that need to be modeled
            and included in the generated output data. Any additional
            fields found in the data will be ignored and will not be
            included in the generated output.
            If ``None``, all the fields found in the data are used.
        field_types (dict[str, dict]):
            Dictinary specifying the data types and subtypes
            of the fields that will be modeled. Field types and subtypes
            combinations must be compatible with the SDV Metadata Schema.
        field_transformers (dict[str, str]):
            Dictinary specifying which transformers to use for each field.
            Available transformers are:

                * ``integer``: Uses a ``NumericalTransformer`` of dtype ``int``.
                * ``float``: Uses a ``NumericalTransformer`` of dtype ``float``.
                * ``categorical``: Uses a ``CategoricalTransformer`` without gaussian noise.
                * ``categorical_fuzzy``: Uses a ``CategoricalTransformer`` adding gaussian noise.
                * ``one_hot_encoding``: Uses a ``OneHotEncodingTransformer``.
                * ``label_encoding``: Uses a ``LabelEncodingTransformer``.
                * ``boolean``: Uses a ``BooleanTransformer``.
                * ``datetime``: Uses a ``DatetimeTransformer``.

        anonymize_fields (dict[str, str]):
            Dict specifying which fields to anonymize and what faker
            category they belong to.
        primary_key (str):
            Name of the field which is the primary key of the table.
        constraints (list[Constraint, dict]):
            List of Constraint objects or dicts.
        table_metadata (dict or metadata.Table):
            Table metadata instance or dict representation.
            If given alongside any other metadata-related arguments, an
            exception will be raised.
            If not given at all, it will be built using the other
            arguments or learned from the data.
        log_frequency (boolean):
            Whether to use log frequency of categorical levels in conditional
            sampling. Defaults to ``True``.
        embedding_dim (int):
            Size of the random sample passed to the Generator. Defaults to 128.
        generator_dim (tuple or list of ints):
            Size of the output samples for each one of the Residuals. A Resiudal Layer
            will be created for each one of the values provided. Defaults to (256, 256).
        discriminator_dim (tuple or list of ints):
            Size of the output samples for each one of the Discriminator Layers. A Linear
            Layer will be created for each one of the values provided. Defaults to (256, 256).
        batch_size (int):
            Number of data samples to process in each step.
        verbose (bool):
            Whether to print fit progress on stdout. Defaults to ``False``.
        epochs (int):
            Number of training epochs. Defaults to 300.
        cuda (bool or str):
            If ``True``, use CUDA. If an ``str``, use the indicated device.
            If ``False``, do not use cuda at all.
        field_distributions (dict):
            Optionally specify a dictionary that maps the name of each field to the distribution
            that must be used in it. Fields that are not specified in the input ``dict`` will
            be modeled using the default distribution. Defaults to ``None``.
        default_distribution (copulas.univariate.Univariate or str):
            Distribution to use on the fields for which no specific distribution has been given.
            Defaults to ``parametric``.
    """

    DEFAULT_DISTRIBUTION = 'parametric'

    def __init__(self,
                 field_names=None,
                 field_types=None,
                 field_transformers=None,
                 anonymize_fields=None,
                 primary_key=None,
                 constraints=None,
                 table_metadata=None,
                 embedding_dim=128,
                 generator_dim=(256, 256),
                 discriminator_dim=(256, 256),
                 generator_lr=2e-4,
                 generator_decay=1e-6,
                 discriminator_lr=2e-4,
                 discriminator_decay=1e-6,
                 batch_size=500,
                 discriminator_steps=1,
                 log_frequency=True,
                 verbose=False,
                 epochs=300,
                 cuda=True,
                 field_distributions=None,
                 default_distribution=None):
        super().__init__(field_names=field_names,
                         primary_key=primary_key,
                         field_types=field_types,
                         field_transformers=field_transformers,
                         anonymize_fields=anonymize_fields,
                         constraints=constraints,
                         table_metadata=table_metadata,
                         embedding_dim=embedding_dim,
                         generator_dim=generator_dim,
                         discriminator_dim=discriminator_dim,
                         generator_lr=generator_lr,
                         generator_decay=generator_decay,
                         discriminator_lr=discriminator_lr,
                         discriminator_decay=discriminator_decay,
                         batch_size=batch_size,
                         discriminator_steps=discriminator_steps,
                         log_frequency=log_frequency,
                         verbose=verbose,
                         epochs=epochs,
                         cuda=cuda)
        self._field_distributions = field_distributions or dict()
        self._default_distribution = default_distribution or self.DEFAULT_DISTRIBUTION

    def get_distributions(self):
        """Get the marginal distributions used by this CopulaGAN.

        Returns:
            dict:
                Dictionary containing the distributions used or detected
                for each column.
        """
        return {
            field: transformer._univariate.to_dict()['type']
            for field, transformer in self._ht.transformers.items()
        }

    def _fit(self, table_data):
        """Fit the model to the table.

        Args:
            table_data (pandas.DataFrame):
                Data to be learned.
        """
        distributions = self._field_distributions
        default = self._default_distribution
        fields = self._metadata.get_fields()
        transformers = {
            field: GaussianCopulaTransformer(
                distribution=distributions.get(field, default))
            for field in table_data.columns
            if fields.get(field, dict()).get('type') != 'categorical'
        }
        self._ht = HyperTransformer(transformers=transformers)
        table_data = self._ht.fit_transform(table_data)

        super()._fit(table_data)

    def _sample(self, num_rows, conditions=None):
        """Sample the indicated number of rows from the model.

        Args:
            num_rows (int):
                Amount of rows to sample.
            conditions (dict):
                If specified, this dictionary maps column names to the column
                value. Then, this method generates `num_rows` samples, all of
                which are conditioned on the given variables.

        Returns:
            pandas.DataFrame:
                Sampled data.
        """
        sampled = super()._sample(num_rows, conditions)
        return self._ht.reverse_transform(sampled)