Beispiel #1
0
    def test_sample_remaining_columns(self):
        """Test ``sample_remaining_columns`` method.

        Expect the correct args to be passed to ``_sample_remaining_columns``

        Input:
            - valid DataFrame
        Side Effects:
            - The expected ``_sample_remaining_columns`` call.
        """
        # Setup
        model = Mock(spec_set=GaussianCopula)
        conditions = pd.DataFrame([{'cola': 'a'}] * 5)
        batch_size = 1
        randomize_samples = False
        output_file_path = 'test.csv'

        # Run
        out = GaussianCopula.sample_remaining_columns(
            model,
            conditions,
            batch_size=batch_size,
            randomize_samples=randomize_samples,
            output_file_path=output_file_path,
        )

        # Assert
        model._sample_remaining_columns.assert_called_once_with(
            conditions, 100, batch_size, randomize_samples, output_file_path)
        assert out == model._sample_remaining_columns.return_value
Beispiel #2
0
def test_conditional_sampling_dataframe():
    data = pd.DataFrame({
        'column1': [1.0, 0.5, 2.5] * 10,
        'column2': ['a', 'b', 'c'] * 10
    })

    model = GaussianCopula()
    model.fit(data)
    conditions = pd.DataFrame({'column2': ['b', 'b', 'b', 'c', 'c']})
    sampled = model.sample_remaining_columns(conditions)

    assert sampled.shape[0] == len(conditions['column2'])
    assert (sampled['column2'] == np.array(['b', 'b', 'b', 'c', 'c'])).all()