def test_sample_empty_transformed_conditions(): """Test that None is passed to ``_sample_batch`` if transformed conditions are empty. The ``Sample`` method is expected to: - Return sampled data and pass None to ``sample_batch`` as the ``transformed_conditions``. Input: - Number of rows to sample - Conditions Output: - Sampled data """ # Setup model = GaussianCopula() data = pd.DataFrame({ 'column1': list(range(100)), 'column2': list(range(100)), 'column3': list(range(100)) }) conditions = {'column1': 25} conditions_series = pd.Series([25, 25, 25, 25, 25], name='column1') model._sample_batch = Mock() sampled = pd.DataFrame({ 'column1': [28, 28], 'column2': [37, 37], 'column3': [93, 93], }) model._sample_batch.return_value = sampled model.fit(data) model._metadata = Mock() model._metadata.get_fields.return_value = ['column1', 'column2', 'column3'] model._metadata.transform.return_value = pd.DataFrame() model._metadata.make_ids_unique.side_effect = lambda x: x # Run output = model.sample(5, conditions=conditions, graceful_reject_sampling=True) # Assert expected_output = pd.DataFrame({ 'column1': [28, 28], 'column2': [37, 37], 'column3': [93, 93], }) _, args, kwargs = model._metadata.transform.mock_calls[0] pd.testing.assert_series_equal(args[0]['column1'], conditions_series) assert kwargs['on_missing_column'] == 'drop' model._metadata.transform.assert_called_once() model._sample_batch.assert_called_with(5, 100, 10, conditions, None, 0.01) pd.testing.assert_frame_equal(output, expected_output)
def test__sample_rows_previous_rows_appended_correctly(): """Test the ``BaseTabularModel._sample_rows`` method. If ``_sample_rows`` is passed ``previous_rows``, then it should reset the index when appending them to the new sampled rows. Input: - num_rows is 5 - previous_rows is a DataFrame of 3 existing rows. Output: - 5 sampled rows with index set to [0, 1, 2, 3, 4] """ # Setup model = GaussianCopula() previous_data = pd.DataFrame({ 'column1': [1, 2, 3], 'column2': [4, 5, 6], 'column3': [7, 8, 9] }) new_data = pd.DataFrame({ 'column1': [4, 5], 'column2': [7, 8], 'column3': [10, 11] }) model._metadata = Mock() model._sample = Mock() model._sample.return_value = new_data model._metadata.reverse_transform.return_value = new_data model._metadata.filter_valid = lambda x: x # Run sampled, num_valid = model._sample_rows(5, previous_rows=previous_data) # Assert expected = pd.DataFrame({ 'column1': [1, 2, 3, 4, 5], 'column2': [4, 5, 6, 7, 8], 'column3': [7, 8, 9, 10, 11] }) assert num_valid == 5 pd.testing.assert_frame_equal(sampled, expected)
def test_sample_batches_transform_conditions_correctly(): """Test that transformed conditions are batched correctly. The ``Sample`` method is expected to: - Return sampled data and call ``_sample_batch`` for every unique transformed condition group. Input: - Number of rows to sample - Conditions Output: - Sampled data """ # Setup model = GaussianCopula() data = pd.DataFrame({ 'column1': list(range(100)), 'column2': list(range(100)), 'column3': list(range(100)) }) conditions = {'column1': [25, 25, 25, 30, 30]} conditions_series = pd.Series([25, 25, 25, 30, 30], name='column1') model._sample_batch = Mock() expected_outputs = [ pd.DataFrame({ 'column1': [25, 25, 25], 'column2': [37, 37, 37], 'column3': [93, 93, 93], }), pd.DataFrame({ 'column1': [30], 'column2': [37], 'column3': [93], }), pd.DataFrame({ 'column1': [30], 'column2': [37], 'column3': [93], }) ] model._sample_batch.side_effect = expected_outputs model.fit(data) model._metadata = Mock() model._metadata.get_fields.return_value = ['column1', 'column2', 'column3'] model._metadata.transform.return_value = pd.DataFrame( [[50], [50], [50], [60], [70]], columns=['transformed_column']) # Run model.sample(5, conditions=conditions, graceful_reject_sampling=True) # Assert _, args, kwargs = model._metadata.transform.mock_calls[0] pd.testing.assert_series_equal(args[0]['column1'], conditions_series) assert kwargs['on_missing_column'] == 'drop' model._metadata.transform.assert_called_once() model._sample_batch.assert_any_call(3, 100, 10, {'column1': 25}, {'transformed_column': 50}, 0.01) model._sample_batch.assert_any_call(1, 100, 10, {'column1': 30}, {'transformed_column': 60}, 0.01) model._sample_batch.assert_any_call(1, 100, 10, {'column1': 30}, {'transformed_column': 70}, 0.01)