def test__transform_continuous(self, MockBGM): """Test ``_transform_continuous``. Setup: - Mock the ``BayesGMMTransformer`` with the transform method returning some dataframe. - Create ``DataTransformer``. Input: - ``ColumnTransformInfo`` object. - A dataframe containing a continuous column. Output: - A np.array where the first column contains the normalized part of the mocked transform, and the other columns are a one hot encoding representation of the component part of the mocked transform. """ # Setup bgm_instance = MockBGM.return_value bgm_instance.transform.return_value = pd.DataFrame({ 'x.normalized': [0.1, 0.2, 0.3], 'x.component': [0.0, 1.0, 1.0] }) transformer = DataTransformer() data = pd.DataFrame({'x': np.array([0.1, 0.3, 0.5])}) column_transform_info = ColumnTransformInfo( column_name='x', column_type='continuous', transform=bgm_instance, output_info=[SpanInfo(1, 'tanh'), SpanInfo(3, 'softmax')], output_dimensions=1 + 3) # Run result = transformer._transform_continuous(column_transform_info, data) # Assert expected = np.array([ [0.1, 1, 0, 0], [0.2, 0, 1, 0], [0.3, 0, 1, 0], ]) np.testing.assert_array_equal(result, expected)
def test_transform(self): """Test 'transform' on a dataframe with one continuous and one discrete columns. It should use the appropriate '_transform' type for each column and should return them concanenated appropriately. Setup: - Mock _column_transform_info_list - Mock _transform_discrete - Mock _transform_continuous Input: - raw_data = a table with one continuous and one discrete columns. Output: - numpy array containing the transformed two columns Side Effects: - _transform_discrete and _transform_continuous should each be called once. """ data = pd.DataFrame({ "x": np.array([0.1, 0.3, 0.5]), "y": np.array(["yes", "yes", "no"]) }) transformer = DataTransformer() transformer._column_transform_info_list = [ ColumnTransformInfo( column_name="x", column_type="continuous", transform=None, transform_aux=None, output_info=[SpanInfo(1, 'tanh'), SpanInfo(3, 'softmax')], output_dimensions=1 + 3), ColumnTransformInfo(column_name="y", column_type="discrete", transform=None, transform_aux=None, output_info=[SpanInfo(2, 'softmax')], output_dimensions=2) ] transformer._transform_continuous = Mock() selected_normalized_value = np.array([[0.1], [0.3], [0.5]]) selected_component_onehot = np.array([ [1, 0, 0], [1, 0, 0], [1, 0, 0], ]) return_value = (selected_normalized_value, selected_component_onehot) transformer._transform_continuous.return_value = return_value transformer._transform_discrete = Mock() transformer._transform_discrete.return_value = [ np.array([ [0, 1], [0, 1], [1, 0], ]) ] result = transformer.transform(data) transformer._transform_continuous.assert_called_once() transformer._transform_discrete.assert_called_once() expected = np.array([ [0.1, 1, 0, 0, 0, 1], [0.3, 1, 0, 0, 0, 1], [0.5, 1, 0, 0, 1, 0], ]) assert result.shape == (3, 6) assert (result[:, 0] == expected[:, 0]).all(), "continuous-cdf" assert (result[:, 1:4] == expected[:, 1:4]).all(), "continuous-softmax" assert (result[:, 4:6] == expected[:, 4:6]).all(), "discrete"
def test_transform(self): """Test ``transform`` on a dataframe with one continuous and one discrete columns. It should use the appropriate ``_transform`` type for each column and should return them concanenated appropriately. Setup: - Initialize a ``DataTransformer`` with a ``column_transform_info`` detailing a continuous and a discrete columns. - Mock the ``_transform_discrete`` and ``_transform_continuous`` methods. Input: - A table with one continuous and one discrete columns. Output: - np.array containing the transformed columns. Side Effects: - ``_transform_discrete`` and ``_transform_continuous`` should each be called once. """ # Setup data = pd.DataFrame({ 'x': np.array([0.1, 0.3, 0.5]), 'y': np.array(['yes', 'yes', 'no']) }) transformer = DataTransformer() transformer._column_transform_info_list = [ ColumnTransformInfo( column_name='x', column_type='continuous', transform=None, output_info=[SpanInfo(1, 'tanh'), SpanInfo(3, 'softmax')], output_dimensions=1 + 3), ColumnTransformInfo(column_name='y', column_type='discrete', transform=None, output_info=[SpanInfo(2, 'softmax')], output_dimensions=2) ] transformer._transform_continuous = Mock() selected_normalized_value = np.array([[0.1], [0.3], [0.5]]) selected_component_onehot = np.array([ [1, 0, 0], [0, 1, 0], [0, 1, 0], ]) return_value = np.concatenate( (selected_normalized_value, selected_component_onehot), axis=1) transformer._transform_continuous.return_value = return_value transformer._transform_discrete = Mock() transformer._transform_discrete.return_value = np.array([ [0, 1], [0, 1], [1, 0], ]) # Run result = transformer.transform(data) # Assert transformer._transform_continuous.assert_called_once() transformer._transform_discrete.assert_called_once() expected = np.array([ [0.1, 1, 0, 0, 0, 1], [0.3, 0, 1, 0, 0, 1], [0.5, 0, 1, 0, 1, 0], ]) assert result.shape == (3, 6) assert (result[:, 0] == expected[:, 0]).all(), 'continuous-cdf' assert (result[:, 1:4] == expected[:, 1:4]).all(), 'continuous-softmax' assert (result[:, 4:6] == expected[:, 4:6]).all(), 'discrete'