def test___fit_discrete_(self, MockOHE): """Test '_fit_discrete_' on a simple discrete column. A 'OneHotEncodingTransformer' will be created and fit with the 'raw_column_data'. Setup: - Create DataTransformer - Mock the OneHotEncodingTransformer - Provide fit method (no-op) Input: - column_name = string - raw_column_data = numpy array of discrete values Output: - ColumnTransformInfo - Check column name - Check that output_dims matches expected (number of categories) Side Effects: - fit should be called with the data """ ohe_instance = MockOHE.return_value ohe_instance.dummies = ['a', 'b'] transformer = DataTransformer() info = transformer._fit_discrete("column", np.array(['a', 'b'] * 100)) assert info.column_name == "column" assert info.transform == ohe_instance assert info.output_dimensions == 2 assert info.output_info[0].dim == 2 assert info.output_info[0].activation_fn == "softmax"
def test_fit(self): """Test 'fit' on a np.ndarray with one continuous and one discrete columns. The 'fit' method should: - Set 'self.dataframe' to 'False' - Set 'self._column_raw_dtypes' to the appropirate dtypes - Use the appropriate '_fit' type for each column' - Update 'self.output_info_list', 'self.output_dimensions' and 'self._column_transform_info_list' appropriately Setup: - Create DataTransformer - Mock _fit_discrete - Mock _fit_continuous Input: - raw_data = a table with one continuous and one discrete columns. - discrete_columns = list with the name of the discrete column Output: - None Side Effects: - _fit_discrete and _fit_continuous should each be called once - Assigns 'self._column_raw_dtypes' the appropriate dtypes - Assigns 'self.output_info_list' the appropriate 'output_info'. - Assigns 'self.output_dimensions' the appropriate 'output_dimensions'. - Assigns 'self._column_transform_info_list' the appropriate 'column_transform_info'. """ data = pd.DataFrame({ "x": np.random.random(size=100), "y": np.random.choice(["yes", "no"], size=100) }) transformer = DataTransformer() transformer._fit_continuous = Mock() transformer._fit_continuous.return_value = ColumnTransformInfo( column_name="x", column_type="continuous", transform=None, transform_aux=None, output_info=[SpanInfo(1, 'tanh'), SpanInfo(3, 'softmax')], output_dimensions=1 + 3) transformer._fit_discrete = Mock() transformer._fit_discrete.return_value = ColumnTransformInfo( column_name="y", column_type="discrete", transform=None, transform_aux=None, output_info=[SpanInfo(2, 'softmax')], output_dimensions=2) transformer.fit(data, discrete_columns=["y"]) transformer._fit_discrete.assert_called_once() transformer._fit_continuous.assert_called_once() assert transformer.output_dimensions == 6
def test_fit(self): """Test ``fit`` on a np.ndarray with one continuous and one discrete columns. The ``fit`` method should: - Set ``self.dataframe`` to ``False``. - Set ``self._column_raw_dtypes`` to the appropirate dtypes. - Use the appropriate ``_fit`` type for each column. - Update ``self.output_info_list``, ``self.output_dimensions`` and ``self._column_transform_info_list`` appropriately. Setup: - Create ``DataTransformer``. - Mock ``_fit_discrete``. - Mock ``_fit_continuous``. Input: - A table with one continuous and one discrete columns. - A list with the name of the discrete column. Side Effects: - ``_fit_discrete`` and ``_fit_continuous`` should each be called once. - Assigns ``self._column_raw_dtypes`` the appropriate dtypes. - Assigns ``self.output_info_list`` the appropriate ``output_info``. - Assigns ``self.output_dimensions`` the appropriate ``output_dimensions``. - Assigns ``self._column_transform_info_list`` the appropriate ``column_transform_info``. """ # Setup transformer = DataTransformer() transformer._fit_continuous = Mock() transformer._fit_continuous.return_value = ColumnTransformInfo( column_name='x', column_type='continuous', transform=None, output_info=[SpanInfo(1, 'tanh'), SpanInfo(3, 'softmax')], output_dimensions=1 + 3) transformer._fit_discrete = Mock() transformer._fit_discrete.return_value = ColumnTransformInfo( column_name='y', column_type='discrete', transform=None, output_info=[SpanInfo(2, 'softmax')], output_dimensions=2) data = pd.DataFrame({ 'x': np.random.random(size=100), 'y': np.random.choice(['yes', 'no'], size=100) }) # Run transformer.fit(data, discrete_columns=['y']) # Assert transformer._fit_discrete.assert_called_once() transformer._fit_continuous.assert_called_once() assert transformer.output_dimensions == 6
def test___fit_discrete(self, MockOHE): """Test ``_fit_discrete_`` on a simple discrete column. A ``OneHotEncodingTransformer`` will be created and fit with the ``data``. Setup: - Mock the ``OneHotEncodingTransformer``. - Create ``DataTransformer``. Input: - A dataframe with only one column containing ``['a', 'b']`` values. Output: - A ``ColumnTransformInfo`` object where: - ``column_name`` matches the column of the data. - ``transform`` is the ``OneHotEncodingTransformer`` instance. - ``output_dimensions`` is 2. - ``output_info`` assigns the correct activation function. Side Effects: - ``fit`` should be called with the data. """ # Setup ohe_instance = MockOHE.return_value ohe_instance.dummies = ['a', 'b'] transformer = DataTransformer() data = pd.DataFrame(np.array(['a', 'b'] * 100), columns=['column']) # Run info = transformer._fit_discrete(data) # Assert assert info.column_name == 'column' assert info.transform == ohe_instance assert info.output_dimensions == 2 assert info.output_info[0].dim == 2 assert info.output_info[0].activation_fn == 'softmax'