コード例 #1
0
    def test_column_without_brackets(self):
        data_mapper = DataMapper([
            (0, FuncTransformer(lambda x: x.reshape(-1, 1)))
        ])
        result = data_mapper.transform(np.ones((2, 3)))

        assert np.all(result == [1, 1])
コード例 #2
0
 def test_many_to_many_support_transformations(self):
     # Instantiate data mapper with many to many transformer support and test whether the feature map is generated
     column_transformer = ColumnTransformer([
         ("column_0_1_2_3", IdentityTransformer(), [0, 1, 2, 3]),
         ("column_4_5", OneHotEncoder(), [4, 5])
     ])
     x = np.ones((10, 6))
     # so that one hot encoder doesn't complain of only one category
     x[0, 4] = 0
     x[0, 5] = 0
     column_transformer.fit(x)
     data_mapper = DataMapper(column_transformer,
                              allow_all_transformations=True)
     data_mapper.transform(x)
     # check feature mapper contents
     feature_map_indices = [[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3],
                            [0, 1, 2, 3], [4, 5], [6, 7]]
     x_out = column_transformer.transform(x)
     feature_map = np.zeros((x.shape[1], x_out.shape[1]))
     num_rows = 0
     for i, row in enumerate(feature_map_indices[:4]):
         feature_map[i, row] = 0.25
         num_rows += 1
     for i, row in enumerate(feature_map_indices[4:], start=num_rows):
         feature_map[i, row] = 1.0
     assert (data_mapper.feature_map == feature_map).all()
コード例 #3
0
 def test_column_passthrough_column_transformer(self):
     x = np.ones((2, 3))
     column_transformer = ColumnTransformer([("column0", "passthrough", [0])
                                             ])
     column_transformer.fit(x)
     data_mapper = DataMapper(column_transformer)
     result = data_mapper.transform(x)
     assert np.all(result == np.array([[1, 1]]))
コード例 #4
0
 def test_column_with_brackets(self):
     x = np.ones((2, 3))
     x[0, 0] = 0
     encoder = OneHotEncoder()
     encoder.fit(x[0].reshape(-1, 1))
     data_mapper = DataMapper([([0], encoder)])
     result = data_mapper.transform(x)
     assert result.shape == (2, 2)
コード例 #5
0
 def test_pipeline_transform_list(self):
     pipeline = Pipeline([("imputer", SimpleImputer()),
                          ("onehotencoder", OneHotEncoder())])
     x = np.ones((3, 2))
     pipeline.fit(x)
     data_mapper = DataMapper([([0, 1], pipeline)])
     result = data_mapper.transform(x)
     assert result.shape == (3, 2)
コード例 #6
0
 def test_column_exception_without_brackets(self):
     with pytest.raises(ValueError):
         x = np.ones((2, 3))
         x[0, 0] = 0
         encoder = OneHotEncoder()
         encoder.fit(x[0])
         data_mapper = DataMapper([[0], encoder])
         data_mapper.transform(x)
コード例 #7
0
 def test_pipeline_transform_column_transformer(self):
     pipeline = Pipeline([("imputer", SimpleImputer()),
                          ("onehotencoder", OneHotEncoder())])
     x = np.ones((3, 2))
     column_transformer = ColumnTransformer([("column", pipeline, [0, 1])])
     column_transformer.fit(x)
     data_mapper = DataMapper(column_transformer)
     result = data_mapper.transform(x)
     assert result.shape == (3, 2)
コード例 #8
0
 def setup_class(self):
     self._identity_mapper_list = DataMapper([([0], IdentityTransformer()),
                                              ([1], IdentityTransformer())])
     column_transformer = ColumnTransformer([
         ("column1", IdentityTransformer(), [0]),
         ("column2", IdentityTransformer(), [1])
     ])
     x = np.ones((10, 5))
     column_transformer.fit(x)
     self._identity_mapper_column_transformer = DataMapper(
         column_transformer)
コード例 #9
0
 def test_large_sparse_transformation(self):
     x = np.linspace((1, ) * 8000, (10, ) * 8000, 10)
     encoder = OneHotEncoder()
     encoder.fit(x)
     data_mapper = DataMapper([(np.arange(8000), encoder)])
     result = data_mapper.transform(x)
     assert result.shape == (10, 80000)
     for i in range(10):
         for j in range(i, 80000, 10):
             assert result[i, j] == 1
             result[i, j] = 0
     result.eliminate_zeros()
     assert result.count_nonzero() == 0
コード例 #10
0
 def test_many_to_many_exception_column_transformer(self):
     # A transformer that takes input many columns. Since we do not recognize this transformer and it uses
     # many input columns - it is treated as many to many/one map.
     with pytest.raises(ValueError):
         column_transformer = ColumnTransformer([
             ("column_0_1", IdentityTransformer(), [0, 1])
         ])
         x = np.ones((2, 2))
         column_transformer.fit(x)
         DataMapper(column_transformer)
コード例 #11
0
 def test_column_with_none_transformer(self):
     x = np.ones((2, 3))
     data_mapper = DataMapper([(0, None)])
     result = data_mapper.transform(x)
     assert np.all(result == np.array([[1, 1]]))
コード例 #12
0
 def test_mixed_dtypes(self):
     x = np.ones((10, 2))
     data_mapper = DataMapper([([0], IdentityTransformer()),
                               ([1], SparseTransformer())])
     result = data_mapper.transform(x)
     assert issparse(result)
コード例 #13
0
class TestDataMapper:
    def setup_class(self):
        self._identity_mapper_list = DataMapper([([0], IdentityTransformer()),
                                                 ([1], IdentityTransformer())])
        column_transformer = ColumnTransformer([
            ("column1", IdentityTransformer(), [0]),
            ("column2", IdentityTransformer(), [1])
        ])
        x = np.ones((10, 5))
        column_transformer.fit(x)
        self._identity_mapper_column_transformer = DataMapper(
            column_transformer)

    def _transform_numpy(self, dmapper):
        x = np.ones((10, 5))
        result = dmapper.transform(x)
        assert result.shape == (10, 2)

    def test_mixed_dtypes(self):
        x = np.ones((10, 2))
        data_mapper = DataMapper([([0], IdentityTransformer()),
                                  ([1], SparseTransformer())])
        result = data_mapper.transform(x)
        assert issparse(result)

    def test_transform_sparse(self):
        x = csr_matrix(np.zeros((10, 2)))
        result = self._identity_mapper_list.transform(x)
        assert result.shape == x.shape
        assert issparse(result)

    def test_column_with_brackets(self):
        x = np.ones((2, 3))
        x[0, 0] = 0
        encoder = OneHotEncoder()
        encoder.fit(x[0].reshape(-1, 1))
        data_mapper = DataMapper([([0], encoder)])
        result = data_mapper.transform(x)
        assert result.shape == (2, 2)

    def test_large_sparse_transformation(self):
        x = np.linspace((1, ) * 8000, (10, ) * 8000, 10)
        encoder = OneHotEncoder()
        encoder.fit(x)
        data_mapper = DataMapper([(np.arange(8000), encoder)])
        result = data_mapper.transform(x)
        assert result.shape == (10, 80000)
        for i in range(10):
            for j in range(i, 80000, 10):
                assert result[i, j] == 1
                result[i, j] = 0
        result.eliminate_zeros()
        assert result.count_nonzero() == 0

    def test_transform_numpy_list(self):
        self._transform_numpy(self._identity_mapper_list)

    def test_transform_numpy_column_transformer(self):
        self._transform_numpy(self._identity_mapper_column_transformer)

    def test_column_without_brackets(self):
        data_mapper = DataMapper([
            (0, FuncTransformer(lambda x: x.reshape(-1, 1)))
        ])
        result = data_mapper.transform(np.ones((2, 3)))

        assert np.all(result == [1, 1])

    def test_column_with_none_transformer(self):
        x = np.ones((2, 3))
        data_mapper = DataMapper([(0, None)])
        result = data_mapper.transform(x)
        assert np.all(result == np.array([[1, 1]]))

    def test_column_passthrough_column_transformer(self):
        x = np.ones((2, 3))
        column_transformer = ColumnTransformer([("column0", "passthrough", [0])
                                                ])
        column_transformer.fit(x)
        data_mapper = DataMapper(column_transformer)
        result = data_mapper.transform(x)
        assert np.all(result == np.array([[1, 1]]))

    def test_column_exception_without_brackets(self):
        with pytest.raises(ValueError):
            x = np.ones((2, 3))
            x[0, 0] = 0
            encoder = OneHotEncoder()
            encoder.fit(x[0])
            data_mapper = DataMapper([[0], encoder])
            data_mapper.transform(x)

    def test_pipeline_transform_list(self):
        pipeline = Pipeline([("imputer", SimpleImputer()),
                             ("onehotencoder", OneHotEncoder())])
        x = np.ones((3, 2))
        pipeline.fit(x)
        data_mapper = DataMapper([([0, 1], pipeline)])
        result = data_mapper.transform(x)
        assert result.shape == (3, 2)

    def test_pipeline_transform_column_transformer(self):
        pipeline = Pipeline([("imputer", SimpleImputer()),
                             ("onehotencoder", OneHotEncoder())])
        x = np.ones((3, 2))
        column_transformer = ColumnTransformer([("column", pipeline, [0, 1])])
        column_transformer.fit(x)
        data_mapper = DataMapper(column_transformer)
        result = data_mapper.transform(x)
        assert result.shape == (3, 2)

    def test_many_to_many_exception_list(self):
        # A transformer that takes input many columns. Since we do not recognize this transformer and it uses
        # many input columns - it is treated as many to many/one map.
        with pytest.raises(ValueError):
            DataMapper([([0, 1], IdentityTransformer())])

    def test_many_to_many_exception_column_transformer(self):
        # A transformer that takes input many columns. Since we do not recognize this transformer and it uses
        # many input columns - it is treated as many to many/one map.
        with pytest.raises(ValueError):
            column_transformer = ColumnTransformer([
                ("column_0_1", IdentityTransformer(), [0, 1])
            ])
            x = np.ones((2, 2))
            column_transformer.fit(x)
            DataMapper(column_transformer)

    def test_many_to_many_support_transformations(self):
        # Instantiate data mapper with many to many transformer support and test whether the feature map is generated
        column_transformer = ColumnTransformer([
            ("column_0_1_2_3", IdentityTransformer(), [0, 1, 2, 3]),
            ("column_4_5", OneHotEncoder(), [4, 5])
        ])
        x = np.ones((10, 6))
        # so that one hot encoder doesn't complain of only one category
        x[0, 4] = 0
        x[0, 5] = 0
        column_transformer.fit(x)
        data_mapper = DataMapper(column_transformer,
                                 allow_all_transformations=True)
        data_mapper.transform(x)
        # check feature mapper contents
        feature_map_indices = [[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3],
                               [0, 1, 2, 3], [4, 5], [6, 7]]
        x_out = column_transformer.transform(x)
        feature_map = np.zeros((x.shape[1], x_out.shape[1]))
        num_rows = 0
        for i, row in enumerate(feature_map_indices[:4]):
            feature_map[i, row] = 0.25
            num_rows += 1
        for i, row in enumerate(feature_map_indices[4:], start=num_rows):
            feature_map[i, row] = 1.0
        assert (data_mapper.feature_map == feature_map).all()
コード例 #14
0
 def test_many_to_many_exception_list(self):
     # A transformer that takes input many columns. Since we do not recognize this transformer and it uses
     # many input columns - it is treated as many to many/one map.
     with pytest.raises(ValueError):
         DataMapper([([0, 1], IdentityTransformer())])