Beispiel #1
0
    def test_prepare_encoders(self):
        df, config = self.df, self.config
        ds = DataSource(df, config)
        assert ds.enable_cache

        encoders = ds.encoders

        for col in ['x1', 'x2']:
            assert isinstance(encoders[col], NumericEncoder)
            assert encoders[col]._prepared is True
            assert encoders[col].is_target is False
            assert encoders[col]._type == 'int'

        assert isinstance(encoders['y'], CategoricalAutoEncoder)
        assert encoders['y']._prepared is True
        assert encoders['y'].is_target is True
        assert encoders['y'].onehot_encoder._prepared is True
        assert encoders['y'].onehot_encoder.is_target is True
        assert encoders['y'].use_autoencoder is False

        encoded_column_x1 = ds.get_encoded_column_data('x1')
        assert isinstance(encoded_column_x1, Tensor)
        assert encoded_column_x1.shape[0] == len(df)
        encoded_column_x2 = ds.get_encoded_column_data('x2')
        assert isinstance(encoded_column_x2, Tensor)
        assert encoded_column_x2.shape[0] == len(df)
        encoded_column_y = ds.get_encoded_column_data('y')
        assert isinstance(encoded_column_y, Tensor)
        assert encoded_column_y.shape[0] == len(df)
Beispiel #2
0
    def test_encoded_cache(self):
        df, config = self.df, self.config

        ds = DataSource(df, config)
        assert ds.enable_cache

        for column in ['x1', 'x2', 'y']:
            assert not column in ds.encoded_cache
            encoded_column = ds.get_encoded_column_data(column)
            assert (ds.encoded_cache[column] == encoded_column).all()