def test_prepare_encoders(self): df, config = self.df, self.config ds = DataSource(df, config) assert ds.enable_cache encoders = ds.encoders for col in ['x1', 'x2']: assert isinstance(encoders[col], NumericEncoder) assert encoders[col]._prepared is True assert encoders[col].is_target is False assert encoders[col]._type == 'int' assert isinstance(encoders['y'], CategoricalAutoEncoder) assert encoders['y']._prepared is True assert encoders['y'].is_target is True assert encoders['y'].onehot_encoder._prepared is True assert encoders['y'].onehot_encoder.is_target is True assert encoders['y'].use_autoencoder is False encoded_column_x1 = ds.get_encoded_column_data('x1') assert isinstance(encoded_column_x1, Tensor) assert encoded_column_x1.shape[0] == len(df) encoded_column_x2 = ds.get_encoded_column_data('x2') assert isinstance(encoded_column_x2, Tensor) assert encoded_column_x2.shape[0] == len(df) encoded_column_y = ds.get_encoded_column_data('y') assert isinstance(encoded_column_y, Tensor) assert encoded_column_y.shape[0] == len(df)
def test_encoded_cache(self): df, config = self.df, self.config ds = DataSource(df, config) assert ds.enable_cache for column in ['x1', 'x2', 'y']: assert not column in ds.encoded_cache encoded_column = ds.get_encoded_column_data(column) assert (ds.encoded_cache[column] == encoded_column).all()