def test_wrong_format(self): lb2 = LabelBinarizer().fit(self.df['var2']) # this must throw ValueError - leafs of a structure must be tuples of # format ('column name', transformer_instance) bs = BatchShaper(x_structure=('var1', self.lb), y_structure=('label', self.le, 1)) # this must throw ValueError - leafs of a structure must be tuples of # format ('column name', transformer_instance) bs = BatchShaper(x_structure=('var1', self.lb), y_structure=('label', 1)) with pytest.raises(ValueError): batch = bs.transform(self.df) # this must also throw ValueError - structure must be a tuple (X, y) to conform Keras requirements bs = BatchShaper(x_structure=[('var1', self.lb)], y_structure=('label', self.le, 1)) with pytest.raises(ValueError): batch = bs.transform(self.df)
def test_none_transformer(self): bs = BatchShaper(x_structure=[('var1', self.lb), ('var2', None)], y_structure=('label', self.le)) batch = bs.transform(self.df) assert type(batch) == tuple assert len(batch) == 2 assert type(batch[0]) == list assert len(batch[0]) == 2 assert np.array_equal(batch[0][1], self.df['var2'].values)
def test_basic(self): bs = BatchShaper(x_structure=('var1', self.lb), y_structure=('label', self.le)) batch = bs.transform(self.df) assert type(batch) == tuple assert len(batch) == 2 assert type(batch[0]) == np.ndarray assert type(batch[1]) == np.ndarray assert batch[0].shape == (4, 3) assert batch[1].shape == (4,)
def test_const_component_str(self): bs = BatchShaper(x_structure=[('var1', self.lb), (None, u'a')], y_structure=('label', self.le)) batch = bs.transform(self.df) assert type(batch) == tuple assert len(batch) == 2 assert type(batch[0]) == list assert len(batch[0]) == 2 assert np.all(batch[0][1] == 'a') assert batch[0][1].dtype == '<U1' # single unicode character
def test_const_component_float(self): bs = BatchShaper(x_structure=[('var1', self.lb), (None, 0.)], y_structure=('label', self.le)) batch = bs.transform(self.df) assert type(batch) == tuple assert len(batch) == 2 assert type(batch[0]) == list assert len(batch[0]) == 2 assert np.all(batch[0][1] == 0) assert batch[0][1].dtype == float
def test_2d_transformer(self): """ this test checks if a BatchShaper will throw a ValueError exception when a 2D transformer is used, e.g. OneHotEncoder. It requires 2D input, while BatchShaper only works on per-column basis, i.e. provides only 1D data. :return: """ bs = BatchShaper(x_structure=('var1', self.oh), y_structure=('label', self.le)) with pytest.raises(ValueError): batch = bs.transform(self.df)
def test_many_y(self): lb2 = LabelBinarizer().fit(self.df['var2']) bs = BatchShaper(x_structure=('var1', self.lb), y_structure=[('label', self.le), ('var2', lb2)]) batch = bs.transform(self.df) assert type(batch) == tuple assert len(batch) == 2 assert type(batch[0]) == np.ndarray assert type(batch[1]) == list assert len(batch[1]) == 2 assert type(batch[1][0]) == np.ndarray assert type(batch[1][1]) == np.ndarray assert batch[1][0].shape == (4,) assert batch[1][1].shape == (4, 4) assert batch[0].shape == (4, 3)
def test_no_return_y(self): bs = BatchShaper(x_structure=('var1', self.lb), y_structure=('label', self.le)) kwargs = {'return_y': False} batch = bs.transform(self.df, **kwargs) assert type(batch) == np.ndarray assert batch.shape == (4, 3)
def test_missing_field(self): bs = BatchShaper(x_structure=('missing_name', self.lb), y_structure=('label', self.le, 1)) with pytest.raises(KeyError): batch = bs.transform(self.df)