예제 #1
0
 def test_wrong_format(self):
     lb2 = LabelBinarizer().fit(self.df['var2'])
     # this must throw ValueError - leafs of a structure must be tuples of
     # format ('column name', transformer_instance)
     bs = BatchShaper(x_structure=('var1', self.lb), y_structure=('label', self.le, 1))
     # this must throw ValueError - leafs of a structure must be tuples of
     # format ('column name', transformer_instance)
     bs = BatchShaper(x_structure=('var1', self.lb), y_structure=('label', 1))
     with pytest.raises(ValueError):
         batch = bs.transform(self.df)
     # this must also throw ValueError - structure must be a tuple (X, y) to conform Keras requirements
     bs = BatchShaper(x_structure=[('var1', self.lb)], y_structure=('label', self.le, 1))
     with pytest.raises(ValueError):
         batch = bs.transform(self.df)
예제 #2
0
 def test_none_transformer(self):
     bs = BatchShaper(x_structure=[('var1', self.lb), ('var2', None)], y_structure=('label', self.le))
     batch = bs.transform(self.df)
     assert type(batch) == tuple
     assert len(batch) == 2
     assert type(batch[0]) == list
     assert len(batch[0]) == 2
     assert np.array_equal(batch[0][1], self.df['var2'].values)
예제 #3
0
 def test_basic(self):
     bs = BatchShaper(x_structure=('var1', self.lb), y_structure=('label', self.le))
     batch = bs.transform(self.df)
     assert type(batch) == tuple
     assert len(batch) == 2
     assert type(batch[0]) == np.ndarray
     assert type(batch[1]) == np.ndarray
     assert batch[0].shape == (4, 3)
     assert batch[1].shape == (4,)
예제 #4
0
 def test_const_component_str(self):
     bs = BatchShaper(x_structure=[('var1', self.lb), (None, u'a')], y_structure=('label', self.le))
     batch = bs.transform(self.df)
     assert type(batch) == tuple
     assert len(batch) == 2
     assert type(batch[0]) == list
     assert len(batch[0]) == 2
     assert np.all(batch[0][1] == 'a')
     assert batch[0][1].dtype == '<U1' # single unicode character
예제 #5
0
 def test_const_component_float(self):
     bs = BatchShaper(x_structure=[('var1', self.lb), (None, 0.)], y_structure=('label', self.le))
     batch = bs.transform(self.df)
     assert type(batch) == tuple
     assert len(batch) == 2
     assert type(batch[0]) == list
     assert len(batch[0]) == 2
     assert np.all(batch[0][1] == 0)
     assert batch[0][1].dtype == float
예제 #6
0
 def test_2d_transformer(self):
     """
     this test checks if a BatchShaper will throw a ValueError exception when a 2D transformer is used,
     e.g. OneHotEncoder. It requires 2D input, while BatchShaper only works on per-column basis, i.e.
     provides only 1D data.
     :return:
     """
     bs = BatchShaper(x_structure=('var1', self.oh), y_structure=('label', self.le))
     with pytest.raises(ValueError):
         batch = bs.transform(self.df)
예제 #7
0
 def test_many_y(self):
     lb2 = LabelBinarizer().fit(self.df['var2'])
     bs = BatchShaper(x_structure=('var1', self.lb), y_structure=[('label', self.le), ('var2', lb2)])
     batch = bs.transform(self.df)
     assert type(batch) == tuple
     assert len(batch) == 2
     assert type(batch[0]) == np.ndarray
     assert type(batch[1]) == list
     assert len(batch[1]) == 2
     assert type(batch[1][0]) == np.ndarray
     assert type(batch[1][1]) == np.ndarray
     assert batch[1][0].shape == (4,)
     assert batch[1][1].shape == (4, 4)
     assert batch[0].shape == (4, 3)
예제 #8
0
 def test_no_return_y(self):
     bs = BatchShaper(x_structure=('var1', self.lb), y_structure=('label', self.le))
     kwargs = {'return_y': False}
     batch = bs.transform(self.df, **kwargs)
     assert type(batch) == np.ndarray
     assert batch.shape == (4, 3)
예제 #9
0
 def test_missing_field(self):
     bs = BatchShaper(x_structure=('missing_name', self.lb), y_structure=('label', self.le, 1))
     with pytest.raises(KeyError):
         batch = bs.transform(self.df)