def test_metadata(self):
     bs = BatchShaper(x_structure=[('var1', self.lb), (None, 0.)], y_structure=('label', self.le))
     md = bs.get_metadata(self.df)
     assert type(md) is tuple
     assert len(md) == 2
     assert type(md[0]) is list
     assert len(md[0]) == 2
     assert type(md[0][0]) == dict
     assert type(md[0][1]) == dict
     fields_in_meta = ['name', 'encoder', 'shape', 'dtype']
     assert all([all([f in m for f in fields_in_meta]) for m in md[0]])
     assert md[0][0]['name'] == 'var1'
     assert md[0][0]['encoder'] == self.lb
     assert md[0][0]['shape'] == (None, 3)
     assert md[0][0]['dtype'] == np.int64
     assert md[0][1]['name'] == 'dummy_constant_0'
     assert md[0][1]['encoder'] is None
     assert md[0][1]['shape'] == (None, 1)
     assert md[0][1]['dtype'] == float
     assert type(md[1]) == dict
     assert all([f in md[1] for f in fields_in_meta])
     assert md[1]['name'] == 'label'
     assert md[1]['encoder'] == self.le
     assert md[1]['shape'] == (None, 1)
     assert md[1]['dtype'] == np.int64
 def test_none_transformer(self):
     bs = BatchShaper(x_structure=[('var1', self.lb), ('var2', None)], y_structure=('label', self.le))
     batch = bs.transform(self.df)
     assert type(batch) == tuple
     assert len(batch) == 2
     assert type(batch[0]) == list
     assert len(batch[0]) == 2
     assert np.array_equal(batch[0][1], self.df['var2'].values)
 def test_basic(self):
     bs = BatchShaper(x_structure=('var1', self.lb), y_structure=('label', self.le))
     batch = bs.transform(self.df)
     assert type(batch) == tuple
     assert len(batch) == 2
     assert type(batch[0]) == np.ndarray
     assert type(batch[1]) == np.ndarray
     assert batch[0].shape == (4, 3)
     assert batch[1].shape == (4,)
 def test_const_component_str(self):
     bs = BatchShaper(x_structure=[('var1', self.lb), (None, u'a')], y_structure=('label', self.le))
     batch = bs.transform(self.df)
     assert type(batch) == tuple
     assert len(batch) == 2
     assert type(batch[0]) == list
     assert len(batch[0]) == 2
     assert np.all(batch[0][1] == 'a')
     assert batch[0][1].dtype == '<U1' # single unicode character
 def test_const_component_float(self):
     bs = BatchShaper(x_structure=[('var1', self.lb), (None, 0.)], y_structure=('label', self.le))
     batch = bs.transform(self.df)
     assert type(batch) == tuple
     assert len(batch) == 2
     assert type(batch[0]) == list
     assert len(batch[0]) == 2
     assert np.all(batch[0][1] == 0)
     assert batch[0][1].dtype == float
 def test_2d_transformer(self):
     """
     this test checks if a BatchShaper will throw a ValueError exception when a 2D transformer is used,
     e.g. OneHotEncoder. It requires 2D input, while BatchShaper only works on per-column basis, i.e.
     provides only 1D data.
     :return:
     """
     bs = BatchShaper(x_structure=('var1', self.oh), y_structure=('label', self.le))
     with pytest.raises(ValueError):
         batch = bs.transform(self.df)
 def test_many_y(self):
     lb2 = LabelBinarizer().fit(self.df['var2'])
     bs = BatchShaper(x_structure=('var1', self.lb), y_structure=[('label', self.le), ('var2', lb2)])
     batch = bs.transform(self.df)
     assert type(batch) == tuple
     assert len(batch) == 2
     assert type(batch[0]) == np.ndarray
     assert type(batch[1]) == list
     assert len(batch[1]) == 2
     assert type(batch[1][0]) == np.ndarray
     assert type(batch[1][1]) == np.ndarray
     assert batch[1][0].shape == (4,)
     assert batch[1][1].shape == (4, 4)
     assert batch[0].shape == (4, 3)
 def test_dummy_var_naming(self):
     bs = BatchShaper(x_structure=[('var1', self.lb), (None, 0.), (None, 1.)], y_structure=('label', self.le))
     md = bs.get_metadata(self.df)
     assert type(md) is tuple
     assert len(md) == 2
     assert type(md[0]) is list
     assert len(md[0]) == 3
     assert all([type(m) == dict for m in md[0]])
     assert md[0][1]['name'] == 'dummy_constant_0'
     assert md[0][2]['name'] == 'dummy_constant_1'
     # test the counter resets with new metadata request
     md = bs.get_metadata(self.df)
     assert md[0][1]['name'] == 'dummy_constant_0'
     assert md[0][2]['name'] == 'dummy_constant_1'
 def test_wrong_format(self):
     lb2 = LabelBinarizer().fit(self.df['var2'])
     # this must throw ValueError - leafs of a structure must be tuples of
     # format ('column name', transformer_instance)
     bs = BatchShaper(x_structure=('var1', self.lb), y_structure=('label', self.le, 1))
     # this must throw ValueError - leafs of a structure must be tuples of
     # format ('column name', transformer_instance)
     bs = BatchShaper(x_structure=('var1', self.lb), y_structure=('label', 1))
     with pytest.raises(ValueError):
         batch = bs.transform(self.df)
     # this must also throw ValueError - structure must be a tuple (X, y) to conform Keras requirements
     bs = BatchShaper(x_structure=[('var1', self.lb)], y_structure=('label', self.le, 1))
     with pytest.raises(ValueError):
         batch = bs.transform(self.df)
    def test_n_classes(self):

        class A:
            @property
            def n_classes(self):
                return 13

            def transform(self, data):
                return data.values

        a = A()
        bs = BatchShaper(x_structure=[('var1', self.lb), ('var1', a)], y_structure=('label', self.le))
        n_classes = bs.n_classes
        pass
    def test_shape(self):

        class A:
            @property
            def shape(self):
                return None, 11

            def transform(self, data):
                return data.values

        a = A()
        bs = BatchShaper(x_structure=[('var1', self.lb), ('var1', a)], y_structure=('label', self.le))
        # At this point, shape is not yet measured (fitted) and runtime error is expected
        with pytest.raises(RuntimeError):
            batch = bs.shape
        bs.fit_shapes(self.df)
        shapes = bs.shape
        assert type(shapes) == tuple
        assert type(shapes[0]) == list
        assert len(shapes[0]) == 2
        assert shapes[0][0] == (None, 3)    # measured
        assert shapes[0][1] == (None, 11)   # direct from transformer's shape property
        assert shapes[1] == (None, 1)       # one dimensional output
 def test_no_return_y(self):
     bs = BatchShaper(x_structure=('var1', self.lb), y_structure=('label', self.le))
     kwargs = {'return_y': False}
     batch = bs.transform(self.df, **kwargs)
     assert type(batch) == np.ndarray
     assert batch.shape == (4, 3)
 def test_missing_field(self):
     bs = BatchShaper(x_structure=('missing_name', self.lb), y_structure=('label', self.le, 1))
     with pytest.raises(KeyError):
         batch = bs.transform(self.df)