def test_keepothers(self): sel = notation((Select('ID'))) r = sel.fit_transform(self.df) self.assertEqual(len(r.columns.tolist()), 1) sel = notation((Select('ID'), KeepOthers())) r = sel.fit_transform(self.df) all_columns = self.df.columns.tolist() self.assertSetEqual(set(all_columns), set(r.columns.tolist()))
def test_notation(self): pl1 = notation([Impute(0)]) self.assertTrue(isinstance(pl1, Pipeline)) pl2 = notation([[[Impute(0)]]]) self.assertTrue(isinstance(pl2, Pipeline)) self.assertTrue(isinstance(pl2[0], Pipeline)) self.assertTrue(isinstance(pl2[0][0], Pipeline)) pl3 = notation(([Impute(0)], Select('age'))) self.assertTrue(isinstance(pl3, PipelineUnion)) self.assertTrue(isinstance(pl3[0], Pipeline))
def test_drop_columns(self): drop_cols = ['sex', 'ID'] drop_cols = notation(([Select(drop_cols), Drop()], KeepOthers())) r = drop_cols.fit_transform(self.df) r_cols = r.columns.tolist() for col in drop_cols: self.assertTrue(col not in r_cols)
def test_impute_zero(self): series = self.df['age'] msk_na = pd.isnull(series) impute = notation([Select('age'), Impute(0)]) r = impute.fit_transform(self.df) n_notzero = (r.loc[msk_na, 'age'] != 0).sum() self.assertTrue(n_notzero == 0)
def test_Scale(self): col = 'height(cm)' min_ = 0 max_ = 1 self.assertGreater(self.df[col].max(), max_) ppl = notation([Select(col), Scale(min_, max_)]) r = ppl.fit_transform(self.df) self.assertEqual(r[col].max(), max_) self.assertEqual(r[col].min(), min_)