Example #1
0
    def test_keepothers(self):
        sel = notation((Select('ID')))
        r = sel.fit_transform(self.df)
        self.assertEqual(len(r.columns.tolist()), 1)

        sel = notation((Select('ID'), KeepOthers()))
        r = sel.fit_transform(self.df)
        all_columns = self.df.columns.tolist()
        self.assertSetEqual(set(all_columns), set(r.columns.tolist()))
Example #2
0
    def test_notation(self):
        pl1 = notation([Impute(0)])
        self.assertTrue(isinstance(pl1, Pipeline))

        pl2 = notation([[[Impute(0)]]])
        self.assertTrue(isinstance(pl2, Pipeline))
        self.assertTrue(isinstance(pl2[0], Pipeline))
        self.assertTrue(isinstance(pl2[0][0], Pipeline))

        pl3 = notation(([Impute(0)], Select('age')))
        self.assertTrue(isinstance(pl3, PipelineUnion))
        self.assertTrue(isinstance(pl3[0], Pipeline))
Example #3
0
 def test_drop_columns(self):
     drop_cols = ['sex', 'ID']
     drop_cols = notation(([Select(drop_cols), Drop()], KeepOthers()))
     r = drop_cols.fit_transform(self.df)
     r_cols = r.columns.tolist()
     for col in drop_cols:
         self.assertTrue(col not in r_cols)
Example #4
0
    def test_impute_zero(self):
        series = self.df['age']
        msk_na = pd.isnull(series)
        impute = notation([Select('age'), Impute(0)])
        r = impute.fit_transform(self.df)

        n_notzero = (r.loc[msk_na, 'age'] != 0).sum()
        self.assertTrue(n_notzero == 0)
Example #5
0
 def test_Scale(self):
     col = 'height(cm)'
     min_ = 0
     max_ = 1
     self.assertGreater(self.df[col].max(), max_)
     ppl = notation([Select(col), Scale(min_, max_)])
     r = ppl.fit_transform(self.df)
     self.assertEqual(r[col].max(), max_)
     self.assertEqual(r[col].min(), min_)