Example #1
0
 def split_by_missingvalues():
     before_cleaning = Cleaner.count_nan_columns(cleaner.azdias)
     cleaner.to_nan()
     after_cleaning = Cleaner.count_nan_columns(cleaner.azdias)
     l, g, s1, s2 = Cleaner.split_by_treshhold(cleaner.azdias)
     cleaner.azdias = l
     return before_cleaning, after_cleaning, l.shape, g.shape
Example #2
0
def clean_data(df):
    l, g, less_trh, gr_trh = Cleaner.split_by_treshhold(df, treshold=34)
    df = l
    df = Cleaner.to_nan(df, cleaner.summary)
    df = recode(df)
    df = extract_features(df)
    df = drop(df)
    df = dummies(df)
    return df
Example #3
0
 def test_split(self):
     l, g = Cleaner.split_by_treshhold(self.cleaner.azdias)
     self.assertEqual(l.shape, (798293, 85))
     self.assertEqual(g.shape, (92928, 85))
     self.cleaner.azdias = l