def test_freq_nan_by_features(self):
     _n_mis: int = DATA_SET.isnull().astype(int).sum().sum()
     _n_features: int = len(DATA_SET.columns)
     _freq_nan_by_features: dict = MissingDataAnalysis(
         df=DATA_SET, percentages=False).freq_nan_by_features()
     _count_mis: int = 0
     for feature in _freq_nan_by_features.keys():
         _count_mis += _freq_nan_by_features.get(feature)
     self.assertTrue(expr=(_n_mis == _count_mis) and (
         _n_features == len(_freq_nan_by_features)))
 def test_get_nan_idx_by_cases(self):
     _nan_idx_by_cases: dict = MissingDataAnalysis(
         df=DATA_SET).get_nan_idx_by_cases()
     _is_nan: bool = True
     for case in _nan_idx_by_cases.keys():
         for idx in _nan_idx_by_cases.get(case):
             if not pd.isnull(DATA_SET.loc[idx, :].values).all():
                 _is_nan = False
                 break
         if not _is_nan:
             break
 def test_get_nan_idx_by_features(self):
     _nan_idx_by_features: dict = MissingDataAnalysis(
         df=DATA_SET).get_nan_idx_by_features()
     _is_nan: bool = True
     for feature in _nan_idx_by_features.keys():
         for idx in _nan_idx_by_features.get(feature):
             if not pd.isnull(DATA_SET.loc[idx, feature]):
                 _is_nan = False
                 break
         if not _is_nan:
             break
     self.assertTrue(expr=_is_nan)
 def test_mice(self):
     _has_nan_before: bool = MissingDataAnalysis(df=DATA_SET).has_nan()
     _has_nan_after: bool = MissingDataAnalysis(df=MultipleImputation(
         df=DATA_SET).mice()).has_nan()
     self.assertGreater(a=int(_has_nan_before), b=int(_has_nan_after))
 def test_has_nan(self):
     self.assertTrue(expr=MissingDataAnalysis(df=DATA_SET).has_nan())
 def test_clean_nan(self):
     self.assertTrue(expr=len(MissingDataAnalysis(
         df=DATA_SET).clean_nan()) <= DATA_SET.shape[0])