Exemple #1
0
def test_output_file_exists(test_data, results_path):
    data = test_data(SHAPE)
    labels = np.array([1, 0, 1, 1, 0])
    imputed_mode = []
    imputed_mode.append(["mode", (impy.mode(np.copy(data)), labels)])
    imputed_mode.append(["mean", (impy.mean(np.copy(data)), labels)])

    impy.util.compare(imputed_mode, log_path=results_path)
    with open(results_path, 'r') as fin:
        expected = {'mode': [('SVC', 0.0)], 'mean': [('SVC', 0.0)]}
        assert ast.literal_eval(next(fin)) == expected
Exemple #2
0
 def setUp(self):
     """
     self.data_c: Complete dataset/No missing values
     self.data_m: Incommplete dataset/Has missing values
     """
     mask = np.zeros((5, 5), dtype=bool)
     mask[0][0] = True
     data_m = impy.dataset.test_data(mask=mask)
     labels = np.array([1, 0, 1, 1, 0])
     self.imputed_mode = []
     self.imputed_mode.append(
         ["mode", (impy.mode(np.copy(data_m)), labels)])
     self.imputed_mode.append(
         ["mean", (impy.mean(np.copy(data_m)), labels)])
def test_mode_impute_missing_values():
    """ After imputation, no NaN's should exist"""
    imputed = impy.mode(data_m)
    assert not np.isnan(imputed).any()
def test_median_return_type():
    """ Check return type, should return an np.ndarray"""
    imputed = impy.mode(data_m)
    assert isinstance(imputed, np.ndarray)
Exemple #5
0
 def test_mode_impute_missing_values(self):
     """ After imputation, no NaN's should exist"""
     imputed = impy.mode(self.data_m)
     self.assertFalse(np.isnan(imputed).any())
Exemple #6
0
 def test_median_return_type(self):
     """ Check return type, should return an np.ndarray"""
     imputed = impy.mode(self.data_m)
     self.assertTrue(isinstance(imputed, np.ndarray))
Exemple #7
0
"""test_compare.py"""
import numpy as np
import impyute as impy

mask = np.zeros((5, 5), dtype=bool)
mask[0][0] = True
data_m = impy.dataset.test_data(mask=mask)
labels = np.array([1, 0, 1, 1, 0])
imputed_mode = []
imputed_mode.append(["mode", (impy.mode(np.copy(data_m)), labels)])
imputed_mode.append(["mean", (impy.mean(np.copy(data_m)), labels)])

def test_output_file_exists():
    """ Small test to just check that it runs without fialing"""
    path = "./results.txt"
    impy.util.compare(imputed_mode, log_path=path)
Exemple #8
0
 def __init__(self, T, mask, algo, miss_info, kf, notobj, obj, target):
     try:
         self.miss_info = miss_info
         self.columns = notobj
         self.ord_num_col = self.miss_info["ord_col"] + self.miss_info[
             "num_col"]
         metric = {"rmse": {}, "nrmse": {}}
         self.rawT = T
         self.target = target
         if target is not None: self.target_y = T[target]
         else: self.target_y = None
         self.cv = {}
         self.cv.update(deepcopy(metric))
         self.kf = kf
         self.MSE = {}
         self.MSE.update(deepcopy(metric))
         self.result = {}
         self.time_ck = {}
         X = deepcopy(T)
         mask = pd.DataFrame(mask, columns=T.columns.tolist())
         self.rawmask = mask
         X[(mask == 1).values] = np.nan
         if obj in [None, []]: obj = None
         else: pass
         ##########################################
         self.X = X[notobj]
         self.T = T[notobj]
         self.mask = mask[notobj]
         self.notobj = notobj
         ##########################################
         if obj is not None:
             ############ Numeric + Category  #################
             cat_impute = SimpleImputer(strategy="most_frequent")
             X[obj] = cat_impute.fit_transform(X[obj])
             self.true_obj = T[obj]
             self.pd_obj = X[obj]
             ###################################################
             TT = deepcopy(T)
             cat_encoder = miss_info["ce_encoder"]
             for k in cat_encoder.category_mapping:
                 col, map_ = k["col"], k["mapping"]
                 TT[col] = TT[col].replace(
                     dict(zip(k["mapping"].index, k["mapping"].values)))
             self.full_miss_data = TT
             self.full_miss_data[(mask == 1).values] = np.nan
             mice_data = deepcopy(T)
             for obj_col in obj:
                 mice_data[obj_col] = "Cols_" + mice_data[obj_col]
             self.full_mice_data = mice_data
             self.full_mice_data[(mask == 1).values] = np.nan
         else:
             ########## Numeric  ###############################
             num_data = deepcopy(self.X)
             num_data[(self.mask == 1).values] = np.nan
             self.full_miss_data = deepcopy(num_data)
             self.full_mice_data = deepcopy(num_data)
             ###################################################
         self.algo = algo
         self.method = {
             "MissForest" : lambda x : MissForest(verbose = 0, n_jobs  = -1 ).fit(x) ,
             "mean" : lambda x : impy.mean(x) ,
             "median" : lambda x : impy.median(x) ,
             "mode" : lambda x : impy.mode(x) ,
             "knn" : lambda x : impy.fast_knn(x) ,
             "MICE" : lambda x : impy.mice(x) ,
             "EM" : lambda x : impy.em(x),
             "MultipleImputer" : lambda x : MultipleImputer(n=1, return_list = True).\
             fit_transform(pd.DataFrame(x)).values,
         }
     except Exception as e:
         print(e)
         pass
Exemple #9
0
def test_mode(test_data):
    data = test_data(SHAPE)
    imputed = impy.mode(data)
    return_na_check(imputed)