import pytest import miceforest as mf from miceforest.ImputationSchema import _ImputationSchema from sklearn.datasets import load_boston import pandas as pd import numpy as np # Set random state and load data from sklearn random_state = np.random.RandomState(1991) boston = pd.DataFrame(load_boston(return_X_y=True)[0]) boston[3] = boston[3].astype("category") boston[8] = boston[8].astype("category") boston.columns = [str(i) for i in boston.columns] # Several types of datasets are tested: boston_amp = mf.ampute_data(boston, perc=0.25, random_state=random_state) # Ampute only some variables somevars = ["1", "2", "5", "10"] boston_amp_somevars = mf.ampute_data(boston, variables=somevars, perc=0.25, random_state=random_state) # Ampute only 1 variable onevar = ["1"] boston_amp_onevar = mf.ampute_data(boston, variables=onevar, perc=0.25, random_state=random_state)
from sklearn.datasets import load_boston import pandas as pd import numpy as np import miceforest as mf # Make random state and load data # Define data random_state = np.random.RandomState(5) boston = pd.DataFrame(load_boston(return_X_y=True)[0]) rows = boston.shape[0] boston.columns = [str(i) for i in boston.columns] boston["3"] = boston["3"].map({0: 'a', 1: 'b'}).astype('category') boston["8"] = boston["8"].astype("category") boston_amp = mf.ampute_data(boston, perc=0.25, random_state=random_state) random_seed_array = np.random.choice(range(1000), size=rows, replace=False).astype("int32") def test_pandas_reproducibility(): datasets = 2 kernel = mf.ImputationKernel(data=boston_amp, datasets=datasets, initialization="random", save_models=2, random_state=2) kernel2 = mf.ImputationKernel(data=boston_amp, datasets=datasets, initialization="random", save_models=2,