Esempio n. 1
0
import pytest
import miceforest as mf
from miceforest.ImputationSchema import _ImputationSchema
from sklearn.datasets import load_boston
import pandas as pd
import numpy as np

# Set random state and load data from sklearn
random_state = np.random.RandomState(1991)
boston = pd.DataFrame(load_boston(return_X_y=True)[0])
boston[3] = boston[3].astype("category")
boston[8] = boston[8].astype("category")
boston.columns = [str(i) for i in boston.columns]

# Several types of datasets are tested:
boston_amp = mf.ampute_data(boston, perc=0.25, random_state=random_state)

# Ampute only some variables
somevars = ["1", "2", "5", "10"]
boston_amp_somevars = mf.ampute_data(boston,
                                     variables=somevars,
                                     perc=0.25,
                                     random_state=random_state)

# Ampute only 1 variable
onevar = ["1"]
boston_amp_onevar = mf.ampute_data(boston,
                                   variables=onevar,
                                   perc=0.25,
                                   random_state=random_state)
from sklearn.datasets import load_boston
import pandas as pd
import numpy as np
import miceforest as mf

# Make random state and load data
# Define data
random_state = np.random.RandomState(5)
boston = pd.DataFrame(load_boston(return_X_y=True)[0])
rows = boston.shape[0]
boston.columns = [str(i) for i in boston.columns]
boston["3"] = boston["3"].map({0: 'a', 1: 'b'}).astype('category')
boston["8"] = boston["8"].astype("category")
boston_amp = mf.ampute_data(boston, perc=0.25, random_state=random_state)
random_seed_array = np.random.choice(range(1000), size=rows,
                                     replace=False).astype("int32")


def test_pandas_reproducibility():

    datasets = 2
    kernel = mf.ImputationKernel(data=boston_amp,
                                 datasets=datasets,
                                 initialization="random",
                                 save_models=2,
                                 random_state=2)

    kernel2 = mf.ImputationKernel(data=boston_amp,
                                  datasets=datasets,
                                  initialization="random",
                                  save_models=2,