class SimpleImputer(Imputer): r"""Implementation of simple imputer. Date: 2020 Author: Luka Pečnik License: MIT Documentation: https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html See Also: * :class:`niaaml.preprocessing.imputation.Imputer` """ Name = 'Simple Imputer' def __init__(self, **kwargs): r"""Initialize imputer. """ self.__simple_imputer = SI(missing_values=np.nan) def fit(self, feature): r"""Fit imputer. Arguments: feature (pandas.core.frame.DataFrame): A column from DataFrame of features. """ if feature.dtypes.iloc[0] != np.dtype( 'int64') or feature.dtypes.iloc[0] != np.dtype('float64'): replacement_val = feature.mode().iloc[0, 0] self.__simple_imputer.set_params(**{ 'fill_value': replacement_val, 'strategy': 'constant' }) self.__simple_imputer.fit(feature) else: self.__simple_imputer.fit(feature) def transform(self, feature): r"""Transform feature's values. Arguments: feature (pandas.core.frame.DataFrame): A column from DataFrame of features. Returns: pandas.core.frame.DataFrame: A transformed column. """ return self.__simple_imputer.transform(feature) def to_string(self): r"""User friendly representation of the object. Returns: str: User friendly representation of the object. """ return Imputer.to_string(self).format(name=self.Name)
def predictor_from_file(config_path, model_path): with open(config_path, "rb") as f: config_dict = json.load(f) if config_dict["model_type"] == "neural_bandit": # initialize the pytorch model and put it in `eval` mode model = embed_dnn.EmbedDnn(**config_dict["model_spec"]) model.load_state_dict(torch.load(model_path)) model.eval() else: with open(model_path, "rb") as f: model = pickle.load(f) # initialize transforms transforms = {} for feature_name, transform_spec in config_dict["transforms"].items(): if transform_spec is None: # id lists don't have transforms transform = None elif transform_spec["name"] == "StandardScaler": transform = preprocessing.StandardScaler() transform.mean_ = np.array(transform_spec["mean"]) transform.scale_ = np.array(transform_spec["scale"]) transform.var_ = np.array(transform_spec["var"]) elif transform_spec["name"] == "OneHotEncoder": transform = preprocessing.OneHotEncoder() transform.sparse = transform_spec["sparse"] transform.categories_ = np.array(transform_spec["categories"]) else: raise Exception( f"Don't know how to load transform_spec of type {transform_spec['name']}" ) transforms[feature_name] = transform # initialize imputers imputers = {} for feature_name, imputer_spec in config_dict["imputers"].items(): if imputer_spec is None: # categoricals & id lists don't have imputers imputer = None else: imputer = SimpleImputer() imputer.set_params(**imputer_spec["parameters"]) imputer.statistics_ = np.array(imputer_spec["statistics"]) imputers[feature_name] = imputer return BanditPredictor( feature_config=config_dict["feature_config"], float_feature_order=config_dict["float_feature_order"], id_feature_order=config_dict["id_feature_order"], id_feature_str_to_int_map=config_dict["id_feature_str_to_int_map"], transforms=transforms, imputers=imputers, model=model, model_type=config_dict["model_type"], reward_type=config_dict["reward_type"], model_spec=config_dict["model_spec"], dense_features_to_use=config_dict["dense_features_to_use"], )
def get_imputer(self): imputer = self.kwargs.get("imputer", self.IMPUTER) if imputer == "SimpleImputer": imputer_use = SimpleImputer() if imputer == "KNNImputer": imputer_use = KNNImputer() imputer_params = self.kwargs.get("imputer_params", {}) self.mlflow_log_param("imputer", imputer) imputer_use.set_params(**imputer_params) print(colored(imputer_use.__class__.__name__, "blue")) return imputer_use
def predictor_from_file(config_path, net_path): with open(config_path, "rb") as f: config_dict = json.load(f) # initialize the pytorch model and put it in `eval` mode net = embed_dnn.EmbedDnn(**config_dict["net_spec"]) net.load_state_dict(torch.load(net_path)) net.eval() # initialize transforms transforms = {} for feature_name, transform_spec in config_dict["transforms"].items(): if transform_spec is None: # id lists don't have transforms transform = None elif transform_spec["name"] == "StandardScaler": transform = preprocessing.StandardScaler() transform.mean_ = np.array(transform_spec["mean"]) transform.scale_ = np.array(transform_spec["scale"]) transform.var_ = np.array(transform_spec["var"]) elif transform_spec["name"] == "OneHotEncoder": transform = preprocessing.OneHotEncoder() transform.sparse = transform_spec["sparse"] transform.categories_ = np.array(transform_spec["categories"]) else: raise Exception( f"Don't know how to load transform_spec of type {transform_spec['name']}" ) transforms[feature_name] = transform # initialize imputers imputers = {} for feature_name, imputer_spec in config_dict["imputers"].items(): if imputer_spec is None: # id lists don't have imputers imputer = None else: imputer = SimpleImputer() imputer.set_params(**imputer_spec["parameters"]) imputer.statistics_ = np.array(imputer_spec["statistics"]) imputers[feature_name] = imputer return BanditPredictor( experiment_specific_params=config_dict["experiment_specific_params"], float_feature_order=config_dict["float_feature_order"], id_feature_order=config_dict["id_feature_order"], transforms=transforms, imputers=imputers, net=net, net_spec=config_dict["net_spec"], )
class PandasSimpleImputer(TransformerMixin): ''' Класс, представляющий имплементацию sklearn.impute.SimpleImputer, которая возвращает pandas.DataFrame ''' def __init__(self, *args, **kwargs): self.imputer = SimpleImputer(*args, **kwargs) def fit(self, X, y=None): self.imputer.fit(X) return self def transform(self, X, y=None): X[:] = self.imputer.transform(X) return X def fit_transform(self, X, y=None): return self.fit(X).transform(X) def set_params(self, **params): self.imputer.set_params(**params) return self