def test_define_variables(): vars_ls = ['var1', 'var2', 'var1'] vars_none = None vars_str = 'var1' assert _define_variables(vars_ls) == vars_ls assert _define_variables(vars_none) == vars_none assert _define_variables(vars_str) == [vars_str]
def __init__( self, tol=0.05, n_categories=10, max_n_categories=None, variables=None, replace_with="Rare", ): if tol < 0 or tol > 1: raise ValueError("tol takes values between 0 and 1") if n_categories < 0 or not isinstance(n_categories, int): raise ValueError( "n_categories takes only positive integer numbers") if max_n_categories is not None: if max_n_categories < 0 or not isinstance(max_n_categories, int): raise ValueError( "max_n_categories takes only positive integer numbers") if not isinstance(replace_with, str): raise ValueError("replace_with takes only strings as values.") self.tol = tol self.n_categories = n_categories self.max_n_categories = max_n_categories self.variables = _define_variables(variables) self.replace_with = replace_with
def __init__(self, missing_only=True, variables=None): if not isinstance(missing_only, bool): raise ValueError("missing_only takes values True or False") self.variables = _define_variables(variables) self.missing_only = missing_only
def __init__(self, base='e', variables=None): if base not in ['e', '10']: raise ValueError("base can take only '10' or 'e' as values") self.variables = _define_variables(variables) self.base = base
def __init__(self, how='missing_only', variables=None): if how not in ['missing_only', 'all']: raise ValueError("how takes only values 'missing_only' or 'all'") self.variables = _define_variables(variables) self.how = how
def __init__( self, cv: int = 3, scoring: str = "neg_mean_squared_error", variables: Optional[List[str]] = None, param_grid: Optional[Dict[str, Union[str, int, float, List[int]]]] = None, regression: bool = True, random_state: Optional[int] = None, ) -> None: if param_grid is None: param_grid = {"max_depth": [1, 2, 3, 4]} if not isinstance(cv, int) or cv < 0: raise ValueError("cv can only take only positive integers") if not isinstance(regression, bool): raise ValueError("regression can only take True or False") self.cv = cv self.scoring = scoring self.regression = regression self.variables = _define_variables(variables) self.param_grid = param_grid self.random_state = random_state
def __init__(self, encoding_method='count', variables=None): if encoding_method not in ['count', 'frequency']: raise ValueError("encoding_method takes only values 'count' and 'frequency'") self.encoding_method = encoding_method self.variables = _define_variables(variables)
def __init__( self, variables: Optional[List[str]] = None, random_state: Optional[Union[int, str, List[str]]] = None, seed: str = "general", seeding_method: str = "add", ) -> None: if seed not in ["general", "observation"]: raise ValueError( "seed takes only values 'general' or 'observation'") if seeding_method not in ["add", "multiply"]: raise ValueError( "seeding_method takes only values 'add' or 'multiply'") if seed == "general" and random_state: if not isinstance(random_state, int): raise ValueError( "if seed == 'general' the random state must take an integer" ) if seed == "observation" and not random_state: raise ValueError( "if seed == 'observation' the random state must take the name of one " "or more variables which will be used to seed the imputer") self.variables = _define_variables(variables) self.random_state = random_state self.seed = seed self.seeding_method = seeding_method
def __init__(self, encoding_method='woe', variables=None): if encoding_method not in ['woe', 'ratio', 'log_ratio']: raise ValueError("encoding_method takes only values 'woe', 'ratio' and 'log_ratio'") self.encoding_method = encoding_method self.variables = _define_variables(variables)
def __init__(self, tol=1, variables=None): if tol < 0 or tol > 1: raise ValueError("tol takes values between 0 and 1") self.tol = tol self.variables = _define_variables(variables)
def __init__(self, imputation_method='median', variables=None): if imputation_method not in ['median', 'mean']: raise ValueError("imputation_method takes only values 'median' or 'mean'") self.imputation_method = imputation_method self.variables = _define_variables(variables)
def __init__(self, exp=0.5, variables=None): if not isinstance(exp, float) and not isinstance(exp, int): raise ValueError('exp must be a float or an int') self.exp = exp self.variables = _define_variables(variables)
def __init__(self, variables=None, random_state=None, seed='general', seeding_method='add'): if seed not in ['general', 'observation']: raise ValueError( "seed takes only values 'general' or 'observation'") if seeding_method not in ['add', 'multiply']: raise ValueError( "seeding_method takes only values 'add' or 'multiply'") if seed == 'general' and random_state: if not isinstance(random_state, int): raise ValueError( "if seed == 'general' the random state must take an integer" ) if seed == 'observation' and not random_state: raise ValueError( "if seed == 'observation' the random state must take the name of one or more variables " "which will be used to seed the imputer") self.variables = _define_variables(variables) self.random_state = random_state self.seed = seed self.seeding_method = seeding_method
def __init__(self, encoding_method='ordered', variables=None): if encoding_method not in ['ordered', 'arbitrary']: raise ValueError("encoding_method takes only values 'ordered' and 'arbitrary'") self.encoding_method = encoding_method self.variables = _define_variables(variables)
def __init__( self, cv=3, scoring="neg_mean_squared_error", variables=None, param_grid=None, regression=True, random_state=None, ): if param_grid is None: param_grid = {"max_depth": [1, 2, 3, 4]} if not isinstance(cv, int) or cv < 0: raise ValueError("cv can only take only positive integers") if not isinstance(regression, bool): raise ValueError("regression can only take True or False") self.cv = cv self.scoring = scoring self.regression = regression self.variables = _define_variables(variables) self.param_grid = param_grid self.random_state = random_state
def __init__( self, capping_method: str = "gaussian", tail: str = "right", fold: Union[int, float] = 3, variables: Optional[List] = None, missing_values: str = "raise", ) -> None: if capping_method not in ["gaussian", "iqr", "quantiles"]: raise ValueError( "capping_method takes only values 'gaussian', 'iqr' or 'quantiles'" ) if tail not in ["right", "left", "both"]: raise ValueError( "tail takes only values 'right', 'left' or 'both'") if fold <= 0: raise ValueError("fold takes only positive numbers") if capping_method == "quantiles" and fold > 0.2: raise ValueError( "with capping_method ='quantiles', fold takes values between 0 and " "0.20 only.") if missing_values not in ["raise", "ignore"]: raise ValueError( "missing_values takes only values 'raise' or 'ignore'") self.capping_method = capping_method self.tail = tail self.fold = fold self.variables = _define_variables(variables) self.missing_values = missing_values
def __init__(self, arbitrary_number=999, variables=None): if isinstance(arbitrary_number, int) or isinstance(arbitrary_number, float): self.arbitrary_number = arbitrary_number else: raise ValueError('arbitrary_number must be numeric of type int or float') self.variables = _define_variables(variables)
def __init__(self, features_to_drop=None): self.features_to_drop = _define_variables(features_to_drop) if len(self.features_to_drop) == 0: raise ValueError( "List of features to drop cannot be empty. Please pass at least 1 " "variable to drop.")
def __init__(self, variables: List[str] = None, transformer=None) -> None: self.variables = _define_variables(variables) self.transformer = transformer if isinstance(self.transformer, OneHotEncoder) and self.transformer.sparse: raise AttributeError( "The SklearnTransformerWrapper can only wrap the OneHotEncoder if you " "set its sparse attribute to False")
def __init__(self, missing_only: bool = True, variables: Optional[List[str]] = None) -> None: if not isinstance(missing_only, bool): raise ValueError("missing_only takes values True or False") self.variables = _define_variables(variables) self.missing_only = missing_only
def __init__( self, exp: Union[float, int] = 0.5, variables: Union[List[str], str] = None ): if not isinstance(exp, (float, int)): raise ValueError("exp must be a float or an int") self.exp = exp self.variables = _define_variables(variables)
def __init__(self, encoding_method: str = "ordered", variables: Optional[List[str]] = None) -> None: if encoding_method not in ["ordered", "arbitrary"]: raise ValueError( "encoding_method takes only values 'ordered' and 'arbitrary'") self.encoding_method = encoding_method self.variables = _define_variables(variables)
def __init__(self, encoding_method: str = "count", variables: Optional[List[str]] = None) -> None: if encoding_method not in ["count", "frequency"]: raise ValueError( "encoding_method takes only values 'count' and 'frequency'") self.encoding_method = encoding_method self.variables = _define_variables(variables)
def __init__(self, imputation_method: str = "median", variables: Optional[List[str]] = None) -> None: if imputation_method not in ["median", "mean"]: raise ValueError( "imputation_method takes only values 'median' or 'mean'") self.imputation_method = imputation_method self.variables = _define_variables(variables)
def __init__(self, encoding_method: str = "ratio", variables: Optional[List[str]] = None) -> None: if encoding_method not in ["ratio", "log_ratio"]: raise ValueError( "encoding_method takes only values 'ratio' and 'log_ratio'") self.encoding_method = encoding_method self.variables = _define_variables(variables)
def __init__(self, encoding_method='arbitrary', cv=3, scoring='neg_mean_squared_error', param_grid={'max_depth': [1, 2, 3, 4]}, regression=True, random_state=None, variables=None): self.encoding_method = encoding_method self.cv = cv self.scoring = scoring self.regression = regression self.param_grid = param_grid self.random_state = random_state self.variables = _define_variables(variables)
def __init__(self, top_categories=None, variables=None, drop_last=False): if top_categories: if not isinstance(top_categories, int): raise ValueError("top_categories takes only integer numbers, 1, 2, 3, etc.") if drop_last not in [True, False]: raise ValueError("drop_last takes only True or False") self.top_categories = top_categories self.drop_last = drop_last self.variables = _define_variables(variables)
def __init__(self, imputation_method='missing', fill_value='Missing', variables=None, return_object=False): if imputation_method not in ['missing', 'frequent']: raise ValueError("imputation_method takes only values 'missing' or 'frequent'") if not isinstance(fill_value, str): raise ValueError("parameter 'fill_value' should be string") self.imputation_method = imputation_method self.fill_value = fill_value self.variables = _define_variables(variables) self.return_object = return_object
def __init__(self, imputation_method='missing', variables=None, return_object=False): if imputation_method not in ['missing', 'frequent']: raise ValueError( "imputation_method takes only values 'missing' or 'frequent'") self.imputation_method = imputation_method self.variables = _define_variables(variables) self.return_object = return_object
def __init__(self, variables=None, method="pearson", threshold=0.8): if method not in ["pearson", "spearman", "kendall"]: raise ValueError( "correlation method takes only values 'pearson', 'spearman', 'kendall'" ) if (threshold < 0 or threshold > 1) or not isinstance(threshold, float): raise ValueError("threshold must be a float between 0 and 1") self.variables = _define_variables(variables) self.method = method self.threshold = threshold