Example #1
0
def test_define_variables():
    vars_ls = ['var1', 'var2', 'var1']
    vars_none = None
    vars_str = 'var1'
    assert _define_variables(vars_ls) == vars_ls
    assert _define_variables(vars_none) == vars_none
    assert _define_variables(vars_str) == [vars_str]
Example #2
0
    def __init__(
        self,
        tol=0.05,
        n_categories=10,
        max_n_categories=None,
        variables=None,
        replace_with="Rare",
    ):

        if tol < 0 or tol > 1:
            raise ValueError("tol takes values between 0 and 1")

        if n_categories < 0 or not isinstance(n_categories, int):
            raise ValueError(
                "n_categories takes only positive integer numbers")

        if max_n_categories is not None:
            if max_n_categories < 0 or not isinstance(max_n_categories, int):
                raise ValueError(
                    "max_n_categories takes only positive integer numbers")

        if not isinstance(replace_with, str):
            raise ValueError("replace_with takes only strings as values.")

        self.tol = tol
        self.n_categories = n_categories
        self.max_n_categories = max_n_categories
        self.variables = _define_variables(variables)
        self.replace_with = replace_with
    def __init__(self, missing_only=True, variables=None):

        if not isinstance(missing_only, bool):
            raise ValueError("missing_only takes values True or False")

        self.variables = _define_variables(variables)
        self.missing_only = missing_only
    def __init__(self, base='e', variables=None):

        if base not in ['e', '10']:
            raise ValueError("base can take only '10' or 'e' as values")

        self.variables = _define_variables(variables)
        self.base = base
    def __init__(self, how='missing_only', variables=None):

        if how not in ['missing_only', 'all']:
            raise ValueError("how takes only values 'missing_only' or 'all'")

        self.variables = _define_variables(variables)
        self.how = how
    def __init__(
        self,
        cv: int = 3,
        scoring: str = "neg_mean_squared_error",
        variables: Optional[List[str]] = None,
        param_grid: Optional[Dict[str, Union[str, int, float, List[int]]]] = None,
        regression: bool = True,
        random_state: Optional[int] = None,
    ) -> None:

        if param_grid is None:
            param_grid = {"max_depth": [1, 2, 3, 4]}

        if not isinstance(cv, int) or cv < 0:
            raise ValueError("cv can only take only positive integers")

        if not isinstance(regression, bool):
            raise ValueError("regression can only take True or False")

        self.cv = cv
        self.scoring = scoring
        self.regression = regression
        self.variables = _define_variables(variables)
        self.param_grid = param_grid
        self.random_state = random_state
Example #7
0
    def __init__(self, encoding_method='count', variables=None):

        if encoding_method not in ['count', 'frequency']:
            raise ValueError("encoding_method takes only values 'count' and 'frequency'")

        self.encoding_method = encoding_method
        self.variables = _define_variables(variables)
Example #8
0
    def __init__(
        self,
        variables: Optional[List[str]] = None,
        random_state: Optional[Union[int, str, List[str]]] = None,
        seed: str = "general",
        seeding_method: str = "add",
    ) -> None:

        if seed not in ["general", "observation"]:
            raise ValueError(
                "seed takes only values 'general' or 'observation'")

        if seeding_method not in ["add", "multiply"]:
            raise ValueError(
                "seeding_method takes only values 'add' or 'multiply'")

        if seed == "general" and random_state:
            if not isinstance(random_state, int):
                raise ValueError(
                    "if seed == 'general' the random state must take an integer"
                )

        if seed == "observation" and not random_state:
            raise ValueError(
                "if seed == 'observation' the random state must take the name of one "
                "or more variables which will be used to seed the imputer")

        self.variables = _define_variables(variables)
        self.random_state = random_state
        self.seed = seed
        self.seeding_method = seeding_method
Example #9
0
    def __init__(self, encoding_method='woe', variables=None):

        if encoding_method not in ['woe', 'ratio', 'log_ratio']:
            raise ValueError("encoding_method takes only values 'woe', 'ratio' and 'log_ratio'")

        self.encoding_method = encoding_method
        self.variables = _define_variables(variables)
Example #10
0
    def __init__(self, tol=1, variables=None):

        if tol < 0 or tol > 1:
            raise ValueError("tol takes values between 0 and 1")

        self.tol = tol
        self.variables = _define_variables(variables)
Example #11
0
    def __init__(self, imputation_method='median', variables=None):

        if imputation_method not in ['median', 'mean']:
            raise ValueError("imputation_method takes only values 'median' or 'mean'")

        self.imputation_method = imputation_method
        self.variables = _define_variables(variables)
    def __init__(self, exp=0.5, variables=None):

        if not isinstance(exp, float) and not isinstance(exp, int):
            raise ValueError('exp must be a float or an int')

        self.exp = exp
        self.variables = _define_variables(variables)
    def __init__(self,
                 variables=None,
                 random_state=None,
                 seed='general',
                 seeding_method='add'):

        if seed not in ['general', 'observation']:
            raise ValueError(
                "seed takes only values 'general' or 'observation'")

        if seeding_method not in ['add', 'multiply']:
            raise ValueError(
                "seeding_method takes only values 'add' or 'multiply'")

        if seed == 'general' and random_state:
            if not isinstance(random_state, int):
                raise ValueError(
                    "if seed == 'general' the random state must take an integer"
                )

        if seed == 'observation' and not random_state:
            raise ValueError(
                "if seed == 'observation' the random state must take the name of one or more variables "
                "which will be used to seed the imputer")

        self.variables = _define_variables(variables)
        self.random_state = random_state
        self.seed = seed
        self.seeding_method = seeding_method
Example #14
0
    def __init__(self, encoding_method='ordered', variables=None):

        if encoding_method not in ['ordered', 'arbitrary']:
            raise ValueError("encoding_method takes only values 'ordered' and 'arbitrary'")

        self.encoding_method = encoding_method
        self.variables = _define_variables(variables)
Example #15
0
    def __init__(
        self,
        cv=3,
        scoring="neg_mean_squared_error",
        variables=None,
        param_grid=None,
        regression=True,
        random_state=None,
    ):

        if param_grid is None:
            param_grid = {"max_depth": [1, 2, 3, 4]}

        if not isinstance(cv, int) or cv < 0:
            raise ValueError("cv can only take only positive integers")

        if not isinstance(regression, bool):
            raise ValueError("regression can only take True or False")

        self.cv = cv
        self.scoring = scoring
        self.regression = regression
        self.variables = _define_variables(variables)
        self.param_grid = param_grid
        self.random_state = random_state
    def __init__(
        self,
        capping_method: str = "gaussian",
        tail: str = "right",
        fold: Union[int, float] = 3,
        variables: Optional[List] = None,
        missing_values: str = "raise",
    ) -> None:

        if capping_method not in ["gaussian", "iqr", "quantiles"]:
            raise ValueError(
                "capping_method takes only values 'gaussian', 'iqr' or 'quantiles'"
            )

        if tail not in ["right", "left", "both"]:
            raise ValueError(
                "tail takes only values 'right', 'left' or 'both'")

        if fold <= 0:
            raise ValueError("fold takes only positive numbers")

        if capping_method == "quantiles" and fold > 0.2:
            raise ValueError(
                "with capping_method ='quantiles', fold takes values between 0 and "
                "0.20 only.")

        if missing_values not in ["raise", "ignore"]:
            raise ValueError(
                "missing_values takes only values 'raise' or 'ignore'")

        self.capping_method = capping_method
        self.tail = tail
        self.fold = fold
        self.variables = _define_variables(variables)
        self.missing_values = missing_values
Example #17
0
    def __init__(self, arbitrary_number=999, variables=None):

        if isinstance(arbitrary_number, int) or isinstance(arbitrary_number, float):
            self.arbitrary_number = arbitrary_number
        else:
            raise ValueError('arbitrary_number must be numeric of type int or float')

        self.variables = _define_variables(variables)
Example #18
0
    def __init__(self, features_to_drop=None):

        self.features_to_drop = _define_variables(features_to_drop)

        if len(self.features_to_drop) == 0:
            raise ValueError(
                "List of features to drop cannot be empty. Please pass at least 1 "
                "variable to drop.")
Example #19
0
    def __init__(self, variables: List[str] = None, transformer=None) -> None:
        self.variables = _define_variables(variables)
        self.transformer = transformer

        if isinstance(self.transformer,
                      OneHotEncoder) and self.transformer.sparse:
            raise AttributeError(
                "The SklearnTransformerWrapper can only wrap the OneHotEncoder if you "
                "set its sparse attribute to False")
Example #20
0
    def __init__(self,
                 missing_only: bool = True,
                 variables: Optional[List[str]] = None) -> None:

        if not isinstance(missing_only, bool):
            raise ValueError("missing_only takes values True or False")

        self.variables = _define_variables(variables)
        self.missing_only = missing_only
Example #21
0
    def __init__(
        self, exp: Union[float, int] = 0.5, variables: Union[List[str], str] = None
    ):

        if not isinstance(exp, (float, int)):
            raise ValueError("exp must be a float or an int")

        self.exp = exp
        self.variables = _define_variables(variables)
Example #22
0
    def __init__(self,
                 encoding_method: str = "ordered",
                 variables: Optional[List[str]] = None) -> None:

        if encoding_method not in ["ordered", "arbitrary"]:
            raise ValueError(
                "encoding_method takes only values 'ordered' and 'arbitrary'")

        self.encoding_method = encoding_method
        self.variables = _define_variables(variables)
Example #23
0
    def __init__(self,
                 encoding_method: str = "count",
                 variables: Optional[List[str]] = None) -> None:

        if encoding_method not in ["count", "frequency"]:
            raise ValueError(
                "encoding_method takes only values 'count' and 'frequency'")

        self.encoding_method = encoding_method
        self.variables = _define_variables(variables)
    def __init__(self,
                 imputation_method: str = "median",
                 variables: Optional[List[str]] = None) -> None:

        if imputation_method not in ["median", "mean"]:
            raise ValueError(
                "imputation_method takes only values 'median' or 'mean'")

        self.imputation_method = imputation_method
        self.variables = _define_variables(variables)
Example #25
0
    def __init__(self,
                 encoding_method: str = "ratio",
                 variables: Optional[List[str]] = None) -> None:

        if encoding_method not in ["ratio", "log_ratio"]:
            raise ValueError(
                "encoding_method takes only values 'ratio' and 'log_ratio'")

        self.encoding_method = encoding_method
        self.variables = _define_variables(variables)
Example #26
0
 def __init__(self, encoding_method='arbitrary', cv=3, scoring='neg_mean_squared_error',
              param_grid={'max_depth': [1, 2, 3, 4]}, regression=True,
              random_state=None, variables=None):
   
     self.encoding_method = encoding_method
     self.cv = cv
     self.scoring = scoring
     self.regression = regression
     self.param_grid = param_grid
     self.random_state = random_state
     self.variables = _define_variables(variables)
Example #27
0
    def __init__(self, top_categories=None, variables=None, drop_last=False):

        if top_categories:
            if not isinstance(top_categories, int):
                raise ValueError("top_categories takes only integer numbers, 1, 2, 3, etc.")

        if drop_last not in [True, False]:
            raise ValueError("drop_last takes only True or False")

        self.top_categories = top_categories
        self.drop_last = drop_last
        self.variables = _define_variables(variables)
Example #28
0
 def __init__(self, imputation_method='missing', fill_value='Missing', variables=None, return_object=False):
     
     if imputation_method not in ['missing', 'frequent']:
         raise ValueError("imputation_method takes only values 'missing' or 'frequent'")
     
     if not isinstance(fill_value, str):
         raise ValueError("parameter 'fill_value' should be string")
     
     self.imputation_method = imputation_method
     self.fill_value = fill_value
     self.variables = _define_variables(variables)
     self.return_object = return_object
    def __init__(self,
                 imputation_method='missing',
                 variables=None,
                 return_object=False):

        if imputation_method not in ['missing', 'frequent']:
            raise ValueError(
                "imputation_method takes only values 'missing' or 'frequent'")

        self.imputation_method = imputation_method
        self.variables = _define_variables(variables)
        self.return_object = return_object
    def __init__(self, variables=None, method="pearson", threshold=0.8):

        if method not in ["pearson", "spearman", "kendall"]:
            raise ValueError(
                "correlation method takes only values 'pearson', 'spearman', 'kendall'"
            )

        if (threshold < 0
                or threshold > 1) or not isinstance(threshold, float):
            raise ValueError("threshold must be a float between 0 and 1")

        self.variables = _define_variables(variables)
        self.method = method
        self.threshold = threshold