コード例 #1
0
def test_define_variables():
    vars_ls = ['var1', 'var2', 'var1']
    vars_none = None
    vars_str = 'var1'
    assert _define_variables(vars_ls) == vars_ls
    assert _define_variables(vars_none) == vars_none
    assert _define_variables(vars_str) == [vars_str]
コード例 #2
0
ファイル: outlier_removers.py プロジェクト: kartikra/mlshark
    def __init__(self,
                 distribution='gaussian',
                 tail='right',
                 fold=3,
                 variables=None,
                 missing_values='raise'):

        if distribution not in ['gaussian', 'skewed', 'quantiles']:
            raise ValueError(
                "distribution takes only values 'gaussian', 'skewed' or 'quantiles'"
            )

        if tail not in ['right', 'left', 'both']:
            raise ValueError(
                "tail takes only values 'right', 'left' or 'both'")

        if fold <= 0:
            raise ValueError("fold takes only positive numbers")

        if distribution == 'quantiles' and fold > 0.2:
            raise ValueError(
                "with distribution='quantiles', fold takes values between 0 and 0.20 only."
            )

        if missing_values not in ['raise', 'ignore']:
            raise ValueError(
                "missing_values takes only values 'raise' or 'ignore'")

        self.distribution = distribution
        self.tail = tail
        self.fold = fold
        self.variables = _define_variables(variables)
        self.missing_values = missing_values
コード例 #3
0
    def __init__(self, how='missing_only', variables=None):

        if how not in ['missing_only', 'all']:
            raise ValueError("how takes only values 'missing_only' or 'all'")

        self.variables = _define_variables(variables)
        self.how = how
コード例 #4
0
    def __init__(self,
                 tol=0.05,
                 n_categories=10,
                 max_n_categories=None,
                 variables=None,
                 replace_with='Rare'):

        if tol < 0 or tol > 1:
            raise ValueError("tol takes values between 0 and 1")

        if n_categories < 0 or not isinstance(n_categories, int):
            raise ValueError(
                "n_categories takes only positive integer numbers")

        if max_n_categories is not None:
            if max_n_categories < 0 or not isinstance(max_n_categories, int):
                raise ValueError(
                    "max_n_categories takes only positive integer numbers")

        if not isinstance(replace_with, str):
            raise ValueError("replace_with takes only strings as values.")

        self.tol = tol
        self.n_categories = n_categories
        self.max_n_categories = max_n_categories
        self.variables = _define_variables(variables)
        self.replace_with = replace_with
コード例 #5
0
    def __init__(self,
                 variables=None,
                 random_state=None,
                 seed='general',
                 seeding_method='add'):

        if seed not in ['general', 'observation']:
            raise ValueError(
                "seed takes only values 'general' or 'observation'")

        if seeding_method not in ['add', 'multiply']:
            raise ValueError(
                "seeding_method takes only values 'add' or 'multiply'")

        if seed == 'general' and random_state:
            if not isinstance(random_state, int):
                raise ValueError(
                    "if seed == 'general' the random state must take an integer"
                )

        if seed == 'observation' and not random_state:
            raise ValueError(
                "if seed == 'observation' the random state must take the name of one or more variables "
                "which will be used to seed the imputer")

        self.variables = _define_variables(variables)
        self.random_state = random_state
        self.seed = seed
        self.seeding_method = seeding_method
コード例 #6
0
    def __init__(self, exp=0.5, variables=None):

        if not isinstance(exp, float) and not isinstance(exp, int):
            raise ValueError('exp must be a float or an int')

        self.exp = exp
        self.variables = _define_variables(variables)
コード例 #7
0
    def __init__(self, base='e', variables=None):

        if base not in ['e', '10']:
            raise ValueError("base can take only '10' or 'e' as values")

        self.variables = _define_variables(variables)
        self.base = base
コード例 #8
0
    def __init__(self, features_to_drop=None):

        self.features_to_drop = _define_variables(features_to_drop)

        if len(self.features_to_drop) == 0:
            raise ValueError(
                'List of features to drop cannot be empty. Please pass at least 1 variable to drop'
            )
コード例 #9
0
    def __init__(self, imputation_method='median', variables=None):

        if imputation_method not in ['median', 'mean']:
            raise ValueError(
                "imputation_method takes only values 'median' or 'mean'")

        self.imputation_method = imputation_method
        self.variables = _define_variables(variables)
コード例 #10
0
    def __init__(self, encoding_method='ordered', variables=None):

        if encoding_method not in ['ordered', 'arbitrary']:
            raise ValueError(
                "encoding_method takes only values 'ordered' and 'arbitrary'")

        self.encoding_method = encoding_method
        self.variables = _define_variables(variables)
コード例 #11
0
    def __init__(self, encoding_method='count', variables=None):

        if encoding_method not in ['count', 'frequency']:
            raise ValueError(
                "encoding_method takes only values 'count' and 'frequency'")

        self.encoding_method = encoding_method
        self.variables = _define_variables(variables)
コード例 #12
0
    def __init__(self, encoding_method='woe', variables=None):

        if encoding_method not in ['woe', 'ratio', 'log_ratio']:
            raise ValueError(
                "encoding_method takes only values 'woe', 'ratio' and 'log_ratio'"
            )

        self.encoding_method = encoding_method
        self.variables = _define_variables(variables)
コード例 #13
0
    def __init__(self, top_categories=None, variables=None, drop_last=False):

        if top_categories:
            if not isinstance(top_categories, int):
                raise ValueError(
                    "top_categories takes only integer numbers, 1, 2, 3, etc.")

        if drop_last not in [True, False]:
            raise ValueError("drop_last takes only True or False")

        self.top_categories = top_categories
        self.drop_last = drop_last
        self.variables = _define_variables(variables)
コード例 #14
0
ファイル: discretisers.py プロジェクト: kartikra/mlshark
    def __init__(self, q=10, variables=None, return_object=False, return_boundaries=False):

        if not isinstance(q, int):
            raise ValueError('q must be an integer')

        if not isinstance(return_object, bool):
            raise ValueError('return_object must be True or False')

        if not isinstance(return_boundaries, bool):
            raise ValueError('return_boundaries must be True or False')

        self.q = q
        self.variables = _define_variables(variables)
        self.return_object = return_object
        self.return_boundaries = return_boundaries
コード例 #15
0
    def __init__(self,
                 arbitrary_number=999,
                 variables=None,
                 imputer_dict=None):

        if isinstance(arbitrary_number, int) or isinstance(
                arbitrary_number, float):
            self.arbitrary_number = arbitrary_number
        else:
            raise ValueError(
                'arbitrary_number must be numeric of type int or float')

        self.variables = _define_variables(variables)

        self.imputer_dict = _define_numerical_dict(imputer_dict)
コード例 #16
0
ファイル: discretisers.py プロジェクト: kartikra/mlshark
    def __init__(self, cv=3, scoring='neg_mean_squared_error',
                 variables=None, param_grid={'max_depth': [1, 2, 3, 4]},
                 regression=True, random_state=None):

        if not isinstance(cv, int) or cv < 0:
            raise ValueError('cv can only take only positive integers')

        if not isinstance(regression, bool):
            raise ValueError('regression can only take True or False')

        self.cv = cv
        self.scoring = scoring
        self.regression = regression
        self.variables = _define_variables(variables)
        self.param_grid = param_grid
        self.random_state = random_state
コード例 #17
0
    def __init__(self,
                 encoding_method='arbitrary',
                 cv=3,
                 scoring='neg_mean_squared_error',
                 param_grid={'max_depth': [1, 2, 3, 4]},
                 regression=True,
                 random_state=None,
                 variables=None):

        self.encoding_method = encoding_method
        self.cv = cv
        self.scoring = scoring
        self.regression = regression
        self.param_grid = param_grid
        self.random_state = random_state
        self.variables = _define_variables(variables)
コード例 #18
0
    def __init__(self,
                 imputation_method='missing',
                 fill_value='Missing',
                 variables=None,
                 return_object=False):

        if imputation_method not in ['missing', 'frequent']:
            raise ValueError(
                "imputation_method takes only values 'missing' or 'frequent'")

        if not isinstance(fill_value, str):
            raise ValueError("parameter 'fill_value' should be string")

        self.imputation_method = imputation_method
        self.fill_value = fill_value
        self.variables = _define_variables(variables)
        self.return_object = return_object
コード例 #19
0
    def fit(self, X, y=None):
        """
        Makes a copy of the variables to impute in the training dataframe from
        which it will randomly extract the values to fill the missing data
        during transform.

        Parameters
        ----------

        X : pandas dataframe of shape = [n_samples, n_features]
            The training input samples.
            Can be the entire dataframe, not just he variables to impute.

        y : None
            y is not needed in this imputation. You can pass None or y.

        Attributes
        ----------

        X_ : dataframe.
            Copy of the training dataframe from which to extract the random samples.
        """

        # check input dataframe
        X = _is_dataframe(X)

        # find variables to impute
        if not self.variables:
            self.variables = [var for var in X.columns]
        else:
            self.variables = self.variables

        # take a copy of the selected variables
        self.X_ = X[self.variables].copy()

        # check the variables assigned to the random state
        if self.seed == 'observation':
            self.random_state = _define_variables(self.random_state)
            if len([var
                    for var in self.random_state if var not in X.columns]) > 0:
                raise ValueError(
                    "There are variables assigned as random state which are not part of the training "
                    "dataframe.")
        self.input_shape_ = X.shape

        return self
コード例 #20
0
    def __init__(self,
                 distribution='gaussian',
                 tail='right',
                 fold=3,
                 variables=None):

        if distribution not in ['gaussian', 'skewed', 'max']:
            raise ValueError(
                "distribution takes only values 'gaussian', 'skewed' or 'max'")

        if tail not in ['right', 'left']:
            raise ValueError("tail takes only values 'right' or 'left'")

        if fold <= 0:
            raise ValueError("fold takes only positive numbers")

        self.distribution = distribution
        self.tail = tail
        self.fold = fold
        self.variables = _define_variables(variables)
コード例 #21
0
 def __init__(self, variables=None, transformer=None):
     self.variables = _define_variables(variables)
     self.transformer = transformer
     if isinstance(self.transformer, OneHotEncoder) and self.transformer.sparse:
         raise AttributeError('The SklearnTransformerWrapper can only wrap the OneHotEncoder if you '
                              'set its sparse attribute to False')
コード例 #22
0
    def __init__(self, variables=None):

        self.variables = _define_variables(variables)