Ejemplo n.º 1
0
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            ints=[],
            floats=['param'],
            strs=['type', 'mode'],
            aliases={
                'k': 'param',
                'type': 'score_func'
            },
        )

        if 'score_func' not in out_params:
            out_params['score_func'] = f_classif
        else:
            if out_params['score_func'].lower() == 'categorical':
                out_params['score_func'] = f_classif
            elif out_params['score_func'].lower() in ['numerical', 'numeric']:
                out_params['score_func'] = f_regression
            else:
                raise RuntimeError(
                    'type can either be categorical or numeric.')

        if 'mode' in out_params:
            if out_params['mode'] not in ('k_best', 'fpr', 'fdr', 'fwe',
                                          'percentile'):
                raise RuntimeError(
                    'mode can only be one of the following: fdr, fpr, fwe, k_best, and percentile'
                )

        self.estimator = GenericUnivariateSelect(**out_params)
Ejemplo n.º 2
0
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            ints=[
                'random_state', 'max_depth', 'min_samples_split',
                'max_leaf_nodes'
            ],
            strs=['splitter', 'max_features'],
        )

        if 'max_depth' not in out_params:
            out_params.setdefault('max_leaf_nodes', 2000)

        # whitelist valid values for splitter, as error raised by sklearn for invalid values is uninformative
        if 'splitter' in out_params:
            try:
                assert (out_params['splitter'] in ['best', 'random'])
            except AssertionError:
                raise RuntimeError('Invalid value for option splitter: "%s"' %
                                   out_params['splitter'])

        # EAFP... convert max_features to int if it is a number.
        try:
            out_params['max_features'] = float(out_params['max_features'])
            max_features_int = int(out_params['max_features'])
            if out_params['max_features'] == max_features_int:
                out_params['max_features'] = max_features_int
        except:
            pass

        self.estimator = _DecisionTreeRegressor(**out_params)
Ejemplo n.º 3
0
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(options.get('params', {}), floats=['gamma'])
        out_params['kernel'] = 'rbf'

        self.estimator = _KernelRidge(**out_params)
 def convert_options(options):
     out_params = convert_params(
         options.get('params', {}),
         bools=['fit_intercept', 'normalize'],
         ignore_extra=True,
     )
     return out_params
    def convert_options(options):
        out_params = convert_params(
            options.get('params', {}),
            ints=[
                'random_state',
                'n_estimators',
                'max_depth',
                'min_samples_split',
                'max_leaf_nodes',
            ],
            strs=['max_features'],
            ignore_extra=True,
        )

        if 'max_depth' not in out_params:
            out_params.setdefault('max_leaf_nodes', 2000)

        if 'max_features' in out_params:
            # Handle None case
            if out_params['max_features'].lower() == "none":
                out_params['max_features'] = None
            else:
                # EAFP... convert max_features to int if it is a number.
                try:
                    out_params['max_features'] = float(out_params['max_features'])
                    max_features_int = int(out_params['max_features'])
                    if out_params['max_features'] == max_features_int:
                        out_params['max_features'] = max_features_int
                except:
                    pass
        return out_params
Ejemplo n.º 6
0
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            bools=['with_centering', 'with_scaling'],
            strs=['quantile_range'],
        )

        if StrictVersion(sklearn_version) < StrictVersion(
                quantile_range_required_version
        ) and 'quantile_range' in out_params.keys():
            out_params.pop('quantile_range')
            msg = 'The quantile_range option is ignored in this version of scikit-learn ({}): version {} or higher required'
            msg = msg.format(sklearn_version, quantile_range_required_version)
            messages.warn(msg)

        if 'quantile_range' in out_params.keys():
            try:
                out_params['quantile_range'] = tuple(
                    int(i) for i in out_params['quantile_range'].split('-'))
                assert len(out_params['quantile_range']) == 2
            except:
                raise RuntimeError(
                    'Syntax Error: quantile_range requires a range, e.g., quantile_range=25-75'
                )

        self.estimator = _RobustScaler(**out_params)
Ejemplo n.º 7
0
    def __init__(self, options):
        self.handle_options(options)
        out_params = convert_params(
            options.get('params', {}), floats=['max_eps'], ints=['min_samples']
        )

        self.estimator = _OPTICS(**out_params)
Ejemplo n.º 8
0
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            ints=[
                'random_state', 'n_estimators', 'max_depth',
                'min_samples_split', 'max_leaf_nodes'
            ],
            strs=['max_features'],
        )

        if 'max_depth' not in out_params:
            out_params.setdefault('max_leaf_nodes', 2000)

        if 'max_features' in out_params:
            # Handle None case
            if out_params['max_features'].lower() == "none":
                out_params['max_features'] = None
            else:
                # EAFP... convert max_features to int if it is a number.
                try:
                    out_params['max_features'] = float(
                        out_params['max_features'])
                    max_features_int = int(out_params['max_features'])
                    if out_params['max_features'] == max_features_int:
                        out_params['max_features'] = max_features_int
                except:
                    pass

        self.estimator = _RandomForestRegressor(**out_params)
Ejemplo n.º 9
0
    def __init__(self, options):

        self.handle_options(options)

        params = options.get('params', {})
        converted_params = convert_params(
            params,
            ints=['k', 'conf_interval'],
            bools=['fft'],
            aliases={'k': 'nlags'},
        )

        # Set the default name to be used so that PACF can override
        self.default_name = 'acf({})'

        # Set the lags, alpha and fft parameters
        self.nlags = converted_params.pop('nlags', 40)
        self.fft = converted_params.pop('fft', False)

        conf_int = converted_params.pop('conf_interval', 95)
        if conf_int <= 0 or conf_int >= 100:
            raise RuntimeError('conf_interval cannot be less than 1 or more than 99.')
        if self.nlags <= 0:
            raise RuntimeError('k must be greater than 0.')
        self.alpha = confidence_interval_to_alpha(conf_int)
Ejemplo n.º 10
0
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            ints=['max_features'],
            floats=['max_df', 'min_df'],
            strs=[
                'ngram_range', 'stop_words', 'analyzer', 'norm',
                'token_pattern'
            ],
        )

        if 'ngram_range' in out_params.keys():
            try:
                out_params['ngram_range'] = tuple(
                    int(i) for i in out_params['ngram_range'].split('-'))
                assert len(out_params['ngram_range']) == 2
            except:
                raise RuntimeError(
                    'Syntax Error: ngram_range requires a range, e.g. ngram_range=1-5'
                )

        # TODO: Maybe let the user know that we make this change.
        out_params.setdefault('max_features', 100)

        self.estimator = _TfidfVectorizer(**out_params)
Ejemplo n.º 11
0
    def __init__(self, options):
        self.handle_options(options)
        params = options.get('params', {})
        out_params = convert_params(
            params,
            strs=['loss', 'max_features'],
            floats=['learning_rate', 'min_weight_fraction_leaf'],
            ints=[
                'n_estimators', 'max_depth', 'min_samples_split',
                'min_samples_leaf', 'max_leaf_nodes', 'random_state'
            ],
        )

        valid_loss = ['deviance', 'exponential']
        if 'loss' in out_params:
            if out_params['loss'] not in valid_loss:
                msg = "loss must be one of: {}".format(', '.join(valid_loss))
                raise RuntimeError(msg)

        if 'max_features' in out_params:
            out_params['max_features'] = handle_max_features(
                out_params['max_features'])

        if 'max_leaf_nodes' in out_params and 'max_depth' in out_params:
            messages.warn('max_depth ignored when max_leaf_nodes is set')

        self.estimator = _GradientBoostingClassifier(**out_params)
Ejemplo n.º 12
0
    def __init__(self, options):
        self.handle_options(options)
        out_params = convert_params(
            options.get('params', {}),
            ints=['k', 'n_iter'],
            floats=['perplexity', 'early_exaggeration', 'learning_rate'],
            aliases={'k': 'n_components'})

        if out_params['n_components'] < 1:
            msg = 'Invalid value for k: k must be greater than or equal to 1, but found k="{}".'
            raise RuntimeError(msg.format(out_params['n_components']))

        if 'n_iter' not in out_params:
            out_params.setdefault('n_iter', 200)

        if 'perplexity' not in out_params:
            out_params.setdefault('perplexity', 30.0)

        if 'early_exaggeration' not in out_params:
            out_params.setdefault('early_exaggeration', 4.0)

        if 'learning_rate' not in out_params:
            out_params.setdefault('learning_rate', 100)

        self.estimator = _TSNE(**out_params)
Ejemplo n.º 13
0
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(options.get('params', {}),
                                    bools=['with_mean', 'with_std'])
        self.estimator = _StandardScaler(**out_params)
        self.columns = None
Ejemplo n.º 14
0
    def __init__(self, options):
        self.handle_options(options)
        out_params = convert_params(options.get('params', {}),
                                    ints=['k'],
                                    aliases={'k': 'n_components'})

        self.estimator = _PCA(**out_params)
Ejemplo n.º 15
0
    def __init__(self,options):
        self.handle_options(options)
        out_params = convert_params(
            options.get('params',{}),
            ints = ['n_estimators','n_jobs','random_state','verbose'],
            floats = ['max_samples','contamination','max_features'],
            bools = ['bootstrap']
            )
        self.return_scores = out_params.pop('anomaly_score', True)

        # whitelist n_estimators > 0
        if 'n_estimators' in out_params and out_params['n_estimators']<=0:
            msg = 'Invalid value error: n_estimators must be greater than 0 and an integer, but found n_estimators="{}".'
            raise RuntimeError(msg.format(out_params['n_estimators']))
        
        # whitelist max_samples > 0 and < 1
        if 'max_samples' in out_params and out_params['max_samples']<0 and out_params['max_samples']>1:
            msg = 'Invalid value error: max_samples must be greater than 0 and a float, but found max_samples="{}".'
            raise RuntimeError(msg.format(out_params['max_samples']))
        
        #   whitelist contamination should be in (0.0, 0.5] as error raised by sklearn for values out of range
        if 'contamination' in out_params and not (0.0 < out_params['contamination'] <= 0.5):
            msg = (
                'Invalid value error: Valid values for contamination are in (0.0, 0.5], '
                'but found contamination="{}".'
            )
            raise RuntimeError(msg.format(out_params['contamination']))

        # whitelist max_features > 0 and < 1
        if 'max_features' in out_params and out_params['max_features']<0 and out_params['max_features']>1:
            msg = 'Invalid value error: max_features must be greater than 0, but found max_features="{}".'
            raise RuntimeError(msg.format(out_params['max_features']))

        
        self.estimator = _IsolationForest(**out_params)    
Ejemplo n.º 16
0
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(options.get('params', {}),
                                    bools=['copy'],
                                    strs=['feature_range'])
        self.estimator = _MinMaxScaler(**out_params)
        self.columns = None
Ejemplo n.º 17
0
 def __init__(self, options):
     debug.info('NetworkX Version {}'.format(nx.__version__))
     self.handle_options(options)
     out_params = convert_params(options.get('params', {}), strs=['weight'])
     if 'weight' not in out_params:
         options['weight'] = 'one'
     else:
         options['weight'] = out_params['weight']
Ejemplo n.º 18
0
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            floats=['gamma', 'C'],
        )

        self.estimator = SVC(class_weight='balanced', **out_params)
Ejemplo n.º 19
0
    def __init__(self, options):
        self.handle_options(options)
        out_params = convert_params(options.get('params', {}),
                                    floats=['tol'],
                                    strs=['algorithm'],
                                    ints=['k', 'n_iter', 'random_state'],
                                    aliases={'k': 'n_components'})

        self.estimator = _TruncatedSVD(**out_params)
Ejemplo n.º 20
0
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            ints=['kmax', 'random_state'],
        )

        self.estimator = _XMeans(**out_params)
    def __init__(self, options):
        self.handle_options(options)
        out_params = convert_params(
            options.get('params', {}),
            ints=['k'],
            aliases={'k': 'n_topics'}
        )

        self.estimator = _LatentDirichletAllocation(**out_params)
Ejemplo n.º 22
0
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            bools=['fit_intercept', 'normalize'],
        )

        self.estimator = _LinearRegression(**out_params)
Ejemplo n.º 23
0
    def __init__(self, options):
        self.handle_options(options)

        params = convert_params(
            options.get('params', {}),
            strs=['order'],
            ints=['forecast_k', 'conf_interval', 'holdback'],
            aliases={'forecast_k': 'steps'})
        self.out_params = dict(model_params=dict(),
                               forecast_function_params=dict())

        if 'order' in params:
            # statsmodels wants a tuple for order of the model for the number of AR parameters,
            # differences, and MA parameters.
            # SPL won't accept a tuple as an option's value, so the next few lines will make it possible for the
            # user to configure order.
            try:
                self.out_params['model_params']['order'] = tuple(
                    int(i) for i in params['order'].split('-'))
                assert len(self.out_params['model_params']['order']) == 3
            except:
                raise RuntimeError(
                    'Syntax Error: order requires three non-negative integer values, e.g. order=4-1-2'
                )
        else:
            raise RuntimeError(
                'Order of model is missing. It is required for fitting. e.g. order=<No. of AR>-'
                '<Parameters-No. of Differences>-<No. of MA Parameters>')

        # Default steps set to zero
        steps = params.get('steps', 0)
        self._test_forecast_k(steps)
        self.out_params['forecast_function_params']['steps'] = steps

        if 'conf_interval' in params:
            self.out_params['forecast_function_params']['alpha'] = \
                confidence_interval_to_alpha(params['conf_interval'])
        else:
            self.out_params['forecast_function_params'][
                'alpha'] = 0.05  # the default value that ARIMAResults.forecast uses.

        if 'holdback' in params:
            self._test_holdback(params['holdback'])
            self.holdback = params.pop('holdback')
            # The required ratio of invariant time frequencies (deltas)
            # Between rows
            self.freq_threshold = 1.0
        else:
            self.holdback = 0
            self.freq_threshold = 0.9

        # Dealing with Missing data
        # if 'missing' in params and params['missing'] in ['raise', 'drop']:
        #     self.out_params['model_params']['missing'] = params['missing']
        # else:
        self.out_params['model_params']['missing'] = 'raise'
Ejemplo n.º 24
0
    def __init__(self, options):
        self.handle_options(options)

        params = options.get('params', {})

        converted_params = convert_params(params,
                                          ints=['n_estimators'],
                                          floats=['learning_rate'])

        self.estimator = _AdaBoostClassifier(**converted_params)
Ejemplo n.º 25
0
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            ints=['k', 'random_state'],
            aliases={'k': 'n_clusters'},
        )

        self.estimator = _KMeans(**out_params)
Ejemplo n.º 26
0
    def __init__(self, options):
        self.handle_options(options)
        params = options.get('params', {})
        out_params = convert_params(
            params,
            strs=['loss', 'max_features'],
            floats=['learning_rate'],
            ints=['n_estimators'],
        )

        self.estimator = _AdaBoostRegressor(**out_params)
Ejemplo n.º 27
0
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            bools=['fit_intercept', 'normalize'],
            floats=['alpha'],
        )
        out_params.setdefault('normalize', True)

        self.estimator = _Ridge(**out_params)
Ejemplo n.º 28
0
    def check_probabilities(options):
        out_params = convert_params(options.get('params', {}),
                                    bools=['probabilities'],
                                    ignore_extra=True)

        if 'probabilities' in out_params:
            probabilities = is_truthy(out_params['probabilities'])
            del options['params']['probabilities']
        else:
            probabilities = False
        return probabilities
Ejemplo n.º 29
0
    def __init__(self, options):
        self.handle_options(options)

        params = options.get('params', {})
        out_params = convert_params(
            params,
            floats=['C', 'gamma'],
            strs=['kernel'],
            ints=['degree'],
        )

        self.estimator = _SVR(**out_params)
Ejemplo n.º 30
0
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            floats=['gamma', 'C', 'tol', 'intercept_scaling'],
            ints=['random_state','max_iter'],
            strs=['penalty', 'loss', 'multi_class'],
            bools=['dual', 'fit_intercept'],
        )

        self.estimator = _LinearSVC(**out_params)