def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), ints=[], floats=['param'], strs=['type', 'mode'], aliases={ 'k': 'param', 'type': 'score_func' }, ) if 'score_func' not in out_params: out_params['score_func'] = f_classif else: if out_params['score_func'].lower() == 'categorical': out_params['score_func'] = f_classif elif out_params['score_func'].lower() in ['numerical', 'numeric']: out_params['score_func'] = f_regression else: raise RuntimeError( 'type can either be categorical or numeric.') if 'mode' in out_params: if out_params['mode'] not in ('k_best', 'fpr', 'fdr', 'fwe', 'percentile'): raise RuntimeError( 'mode can only be one of the following: fdr, fpr, fwe, k_best, and percentile' ) self.estimator = GenericUnivariateSelect(**out_params)
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), ints=[ 'random_state', 'max_depth', 'min_samples_split', 'max_leaf_nodes' ], strs=['splitter', 'max_features'], ) if 'max_depth' not in out_params: out_params.setdefault('max_leaf_nodes', 2000) # whitelist valid values for splitter, as error raised by sklearn for invalid values is uninformative if 'splitter' in out_params: try: assert (out_params['splitter'] in ['best', 'random']) except AssertionError: raise RuntimeError('Invalid value for option splitter: "%s"' % out_params['splitter']) # EAFP... convert max_features to int if it is a number. try: out_params['max_features'] = float(out_params['max_features']) max_features_int = int(out_params['max_features']) if out_params['max_features'] == max_features_int: out_params['max_features'] = max_features_int except: pass self.estimator = _DecisionTreeRegressor(**out_params)
def __init__(self, options): self.handle_options(options) out_params = convert_params(options.get('params', {}), floats=['gamma']) out_params['kernel'] = 'rbf' self.estimator = _KernelRidge(**out_params)
def convert_options(options): out_params = convert_params( options.get('params', {}), bools=['fit_intercept', 'normalize'], ignore_extra=True, ) return out_params
def convert_options(options): out_params = convert_params( options.get('params', {}), ints=[ 'random_state', 'n_estimators', 'max_depth', 'min_samples_split', 'max_leaf_nodes', ], strs=['max_features'], ignore_extra=True, ) if 'max_depth' not in out_params: out_params.setdefault('max_leaf_nodes', 2000) if 'max_features' in out_params: # Handle None case if out_params['max_features'].lower() == "none": out_params['max_features'] = None else: # EAFP... convert max_features to int if it is a number. try: out_params['max_features'] = float(out_params['max_features']) max_features_int = int(out_params['max_features']) if out_params['max_features'] == max_features_int: out_params['max_features'] = max_features_int except: pass return out_params
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), bools=['with_centering', 'with_scaling'], strs=['quantile_range'], ) if StrictVersion(sklearn_version) < StrictVersion( quantile_range_required_version ) and 'quantile_range' in out_params.keys(): out_params.pop('quantile_range') msg = 'The quantile_range option is ignored in this version of scikit-learn ({}): version {} or higher required' msg = msg.format(sklearn_version, quantile_range_required_version) messages.warn(msg) if 'quantile_range' in out_params.keys(): try: out_params['quantile_range'] = tuple( int(i) for i in out_params['quantile_range'].split('-')) assert len(out_params['quantile_range']) == 2 except: raise RuntimeError( 'Syntax Error: quantile_range requires a range, e.g., quantile_range=25-75' ) self.estimator = _RobustScaler(**out_params)
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), floats=['max_eps'], ints=['min_samples'] ) self.estimator = _OPTICS(**out_params)
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), ints=[ 'random_state', 'n_estimators', 'max_depth', 'min_samples_split', 'max_leaf_nodes' ], strs=['max_features'], ) if 'max_depth' not in out_params: out_params.setdefault('max_leaf_nodes', 2000) if 'max_features' in out_params: # Handle None case if out_params['max_features'].lower() == "none": out_params['max_features'] = None else: # EAFP... convert max_features to int if it is a number. try: out_params['max_features'] = float( out_params['max_features']) max_features_int = int(out_params['max_features']) if out_params['max_features'] == max_features_int: out_params['max_features'] = max_features_int except: pass self.estimator = _RandomForestRegressor(**out_params)
def __init__(self, options): self.handle_options(options) params = options.get('params', {}) converted_params = convert_params( params, ints=['k', 'conf_interval'], bools=['fft'], aliases={'k': 'nlags'}, ) # Set the default name to be used so that PACF can override self.default_name = 'acf({})' # Set the lags, alpha and fft parameters self.nlags = converted_params.pop('nlags', 40) self.fft = converted_params.pop('fft', False) conf_int = converted_params.pop('conf_interval', 95) if conf_int <= 0 or conf_int >= 100: raise RuntimeError('conf_interval cannot be less than 1 or more than 99.') if self.nlags <= 0: raise RuntimeError('k must be greater than 0.') self.alpha = confidence_interval_to_alpha(conf_int)
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), ints=['max_features'], floats=['max_df', 'min_df'], strs=[ 'ngram_range', 'stop_words', 'analyzer', 'norm', 'token_pattern' ], ) if 'ngram_range' in out_params.keys(): try: out_params['ngram_range'] = tuple( int(i) for i in out_params['ngram_range'].split('-')) assert len(out_params['ngram_range']) == 2 except: raise RuntimeError( 'Syntax Error: ngram_range requires a range, e.g. ngram_range=1-5' ) # TODO: Maybe let the user know that we make this change. out_params.setdefault('max_features', 100) self.estimator = _TfidfVectorizer(**out_params)
def __init__(self, options): self.handle_options(options) params = options.get('params', {}) out_params = convert_params( params, strs=['loss', 'max_features'], floats=['learning_rate', 'min_weight_fraction_leaf'], ints=[ 'n_estimators', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'max_leaf_nodes', 'random_state' ], ) valid_loss = ['deviance', 'exponential'] if 'loss' in out_params: if out_params['loss'] not in valid_loss: msg = "loss must be one of: {}".format(', '.join(valid_loss)) raise RuntimeError(msg) if 'max_features' in out_params: out_params['max_features'] = handle_max_features( out_params['max_features']) if 'max_leaf_nodes' in out_params and 'max_depth' in out_params: messages.warn('max_depth ignored when max_leaf_nodes is set') self.estimator = _GradientBoostingClassifier(**out_params)
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), ints=['k', 'n_iter'], floats=['perplexity', 'early_exaggeration', 'learning_rate'], aliases={'k': 'n_components'}) if out_params['n_components'] < 1: msg = 'Invalid value for k: k must be greater than or equal to 1, but found k="{}".' raise RuntimeError(msg.format(out_params['n_components'])) if 'n_iter' not in out_params: out_params.setdefault('n_iter', 200) if 'perplexity' not in out_params: out_params.setdefault('perplexity', 30.0) if 'early_exaggeration' not in out_params: out_params.setdefault('early_exaggeration', 4.0) if 'learning_rate' not in out_params: out_params.setdefault('learning_rate', 100) self.estimator = _TSNE(**out_params)
def __init__(self, options): self.handle_options(options) out_params = convert_params(options.get('params', {}), bools=['with_mean', 'with_std']) self.estimator = _StandardScaler(**out_params) self.columns = None
def __init__(self, options): self.handle_options(options) out_params = convert_params(options.get('params', {}), ints=['k'], aliases={'k': 'n_components'}) self.estimator = _PCA(**out_params)
def __init__(self,options): self.handle_options(options) out_params = convert_params( options.get('params',{}), ints = ['n_estimators','n_jobs','random_state','verbose'], floats = ['max_samples','contamination','max_features'], bools = ['bootstrap'] ) self.return_scores = out_params.pop('anomaly_score', True) # whitelist n_estimators > 0 if 'n_estimators' in out_params and out_params['n_estimators']<=0: msg = 'Invalid value error: n_estimators must be greater than 0 and an integer, but found n_estimators="{}".' raise RuntimeError(msg.format(out_params['n_estimators'])) # whitelist max_samples > 0 and < 1 if 'max_samples' in out_params and out_params['max_samples']<0 and out_params['max_samples']>1: msg = 'Invalid value error: max_samples must be greater than 0 and a float, but found max_samples="{}".' raise RuntimeError(msg.format(out_params['max_samples'])) # whitelist contamination should be in (0.0, 0.5] as error raised by sklearn for values out of range if 'contamination' in out_params and not (0.0 < out_params['contamination'] <= 0.5): msg = ( 'Invalid value error: Valid values for contamination are in (0.0, 0.5], ' 'but found contamination="{}".' ) raise RuntimeError(msg.format(out_params['contamination'])) # whitelist max_features > 0 and < 1 if 'max_features' in out_params and out_params['max_features']<0 and out_params['max_features']>1: msg = 'Invalid value error: max_features must be greater than 0, but found max_features="{}".' raise RuntimeError(msg.format(out_params['max_features'])) self.estimator = _IsolationForest(**out_params)
def __init__(self, options): self.handle_options(options) out_params = convert_params(options.get('params', {}), bools=['copy'], strs=['feature_range']) self.estimator = _MinMaxScaler(**out_params) self.columns = None
def __init__(self, options): debug.info('NetworkX Version {}'.format(nx.__version__)) self.handle_options(options) out_params = convert_params(options.get('params', {}), strs=['weight']) if 'weight' not in out_params: options['weight'] = 'one' else: options['weight'] = out_params['weight']
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), floats=['gamma', 'C'], ) self.estimator = SVC(class_weight='balanced', **out_params)
def __init__(self, options): self.handle_options(options) out_params = convert_params(options.get('params', {}), floats=['tol'], strs=['algorithm'], ints=['k', 'n_iter', 'random_state'], aliases={'k': 'n_components'}) self.estimator = _TruncatedSVD(**out_params)
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), ints=['kmax', 'random_state'], ) self.estimator = _XMeans(**out_params)
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), ints=['k'], aliases={'k': 'n_topics'} ) self.estimator = _LatentDirichletAllocation(**out_params)
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), bools=['fit_intercept', 'normalize'], ) self.estimator = _LinearRegression(**out_params)
def __init__(self, options): self.handle_options(options) params = convert_params( options.get('params', {}), strs=['order'], ints=['forecast_k', 'conf_interval', 'holdback'], aliases={'forecast_k': 'steps'}) self.out_params = dict(model_params=dict(), forecast_function_params=dict()) if 'order' in params: # statsmodels wants a tuple for order of the model for the number of AR parameters, # differences, and MA parameters. # SPL won't accept a tuple as an option's value, so the next few lines will make it possible for the # user to configure order. try: self.out_params['model_params']['order'] = tuple( int(i) for i in params['order'].split('-')) assert len(self.out_params['model_params']['order']) == 3 except: raise RuntimeError( 'Syntax Error: order requires three non-negative integer values, e.g. order=4-1-2' ) else: raise RuntimeError( 'Order of model is missing. It is required for fitting. e.g. order=<No. of AR>-' '<Parameters-No. of Differences>-<No. of MA Parameters>') # Default steps set to zero steps = params.get('steps', 0) self._test_forecast_k(steps) self.out_params['forecast_function_params']['steps'] = steps if 'conf_interval' in params: self.out_params['forecast_function_params']['alpha'] = \ confidence_interval_to_alpha(params['conf_interval']) else: self.out_params['forecast_function_params'][ 'alpha'] = 0.05 # the default value that ARIMAResults.forecast uses. if 'holdback' in params: self._test_holdback(params['holdback']) self.holdback = params.pop('holdback') # The required ratio of invariant time frequencies (deltas) # Between rows self.freq_threshold = 1.0 else: self.holdback = 0 self.freq_threshold = 0.9 # Dealing with Missing data # if 'missing' in params and params['missing'] in ['raise', 'drop']: # self.out_params['model_params']['missing'] = params['missing'] # else: self.out_params['model_params']['missing'] = 'raise'
def __init__(self, options): self.handle_options(options) params = options.get('params', {}) converted_params = convert_params(params, ints=['n_estimators'], floats=['learning_rate']) self.estimator = _AdaBoostClassifier(**converted_params)
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), ints=['k', 'random_state'], aliases={'k': 'n_clusters'}, ) self.estimator = _KMeans(**out_params)
def __init__(self, options): self.handle_options(options) params = options.get('params', {}) out_params = convert_params( params, strs=['loss', 'max_features'], floats=['learning_rate'], ints=['n_estimators'], ) self.estimator = _AdaBoostRegressor(**out_params)
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), bools=['fit_intercept', 'normalize'], floats=['alpha'], ) out_params.setdefault('normalize', True) self.estimator = _Ridge(**out_params)
def check_probabilities(options): out_params = convert_params(options.get('params', {}), bools=['probabilities'], ignore_extra=True) if 'probabilities' in out_params: probabilities = is_truthy(out_params['probabilities']) del options['params']['probabilities'] else: probabilities = False return probabilities
def __init__(self, options): self.handle_options(options) params = options.get('params', {}) out_params = convert_params( params, floats=['C', 'gamma'], strs=['kernel'], ints=['degree'], ) self.estimator = _SVR(**out_params)
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), floats=['gamma', 'C', 'tol', 'intercept_scaling'], ints=['random_state','max_iter'], strs=['penalty', 'loss', 'multi_class'], bools=['dual', 'fit_intercept'], ) self.estimator = _LinearSVC(**out_params)