Ejemplo n.º 1
0
    def fit(self, X, y,
            X_test=None,
            y_test=None,
            metric=None,
            feat_type=None,
            dataset_name=None):

        # TODO
        # regularly check https://github.com/scikit-learn/scikit-learn/issues/15336 whether
        # histogram gradient boosting in scikit-learn finally support sparse data
        is_sparse = scipy.sparse.issparse(X)
        if is_sparse:
            include_estimators = [
                'extra_trees', 'passive_aggressive', 'random_forest', 'sgd', 'mlp',
            ]
        else:
            include_estimators = [
                'extra_trees',
                'passive_aggressive',
                'random_forest',
                'sgd',
                'gradient_boosting',
                'mlp',
            ]
        self.include['classifier'] = include_estimators

        if self.metric is None:
            if len(y.shape) == 1 or y.shape[1] == 1:
                self.metric = accuracy
            else:
                self.metric = log_loss

        if self.metric in metrics:
            metric_name = self.metric.name
            selector_file = selector_files[metric_name]
        else:
            metric_name = 'balanced_accuracy'
            selector_file = selector_files[metric_name]
        with open(selector_file, 'rb') as fh:
            selector = pickle.load(fh)

        metafeatures = pd.DataFrame({dataset_name: [X.shape[1], X.shape[0]]}).transpose()
        selection = np.argmax(selector.predict(metafeatures))
        automl_policy = strategies[selection]

        setting = {
            'RF_None_holdout_iterative_es_if': {
                'resampling_strategy': 'holdout-iterative-fit',
                'fidelity': None,
            },
            'RF_None_3CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 3,
                'fidelity': None,
            },
            'RF_None_5CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 5,
                'fidelity': None,
            },
            'RF_None_10CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 10,
                'fidelity': None,
            },
            'RF_SH-eta4-i_holdout_iterative_es_if': {
                'resampling_strategy': 'holdout-iterative-fit',
                'fidelity': 'SH',
            },
            'RF_SH-eta4-i_3CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 3,
                'fidelity': 'SH',
            },
            'RF_SH-eta4-i_5CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 5,
                'fidelity': 'SH',
            },
            'RF_SH-eta4-i_10CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 10,
                'fidelity': 'SH',
            }
        }[automl_policy]

        resampling_strategy = setting['resampling_strategy']
        if resampling_strategy == 'cv-iterative-fit':
            resampling_strategy_kwargs = {'folds': setting['folds']}
        else:
            resampling_strategy_kwargs = None

        portfolio_file = (
            this_directory / metric_name / 'askl2_portfolios' / ('%s.json' % automl_policy)
        )
        with open(portfolio_file) as fh:
            portfolio_json = json.load(fh)
        portfolio = portfolio_json['portfolio']

        if setting['fidelity'] == 'SH':
            smac_callback = SHObjectCallback('iterations', 4, 5.0, portfolio)
        else:
            smac_callback = SmacObjectCallback(portfolio)

        self.resampling_strategy = resampling_strategy
        self.resampling_strategy_arguments = resampling_strategy_kwargs
        self.get_smac_object_callback = smac_callback
        return super().fit(
            X=X,
            y=y,
            X_test=X_test,
            y_test=y_test,
            feat_type=feat_type,
            dataset_name=dataset_name,
        )
Ejemplo n.º 2
0
    def fit(self,
            X,
            y,
            X_test=None,
            y_test=None,
            metric=None,
            feat_type=None,
            dataset_name=None):

        with open(selector_file, 'rb') as fh:
            selector = pickle.load(fh)

        metafeatures = np.array([len(np.unique(y)), X.shape[1], X.shape[0]])
        selection = np.argmax(selector.predict(metafeatures))
        automl_policy = strategies[selection]

        setting = {
            'RF_None_holdout_iterative_es_if': {
                'resampling_strategy': 'holdout-iterative-fit',
                'fidelity': None,
            },
            'RF_None_3CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 3,
                'fidelity': None,
            },
            'RF_None_5CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 5,
                'fidelity': None,
            },
            'RF_None_10CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 10,
                'fidelity': None,
            },
            'RF_SH-eta4-i_holdout_iterative_es_if': {
                'resampling_strategy': 'holdout-iterative-fit',
                'fidelity': 'SH',
            },
            'RF_SH-eta4-i_3CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 3,
                'fidelity': 'SH',
            },
            'RF_SH-eta4-i_5CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 5,
                'fidelity': 'SH',
            },
            'RF_SH-eta4-i_10CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 10,
                'fidelity': 'SH',
            }
        }[automl_policy]

        resampling_strategy = setting['resampling_strategy']
        if resampling_strategy == 'cv-iterative-fit':
            resampling_strategy_kwargs = {'folds': setting['folds']}
        else:
            resampling_strategy_kwargs = None

        portfolio_file = os.path.join(this_directory, 'askl2_portfolios',
                                      '%s.json' % automl_policy)
        with open(portfolio_file) as fh:
            portfolio_json = json.load(fh)
        portfolio = portfolio_json['portfolio']

        if setting['fidelity'] == 'SH':
            smac_callback = get_sh_object_callback('iterations', 4, 5.0,
                                                   portfolio)
        else:
            smac_callback = get_smac_object_callback(portfolio)

        self.resampling_strategy = resampling_strategy
        self.resampling_strategy_arguments = resampling_strategy_kwargs
        self.get_smac_object_callback = smac_callback
        return super().fit(
            X=X,
            y=y,
            X_test=X_test,
            y_test=y_test,
            feat_type=feat_type,
            dataset_name=dataset_name,
        )
Ejemplo n.º 3
0
    def fit(self, X, y,
            X_test=None,
            y_test=None,
            metric=None,
            feat_type=None,
            dataset_name=None):

        if self.metric is None:
            if len(y.shape) == 1 or y.shape[1] == 1:
                self.metric = accuracy
            else:
                self.metric = log_loss

        if self.metric in metrics:
            metric_name = self.metric.name
            selector_file = selector_files[metric_name]
        else:
            metric_name = 'balanced_accuracy'
            selector_file = selector_files[metric_name]
        with open(selector_file, 'rb') as fh:
            selector = pickle.load(fh)

        metafeatures = pd.DataFrame({dataset_name: [X.shape[1], X.shape[0]]}).transpose()
        selection = np.argmax(selector.predict(metafeatures))
        automl_policy = strategies[selection]

        setting = {
            'RF_None_holdout_iterative_es_if': {
                'resampling_strategy': 'holdout-iterative-fit',
                'fidelity': None,
            },
            'RF_None_3CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 3,
                'fidelity': None,
            },
            'RF_None_5CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 5,
                'fidelity': None,
            },
            'RF_None_10CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 10,
                'fidelity': None,
            },
            'RF_SH-eta4-i_holdout_iterative_es_if': {
                'resampling_strategy': 'holdout-iterative-fit',
                'fidelity': 'SH',
            },
            'RF_SH-eta4-i_3CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 3,
                'fidelity': 'SH',
            },
            'RF_SH-eta4-i_5CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 5,
                'fidelity': 'SH',
            },
            'RF_SH-eta4-i_10CV_iterative_es_if': {
                'resampling_strategy': 'cv-iterative-fit',
                'folds': 10,
                'fidelity': 'SH',
            }
        }[automl_policy]

        resampling_strategy = setting['resampling_strategy']
        if resampling_strategy == 'cv-iterative-fit':
            resampling_strategy_kwargs = {'folds': setting['folds']}
        else:
            resampling_strategy_kwargs = None

        portfolio_file = (
            this_directory / metric_name / 'askl2_portfolios' / ('%s.json' % automl_policy)
        )
        with open(portfolio_file) as fh:
            portfolio_json = json.load(fh)
        portfolio = portfolio_json['portfolio']

        if setting['fidelity'] == 'SH':
            smac_callback = SHObjectCallback('iterations', 4, 5.0, portfolio)
        else:
            smac_callback = SmacObjectCallback(portfolio)

        self.resampling_strategy = resampling_strategy
        self.resampling_strategy_arguments = resampling_strategy_kwargs
        self.get_smac_object_callback = smac_callback
        return super().fit(
            X=X,
            y=y,
            X_test=X_test,
            y_test=y_test,
            feat_type=feat_type,
            dataset_name=dataset_name,
        )