コード例 #1
0
ファイル: test_imputers.py プロジェクト: automl/Auto-PyTorch
    def test_get_config_space(self):
        dataset_properties = dict(categorical_columns=[0, 1],
                                  numerical_columns=[1, 2])
        config = SimpleImputer.get_hyperparameter_search_space(
            dataset_properties).sample_configuration()
        estimator = SimpleImputer(**config)
        estimator_clone = clone(estimator)
        estimator_clone_params = estimator_clone.get_params()

        # Make sure all keys are copied properly
        for k, v in estimator.get_params().items():
            self.assertIn(k, estimator_clone_params)

        # Make sure the params getter of estimator are honored
        klass = estimator.__class__
        new_object_params = estimator.get_params(deep=False)
        for name, param in new_object_params.items():
            new_object_params[name] = clone(param, safe=False)
        new_object = klass(**new_object_params)
        params_set = new_object.get_params(deep=False)

        for name in new_object_params:
            param1 = new_object_params[name]
            param2 = params_set[name]
            self.assertEqual(param1, param2)
コード例 #2
0
    def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]],
                            ) -> List[Tuple[str, autoPyTorchChoice]]:
        """
        Defines what steps a pipeline should follow.
        The step itself has choices given via autoPyTorchChoice.

        Returns:
            List[Tuple[str, autoPyTorchChoice]]: list of steps sequentially exercised
                by the pipeline.
        """
        steps = []  # type: List[Tuple[str, autoPyTorchChoice]]

        default_dataset_properties = {'target_type': 'tabular_classification'}
        if dataset_properties is not None:
            default_dataset_properties.update(dataset_properties)

        steps.extend([
            ("imputer", SimpleImputer()),
            ("encoder", EncoderChoice(default_dataset_properties)),
            ("scaler", ScalerChoice(default_dataset_properties)),
            ("tabular_transformer", TabularColumnTransformer()),
            ("preprocessing", EarlyPreprocessing()),
            ("network", NetworkChoice(default_dataset_properties)),
            ("network_init", NetworkInitializerChoice(default_dataset_properties)),
            ("optimizer", OptimizerChoice(default_dataset_properties)),
            ("lr_scheduler", SchedulerChoice(default_dataset_properties)),
            ("data_loader", FeatureDataLoader()),
            ("trainer", TrainerChoice(default_dataset_properties)),
        ])
        return steps
コード例 #3
0
    def _get_pipeline_steps(
        self, dataset_properties: Optional[Dict[str,
                                                BaseDatasetPropertiesType]]
    ) -> List[Tuple[str, PipelineStepType]]:
        """
        Defines what steps a pipeline should follow.
        The step itself has choices given via autoPyTorchChoice.

        Returns:
            List[Tuple[str, PipelineStepType]]:
                list of steps sequentially exercised by the pipeline.
        """
        steps: List[Tuple[str, PipelineStepType]] = []

        default_dataset_properties: Dict[str, BaseDatasetPropertiesType] = {
            'target_type': 'tabular_regression'
        }
        if dataset_properties is not None:
            default_dataset_properties.update(dataset_properties)

        steps.extend([
            ("imputer", SimpleImputer(random_state=self.random_state)),
            ("encoder",
             EncoderChoice(default_dataset_properties,
                           random_state=self.random_state)),
            ("scaler",
             ScalerChoice(default_dataset_properties,
                          random_state=self.random_state)),
            ("feature_preprocessor",
             FeatureProprocessorChoice(default_dataset_properties,
                                       random_state=self.random_state)),
            ("tabular_transformer",
             TabularColumnTransformer(random_state=self.random_state)),
            ("preprocessing",
             EarlyPreprocessing(random_state=self.random_state)),
            ("network_embedding",
             NetworkEmbeddingChoice(default_dataset_properties,
                                    random_state=self.random_state)),
            ("network_backbone",
             NetworkBackboneChoice(default_dataset_properties,
                                   random_state=self.random_state)),
            ("network_head",
             NetworkHeadChoice(default_dataset_properties,
                               random_state=self.random_state)),
            ("network", NetworkComponent(random_state=self.random_state)),
            ("network_init",
             NetworkInitializerChoice(default_dataset_properties,
                                      random_state=self.random_state)),
            ("optimizer",
             OptimizerChoice(default_dataset_properties,
                             random_state=self.random_state)),
            ("lr_scheduler",
             SchedulerChoice(default_dataset_properties,
                             random_state=self.random_state)),
            ("data_loader", FeatureDataLoader(random_state=self.random_state)),
            ("trainer",
             TrainerChoice(default_dataset_properties,
                           random_state=self.random_state)),
        ])
        return steps
コード例 #4
0
ファイル: test_imputers.py プロジェクト: automl/Auto-PyTorch
    def test_constant_imputation(self):
        data = np.array(
            [['1.0', np.nan, 3], [np.nan, 8, 9], ['4.0', 5, np.nan],
             [np.nan, 2, 3], ['7.0', np.nan, 9], ['4.0', np.nan, np.nan]],
            dtype=object)
        numerical_columns = [1, 2]
        categorical_columns = [0]
        train_indices = np.array([0, 2, 3])
        test_indices = np.array([1, 4, 5])
        dataset_properties = {
            'categorical_columns': categorical_columns,
            'numerical_columns': numerical_columns,
        }
        X = {
            'X_train': data[train_indices],
            'dataset_properties': dataset_properties
        }
        imputer_component = SimpleImputer(
            numerical_strategy='constant_zero',
            categorical_strategy='constant_!missing!')

        imputer_component = imputer_component.fit(X)
        X = imputer_component.transform(X)
        categorical_imputer = X['imputer']['categorical']
        numerical_imputer = X['imputer']['numerical']

        # check if the fit dictionary X is modified as expected
        self.assertIsInstance(X['imputer'], dict)
        self.assertIsInstance(categorical_imputer, BaseEstimator)
        self.assertIsInstance(numerical_imputer, BaseEstimator)

        # make column transformer with returned encoder to fit on data
        column_transformer = make_column_transformer(
            (categorical_imputer,
             X['dataset_properties']['categorical_columns']),
            (numerical_imputer, X['dataset_properties']['numerical_columns']),
            remainder='passthrough')
        column_transformer = column_transformer.fit(X['X_train'])
        transformed = column_transformer.transform(data[test_indices])
        assert_array_equal(
            transformed.astype(str),
            np.array([['-1', 8, 9], [7.0, '0', 9], [4.0, '0', '0']],
                     dtype=str))
コード例 #5
0
ファイル: base.py プロジェクト: automl/Auto-PyTorch
    def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]],
                            ) -> List[Tuple[str, autoPyTorchChoice]]:
        """
        Defines what steps a pipeline should follow.
        The step itself has choices given via autoPyTorchChoice.

        Returns:
            List[Tuple[str, autoPyTorchChoice]]: list of steps sequentially exercised
                by the pipeline.
        """
        steps: List[Tuple[str, autoPyTorchChoice]] = []

        default_dataset_properties = {'target_type': 'tabular_classification'}
        if dataset_properties is not None:
            default_dataset_properties.update(dataset_properties)

        steps.extend([
            ("imputer", SimpleImputer()),
            ("encoder", EncoderChoice(default_dataset_properties)),
            ("scaler", ScalerChoice(default_dataset_properties)),
            ("tabular_transformer", TabularColumnTransformer()),
        ])
        return steps