Beispiel #1
0
    def linearized_component_graph(cls, components):
        """Return a list of (component name, component class) tuples from a pre-initialized component graph defined
        as either a list or a dictionary. The component names are guaranteed to be unique.

        Args:
            components (list(ComponentBase) or Dict[str, ComponentBase]): Components in the pipeline.

        Returns:
            list((component name, ComponentBase)) - tuples with the unique component name as the first element and the
                component class as the second element. When the input is a list, the components will be returned in
                the order they appear in the input.
        """
        names = []
        if isinstance(components, list):
            seen = set()
            for idx, component in enumerate(components):
                component_class = handle_component_class(component)
                component_name = component_class.name

                if component_name in seen:
                    component_name = f'{component_name}_{idx}'
                seen.add(component_name)
                names.append((component_name, component_class))
        else:
            for k, v in components.items():
                names.append((k, handle_component_class(v[0])))
        return names
Beispiel #2
0
    def from_list(cls, component_list, random_seed=0):
        """Constructs a linear ComponentGraph from a given list, where each component in the list feeds its X transformed output to the next component

        Arguments:
            component_list (list): String names or ComponentBase subclasses in
                                   an order that represents a valid linear graph
        """
        component_dict = {}
        previous_component = None
        for idx, component in enumerate(component_list):
            component_class = handle_component_class(component)
            component_name = component_class.name

            if component_name in component_dict.keys():
                component_name = f'{component_name}_{idx}'

            component_dict[component_name] = [component_class]
            if previous_component is not None:
                if "sampler" in previous_component:
                    component_dict[component_name].extend(
                        [f"{previous_component}.x", f"{previous_component}.y"])
                else:
                    component_dict[component_name].append(
                        f"{previous_component}.x")
            previous_component = component_name
        return cls(component_dict, random_seed=random_seed)
    def _transform_parameters(self, pipeline_class, proposed_parameters):
        """Given a pipeline parameters dict, make sure n_jobs and number_features are set."""
        parameters = {}
        if 'pipeline' in self._pipeline_params:
            parameters['pipeline'] = self._pipeline_params['pipeline']
        component_graph = [
            handle_component_class(c)
            for c in pipeline_class.linearized_component_graph
        ]
        for component_class in component_graph:
            component_parameters = proposed_parameters.get(
                component_class.name, {})
            init_params = inspect.signature(
                component_class.__init__).parameters

            # Add the text columns parameter if the component is a TextFeaturizer
            if component_class.name == "Text Featurization Component":
                component_parameters['text_columns'] = self._text_columns

            # Inspects each component and adds the following parameters when needed
            if 'n_jobs' in init_params:
                component_parameters['n_jobs'] = self.n_jobs
            if 'number_features' in init_params:
                component_parameters['number_features'] = self.number_features
            # For first batch, pass the pipeline params to the components that need them
            if component_class.name in self._pipeline_params and self._batch_number == 0:
                for param_name, value in self._pipeline_params[
                        component_class.name].items():
                    if isinstance(value, (Integer, Real)):
                        # get a random value in the space
                        component_parameters[param_name] = value.rvs(
                            random_state=self._random_state)[0]
                    elif isinstance(value, Categorical):
                        component_parameters[param_name] = value.rvs(
                            random_state=self._random_state)
                    elif isinstance(value, (list, tuple)):
                        component_parameters[param_name] = value[0]
                    else:
                        component_parameters[param_name] = value
            if 'pipeline' in self._pipeline_params:
                for param_name, value in self._pipeline_params[
                        'pipeline'].items():
                    if param_name in init_params:
                        component_parameters[param_name] = value
            parameters[component_class.name] = component_parameters
        return parameters
Beispiel #4
0
    def __init__(self, component_dict=None, random_state=0):
        """ Initializes a component graph for a pipeline as a directed acyclic graph (DAG).

        Example:
            >>> component_dict = {'imputer': ['Imputer'], 'ohe': ['One Hot Encoder', 'imputer.x'], 'estimator_1': ['Random Forest Classifier', 'ohe.x'], 'estimator_2': ['Decision Tree Classifier', 'ohe.x'], 'final': ['Logistic Regression Classifier', 'estimator_1', 'estimator_2']}
            >>> component_graph = ComponentGraph(component_dict)
           """
        self.random_state = get_random_state(random_state)
        self.component_dict = component_dict or {}
        self.component_instances = {}
        self._is_instantiated = False
        for component_name, component_info in self.component_dict.items():
            if not isinstance(component_info, list):
                raise ValueError(
                    'All component information should be passed in as a list')
            component_class = handle_component_class(component_info[0])
            self.component_instances[component_name] = component_class
        self.compute_order = self.generate_order(self.component_dict)
        self.input_feature_names = {}
Beispiel #5
0
def test_handle_component_class_names():
    for cls in all_components():
        cls_ret = handle_component_class(cls)
        assert inspect.isclass(cls_ret)
        assert issubclass(cls_ret, ComponentBase)
        name_ret = handle_component_class(cls.name)
        assert inspect.isclass(name_ret)
        assert issubclass(name_ret, ComponentBase)

    invalid_name = 'This Component Does Not Exist'
    with pytest.raises(MissingComponentError, match='Component "This Component Does Not Exist" was not found'):
        handle_component_class(invalid_name)

    class NonComponent:
        pass
    with pytest.raises(ValueError):
        handle_component_class(NonComponent())