Python PipelineTemplate.export_pipeline 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: fedot.core.pipelines.template

클래스/타입: PipelineTemplate

메소드/함수: export_pipeline

hotexamples.com에서의 예제들: 2

Python PipelineTemplate.export_pipeline - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 fedot.core.pipelines.template.PipelineTemplate.export_pipeline에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

PipelineTemplate(13)

import_pipeline(3)

convert_to_dict(2)

export_pipeline(2)

예제 #1

파일 보기

class Pipeline(Graph):
    """
    Base class used for composite model structure definition

    :param nodes: Node object(s)
    :param log: Log object to record messages
    :param tag: uniq part of the repository filename

    .. note::
        fitted_on_data stores the data which were used in last pipeline fitting (equals None if pipeline hasn't been
        fitted yet)
    """
    def __init__(self,
                 nodes: Optional[Union[Node, List[Node]]] = None,
                 log: Log = None):

        self.computation_time = None
        self.template = None
        self.fitted_on_data = {}

        self.log = log
        if not log:
            self.log = default_log(__name__)
        else:
            self.log = log
        super().__init__(nodes)

    def fit_from_scratch(self,
                         input_data: Union[InputData, MultiModalData] = None):
        """
        Method used for training the pipeline without using saved information

        :param input_data: data used for operation training
        """
        # Clean all saved states and fit all operations
        self.log.info('Fit pipeline from scratch')
        self.unfit()
        self.fit(input_data, use_fitted=False)

    def update_fitted_on_data(self, data: InputData):
        characteristics = input_data_characteristics(data=data, log=self.log)
        self.fitted_on_data['data_type'] = characteristics[0]
        self.fitted_on_data['features_hash'] = characteristics[1]
        self.fitted_on_data['target_hash'] = characteristics[2]

    def _fitted_status_if_new_data(self, new_input_data: InputData,
                                   fitted_status: bool):
        new_data_params = input_data_characteristics(new_input_data,
                                                     log=self.log)
        if fitted_status and self.fitted_on_data:
            params_names = ('data_type', 'features_hash', 'target_hash')
            are_data_params_different = any([
                new_data_param != self.fitted_on_data[param_name]
                for new_data_param, param_name in zip(new_data_params,
                                                      params_names)
            ])
            if are_data_params_different:
                info = 'Trained operation is not actual because you are using new dataset for training. ' \
                       'Parameter use_fitted value changed to False'
                self.log.info(info)
                fitted_status = False
        return fitted_status

    def _fit_with_time_limit(
        self,
        input_data: Optional[InputData] = None,
        use_fitted_operations=False,
        time: timedelta = timedelta(minutes=3)
    ) -> Manager:
        """
        Run training process with time limit. Create

        :param input_data: data used for operation training
        :param use_fitted_operations: flag defining whether use saved information about previous executions or not,
        default True
        :param time: time constraint for operation fitting process (seconds)
        """
        time = int(time.total_seconds())
        manager = Manager()
        process_state_dict = manager.dict()
        fitted_operations = manager.list()
        p = Process(target=self._fit,
                    args=(input_data, use_fitted_operations,
                          process_state_dict, fitted_operations),
                    kwargs={})
        p.start()
        p.join(time)
        if p.is_alive():
            p.terminate()
            raise TimeoutError(
                f'Pipeline fitness evaluation time limit is expired')

        self.fitted_on_data = process_state_dict['fitted_on_data']
        self.computation_time = process_state_dict['computation_time']
        for node_num, node in enumerate(self.nodes):
            self.nodes[node_num].fitted_operation = fitted_operations[node_num]
        return process_state_dict['train_predicted']

    def _fit(self,
             input_data: InputData,
             use_fitted_operations=False,
             process_state_dict: Manager = None,
             fitted_operations: Manager = None):
        """
        Run training process in all nodes in pipeline starting with root.

        :param input_data: data used for operation training
        :param use_fitted_operations: flag defining whether use saved information about previous executions or not,
        default True
        :param process_state_dict: this dictionary is used for saving required pipeline parameters (which were changed
        inside the process) in a case of operation fit time control (when process created)
        :param fitted_operations: this list is used for saving fitted operations of pipeline nodes
        """

        # InputData was set directly to the primary nodes
        if input_data is None:
            use_fitted_operations = False
        else:
            use_fitted_operations = self._fitted_status_if_new_data(
                new_input_data=input_data, fitted_status=use_fitted_operations)

            if not use_fitted_operations or not self.fitted_on_data:
                # Don't use previous information
                self.unfit()
                self.update_fitted_on_data(input_data)

        with Timer(log=self.log) as t:
            computation_time_update = not use_fitted_operations or not self.root_node.fitted_operation or \
                                      self.computation_time is None

            train_predicted = self.root_node.fit(input_data=input_data)
            if computation_time_update:
                self.computation_time = round(t.minutes_from_start, 3)

        if process_state_dict is None:
            return train_predicted
        else:
            process_state_dict['train_predicted'] = train_predicted
            process_state_dict['computation_time'] = self.computation_time
            process_state_dict['fitted_on_data'] = self.fitted_on_data
            for node in self.nodes:
                fitted_operations.append(node.fitted_operation)

    def fit(self,
            input_data: Union[InputData, MultiModalData],
            use_fitted=True,
            time_constraint: Optional[timedelta] = None):
        """
        Run training process in all nodes in pipeline starting with root.

        :param input_data: data used for operation training
        :param use_fitted: flag defining whether use saved information about previous executions or not,
            default True
        :param time_constraint: time constraint for operation fitting (seconds)
        """
        if not use_fitted:
            self.unfit()

        # Make copy of the input data to avoid performing inplace operations
        copied_input_data = copy(input_data)
        copied_input_data = self._assign_data_to_nodes(copied_input_data)

        if time_constraint is None:
            train_predicted = self._fit(input_data=copied_input_data,
                                        use_fitted_operations=use_fitted)
        else:
            train_predicted = self._fit_with_time_limit(
                input_data=copied_input_data,
                use_fitted_operations=use_fitted,
                time=time_constraint)
        return train_predicted

    @property
    def is_fitted(self):
        return all([(node.fitted_operation is not None)
                    for node in self.nodes])

    def unfit(self):
        """
        Remove fitted operations for all nodes.
        """
        for node in self.nodes:
            node.unfit()

    def fit_from_cache(self, cache: OperationsCache):
        for node in self.nodes:
            cached_state = cache.get(node)
            if cached_state:
                node.fitted_operation = cached_state.operation
            else:
                node.fitted_operation = None

    def predict(self,
                input_data: Union[InputData, MultiModalData],
                output_mode: str = 'default'):
        """
        Run the predict process in all nodes in pipeline starting with root.

        :param input_data: data for prediction
        :param output_mode: desired form of output for operations. Available options are:
                'default' (as is),
                'labels' (numbers of classes - for classification) ,
                'probs' (probabilities - for classification =='default'),
                'full_probs' (return all probabilities - for binary classification).
        :return: OutputData with prediction
        """

        if not self.is_fitted:
            ex = 'Pipeline is not fitted yet'
            self.log.error(ex)
            raise ValueError(ex)

        # Make copy of the input data to avoid performing inplace operations
        copied_input_data = copy(input_data)
        copied_input_data = self._assign_data_to_nodes(copied_input_data)

        result = self.root_node.predict(input_data=copied_input_data,
                                        output_mode=output_mode)
        return result

    def fine_tune_all_nodes(self,
                            loss_function: Callable,
                            loss_params: Callable = None,
                            input_data: Union[InputData,
                                              MultiModalData] = None,
                            iterations=50,
                            timeout: int = 5,
                            cv_folds: int = None,
                            validation_blocks: int = 3) -> 'Pipeline':
        """ Tune all hyperparameters of nodes simultaneously via black-box
            optimization using PipelineTuner. For details, see
        :meth:`~fedot.core.pipelines.tuning.unified.PipelineTuner.tune_pipeline`
        """
        # Make copy of the input data to avoid performing inplace operations
        copied_input_data = copy(input_data)

        timeout = timedelta(minutes=timeout)
        pipeline_tuner = PipelineTuner(pipeline=self,
                                       task=copied_input_data.task,
                                       iterations=iterations,
                                       timeout=timeout)
        self.log.info('Start tuning of primary nodes')

        tuned_pipeline = pipeline_tuner.tune_pipeline(
            input_data=copied_input_data,
            loss_function=loss_function,
            loss_params=loss_params,
            cv_folds=cv_folds,
            validation_blocks=validation_blocks)
        self.log.info('Tuning was finished')

        return tuned_pipeline

    def save(self, path: str):
        """
        Save the pipeline to the json representation with pickled fitted operations.

        :param path to json file with operation
        :return: json containing a composite operation description
        """
        if not self.template:
            self.template = PipelineTemplate(self, self.log)
        json_object = self.template.export_pipeline(path)
        return json_object

    def load(self, path: str):
        """
        Load the pipeline the json representation with pickled fitted operations.

        :param path to json file with operation
        """
        self.nodes = []
        self.template = PipelineTemplate(self, self.log)
        self.template.import_pipeline(path)

    def __eq__(self, other) -> bool:
        return self.root_node.descriptive_id == other.root_node.descriptive_id

    def __str__(self):
        description = {
            'depth': self.depth,
            'length': self.length,
            'nodes': self.nodes,
        }
        return f'{description}'

    @property
    def root_node(self) -> Optional[Node]:
        if len(self.nodes) == 0:
            return None
        root = [
            node for node in self.nodes
            if not any(self.operator.node_children(node))
        ]
        if len(root) > 1:
            raise ValueError(
                f'{ERROR_PREFIX} More than 1 root_nodes in pipeline')
        return root[0]

    def _assign_data_to_nodes(self, input_data) -> Optional[InputData]:
        if isinstance(input_data, MultiModalData):
            for node in [n for n in self.nodes if isinstance(n, PrimaryNode)]:
                if node.operation.operation_type in input_data.keys():
                    node.node_data = input_data[node.operation.operation_type]
                    node.direct_set = True
                else:
                    raise ValueError(f'No data for primary node {node}')
            return None
        return input_data

    def print_structure(self):
        """ Method print information about pipeline """
        print('Pipeline structure:')
        print(self.__str__())
        for node in self.nodes:
            print(f'{node.operation.operation_type} - {node.custom_params}')

예제 #2

파일 보기

class AtomizedModelTemplate(OperationTemplateAbstract):
    def __init__(self, node: Node = None, operation_id: int = None, nodes_from: list = None, path: str = None):
        # Need use the imports inside the class because of the problem of circular imports.
        from fedot.core.pipelines.pipeline import Pipeline
        from fedot.core.pipelines.template import PipelineTemplate
        from fedot.core.operations.atomized_model import AtomizedModel

        super().__init__()
        self.atomized_model_json_path = None
        self.next_pipeline_template = None
        self.pipeline_template = None

        if path:
            pipeline = Pipeline()
            pipeline.load(path)
            self.next_pipeline_template = AtomizedModel(pipeline)
            self.pipeline_template = PipelineTemplate(pipeline)

        if node:
            self._operation_to_template(node, operation_id, nodes_from)

    def _operation_to_template(self, node: Node, operation_id: int, nodes_from: list):
        from fedot.core.pipelines.template import PipelineTemplate

        self.operation_id = operation_id
        self.operation_type = node.operation.operation_type
        self.nodes_from = nodes_from
        self.pipeline_template = PipelineTemplate(node.operation.pipeline)
        self.atomized_model_json_path = 'nested_' + str(self.operation_id)

    def convert_to_dict(self) -> dict:

        operation_object = {
            'operation_id': self.operation_id,
            'operation_type': self.operation_type,
            'nodes_from': self.nodes_from,
            'atomized_model_json_path': self.atomized_model_json_path
        }

        return operation_object

    def _create_nested_path(self, path: str) -> Tuple[str, str]:
        """
        Create folder for nested JSON operation and prepared path to save JSON's.
        :params path: path where to save parent JSON operation
        :return: absolute and relative paths to save nested JSON operation
        """

        relative_path = os.path.join('fitted_operations', 'nested_' + str(self.operation_id))
        absolute_path = os.path.join(path, relative_path)

        if not os.path.exists(absolute_path):
            os.makedirs(absolute_path)

        return absolute_path, relative_path

    def export_operation(self, path: str):
        absolute_path = os.path.join(path, self.atomized_model_json_path)
        _check_existing_path(absolute_path)
        self.pipeline_template.export_pipeline(absolute_path)

    def import_json(self, operation_object: dict):
        required_fields = ['operation_id', 'operation_type', 'nodes_from', 'atomized_model_json_path']
        self._validate_json_operation_template(operation_object, required_fields)

        self.operation_id = operation_object['operation_id']
        self.operation_type = operation_object['operation_type']
        self.nodes_from = operation_object['nodes_from']
        self.atomized_model_json_path = operation_object['atomized_model_json_path']