Ejemplo n.º 1
0
    def search_fit(self, input_data, time_limit=300, *, expose_values=False):
        dataframe = self.get_dataframe(input_data)
        y = self.get_y(dataframe)
        x = self.get_x(dataframe)

        self.clf.fit(x=x,
                     y=y,
                     epochs=self.epochs,
                     batch_size=self.batch_size,
                     validation_split=self.validation_split)
        keras_model = self.clf.export_model()
        best_pipeline = keras2pipeline(keras_model, batch_size=self.batch_size)

        fitted_pipeline_result = self.backend.fit_pipeline(
            problem_description=self.problem_description,
            pipeline=best_pipeline,
            input_data=input_data,
            expose_outputs=expose_values)

        if fitted_pipeline_result.error is not None:
            logging.error('No solution founded')
            pipeline_result = PipelineResult(pipeline=best_pipeline)
            pipeline_result.error = RuntimeError("No solution found")
            return pipeline_result

        self.best_fitted_pipeline_id = fitted_pipeline_result.fitted_pipeline_id
        return fitted_pipeline_result
Ejemplo n.º 2
0
    def fit(
        self,
        pipeline: Pipeline,
        input_data: typing.Sequence[container.Dataset],
        expose_outputs: bool = False
    ) -> typing.Tuple[runtime_module.Runtime, PipelineResult]:

        pipeline_result = PipelineResult(pipeline=pipeline)

        runtime, output, result = runtime_module.fit(
            pipeline=pipeline,
            inputs=input_data,
            problem_description=self.problem_description,
            context=Context.TESTING,
            hyperparams=None,
            random_seed=self.random_seed,
            volumes_dir=self.volumes_dir,
            runtime_environment=self.runtime_environment,
            scratch_dir=self.scratch_dir,
            expose_produced_outputs=expose_outputs)
        if result.has_error():
            pipeline_result.status = "ERRORED"
            pipeline_result.error = result.error
        else:
            pipeline_result.status = "COMPLETED"

            pipeline_result.exposed_outputs = result.values
            pipeline_result.output = output

        return runtime, pipeline_result
Ejemplo n.º 3
0
    def evaluate_pipeline_request(self,
                                  problem_description: Problem,
                                  pipeline: Pipeline,
                                  input_data: typing.Sequence[ContainerType],
                                  *,
                                  metrics: typing.Sequence[typing.Dict],
                                  data_preparation_pipeline: Pipeline = None,
                                  scoring_pipeline: Pipeline = None,
                                  data_preparation_params: typing.Dict[
                                      str, str] = None,
                                  scoring_params: typing.Dict[str, str] = None,
                                  timeout: float = None) -> str:
        request_id = str(uuid.uuid4())

        pipeline_result = PipelineResult(pipeline=pipeline)
        pipeline_result.status = "RUNNING"
        pipeline_result.method_called = "evaluate"

        scores, results = runtime_module.evaluate(
            pipeline=pipeline,
            inputs=input_data,
            data_pipeline=data_preparation_pipeline,
            scoring_pipeline=scoring_pipeline,
            problem_description=problem_description,
            data_params=data_preparation_params,
            metrics=metrics,
            context=Context.TESTING,
            scoring_params=scoring_params,
            hyperparams=None,
            random_seed=self.random_seed,
            data_random_seed=self.random_seed,
            scoring_random_seed=self.random_seed,
            volumes_dir=self.volumes_dir,
            scratch_dir=self.scratch_dir,
            runtime_environment=self.runtime_environment)

        if results.has_error():
            pipeline_result.status = "ERRORED"
            pipeline_result.error = [result.error for result in results]
        else:
            pipeline_result.status = "COMPLETED"
            pipeline_result.scores = runtime_module.combine_folds(scores)
            pipeline_result.outputs = [result.values for result in results]

        self.request_results[request_id] = pipeline_result
        return request_id
Ejemplo n.º 4
0
    def search_fit(
        self,
        input_data: typing.Sequence[ContainerType],
        time_limit: float = 300,
        *,
        expose_values: bool = False
    ) -> typing.Tuple[runtime_module.Runtime, PipelineResult]:
        """
        This method calls search and fit the best ranking pipelines located from the search located on the history.

        Parameters
        ----------
        input_data : typing.Sequence[ContainerType]
            A list of D3M containers to be use as the pipeline input.

        time_limit : float
            The time limit to be use for the search.

        expose_values : bool
            A flag that allows the user expose all intermediate result of the pipeline during fitting.
        """
        self.input_data = input_data
        self.search(time_limit)

        best_pipeline = None
        for pipeline_result in self.history:
            if pipeline_result.error is None:
                if best_pipeline is None:
                    best_pipeline = pipeline_result
                else:
                    if pipeline_result.rank < best_pipeline.rank:
                        best_pipeline = pipeline_result

        if best_pipeline is None:
            logging.error('No solution founded')
            pipeline_result = PipelineResult(fitted_pipeline_id='')
            pipeline_result.error = RuntimeError("No solution found")
            return None, pipeline_result

        return self.fit(best_pipeline.pipeline, input_data, expose_values)
Ejemplo n.º 5
0
    def get_request(self, request_id: str) -> PipelineResult:
        """
        A method that returns the result from the requests

        Parameters
        ----------
        request_id : str
            Request id of data to retrieve

        Returns
        -------
        PipelineResult
            A PipelineResult instance that contains the information.
        """
        if request_id in self.request_results:
            return self.request_results[request_id]
        else:
            return PipelineResult(fitted_pipeline_id='')
Ejemplo n.º 6
0
    def evaluate_pipeline(
            self, data_handler, problem_description: Problem, pipeline: Pipeline,
            input_data_id: str, *, metrics: typing.Sequence[typing.Dict],
            data_preparation_pipeline: Pipeline = None, scoring_pipeline: Pipeline = None,
            data_preparation_params: typing.Dict[str, str] = None, scoring_params: typing.Dict[str, str] = None,
            timeout: float = None
    ) -> PipelineResult:

        with d3m_utils.silence():
            pipeline_result = PipelineResult(pipeline=pipeline)
        pipeline_result.status = "RUNNING"
        pipeline_result.method_called = "evaluate"

        request_id = data_handler.get_data.remote(input_data_id)
        input_data = ray.get(request_id)

        with d3m_utils.silence():
            scores, results = runtime_module.evaluate(
                pipeline=pipeline, inputs=input_data, data_pipeline=data_preparation_pipeline,
                scoring_pipeline=scoring_pipeline, problem_description=problem_description,
                data_params=data_preparation_params, metrics=metrics, context=Context.TESTING,
                scoring_params=scoring_params, hyperparams=None, random_seed=self.random_seed,
                data_random_seed=self.random_seed, scoring_random_seed=self.random_seed,
                volumes_dir=self.volumes_dir, scratch_dir=self.scratch_dir, runtime_environment=self.runtime_environment
            )

        if results.has_error():
            pipeline_result.status = "ERRORED"
            pipeline_result.error = [result.error for result in results]
        else:
            pipeline_result.status = "COMPLETED"
            pipeline_result.scores = runtime_module.combine_folds(scores)

        if self.store_results:
            pipeline_result.pipeline_run = save_pipeline_run(results.pipeline_runs, self.scratch_dir)
        return pipeline_result
Ejemplo n.º 7
0
    def produce(self,
                fitted_pipeline: runtime_module.Runtime,
                input_data: typing.Sequence[container.Dataset],
                expose_outputs: bool = False) -> PipelineResult:
        pipeline_result = PipelineResult(fitted_pipeline_id='')

        with d3m_utils.silence():
            output, result = runtime_module.produce(
                fitted_pipeline=fitted_pipeline,
                test_inputs=input_data,
                expose_produced_outputs=expose_outputs)

        if result.has_error():
            pipeline_result.status = "ERRORED"
            pipeline_result.error = result.error
        else:
            pipeline_result.status = "COMPLETED"

            pipeline_result.exposed_outputs = result.values
            pipeline_result.output = output
        return pipeline_result
Ejemplo n.º 8
0
    def fit_pipeline(
            self, data_handler, problem_description: Problem, pipeline:  Pipeline,
            input_data_id: str, *, timeout: float = None, expose_outputs: bool = False
    ) -> PipelineResult:
        pipeline_result = PipelineResult(pipeline=pipeline)
        pipeline_result.status = "RUNNING"
        pipeline_result.method_called = "fit"

        request_id = data_handler.get_data.remote(input_data_id)
        input_data = ray.get(request_id)

        is_standard_pipeline = False
        if len(input_data) == 1 and len(pipeline.outputs) == 1:
            is_standard_pipeline = True

        with d3m_utils.silence():
            runtime, output, result = runtime_module.fit(
                pipeline=pipeline, inputs=input_data, problem_description=problem_description, context=Context.TESTING,
                hyperparams=None, random_seed=self.random_seed, volumes_dir=self.volumes_dir,
                scratch_dir=self.scratch_dir,
                runtime_environment=self.runtime_environment, is_standard_pipeline=is_standard_pipeline,
                expose_produced_outputs=expose_outputs
            )

        if result.has_error():
            pipeline_result.status = "ERRORED"
            pipeline_result.error = result.error
        else:
            pipeline_result.status = "COMPLETED"
            fitted_pipeline_id = str(uuid.uuid4())

            if self.store_results:
                pipeline_result.exposed_outputs = save_exposed_values(result.values, pipeline.id, self.scratch_dir)
                pipeline_result.output = save_exposed_values(output, pipeline.id, self.scratch_dir)
            else:
                pipeline_result.exposed_outputs = result.values
                pipeline_result.output = output

            pipeline_result.fitted_pipeline_id = fitted_pipeline_id
            self.fitted_pipelines[fitted_pipeline_id] = runtime

        if self.store_results:
            pipeline_result.pipeline_run = save_pipeline_run(result.pipeline_run, self.scratch_dir)

        return pipeline_result
Ejemplo n.º 9
0
    def produce_pipeline(
            self, data_handler, fitted_pipeline_id: str, input_data_id: str, *,
            timeout: float = None, expose_outputs: bool = False
    ) -> PipelineResult:

        pipeline_result = PipelineResult(fitted_pipeline_id=fitted_pipeline_id)
        pipeline_result.status = "RUNNING"
        pipeline_result.method_called = "produce"
        pipeline_result.fitted_pipeline_id = fitted_pipeline_id

        request_id = data_handler.get_data.remote(input_data_id)
        input_data = ray.get(request_id)

        with d3m_utils.silence():
            output, result = runtime_module.produce(
                fitted_pipeline=self.fitted_pipelines[fitted_pipeline_id], test_inputs=input_data,
                expose_produced_outputs=expose_outputs
            )

        if result.has_error():
            pipeline_result.status = "ERRORED"
            pipeline_result.error = result.error
        else:
            pipeline_result.status = "COMPLETED"
            if self.store_results:
                pipeline_result.exposed_outputs = save_exposed_values(result.values, fitted_pipeline_id, self.scratch_dir)
                pipeline_result.output = save_exposed_values(output, fitted_pipeline_id, self.scratch_dir)
            else:
                pipeline_result.exposed_outputs = result.values
                pipeline_result.output = output

        if self.store_results:
            pipeline_result.pipeline_run = save_pipeline_run(result.pipeline_run, self.scratch_dir)

        return pipeline_result
Ejemplo n.º 10
0
    def fit_pipeline_request(self,
                             problem_description: Problem,
                             pipeline: Pipeline,
                             input_data: typing.Sequence[ContainerType],
                             *,
                             timeout: float = None,
                             expose_outputs: bool = False) -> str:
        """
        A method that submit a fit_pipeline job.

        Parameters
        ----------
        problem_description : Problem
            A problem description.
        pipeline : Pipeline
            The pipeline that is going to be fitted.
        input_data : typing.Sequence[ContainerType]
            A list of D3M containers.
        timeout : float
            A maximum amount of time that pipelines are going to be executed in seconds.
        expose_outputs : bool
            A variable that enable exposing every intermediate results based on the input_data

        Returns
        -------
        str
            A request id.
        """
        request_id = str(uuid.uuid4())
        pipeline_result = PipelineResult(pipeline=pipeline)
        pipeline_result.status = "RUNNING"
        pipeline_result.method_called = "fit"

        is_standard_pipeline = False
        if len(input_data) == 1 and len(pipeline.outputs) == 1:
            is_standard_pipeline = True

        runtime, output, result = runtime_module.fit(
            pipeline=pipeline,
            inputs=input_data,
            problem_description=problem_description,
            context=Context.TESTING,
            hyperparams=None,
            random_seed=self.random_seed,
            volumes_dir=self.volumes_dir,
            scratch_dir=self.scratch_dir,
            runtime_environment=self.runtime_environment,
            is_standard_pipeline=is_standard_pipeline,
            expose_produced_outputs=expose_outputs)

        if result.has_error():
            pipeline_result.status = "ERRORED"
            pipeline_result.error = result.error
        else:
            pipeline_result.status = "COMPLETED"
            pipeline_result.exposed_outputs = result.values
            pipeline_result.output = output
            fitted_pipeline_id = str(uuid.uuid4())
            pipeline_result.fitted_pipeline_id = fitted_pipeline_id
            self.fitted_pipelines[fitted_pipeline_id] = runtime

        pipeline_result.pipeline_run = result.pipeline_run
        self.request_results[request_id] = pipeline_result

        return request_id
Ejemplo n.º 11
0
    def produce_pipeline_request(self,
                                 fitted_pipeline_id: str,
                                 input_data: typing.Sequence[ContainerType],
                                 *,
                                 timeout: float = None,
                                 expose_outputs: bool = False) -> str:
        """
        A method that submit a produce pipeline request.

        Parameters
        ----------
        fitted_pipeline_id : str
            The fitted pipeline if of the fitted pipeline to be use to produce results.
        input_data : typing.Sequence[ContainerType]
            A list of D3M containers.
        timeout : float
            A maximum amount of time that pipelines are going to be executed in seconds.
        expose_outputs : bool
            A variable that enable exposing every intermediate results based on the input_data

        Returns
        -------
        str
            A request id.
        """
        request_id = str(uuid.uuid4())

        pipeline_result = PipelineResult(fitted_pipeline_id=fitted_pipeline_id)
        pipeline_result.status = "RUNNING"
        pipeline_result.method_called = "produce"
        pipeline_result.fitted_pipeline_id = fitted_pipeline_id

        output, result = runtime_module.produce(
            fitted_pipeline=self.fitted_pipelines[fitted_pipeline_id],
            test_inputs=input_data,
            expose_produced_outputs=expose_outputs)

        if result.has_error():
            pipeline_result.status = "ERRORED"
            pipeline_result.error = result.error
        else:
            pipeline_result.status = "COMPLETED"
            pipeline_result.output = output
            pipeline_result.exposed_outputs = result.values

        pipeline_result.pipeline_run = result.pipeline_run
        self.request_results[request_id] = pipeline_result

        return request_id