def search_fit(self, input_data, time_limit=300, *, expose_values=False): dataframe = self.get_dataframe(input_data) y = self.get_y(dataframe) x = self.get_x(dataframe) self.clf.fit(x=x, y=y, epochs=self.epochs, batch_size=self.batch_size, validation_split=self.validation_split) keras_model = self.clf.export_model() best_pipeline = keras2pipeline(keras_model, batch_size=self.batch_size) fitted_pipeline_result = self.backend.fit_pipeline( problem_description=self.problem_description, pipeline=best_pipeline, input_data=input_data, expose_outputs=expose_values) if fitted_pipeline_result.error is not None: logging.error('No solution founded') pipeline_result = PipelineResult(pipeline=best_pipeline) pipeline_result.error = RuntimeError("No solution found") return pipeline_result self.best_fitted_pipeline_id = fitted_pipeline_result.fitted_pipeline_id return fitted_pipeline_result
def fit( self, pipeline: Pipeline, input_data: typing.Sequence[container.Dataset], expose_outputs: bool = False ) -> typing.Tuple[runtime_module.Runtime, PipelineResult]: pipeline_result = PipelineResult(pipeline=pipeline) runtime, output, result = runtime_module.fit( pipeline=pipeline, inputs=input_data, problem_description=self.problem_description, context=Context.TESTING, hyperparams=None, random_seed=self.random_seed, volumes_dir=self.volumes_dir, runtime_environment=self.runtime_environment, scratch_dir=self.scratch_dir, expose_produced_outputs=expose_outputs) if result.has_error(): pipeline_result.status = "ERRORED" pipeline_result.error = result.error else: pipeline_result.status = "COMPLETED" pipeline_result.exposed_outputs = result.values pipeline_result.output = output return runtime, pipeline_result
def produce_pipeline( self, data_handler, fitted_pipeline_id: str, input_data_id: str, *, timeout: float = None, expose_outputs: bool = False ) -> PipelineResult: pipeline_result = PipelineResult(fitted_pipeline_id=fitted_pipeline_id) pipeline_result.status = "RUNNING" pipeline_result.method_called = "produce" pipeline_result.fitted_pipeline_id = fitted_pipeline_id request_id = data_handler.get_data.remote(input_data_id) input_data = ray.get(request_id) with d3m_utils.silence(): output, result = runtime_module.produce( fitted_pipeline=self.fitted_pipelines[fitted_pipeline_id], test_inputs=input_data, expose_produced_outputs=expose_outputs ) if result.has_error(): pipeline_result.status = "ERRORED" pipeline_result.error = result.error else: pipeline_result.status = "COMPLETED" if self.store_results: pipeline_result.exposed_outputs = save_exposed_values(result.values, fitted_pipeline_id, self.scratch_dir) pipeline_result.output = save_exposed_values(output, fitted_pipeline_id, self.scratch_dir) else: pipeline_result.exposed_outputs = result.values pipeline_result.output = output if self.store_results: pipeline_result.pipeline_run = save_pipeline_run(result.pipeline_run, self.scratch_dir) return pipeline_result
def produce_pipeline_request(self, fitted_pipeline_id: str, input_data: typing.Sequence[ContainerType], *, timeout: float = None, expose_outputs: bool = False) -> str: """ A method that submit a produce pipeline request. Parameters ---------- fitted_pipeline_id : str The fitted pipeline if of the fitted pipeline to be use to produce results. input_data : typing.Sequence[ContainerType] A list of D3M containers. timeout : float A maximum amount of time that pipelines are going to be executed in seconds. expose_outputs : bool A variable that enable exposing every intermediate results based on the input_data Returns ------- str A request id. """ request_id = str(uuid.uuid4()) pipeline_result = PipelineResult(fitted_pipeline_id=fitted_pipeline_id) pipeline_result.status = "RUNNING" pipeline_result.method_called = "produce" pipeline_result.fitted_pipeline_id = fitted_pipeline_id output, result = runtime_module.produce( fitted_pipeline=self.fitted_pipelines[fitted_pipeline_id], test_inputs=input_data, expose_produced_outputs=expose_outputs) if result.has_error(): pipeline_result.status = "ERRORED" pipeline_result.error = result.error else: pipeline_result.status = "COMPLETED" pipeline_result.output = output pipeline_result.exposed_outputs = result.values pipeline_result.pipeline_run = result.pipeline_run self.request_results[request_id] = pipeline_result return request_id
def evaluate_pipeline_request(self, problem_description: Problem, pipeline: Pipeline, input_data: typing.Sequence[ContainerType], *, metrics: typing.Sequence[typing.Dict], data_preparation_pipeline: Pipeline = None, scoring_pipeline: Pipeline = None, data_preparation_params: typing.Dict[ str, str] = None, scoring_params: typing.Dict[str, str] = None, timeout: float = None) -> str: request_id = str(uuid.uuid4()) pipeline_result = PipelineResult(pipeline=pipeline) pipeline_result.status = "RUNNING" pipeline_result.method_called = "evaluate" scores, results = runtime_module.evaluate( pipeline=pipeline, inputs=input_data, data_pipeline=data_preparation_pipeline, scoring_pipeline=scoring_pipeline, problem_description=problem_description, data_params=data_preparation_params, metrics=metrics, context=Context.TESTING, scoring_params=scoring_params, hyperparams=None, random_seed=self.random_seed, data_random_seed=self.random_seed, scoring_random_seed=self.random_seed, volumes_dir=self.volumes_dir, scratch_dir=self.scratch_dir, runtime_environment=self.runtime_environment) if results.has_error(): pipeline_result.status = "ERRORED" pipeline_result.error = [result.error for result in results] else: pipeline_result.status = "COMPLETED" pipeline_result.scores = runtime_module.combine_folds(scores) pipeline_result.outputs = [result.values for result in results] self.request_results[request_id] = pipeline_result return request_id
def fit_pipeline( self, data_handler, problem_description: Problem, pipeline: Pipeline, input_data_id: str, *, timeout: float = None, expose_outputs: bool = False ) -> PipelineResult: pipeline_result = PipelineResult(pipeline=pipeline) pipeline_result.status = "RUNNING" pipeline_result.method_called = "fit" request_id = data_handler.get_data.remote(input_data_id) input_data = ray.get(request_id) is_standard_pipeline = False if len(input_data) == 1 and len(pipeline.outputs) == 1: is_standard_pipeline = True with d3m_utils.silence(): runtime, output, result = runtime_module.fit( pipeline=pipeline, inputs=input_data, problem_description=problem_description, context=Context.TESTING, hyperparams=None, random_seed=self.random_seed, volumes_dir=self.volumes_dir, scratch_dir=self.scratch_dir, runtime_environment=self.runtime_environment, is_standard_pipeline=is_standard_pipeline, expose_produced_outputs=expose_outputs ) if result.has_error(): pipeline_result.status = "ERRORED" pipeline_result.error = result.error else: pipeline_result.status = "COMPLETED" fitted_pipeline_id = str(uuid.uuid4()) if self.store_results: pipeline_result.exposed_outputs = save_exposed_values(result.values, pipeline.id, self.scratch_dir) pipeline_result.output = save_exposed_values(output, pipeline.id, self.scratch_dir) else: pipeline_result.exposed_outputs = result.values pipeline_result.output = output pipeline_result.fitted_pipeline_id = fitted_pipeline_id self.fitted_pipelines[fitted_pipeline_id] = runtime if self.store_results: pipeline_result.pipeline_run = save_pipeline_run(result.pipeline_run, self.scratch_dir) return pipeline_result
def produce(self, fitted_pipeline: runtime_module.Runtime, input_data: typing.Sequence[container.Dataset], expose_outputs: bool = False) -> PipelineResult: pipeline_result = PipelineResult(fitted_pipeline_id='') with d3m_utils.silence(): output, result = runtime_module.produce( fitted_pipeline=fitted_pipeline, test_inputs=input_data, expose_produced_outputs=expose_outputs) if result.has_error(): pipeline_result.status = "ERRORED" pipeline_result.error = result.error else: pipeline_result.status = "COMPLETED" pipeline_result.exposed_outputs = result.values pipeline_result.output = output return pipeline_result
def search_fit( self, input_data: typing.Sequence[ContainerType], time_limit: float = 300, *, expose_values: bool = False ) -> typing.Tuple[runtime_module.Runtime, PipelineResult]: """ This method calls search and fit the best ranking pipelines located from the search located on the history. Parameters ---------- input_data : typing.Sequence[ContainerType] A list of D3M containers to be use as the pipeline input. time_limit : float The time limit to be use for the search. expose_values : bool A flag that allows the user expose all intermediate result of the pipeline during fitting. """ self.input_data = input_data self.search(time_limit) best_pipeline = None for pipeline_result in self.history: if pipeline_result.error is None: if best_pipeline is None: best_pipeline = pipeline_result else: if pipeline_result.rank < best_pipeline.rank: best_pipeline = pipeline_result if best_pipeline is None: logging.error('No solution founded') pipeline_result = PipelineResult(fitted_pipeline_id='') pipeline_result.error = RuntimeError("No solution found") return None, pipeline_result return self.fit(best_pipeline.pipeline, input_data, expose_values)
def evaluate_pipeline( self, data_handler, problem_description: Problem, pipeline: Pipeline, input_data_id: str, *, metrics: typing.Sequence[typing.Dict], data_preparation_pipeline: Pipeline = None, scoring_pipeline: Pipeline = None, data_preparation_params: typing.Dict[str, str] = None, scoring_params: typing.Dict[str, str] = None, timeout: float = None ) -> PipelineResult: with d3m_utils.silence(): pipeline_result = PipelineResult(pipeline=pipeline) pipeline_result.status = "RUNNING" pipeline_result.method_called = "evaluate" request_id = data_handler.get_data.remote(input_data_id) input_data = ray.get(request_id) with d3m_utils.silence(): scores, results = runtime_module.evaluate( pipeline=pipeline, inputs=input_data, data_pipeline=data_preparation_pipeline, scoring_pipeline=scoring_pipeline, problem_description=problem_description, data_params=data_preparation_params, metrics=metrics, context=Context.TESTING, scoring_params=scoring_params, hyperparams=None, random_seed=self.random_seed, data_random_seed=self.random_seed, scoring_random_seed=self.random_seed, volumes_dir=self.volumes_dir, scratch_dir=self.scratch_dir, runtime_environment=self.runtime_environment ) if results.has_error(): pipeline_result.status = "ERRORED" pipeline_result.error = [result.error for result in results] else: pipeline_result.status = "COMPLETED" pipeline_result.scores = runtime_module.combine_folds(scores) if self.store_results: pipeline_result.pipeline_run = save_pipeline_run(results.pipeline_runs, self.scratch_dir) return pipeline_result
def fit_pipeline_request(self, problem_description: Problem, pipeline: Pipeline, input_data: typing.Sequence[ContainerType], *, timeout: float = None, expose_outputs: bool = False) -> str: """ A method that submit a fit_pipeline job. Parameters ---------- problem_description : Problem A problem description. pipeline : Pipeline The pipeline that is going to be fitted. input_data : typing.Sequence[ContainerType] A list of D3M containers. timeout : float A maximum amount of time that pipelines are going to be executed in seconds. expose_outputs : bool A variable that enable exposing every intermediate results based on the input_data Returns ------- str A request id. """ request_id = str(uuid.uuid4()) pipeline_result = PipelineResult(pipeline=pipeline) pipeline_result.status = "RUNNING" pipeline_result.method_called = "fit" is_standard_pipeline = False if len(input_data) == 1 and len(pipeline.outputs) == 1: is_standard_pipeline = True runtime, output, result = runtime_module.fit( pipeline=pipeline, inputs=input_data, problem_description=problem_description, context=Context.TESTING, hyperparams=None, random_seed=self.random_seed, volumes_dir=self.volumes_dir, scratch_dir=self.scratch_dir, runtime_environment=self.runtime_environment, is_standard_pipeline=is_standard_pipeline, expose_produced_outputs=expose_outputs) if result.has_error(): pipeline_result.status = "ERRORED" pipeline_result.error = result.error else: pipeline_result.status = "COMPLETED" pipeline_result.exposed_outputs = result.values pipeline_result.output = output fitted_pipeline_id = str(uuid.uuid4()) pipeline_result.fitted_pipeline_id = fitted_pipeline_id self.fitted_pipelines[fitted_pipeline_id] = runtime pipeline_result.pipeline_run = result.pipeline_run self.request_results[request_id] = pipeline_result return request_id