Ejemplo n.º 1
0
    def test_pipeline(self):
        with open(os.path.join(TEST_PIPELINES_DIR, 'random-sample.yml'), 'r') as pipeline_file:
            pipeline = pipeline_module.Pipeline.from_yaml(
                pipeline_file,
                resolver=pipeline_module.Resolver(),
                strict_digest=True,
            )

        with tempfile.TemporaryDirectory() as scratch_dir:
            def validate_uri(uri):
                utils.validate_uri(uri, [scratch_dir])

            pipeline_message = utils.encode_pipeline_description(
                pipeline,
                [
                    utils.ValueType.RAW,
                    utils.ValueType.CSV_URI,
                    utils.ValueType.DATASET_URI,
                    utils.ValueType.PICKLE_BLOB,
                    utils.ValueType.PICKLE_URI,
                ],
                scratch_dir,
                validate_uri=validate_uri,
            )

            decoded_pipeline = utils.decode_pipeline_description(
                pipeline_message,
                pipeline_module.Resolver(),
                validate_uri=validate_uri,
                strict_digest=True,
            )

            self.assertEqual(pipeline.to_json_structure(nest_subpipelines=True), decoded_pipeline.to_json_structure(nest_subpipelines=True))
def execute_pipeline(pipeline: pipeline_pb2.PipelineDescription,
                     dataset_filenames: List[str],
                     static_resource_path: Optional[str] = None) -> Any:
    """
        Executes a binary protobuf pipeline against a supplied d3m dataset using the d3m common runtime.

    Parameters
    ----------
    pipeline: protobuf pipeline definition
    dataset_filenames: paths to folders containing input D3M datasets

    Returns
    -------
    The result of the pipeline execution.
    """

    # transform the pipeline to the internal d3m representation
    pipeline_d3m = utils.decode_pipeline_description(
        pipeline, metadata_pipeline.Resolver())

    # load the data
    inputs = _load_inputs(dataset_filenames)

    # fit and produce
    fitted_pipeline, _ = _fit(pipeline_d3m,
                              inputs,
                              volumes_dir=static_resource_path)
    result = _produce(fitted_pipeline, inputs)

    return result
Ejemplo n.º 3
0
    def SearchSolutions(self, message):
        """Get potential pipelines and load into DB."""
        # Validate request is in required format, extract if it is
        dataset_uri, _ = self.validator.validate_search_solutions_request(
            message)
        # Generate search ID
        search_id = self._generate_id()

        # serialize the problem buf to json to save it in the db
        prob = json_format.MessageToDict(message.problem)
        prob = json.dumps(prob)

        # serialize the pipeline to a string for storage in db if one is provided
        search_template: str = None
        if message.HasField('template'):
            search_template_obj = api_utils.decode_pipeline_description(
                message.template, pipeline.Resolver())
            search_template = search_template_obj.to_json()

        # Create search row in DB
        search = models.Searches(id=search_id)
        self.session.add(search)
        self.session.commit()

        task = models.Tasks(problem=prob,
                            pipeline=search_template,
                            type="EXLINE",
                            dataset_uri=dataset_uri,
                            id=self._generate_id(),
                            search_id=search_id)
        self.session.add(task)

        # Add all to DB
        self.session.commit()
        return search_id
def convert_pipeline(input_file: str,
                     output_file: str) -> pipeline_pb2.PipelineDescription:
    # load the protobuf
    infile = open(input_file, "rb")
    pipeline = pipeline_pb2.PipelineDescription()
    pipeline.ParseFromString(infile.read())
    infile.close()

    # convert to json and save
    return ta3ta2.decode_pipeline_description(pipeline,
                                              pipeline_module.Resolver())
Ejemplo n.º 5
0
    def SearchSolutions(self, request, context):
        """Create a `Session` and start generating & scoring pipelines.
        """
        if len(request.inputs) > 1:
            raise error(context, grpc.StatusCode.UNIMPLEMENTED,
                        "Search with more than 1 input is not supported")
        expected_version = pb_core.DESCRIPTOR.GetOptions().Extensions[
            pb_core.protocol_version]

        if request.version != expected_version:
            logger.error(
                "TA3 is using a different protocol version: %r "
                "(us: %r)", request.version, expected_version)

        template = request.template

        if template is not None and len(
                template.steps
        ) > 0:  # isinstance(template, pb_pipeline.PipelineDescription)
            pipeline = decode_pipeline_description(template,
                                                   pipeline_module.Resolver())
            if pipeline.has_placeholder():
                template = pipeline.to_json_structure()
            else:  # Pipeline template fully defined
                problem = None
                if request.problem:
                    problem = decode_problem_description(request.problem)
                search_id = self._ta2.new_session(problem)
                dataset = request.inputs[0].dataset_uri
                if not dataset.startswith('file://'):
                    dataset = 'file://' + dataset

                self._ta2.build_fixed_pipeline(search_id,
                                               pipeline.to_json_structure(),
                                               dataset)
                return pb_core.SearchSolutionsResponse(
                    search_id=str(search_id), )
        else:
            template = None

        dataset = request.inputs[0].dataset_uri
        if not dataset.endswith('datasetDoc.json'):
            raise error(context, grpc.StatusCode.INVALID_ARGUMENT,
                        "Dataset is not in D3M format: %s", dataset)
        if not dataset.startswith('file://'):
            dataset = 'file://' + dataset

        problem = decode_problem_description(request.problem)
        timeout_search = request.time_bound_search
        timeout_run = request.time_bound_run
        report_rank = True if request.rank_solutions_limit > 0 else False

        if timeout_search <= 0.0:
            timeout_search = None

        if timeout_run <= 0.0:
            timeout_run = None

        search_id = self._ta2.new_session(problem)
        session = self._ta2.sessions[search_id]
        task_keywords = session.problem['problem']['task_keywords']
        metrics = session.metrics

        self._ta2.build_pipelines(search_id,
                                  dataset,
                                  task_keywords,
                                  metrics,
                                  timeout_search,
                                  timeout_run,
                                  template,
                                  report_rank=report_rank)

        return pb_core.SearchSolutionsResponse(search_id=str(search_id), )