def load_pipeline(pipeline):
    with open(pipeline) as _pipeline:
        if pipeline.endswith('.json'):
            pipeline = Pipeline.from_json(_pipeline)
        else:
            pipeline = Pipeline.from_yaml(_pipeline)

    return pipeline
Example #2
0
def generate_template(pipeline_file: str) -> dict:
    with open(pipeline_file) as f:
        pipeline = Pipeline.from_json(f)

    steps = []
    for i, step in enumerate(pipeline.steps):
        if not isinstance(step, PrimitiveStep):
            raise ValueError('Can only handle PrimitiveSteps')
        step_name = f'steps.{i}'
        hyperparameters = {}
        for name, value in step.hyperparams.items():
            if value['type'] == ArgumentType.VALUE:
                hyperparameters[name] = value['data']
            else:
                raise ValueError(
                    f'Do not know how to parse hyperparam: {str(value)}')
        arguments = []
        argument_keys = set(step.arguments.keys())
        for argument_name in ['inputs', 'outputs', 'reference']:
            if argument_name in argument_keys:
                argument_keys.remove(argument_name)
                if step.arguments[argument_name][
                        'type'] == ArgumentType.CONTAINER:
                    if step.arguments[argument_name]['data'] == 'inputs.0':
                        arguments.append('template_input')
                    elif step.arguments[argument_name]['data'].startswith(
                            'steps.') and step.arguments['inputs'][
                                'data'].endswith('.produce'):
                        arguments.append(
                            step.arguments[argument_name]['data'][:-8])
                    else:
                        raise ValueError(
                            f"Do not know how to parse argument: {step.arguments['inputs']['data']}"
                        )
                else:
                    raise ValueError(
                        f"Do not know how to parse argument type: {step.arguments['inputs']['type']}"
                    )
        if len(argument_keys) > 0:
            for argument_name in argument_keys:
                print(argument_name, step.arguments[argument_name])
            raise ValueError(f"Unused arguments: {argument_keys}")
        primitive = OrderedDict()
        primitive['primitive'] = str(step.primitive)
        primitive['hyperparameters'] = hyperparameters
        step = OrderedDict()
        step['name'] = step_name
        step['primitives'] = [primitive]
        step['inputs'] = arguments
        steps.append(step)
    template = OrderedDict()
    template['name'] = pipeline.id if pipeline.name is None else pipeline.name
    template['taskType'] = {'TaskType'}
    template['taskSubtype'] = {'TaskSubtype'}
    template['inputType'] = {'table'}
    template['output'] = step_name
    template['steps'] = steps
    return template
Example #3
0
    def DescribeSolution(self, request, context):

        solution_id = request.solution_id
        info_dict = self.get_from_stage_outputs("GetSearchSolutionsResults",
                                                solution_id)

        # Serialize the pipeline
        pipeline_json = info_dict["pipeline_json"]
        allowed_value_types = info_dict["allowed_value_types"]
        pipeline = Pipeline.from_json(pipeline_json)
        pipeline_description = ta3ta2utils.encode_pipeline_description(
            pipeline, allowed_value_types, "/tmp")

        return core_pb2.DescribeSolutionResponse(pipeline=pipeline_description)
Example #4
0
def generate_pipeline(pipeline_path: str,
                      dataset_path: str,
                      problem_doc_path: str,
                      resolver: Resolver = None) -> Runtime:
    """
    Simplified interface that fit a pipeline with a dataset

    Paramters
    ---------
    pipeline_path
        Path to the pipeline description
    dataset_path:
        Path to the datasetDoc.json
    problem_doc_path:
        Path to the problemDoc.json
    resolver : Resolver
        Resolver to use.
    """

    # Pipeline description
    pipeline_description = None
    if '.json' in pipeline_path:
        with open(pipeline_path) as pipeline_file:
            pipeline_description = Pipeline.from_json(
                string_or_file=pipeline_file, resolver=resolver)
    else:
        with open(pipeline_path) as pipeline_file:
            pipeline_description = Pipeline.from_yaml(
                string_or_file=pipeline_file, resolver=resolver)

    # Problem Doc
    problem_doc = load_problem_doc(problem_doc_path)

    # Dataset
    if 'file:' not in dataset_path:
        dataset_path = 'file://{dataset_path}'.format(
            dataset_path=os.path.abspath(dataset_path))

    dataset = D3MDatasetLoader().load(dataset_uri=dataset_path)
    # Adding Metadata to Dataset
    dataset = add_target_columns_metadata(dataset, problem_doc)

    # Pipeline
    pipeline_runtime = Runtime(pipeline_description)
    # Fitting Pipeline
    pipeline_runtime.fit(inputs=[dataset])
    return pipeline_runtime
    def fitproduce(self, input_item):
        problem_doc, pipeline_json, dataset_train, dataset_test = input_item[
            1:]

        # Run pipeline
        pipeline = Pipeline.from_json(pipeline_json)
        pipeline_runtime = Runtime(pipeline, context=Context.TESTING)
        pipeline_runtime.fit(inputs=[dataset_train],
                             return_values=['outputs.0'])
        score_predictions = pipeline_runtime.produce(
            inputs=[dataset_test], return_values=['outputs.0'])
        score_predictions = score_predictions.values['outputs.0']

        # Write predictions to output path
        path = self.get_predictions_save_path()
        utils.utils.write_predictions_to_file(score_predictions, path,
                                              problem_doc)
        path_uri = "file://%s" % path
        return path_uri
    def score(self, input_item):
        problem_doc, metric, pipeline_json, dataset_train, dataset_test = input_item[
            1:]

        # Run pipeline
        pipeline = Pipeline.from_json(pipeline_json)
        pipeline_runtime = Runtime(pipeline, context=Context.TESTING)
        pipeline_runtime.fit(inputs=[dataset_train],
                             return_values=['outputs.0'])
        score_predictions = pipeline_runtime.produce(
            inputs=[dataset_test], return_values=['outputs.0'])
        score_predictions = score_predictions.values['outputs.0']

        # Evaluate scores on score dir
        achieved_score = utils.train_utils.score(score_predictions,
                                                 dataset_test,
                                                 problem_doc,
                                                 override_metric_key=metric)
        return achieved_score
Example #7
0
import utils.utils
import utils.train_utils

if __name__ == "__main__":

    # Get args
    try:
        path_to_pipeline_json = sys.argv[1]
        inputdir = sys.argv[2]

        # Load datasets
        problem_doc, dataset = utils.utils.load_data_from_dir(inputdir)

        # Create pipeline
        with open(path_to_pipeline_json, "r") as f:
            pipeline = Pipeline.from_json(f.read())

        pipeline_runtime = Runtime(pipeline, context=Context.TESTING)
        pipeline_runtime.fit(inputs=[dataset], return_values=['outputs.0'])

        problem_doc_score, dataset_score = utils.utils.load_data_from_dir(inputdir, mode="score")

        score_predictions = pipeline_runtime.produce(inputs=[dataset_score], return_values=['outputs.0'])
        score_predictions = score_predictions.values['outputs.0']

        # Evaluate scores on score dir
        achieved_score = utils.train_utils.score(score_predictions, dataset_score, problem_doc_score)
        print(achieved_score)
    except:
        print("N/A")
Example #8
0
def load_pipeline(pipeline_json):
    return Pipeline.from_json(pipeline_json)
Example #9
0
def load_pipeline(pipeline_path):
    with open(pipeline_path, 'r') as pipeline_file:
        if pipeline_path.endswith('.json'):
            return Pipeline.from_json(pipeline_file)
        else:
            return Pipeline.from_yaml(pipeline_file)