Example #1
0
def load_pipeline(path, tunables=True, defaults=True):
    """Load a d3m json or yaml pipeline."""

    if not os.path.exists(path):
        base_path = os.path.abspath(os.path.dirname(__file__))

        path = os.path.join('templates', path)
        path = os.path.join(base_path, path)

    if not os.path.isfile(path):
        raise ValueError('Could not find pipeline: {}'.format(path))

    LOGGER.warn('Loading pipeline from %s', path)
    with open(path) as pipeline:
        if path.endswith('yml'):
            data = yaml.safe_load(pipeline)

        else:
            data = json.load(pipeline)

    pipeline = Pipeline.from_json_structure(data)

    if tunables:
        # extract tunable hyperparameters
        tunable_hyperparameters = extract_tunable_hyperparams(pipeline)

        return pipeline, tunable_hyperparameters

    return pipeline
Example #2
0
    def __setstate__(self, state: typing.Dict) -> None:
        """
        This method is used for unpickling the object. It takes a dictionary
        of saved state of object and restores the object to that state.
        Args:
            state: typing.Dict
                dictionary of the objects picklable state
        Returns:
        """

        # print("[INFO] Set state called!")

        fitted = state['fitted_pipe']
        del state['fitted_pipe']

        structure = state['pipeline']
        state['pipeline'] = Pipeline.from_json_structure(structure)
        random_seed = state['random_seed']

        run = Runtime(state['pipeline'],
                      fitted_pipeline_id=state['id'],
                      random_seed=random_seed,
                      volumes_dir=FittedPipeline.runtime_setting.volumes_dir,
                      log_dir=FittedPipeline.runtime_setting.log_dir)
        run.steps_state = fitted

        state['runtime'] = run

        self.__dict__ = state
Example #3
0
def load_pipeline(pipeline_file: typing.Union[str, typing.Dict]):
    """
    Load pipeline from a pipeline URI

    Parameters
    ----------
    pipeline_file: Union[str, dict]
        The URI pointing to a json file of pipeline or dict of string that is a pipeline

    Returns
    -------
    pipeline: Pipeline
        An object of Pipeline

    """
    if isinstance(pipeline_file, dict):
        try:
            with d3m_utils.silence():
                pipeline = Pipeline.from_json_structure(pipeline_file)
        except:
            pipeline = None
    else:
        with d3m_utils.silence():
            pipeline = get_pipeline(pipeline_path=pipeline_file,
                                    load_all_primitives=False)
    return pipeline
Example #4
0
def load_template():
    with open(
            join(os.path.dirname(__file__),
                 '../resource/pipelines/example_metalearningdb.json')) as fin:
        json_pipeline = json.load(fin)

    d3m_pipeline = Pipeline.from_json_structure(json_pipeline, )
    grpc_pipeline = encode_pipeline_description(d3m_pipeline, ['RAW'], '/tmp')

    return grpc_pipeline
Example #5
0
def evaluate(pipeline, data_pipeline, dataset, metrics, problem,
             scoring_config, dataset_uri, timeout_run):
    if is_collection(dataset_uri[7:]):
        dataset = get_dataset_sample(dataset, problem)

    json_pipeline = convert.to_d3m_json(pipeline)

    if TaskKeyword.GRAPH in problem['problem'][
            'task_keywords'] and json_pipeline['description'].startswith(
                'MtLDB'):
        return {0: {'ACCURACY': 1.0}, 1: {'ACCURACY': 1.0}}

    logger.info(
        "Pipeline to be scored:\n\t%s", '\n\t'.join(
            [x['primitive']['python_path'] for x in json_pipeline['steps']]))

    d3m_pipeline = Pipeline.from_json_structure(json_pipeline, )
    if 'method' in scoring_config:
        scoring_config.pop('method')

    manager = Manager()
    return_dict = manager.dict()
    p = Process(target=worker,
                args=(d3m_pipeline, data_pipeline, scoring_pipeline, problem,
                      dataset, scoring_config, metrics, return_dict))
    p.start()
    p.join(timeout_run)
    p.terminate()

    if 'run_results' not in return_dict or 'run_scores' not in return_dict:
        raise TimeoutError('Reached timeout (%d seconds) to score a pipeline' %
                           timeout_run)

    run_results = return_dict['run_results']
    run_scores = return_dict['run_scores']

    for result in run_results:
        if result.has_error():
            raise RuntimeError(result.pipeline_run.status['message'])

    #save_pipeline_runs(run_results.pipeline_runs)
    combined_folds = d3m.runtime.combine_folds([fold for fold in run_scores])
    scores = {}

    for _, row in combined_folds.iterrows():
        if row['fold'] not in scores:
            scores[row['fold']] = {}
        scores[row['fold']][row['metric']] = row['value']

    return scores
Example #6
0
    def load_schema_only(
            cls, pipeline_id: str, folder_loc: str, pipeline_schema_subdir: str
    ) -> typing.Tuple[Pipeline, typing.Dict]:
        pipeline_dir = os.path.join(folder_loc, pipeline_schema_subdir)
        subpipeline_dir = os.path.join(folder_loc, cls.subpipelines_subdir)

        pipeline_schema = os.path.join(pipeline_dir, pipeline_id + '.json')

        with open(pipeline_schema, 'r') as f:
            structure = json.load(f)

        resolver = Resolver(
            pipeline_search_paths=[pipeline_dir, subpipeline_dir])
        pipeline = Pipeline.from_json_structure(pipeline_description=structure,
                                                resolver=resolver)
        return (pipeline, structure)
Example #7
0
    def add_extra_primitive(self, primitive_name: typing.List[str],
                            location_number: int) -> None:
        """
            Add extra primitives, usually it should be
            "d3m.primitives.data_transformation.denormalize.Common"             or
            "d3m.primitives.data_preprocessing.do_nothing_for_dataset.DSBOX"    or
            "d3m.primitives.data_augmentation.datamart_query.DSBOX"             or
            "d3m.primitives.data_augmentation.datamart_augmentation.DSBOX"
        """

        structure = self.pipeline.to_json_structure()
        for each_primitive_name in primitive_name:
            # considering adding datamart query and augment must be add in the same time
            # we should support adding multiple primitives in once
            if location_number == 0:
                input_names = ["inputs.0"]
            else:
                input_names = [
                    "steps." + str(location_number - 1) + ".produce"
                ]
            # if each_primitive_name == "datamart_augmentation":
            #     if location_number >= 2:
            #         input_names = ["steps."+str(location_number - 1)+".produce", "steps."+str(location_number - 2)+".produce"]
            #     if location_number == 1: # which should not occur any more
            #         _logger.warn("detect DatamartAugmentation primitive was added in second step, which should not happen!")
            #         input_names = ["steps."+str(location_number - 1)+".produce", "inputs.0"]

            primitive_augument = self.get_primitive_augment(
                each_primitive_name, input_names)

            hyperparams_file_loc = os.path.join(
                self.runtime_setting.scratch_dir,
                self.dataset_id + each_primitive_name + ".json")
            with open(hyperparams_file_loc, "r") as f:
                hyperparams_file = json.load(f)
            new_hyper_file = {}
            for key, value in hyperparams_file.items():
                new_hyper_file[key] = {"type": "VALUE", "data": value}
            primitive_augument['hyperparams'] = new_hyper_file

            # update output reference
            output_step_reference = structure[
                "outputs"]  # it should look like "steps.11.produce"
            for i, each_output_step_reference in enumerate(
                    output_step_reference):
                each_output_step_reference_split = each_output_step_reference[
                    "data"].split(".")
                each_output_step_reference_split[1] = str(
                    int(each_output_step_reference_split[1]) + 1)
                structure["outputs"][i]["data"] = ".".join(
                    each_output_step_reference_split)

            # add the step in corresponding position
            detail_steps = structure["steps"]
            detail_steps.insert(location_number, primitive_augument)
            for i in range(location_number + 1, len(detail_steps)):
                each_step = detail_steps[i]
                if "arguments" in each_step:
                    for each_argument_key in each_step["arguments"].keys():
                        argument_target = each_step["arguments"][
                            each_argument_key]["data"]
                        if argument_target == "inputs.0":  # and "denormalize" in each_step["primitive"]["python_path"]:
                            argument_target_new = "steps.0.produce"
                            each_step["arguments"][each_argument_key][
                                "data"] = argument_target_new
                        else:
                            argument_target_list = argument_target.split(".")
                            if int(
                                    argument_target_list[1]
                            ) >= location_number or i == location_number + 1:
                                argument_target_list[1] = str(
                                    int(argument_target_list[1]) + 1)
                                argument_target_new = ".".join(
                                    argument_target_list)
                                each_step["arguments"][each_argument_key][
                                    "data"] = argument_target_new
                # update each_step
                detail_steps[i] = each_step
            # update original structure
            structure["steps"] = detail_steps
            # add into runtime
            primitive_pickle_file_loc = os.path.join(
                self.runtime_setting.scratch_dir,
                self.dataset_id + each_primitive_name + ".pkl")
            with open(primitive_pickle_file_loc, "rb") as f:
                primitive_pickle_file = pickle.load(f)
            self.runtime.steps_state.insert(location_number,
                                            primitive_pickle_file)
            location_number += 1

        # update cracked Pipeline from new structure
        self.pipeline = Pipeline.from_json_structure(structure)
        # ForkedPdb().set_trace()
        steps_state_old = self.runtime.steps_state
        # generate new runtime
        cross_validation_result = self.runtime.cross_validation_result
        self.runtime = Runtime(
            self.pipeline,
            fitted_pipeline_id=self.id,
            random_seed=self.random_seed,
            volumes_dir=FittedPipeline.runtime_setting.volumes_dir,
            log_dir=FittedPipeline.runtime_setting.log_dir)
        self.runtime.cross_validation_result = cross_validation_result
        self.runtime.steps_state = steps_state_old