Esempio n. 1
0
def crawl_openml_handler(
    arguments: argparse.Namespace,
    *,
    pipeline_resolver: typing.Callable = None,
    dataset_resolver: typing.Callable = None,
    problem_resolver: typing.Callable = None,
) -> None:
    if pipeline_resolver is None:
        pipeline_resolver = pipeline_module.get_pipeline
    if dataset_resolver is None:
        dataset_resolver = dataset_module.get_dataset
    if problem_resolver is None:
        problem_resolver = problem_module.get_problem

    context = metadata_base.Context[arguments.context]
    compute_digest = dataset_module.ComputeDigest[getattr(
        arguments, 'compute_digest',
        dataset_module.ComputeDigest.ONLY_IF_MISSING.name)]
    runtime_environment = pipeline_run_module.RuntimeEnvironment(
        worker_id=getattr(arguments, 'worker_id', None), )

    task_types = [
        problem_module.OpenMLTaskType[task_type]
        for task_type in arguments.task_types
    ]
    if utils.has_duplicates(task_types):
        raise exceptions.InvalidArgumentValueError(
            "Same task type listed multiple times.")

    assert task_types

    inputs_config = runtime._get_inputs_config_from_arguments(
        arguments=arguments,
        pipeline_resolver=pipeline_resolver,
        dataset_resolver=dataset_resolver,
    )

    assert inputs_config.data_pipeline

    has_errored = crawl_openml(
        save_dir=arguments.save_dir,
        task_types=task_types,
        data_pipeline=inputs_config.data_pipeline,
        data_params=inputs_config.data_params,
        context=context,
        random_seed=inputs_config.data_random_seed,
        volumes_dir=getattr(arguments, 'volumes_dir', None),
        scratch_dir=getattr(arguments, 'scratch_dir', None),
        runtime_environment=runtime_environment,
        max_tasks=arguments.max_tasks,
        ignore_tasks=arguments.ignore_tasks or [],
        ignore_datasets=arguments.ignore_datasets or [],
        dataset_resolver=dataset_resolver,
        problem_resolver=problem_resolver,
        compute_digest=compute_digest,
        strict_digest=getattr(arguments, 'strict_digest', False),
    )

    if has_errored:
        sys.exit(1)
Esempio n. 2
0
 def __init__(self, *, random_seed: int = 0, volumes_dir: str = None, scratch_dir: str = None, store_results=False,
              blocklist=()) -> None:
     self.random_seed = random_seed
     self.volumes_dir = volumes_dir
     self.scratch_dir = scratch_dir
     self.fitted_pipelines = {}
     with d3m_utils.silence():
         d3m_index.load_all(blocklist=blocklist)
         self.runtime_environment = pipeline_run_module.RuntimeEnvironment()
     self.store_results = store_results
Esempio n. 3
0
    def __init__(self,
                 *,
                 random_seed: int = 0,
                 volumes_dir: str = None,
                 scratch_dir: str = None) -> None:
        super().__init__(random_seed=random_seed,
                         volumes_dir=volumes_dir,
                         scratch_dir=scratch_dir)
        self.fitted_pipelines = {}
        self.request_results = {}

        with d3m_utils.silence():
            self.runtime_environment = pipeline_run_module.RuntimeEnvironment()
Esempio n. 4
0
def fit(
    pipeline: pipeline.Pipeline, problem: problem.Problem,
    input_dataset: container.Dataset
) -> Tuple[Optional[runtime.Runtime], Optional[runtime.Result]]:
    hyperparams = None
    random_seed = 0
    volumes_dir = config.D3MSTATICDIR

    fitted_runtime, _, result = runtime.fit(
        pipeline,
        problem, [input_dataset],
        hyperparams=hyperparams,
        random_seed=random_seed,
        volumes_dir=volumes_dir,
        context=metadata_base.Context.TESTING,
        runtime_environment=pipeline_run.RuntimeEnvironment())

    if result.has_error():
        raise result.error

    return fitted_runtime, result
def _fit(
    pipeline: metadata_pipeline.Pipeline,
    input_dataset: List[container.Dataset],
    volumes_dir: Optional[str] = None
) -> Tuple[Optional[runtime.Runtime], Optional[runtime.Result]]:
    hyperparams = None
    random_seed = 0

    fitted_runtime, _, result = runtime.fit(
        pipeline,
        None,
        input_dataset,
        hyperparams=hyperparams,
        random_seed=random_seed,
        volumes_dir=volumes_dir,
        context=metadata_base.Context.TESTING,
        runtime_environment=pipeline_run.RuntimeEnvironment())

    if result.has_error():
        raise result.error

    return fitted_runtime, result
Esempio n. 6
0
    def __init__(self,
                 problem_description: Problem,
                 backend: RunnerBase,
                 *,
                 primitives_blocklist: typing.Sequence[str] = None,
                 ranking_function: typing.Callable = None) -> None:
        self.search_id = str(uuid.uuid4())
        self.backend = backend
        self.random_seed = backend.random_seed
        self.volumes_dir = backend.volumes_dir
        self.scratch_dir = backend.scratch_dir
        self.ranking_function = ranking_function

        self.problem_description: Problem = problem_description
        self.primitives_blocklist: typing.Sequence[str] = primitives_blocklist

        self.history: typing.List[PipelineResult] = []

        # missing typing
        self.best_fitted_pipeline_id: str = None
        self.input_data: typing.Sequence[ContainerType] = None

        with d3m_utils.silence():
            self.runtime_environment = pipeline_run_module.RuntimeEnvironment()