Esempio n. 1
0
def get_papermill_parameters(compute_context, inputs, output_log_path):
    check.inst_param(compute_context, 'compute_context',
                     SystemComputeExecutionContext)
    check.param_invariant(
        isinstance(compute_context.environment_dict, dict),
        'compute_context',
        'SystemComputeExecutionContext must have valid environment_dict',
    )
    check.dict_param(inputs, 'inputs', key_type=six.string_types)

    run_id = compute_context.run_id

    marshal_dir = '/tmp/dagstermill/{run_id}/marshal'.format(run_id=run_id)
    mkdir_p(marshal_dir)

    (handle, solid_subset) = ExecutionTargetHandle.get_handle(
        compute_context.pipeline_def)

    if not handle:
        raise DagstermillError(
            'Can\'t execute a dagstermill solid from a pipeline that wasn\'t instantiated using '
            'an ExecutionTargetHandle')

    dm_handle_kwargs = handle.data._asdict()

    dm_handle_kwargs['pipeline_name'] = compute_context.pipeline_def.name

    dm_context_dict = {
        'output_log_path': output_log_path,
        'marshal_dir': marshal_dir,
        'environment_dict': compute_context.environment_dict,
    }

    dm_solid_handle_kwargs = compute_context.solid_handle._asdict()

    parameters = {}

    input_def_dict = compute_context.solid_def.input_dict
    for input_name, input_value in inputs.items():
        assert (
            input_name not in RESERVED_INPUT_NAMES
        ), 'Dagstermill solids cannot have inputs named {input_name}'.format(
            input_name=input_name)
        dagster_type = input_def_dict[input_name].dagster_type
        parameter_value = write_value(
            dagster_type, input_value,
            os.path.join(marshal_dir, 'input-{}'.format(input_name)))
        parameters[input_name] = parameter_value

    parameters['__dm_context'] = dm_context_dict
    parameters['__dm_handle_kwargs'] = dm_handle_kwargs
    parameters['__dm_pipeline_run_dict'] = pack_value(
        compute_context.pipeline_run)
    parameters['__dm_solid_handle_kwargs'] = dm_solid_handle_kwargs
    parameters['__dm_solid_subset'] = solid_subset
    parameters['__dm_instance_ref_dict'] = pack_value(
        compute_context.instance.get_ref())

    return parameters
Esempio n. 2
0
def get_papermill_parameters(step_context, inputs, output_log_path):
    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.param_invariant(
        isinstance(step_context.run_config, dict),
        "step_context",
        "StepExecutionContext must have valid run_config",
    )
    check.dict_param(inputs, "inputs", key_type=str)

    run_id = step_context.run_id
    temp_dir = get_system_temp_directory()
    marshal_dir = os.path.normpath(
        os.path.join(temp_dir, "dagstermill", str(run_id), "marshal"))
    mkdir_p(marshal_dir)

    if not isinstance(step_context.pipeline, ReconstructablePipeline):
        raise DagstermillError(
            "Can't execute a dagstermill solid from a pipeline that is not reconstructable. "
            "Use the reconstructable() function if executing from python")

    dm_executable_dict = step_context.pipeline.to_dict()

    dm_context_dict = {
        "output_log_path": output_log_path,
        "marshal_dir": marshal_dir,
        "run_config": step_context.run_config,
    }

    dm_solid_handle_kwargs = step_context.solid_handle._asdict()

    parameters = {}

    input_def_dict = step_context.solid_def.input_dict
    for input_name, input_value in inputs.items():
        assert (
            input_name not in RESERVED_INPUT_NAMES
        ), "Dagstermill solids cannot have inputs named {input_name}".format(
            input_name=input_name)
        dagster_type = input_def_dict[input_name].dagster_type
        parameter_value = write_value(
            dagster_type,
            input_value,
            os.path.join(
                marshal_dir,
                f"{str(step_context.solid_handle)}-input-{input_name}"),
        )
        parameters[input_name] = parameter_value

    parameters["__dm_context"] = dm_context_dict
    parameters["__dm_executable_dict"] = dm_executable_dict
    parameters["__dm_pipeline_run_dict"] = pack_value(
        step_context.pipeline_run)
    parameters["__dm_solid_handle_kwargs"] = dm_solid_handle_kwargs
    parameters["__dm_instance_ref_dict"] = pack_value(
        step_context.instance.get_ref())

    return parameters
Esempio n. 3
0
def get_papermill_parameters(compute_context, inputs, output_log_path):
    check.inst_param(compute_context, "compute_context",
                     SystemComputeExecutionContext)
    check.param_invariant(
        isinstance(compute_context.run_config, dict),
        "compute_context",
        "SystemComputeExecutionContext must have valid run_config",
    )
    check.dict_param(inputs, "inputs", key_type=six.string_types)

    run_id = compute_context.run_id

    marshal_dir = "/tmp/dagstermill/{run_id}/marshal".format(run_id=run_id)
    mkdir_p(marshal_dir)

    if not isinstance(compute_context.pipeline, ReconstructablePipeline):
        raise DagstermillError(
            "Can't execute a dagstermill solid from a pipeline that is not reconstructable. "
            "Use the reconstructable() function if executing from python")

    dm_executable_dict = compute_context.pipeline.to_dict()

    dm_context_dict = {
        "output_log_path": output_log_path,
        "marshal_dir": marshal_dir,
        "run_config": compute_context.run_config,
    }

    dm_solid_handle_kwargs = compute_context.solid_handle._asdict()

    parameters = {}

    input_def_dict = compute_context.solid_def.input_dict
    for input_name, input_value in inputs.items():
        assert (
            input_name not in RESERVED_INPUT_NAMES
        ), "Dagstermill solids cannot have inputs named {input_name}".format(
            input_name=input_name)
        dagster_type = input_def_dict[input_name].dagster_type
        parameter_value = write_value(
            dagster_type, input_value,
            os.path.join(marshal_dir, "input-{}".format(input_name)))
        parameters[input_name] = parameter_value

    parameters["__dm_context"] = dm_context_dict
    parameters["__dm_executable_dict"] = dm_executable_dict
    parameters["__dm_pipeline_run_dict"] = pack_value(
        compute_context.pipeline_run)
    parameters["__dm_solid_handle_kwargs"] = dm_solid_handle_kwargs
    parameters["__dm_instance_ref_dict"] = pack_value(
        compute_context.instance.get_ref())

    return parameters
Esempio n. 4
0
def get_papermill_parameters(step_context, inputs, output_log_path,
                             compute_descriptor):
    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.param_invariant(
        isinstance(step_context.run_config, dict),
        "step_context",
        "StepExecutionContext must have valid run_config",
    )
    check.dict_param(inputs, "inputs", key_type=str)

    run_id = step_context.run_id
    temp_dir = get_system_temp_directory()
    marshal_dir = os.path.normpath(
        os.path.join(temp_dir, "dagstermill", str(run_id), "marshal"))
    mkdir_p(marshal_dir)

    if not isinstance(step_context.pipeline, ReconstructablePipeline):
        if compute_descriptor == "solid":
            raise DagstermillError(
                "Can't execute a dagstermill solid from a pipeline that is not reconstructable. "
                "Use the reconstructable() function if executing from python")
        else:
            raise DagstermillError(
                "Can't execute a dagstermill op from a job that is not reconstructable. "
                "Use the reconstructable() function if executing from python")

    dm_executable_dict = step_context.pipeline.to_dict()

    dm_context_dict = {
        "output_log_path": output_log_path,
        "marshal_dir": marshal_dir,
        "run_config": step_context.run_config,
    }

    dm_solid_handle_kwargs = step_context.solid_handle._asdict()
    dm_step_key = step_context.step.key

    parameters = {}

    parameters["__dm_context"] = dm_context_dict
    parameters["__dm_executable_dict"] = dm_executable_dict
    parameters["__dm_pipeline_run_dict"] = pack_value(
        step_context.pipeline_run)
    parameters["__dm_solid_handle_kwargs"] = dm_solid_handle_kwargs
    parameters["__dm_instance_ref_dict"] = pack_value(
        step_context.instance.get_ref())
    parameters["__dm_step_key"] = dm_step_key
    parameters["__dm_input_names"] = list(inputs.keys())

    return parameters
Esempio n. 5
0
def _submit_task_k8s_job(app, pipeline_context, step, queue, priority,
                         known_state):
    user_defined_k8s_config = get_user_defined_k8s_config(step.tags)

    execute_step_args = ExecuteStepArgs(
        pipeline_origin=pipeline_context.reconstructable_pipeline.
        get_python_origin(),
        pipeline_run_id=pipeline_context.pipeline_run.run_id,
        step_keys_to_execute=[step.key],
        instance_ref=pipeline_context.instance.get_ref(),
        retry_mode=pipeline_context.executor.retries.for_inner_plan(),
        known_state=known_state,
        should_verify_step=True,
    )

    task = create_k8s_job_task(app)
    task_signature = task.si(
        execute_step_args_packed=pack_value(execute_step_args),
        job_config_dict=pipeline_context.executor.job_config.to_dict(),
        job_namespace=pipeline_context.executor.job_namespace,
        user_defined_k8s_config_dict=user_defined_k8s_config.to_dict(),
        load_incluster_config=pipeline_context.executor.load_incluster_config,
        kubeconfig_file=pipeline_context.executor.kubeconfig_file,
    )

    return task_signature.apply_async(
        priority=priority,
        queue=queue,
        routing_key="{queue}.execute_step_k8s_job".format(queue=queue),
    )
Esempio n. 6
0
def _submit_task_k8s_job(app, pipeline_context, step, queue, priority):
    user_defined_k8s_config = get_user_defined_k8s_config(step.tags)

    task = create_k8s_job_task(app)

    recon_repo = pipeline_context.pipeline.get_reconstructable_repository()

    task_signature = task.si(
        instance_ref_dict=pipeline_context.instance.get_ref().to_dict(),
        step_keys=[step.key],
        run_config=pipeline_context.pipeline_run.run_config,
        mode=pipeline_context.pipeline_run.mode,
        repo_name=recon_repo.get_definition().name,
        repo_location_name=pipeline_context.executor.repo_location_name,
        run_id=pipeline_context.pipeline_run.run_id,
        job_config_dict=pipeline_context.executor.job_config.to_dict(),
        job_namespace=pipeline_context.executor.job_namespace,
        user_defined_k8s_config_dict=user_defined_k8s_config.to_dict(),
        retries_dict=pipeline_context.executor.retries.for_inner_plan().to_config(),
        pipeline_origin_packed=pack_value(pipeline_context.pipeline.get_origin()),
        load_incluster_config=pipeline_context.executor.load_incluster_config,
        kubeconfig_file=pipeline_context.executor.kubeconfig_file,
    )

    return task_signature.apply_async(
        priority=priority,
        queue=queue,
        routing_key="{queue}.execute_step_k8s_job".format(queue=queue),
    )
Esempio n. 7
0
def in_pipeline_manager(pipeline_name='hello_world_pipeline',
                        solid_handle=SolidHandle('hello_world', 'hello_world',
                                                 None),
                        handle_kwargs=None,
                        mode=None,
                        **kwargs):
    manager = Manager()

    run_id = make_new_run_id()
    instance = DagsterInstance.local_temp()
    marshal_dir = tempfile.mkdtemp()

    if not handle_kwargs:
        handle_kwargs = {
            'pipeline_name': pipeline_name,
            'module_name': 'dagstermill.examples.repository',
            'fn_name': 'define_hello_world_pipeline',
        }

    pipeline_run_dict = pack_value(
        PipelineRun(
            pipeline_name=pipeline_name,
            run_id=run_id,
            mode=mode or 'default',
            environment_dict=None,
            selector=None,
            step_keys_to_execute=None,
            status=PipelineRunStatus.NOT_STARTED,
        ))

    try:
        with safe_tempfile_path() as output_log_file_path:
            context_dict = {
                'pipeline_run_dict': pipeline_run_dict,
                'solid_handle_kwargs': solid_handle._asdict(),
                'handle_kwargs': handle_kwargs,
                'marshal_dir': marshal_dir,
                'environment_dict': {},
                'output_log_path': output_log_file_path,
                'instance_ref_dict': pack_value(instance.get_ref()),
            }

            manager.reconstitute_pipeline_context(
                **dict(context_dict, **kwargs))
            yield manager
    finally:
        shutil.rmtree(marshal_dir)
Esempio n. 8
0
def in_pipeline_manager(
    pipeline_name="hello_world_pipeline",
    solid_handle=NodeHandle("hello_world", None),
    step_key="hello_world",
    executable_dict=None,
    mode=None,
    **kwargs,
):
    manager = Manager()

    run_id = make_new_run_id()
    with instance_for_test() as instance:
        marshal_dir = tempfile.mkdtemp()

        if not executable_dict:
            executable_dict = ReconstructablePipeline.for_module(
                "dagstermill.examples.repository",
                "hello_world_pipeline").to_dict()

        pipeline_run_dict = pack_value(
            PipelineRun(
                pipeline_name=pipeline_name,
                run_id=run_id,
                mode=mode or "default",
                run_config=None,
                step_keys_to_execute=None,
                status=PipelineRunStatus.NOT_STARTED,
            ))

        try:
            with safe_tempfile_path() as output_log_file_path:
                context_dict = {
                    "pipeline_run_dict": pipeline_run_dict,
                    "solid_handle_kwargs": solid_handle._asdict(),
                    "executable_dict": executable_dict,
                    "marshal_dir": marshal_dir,
                    "run_config": {},
                    "output_log_path": output_log_file_path,
                    "instance_ref_dict": pack_value(instance.get_ref()),
                    "step_key": step_key,
                }

                manager.reconstitute_pipeline_context(
                    **dict(context_dict, **kwargs))
                yield manager
        finally:
            shutil.rmtree(marshal_dir)
Esempio n. 9
0
def _submit_task_docker(app, pipeline_context, step, queue, priority):
    execute_step_args = ExecuteStepArgs(
        pipeline_origin=pipeline_context.pipeline.get_python_origin(),
        pipeline_run_id=pipeline_context.pipeline_run.run_id,
        step_keys_to_execute=[step.key],
        instance_ref=pipeline_context.instance.get_ref(),
        retries_dict=pipeline_context.executor.retries.for_inner_plan().to_config(),
    )

    task = create_docker_task(app)
    task_signature = task.si(
        execute_step_args_packed=pack_value(execute_step_args),
        docker_config=pipeline_context.executor.docker_config,
    )
    return task_signature.apply_async(
        priority=priority,
        queue=queue,
        routing_key="{queue}.execute_step_docker".format(queue=queue),
    )
Esempio n. 10
0
def _submit_task(app, pipeline_context, step, queue, priority, known_state):
    from .tasks import create_task

    execute_step_args = ExecuteStepArgs(
        pipeline_origin=pipeline_context.pipeline.get_python_origin(),
        pipeline_run_id=pipeline_context.pipeline_run.run_id,
        step_keys_to_execute=[step.key],
        instance_ref=pipeline_context.instance.get_ref(),
        retry_mode=pipeline_context.executor.retries.for_inner_plan(),
        known_state=known_state,
    )

    task = create_task(app)
    task_signature = task.si(
        execute_step_args_packed=pack_value(execute_step_args),
        executable_dict=pipeline_context.pipeline.to_dict(),
    )
    return task_signature.apply_async(
        priority=priority,
        queue=queue,
        routing_key="{queue}.execute_plan".format(queue=queue),
    )
Esempio n. 11
0
def _submit_task_k8s_job(app, plan_context, step, queue, priority, known_state):
    user_defined_k8s_config = get_user_defined_k8s_config(step.tags)

    pipeline_origin = plan_context.reconstructable_pipeline.get_python_origin()

    execute_step_args = ExecuteStepArgs(
        pipeline_origin=pipeline_origin,
        pipeline_run_id=plan_context.pipeline_run.run_id,
        step_keys_to_execute=[step.key],
        instance_ref=plan_context.instance.get_ref(),
        retry_mode=plan_context.executor.retries.for_inner_plan(),
        known_state=known_state,
        should_verify_step=True,
    )

    job_config = plan_context.executor.job_config
    if not job_config.job_image:
        job_config = job_config.with_image(pipeline_origin.repository_origin.container_image)

    if not job_config.job_image:
        raise Exception("No image included in either executor config or the dagster job")

    task = create_k8s_job_task(app)
    task_signature = task.si(
        execute_step_args_packed=pack_value(execute_step_args),
        job_config_dict=job_config.to_dict(),
        job_namespace=plan_context.executor.job_namespace,
        user_defined_k8s_config_dict=user_defined_k8s_config.to_dict(),
        load_incluster_config=plan_context.executor.load_incluster_config,
        job_wait_timeout=plan_context.executor.job_wait_timeout,
        kubeconfig_file=plan_context.executor.kubeconfig_file,
    )

    return task_signature.apply_async(
        priority=priority,
        queue=queue,
        routing_key="{queue}.execute_step_k8s_job".format(queue=queue),
    )
Esempio n. 12
0
def _submit_task_docker(app, pipeline_context, step, queue, priority):
    task = create_docker_task(app)

    recon_repo = pipeline_context.pipeline.get_reconstructable_repository()

    task_signature = task.si(
        instance_ref_dict=pipeline_context.instance.get_ref().to_dict(),
        step_keys=[step.key],
        run_config=pipeline_context.pipeline_run.run_config,
        mode=pipeline_context.pipeline_run.mode,
        repo_name=recon_repo.get_definition().name,
        run_id=pipeline_context.pipeline_run.run_id,
        docker_config=pipeline_context.executor.docker_config,
        pipeline_origin_packed=pack_value(
            pipeline_context.pipeline.get_origin()),
        retries_dict=pipeline_context.executor.retries.for_inner_plan().
        to_config(),
    )
    return task_signature.apply_async(
        priority=priority,
        queue=queue,
        routing_key="{queue}.execute_step_docker".format(queue=queue),
    )
Esempio n. 13
0
 def to_dict(self):
     return pack_value(self)