Beispiel #1
0
 def submit_run(self, pipeline_run, external_pipeline):
     check.inst_param(pipeline_run, "run", PipelineRun)
     check.opt_inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
     check.inst(pipeline_run.external_pipeline_origin, ExternalPipelineOrigin)
     self._queue.append(pipeline_run)
     return pipeline_run
Beispiel #2
0
    def execute(pipeline_context, execution_plan, step_keys_to_execute=None):
        check.inst_param(pipeline_context, 'pipeline_context',
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
        check.opt_list_param(step_keys_to_execute,
                             'step_keys_to_execute',
                             of_type=str)

        step_key_set = None if step_keys_to_execute is None else set(
            step_keys_to_execute)

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Executing steps in process (pid: {pid})'.format(pid=os.getpid()),
            event_specific_data=EngineEventData.in_process(
                os.getpid(), step_key_set),
        )

        with time_execution_scope() as timer_result:
            check.param_invariant(
                isinstance(pipeline_context.executor_config, ExecutorConfig),
                'pipeline_context',
                'Expected executor_config to be ExecutorConfig got {}'.format(
                    pipeline_context.executor_config),
            )

            failed_or_skipped_steps = set()

            step_levels = execution_plan.topological_step_levels()

            # It would be good to implement a reference tracking algorithm here to
            # garbage collect results that are no longer needed by any steps
            # https://github.com/dagster-io/dagster/issues/811
            for step_level in step_levels:
                for step in step_level:
                    if step_key_set and step.key not in step_key_set:
                        continue

                    step_context = pipeline_context.for_step(step)

                    failed_inputs = []
                    for step_input in step.step_inputs:
                        failed_inputs.extend(
                            failed_or_skipped_steps.intersection(
                                step_input.dependency_keys))

                    if failed_inputs:
                        step_context.log.info((
                            'Dependencies for step {step} failed: {failed_inputs}. Not executing.'
                        ).format(step=step.key, failed_inputs=failed_inputs))
                        failed_or_skipped_steps.add(step.key)
                        yield DagsterEvent.step_skipped_event(step_context)
                        continue

                    uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs(
                        step_context, step)
                    if uncovered_inputs:
                        # In partial pipeline execution, we may end up here without having validated the
                        # missing dependent outputs were optional
                        _assert_missing_inputs_optional(
                            uncovered_inputs, execution_plan, step.key)

                        step_context.log.info((
                            'Not all inputs covered for {step}. Not executing. Output missing for '
                            'inputs: {uncovered_inputs}').format(
                                uncovered_inputs=uncovered_inputs,
                                step=step.key))
                        failed_or_skipped_steps.add(step.key)
                        yield DagsterEvent.step_skipped_event(step_context)
                        continue

                    for step_event in check.generator(
                            dagster_event_sequence_for_step(step_context)):
                        check.inst(step_event, DagsterEvent)
                        if step_event.is_step_failure:
                            failed_or_skipped_steps.add(step.key)

                        yield step_event

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Finished steps in process (pid: {pid}) in {duration_ms}'.format(
                pid=os.getpid(),
                duration_ms=format_duration(timer_result.millis)),
            event_specific_data=EngineEventData.in_process(
                os.getpid(), step_key_set),
        )
Beispiel #3
0
    def event_generator(
        self,
        execution_plan,
        run_config,
        pipeline_run,
        instance,
        scoped_resources_builder_cm,
        intermediate_storage=None,
        raise_on_error=False,
        resource_instances_to_override=None,
        output_capture=None,
    ):
        execution_plan = check.inst_param(execution_plan, "execution_plan",
                                          ExecutionPlan)
        pipeline_def = execution_plan.pipeline.get_definition()

        run_config = check.dict_param(run_config, "run_config", key_type=str)
        pipeline_run = check.inst_param(pipeline_run, "pipeline_run",
                                        PipelineRun)
        instance = check.inst_param(instance, "instance", DagsterInstance)

        scoped_resources_builder_cm = check.callable_param(
            scoped_resources_builder_cm, "scoped_resources_builder_cm")
        intermediate_storage = check.opt_inst_param(
            intermediate_storage, "intermediate_storage_data",
            IntermediateStorage)
        raise_on_error = check.bool_param(raise_on_error, "raise_on_error")
        resource_instances_to_override = check.opt_dict_param(
            resource_instances_to_override, "resource_instances_to_override")

        execution_context = None
        resources_manager = None

        try:
            context_creation_data = create_context_creation_data(
                execution_plan,
                run_config,
                pipeline_run,
                instance,
            )

            log_manager = create_log_manager(context_creation_data)
            resource_defs = execution_plan.pipeline_def.get_mode_definition(
                context_creation_data.environment_config.mode).resource_defs
            resources_manager = scoped_resources_builder_cm(
                resource_defs=resource_defs,
                resource_configs=context_creation_data.environment_config.
                resources,
                log_manager=log_manager,
                execution_plan=execution_plan,
                pipeline_run=context_creation_data.pipeline_run,
                resource_keys_to_init=context_creation_data.
                resource_keys_to_init,
                instance=instance,
                resource_instances_to_override=resource_instances_to_override,
                emit_persistent_events=True,
            )
            yield from resources_manager.generate_setup_events()
            scoped_resources_builder = check.inst(
                resources_manager.get_object(), ScopedResourcesBuilder)

            intermediate_storage = create_intermediate_storage(
                context_creation_data,
                intermediate_storage,
                scoped_resources_builder,
            )

            execution_context = self.construct_context(
                context_creation_data=context_creation_data,
                scoped_resources_builder=scoped_resources_builder,
                log_manager=log_manager,
                intermediate_storage=intermediate_storage,
                raise_on_error=raise_on_error,
                output_capture=output_capture,
            )

            _validate_plan_with_context(execution_context, execution_plan)

            yield execution_context
            yield from resources_manager.generate_teardown_events()
        except DagsterError as dagster_error:
            if execution_context is None:
                user_facing_exc_info = (
                    # pylint does not know original_exc_info exists is is_user_code_error is true
                    # pylint: disable=no-member
                    dagster_error.original_exc_info
                    if dagster_error.is_user_code_error else sys.exc_info())
                error_info = serializable_error_info_from_exc_info(
                    user_facing_exc_info)

                yield DagsterEvent.pipeline_init_failure(
                    pipeline_name=pipeline_def.name,
                    failure_data=PipelineInitFailureData(error=error_info),
                    log_manager=_create_context_free_log_manager(
                        instance, pipeline_run, pipeline_def),
                )
                if resources_manager:
                    yield from resources_manager.generate_teardown_events()
            else:
                # pipeline teardown failure
                raise dagster_error

            if raise_on_error:
                raise dagster_error
Beispiel #4
0
    def _execute_step_k8s_job(
        self,
        instance_ref_dict,
        step_keys,
        run_config,
        mode,
        repo_name,
        repo_location_name,
        run_id,
        job_config_dict,
        job_namespace,
        load_incluster_config,
        retries_dict,
        pipeline_origin_packed,
        user_defined_k8s_config_dict=None,
        kubeconfig_file=None,
    ):
        """Run step execution in a K8s job pod.
        """

        check.dict_param(instance_ref_dict, "instance_ref_dict")
        check.list_param(step_keys, "step_keys", of_type=str)
        check.invariant(
            len(step_keys) == 1,
            "Celery K8s task executor can only execute 1 step at a time")
        check.dict_param(run_config, "run_config")
        check.str_param(mode, "mode")
        check.str_param(repo_name, "repo_name")
        check.str_param(repo_location_name, "repo_location_name")
        check.str_param(run_id, "run_id")

        # Celery will serialize this as a list
        job_config = DagsterK8sJobConfig.from_dict(job_config_dict)
        check.inst_param(job_config, "job_config", DagsterK8sJobConfig)
        check.str_param(job_namespace, "job_namespace")

        check.bool_param(load_incluster_config, "load_incluster_config")
        check.dict_param(retries_dict, "retries_dict")

        pipeline_origin = unpack_value(
            check.dict_param(
                pipeline_origin_packed,
                "pipeline_origin_packed")  # TODO: make part of args
        )
        check.inst(pipeline_origin, PipelineOrigin)

        user_defined_k8s_config = UserDefinedDagsterK8sConfig.from_dict(
            user_defined_k8s_config_dict)
        check.opt_inst_param(
            user_defined_k8s_config,
            "user_defined_k8s_config",
            UserDefinedDagsterK8sConfig,
        )
        check.opt_str_param(kubeconfig_file, "kubeconfig_file")

        # For when launched via DinD or running the cluster
        if load_incluster_config:
            kubernetes.config.load_incluster_config()
        else:
            kubernetes.config.load_kube_config(kubeconfig_file)

        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        pipeline_run = instance.get_run_by_id(run_id)

        check.invariant(pipeline_run, "Could not load run {}".format(run_id))
        step_key = step_keys[0]

        celery_worker_name = self.request.hostname
        celery_pod_name = os.environ.get("HOSTNAME")
        instance.report_engine_event(
            "Task for step {step_key} picked up by Celery".format(
                step_key=step_key),
            pipeline_run,
            EngineEventData([
                EventMetadataEntry.text(celery_worker_name,
                                        "Celery worker name"),
                EventMetadataEntry.text(celery_pod_name,
                                        "Celery worker Kubernetes Pod name"),
            ]),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )

        if pipeline_run.status != PipelineRunStatus.STARTED:
            instance.report_engine_event(
                "Not scheduling step because pipeline run status is not STARTED",
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(step_key, "Step key"),
                ]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return []

        # Ensure we stay below k8s name length limits
        k8s_name_key = get_k8s_job_name(run_id, step_key)

        retries = Retries.from_config(retries_dict)

        if retries.get_attempt_count(step_key):
            attempt_number = retries.get_attempt_count(step_key)
            job_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number)
            pod_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number)
        else:
            job_name = "dagster-job-%s" % (k8s_name_key)
            pod_name = "dagster-job-%s" % (k8s_name_key)

        input_json = serialize_dagster_namedtuple(
            ExecuteStepArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run_id,
                instance_ref=None,
                mode=mode,
                step_keys_to_execute=step_keys,
                run_config=run_config,
                retries_dict=retries_dict,
            ))
        command = ["dagster"]
        args = ["api", "execute_step_with_structured_logs", input_json]

        job = construct_dagster_k8s_job(job_config, command, args, job_name,
                                        user_defined_k8s_config, pod_name)

        # Running list of events generated from this task execution
        events = []

        # Post event for starting execution
        job_name = job.metadata.name
        engine_event = instance.report_engine_event(
            "Executing step {} in Kubernetes job {}".format(
                step_key, job_name),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_key, "Step key"),
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(pod_name, "Kubernetes Pod name"),
                    EventMetadataEntry.text(job_config.job_image, "Job image"),
                    EventMetadataEntry.text(job_config.image_pull_policy,
                                            "Image pull policy"),
                    EventMetadataEntry.text(str(job_config.image_pull_secrets),
                                            "Image pull secrets"),
                    EventMetadataEntry.text(
                        str(job_config.service_account_name),
                        "Service account name"),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryK8sJobExecutor,
            # validated above that step_keys is length 1, and it is not possible to use ETH or
            # execution plan in this function (Celery K8s workers should not access to user code)
            step_key=step_key,
        )
        events.append(engine_event)

        try:
            kubernetes.client.BatchV1Api().create_namespaced_job(
                body=job, namespace=job_namespace)
        except kubernetes.client.rest.ApiException as e:
            if e.reason == "Conflict":
                # There is an existing job with the same name so do not procede.
                instance.report_engine_event(
                    "Did not create Kubernetes job {} for step {} since job name already "
                    "exists, exiting.".format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, "Step key"),
                            EventMetadataEntry.text(job_name,
                                                    "Kubernetes Job name"),
                            EventMetadataEntry.text(pod_name,
                                                    "Kubernetes Pod name"),
                        ],
                        marker_end=DELEGATE_MARKER,
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            else:
                instance.report_engine_event(
                    "Encountered unexpected error while creating Kubernetes job {} for step {}, "
                    "exiting.".format(job_name, step_key),
                    pipeline_run,
                    EngineEventData([
                        EventMetadataEntry.text(step_key, "Step key"),
                        EventMetadataEntry.text(str(e), "Error"),
                    ]),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            return []

        try:
            wait_for_job_success(
                job_name=job_name,
                namespace=job_namespace,
                instance=instance,
                run_id=run_id,
            )
        except DagsterK8sPipelineStatusException:
            instance.report_engine_event(
                "Terminating Kubernetes Job because pipeline run status is not STARTED",
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(step_key, "Step key"),
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(job_namespace,
                                            "Kubernetes Job namespace"),
                ]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            delete_job(job_name=job_name, namespace=job_namespace)
            return []
        except kubernetes.client.rest.ApiException as e:
            instance.report_engine_event(
                "Encountered unexpected error while waiting on Kubernetes job {} for step {}, "
                "exiting.".format(job_name, step_key),
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(step_key, "Step key"),
                    EventMetadataEntry.text(str(e), "Error"),
                ]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return []

        try:
            pod_names = get_pod_names_in_job(job_name, namespace=job_namespace)
        except kubernetes.client.rest.ApiException as e:
            instance.report_engine_event(
                "Encountered unexpected error retreiving Pods for Kubernetes job {} for step {}, "
                "exiting.".format(job_name, step_key),
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(step_key, "Step key"),
                    EventMetadataEntry.text(str(e), "Error"),
                ]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return []

        # Post engine event for log retrieval
        engine_event = instance.report_engine_event(
            "Retrieving logs from Kubernetes Job pods",
            pipeline_run,
            EngineEventData(
                [EventMetadataEntry.text("\n".join(pod_names), "Pod names")]),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )
        events.append(engine_event)

        logs = []
        for pod_name in pod_names:
            try:
                raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace)
                logs += raw_logs.split("\n")
            except kubernetes.client.rest.ApiException as e:
                instance.report_engine_event(
                    "Encountered unexpected error while fetching pod logs for Kubernetes job {}, "
                    "Pod name {} for step {}. Will attempt to continue with other pods."
                    .format(job_name, pod_name, step_key),
                    pipeline_run,
                    EngineEventData([
                        EventMetadataEntry.text(step_key, "Step key"),
                        EventMetadataEntry.text(str(e), "Error"),
                    ]),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )

        events += filter_dagster_events_from_pod_logs(logs)
        serialized_events = [
            serialize_dagster_namedtuple(event) for event in events
        ]
        return serialized_events
def execution_context_event_generator(
    pipeline,
    execution_plan,
    run_config,
    pipeline_run,
    instance,
    retry_mode,
    scoped_resources_builder_cm=None,
    intermediate_storage=None,
    raise_on_error=False,
    output_capture=None,
):
    scoped_resources_builder_cm = check.opt_callable_param(
        scoped_resources_builder_cm,
        "scoped_resources_builder_cm",
        default=resource_initialization_manager,
    )

    execution_plan = check.inst_param(execution_plan, "execution_plan",
                                      ExecutionPlan)
    pipeline_def = pipeline.get_definition()

    run_config = check.dict_param(run_config, "run_config", key_type=str)
    pipeline_run = check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    instance = check.inst_param(instance, "instance", DagsterInstance)

    intermediate_storage = check.opt_inst_param(intermediate_storage,
                                                "intermediate_storage_data",
                                                IntermediateStorage)
    raise_on_error = check.bool_param(raise_on_error, "raise_on_error")

    context_creation_data = create_context_creation_data(
        pipeline,
        execution_plan,
        run_config,
        pipeline_run,
        instance,
    )

    log_manager = create_log_manager(context_creation_data)
    resource_defs = pipeline_def.get_mode_definition(
        context_creation_data.environment_config.mode).resource_defs
    resources_manager = scoped_resources_builder_cm(
        resource_defs=resource_defs,
        resource_configs=context_creation_data.environment_config.resources,
        log_manager=log_manager,
        execution_plan=execution_plan,
        pipeline_run=context_creation_data.pipeline_run,
        resource_keys_to_init=context_creation_data.resource_keys_to_init,
        instance=instance,
        emit_persistent_events=True,
        pipeline_def_for_backwards_compat=pipeline_def,
    )
    yield from resources_manager.generate_setup_events()
    scoped_resources_builder = check.inst(resources_manager.get_object(),
                                          ScopedResourcesBuilder)

    intermediate_storage = create_intermediate_storage(
        context_creation_data,
        intermediate_storage,
        scoped_resources_builder,
    )

    execution_context = PlanExecutionContext(
        plan_data=create_plan_data(context_creation_data, raise_on_error,
                                   retry_mode),
        execution_data=create_execution_data(context_creation_data,
                                             scoped_resources_builder,
                                             intermediate_storage),
        log_manager=log_manager,
        output_capture=output_capture,
    )

    _validate_plan_with_context(execution_context, execution_plan)

    yield execution_context
    yield from resources_manager.generate_teardown_events()
Beispiel #6
0
 def command(input_file, output_file):
     args = check.inst(read_unary_input(input_file), input_cls)
     output = check.inst(fn(args), output_cls)
     ipc_write_unary_response(output_file, output)
Beispiel #7
0
def from_dagster_event_record(graphene_info, event_record, dauphin_pipeline,
                              execution_plan):
    # Lots of event types. Pylint thinks there are too many branches
    # pylint: disable=too-many-branches
    check.inst_param(event_record, 'event_record', EventRecord)
    check.param_invariant(event_record.is_dagster_event, 'event_record')
    check.opt_inst_param(dauphin_pipeline, 'dauphin_pipeline',
                         graphene_info.schema.type_named('Pipeline'))
    check.opt_inst_param(execution_plan, 'execution_plan', ExecutionPlan)

    dagster_event = event_record.dagster_event
    basic_params = construct_basic_params(graphene_info, event_record,
                                          execution_plan)
    if dagster_event.event_type == DagsterEventType.STEP_START:
        return graphene_info.schema.type_named('ExecutionStepStartEvent')(
            **basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_SKIPPED:
        return graphene_info.schema.type_named('ExecutionStepSkippedEvent')(
            **basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_SUCCESS:
        return graphene_info.schema.type_named('ExecutionStepSuccessEvent')(
            **basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_INPUT:
        input_data = dagster_event.event_specific_data
        return graphene_info.schema.type_named('ExecutionStepInputEvent')(
            input_name=input_data.input_name,
            type_check=input_data.type_check_data,
            **basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_OUTPUT:
        output_data = dagster_event.step_output_data
        return graphene_info.schema.type_named('ExecutionStepOutputEvent')(
            output_name=output_data.output_name,
            type_check=output_data.type_check_data,
            # parens make black not put trailing commas, which in turn break py27
            # fmt: off
            **(basic_params)
            # fmt: on
        )
    elif dagster_event.event_type == DagsterEventType.STEP_MATERIALIZATION:
        materialization = dagster_event.step_materialization_data.materialization
        return graphene_info.schema.type_named('StepMaterializationEvent')(
            materialization=materialization, **basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
        expectation_result = dagster_event.event_specific_data.expectation_result
        return graphene_info.schema.type_named('StepExpectationResultEvent')(
            expectation_result=expectation_result, **(basic_params))
    elif dagster_event.event_type == DagsterEventType.STEP_FAILURE:
        check.inst(dagster_event.step_failure_data, StepFailureData)
        return graphene_info.schema.type_named('ExecutionStepFailureEvent')(
            error=graphene_info.schema.type_named('PythonError')(
                dagster_event.step_failure_data.error),
            # parens make black not put trailing commas, which in turn break py27
            # fmt: off
            **(basic_params)
            # fmt: on
        )
    elif dagster_event.event_type == DagsterEventType.PIPELINE_START:
        return graphene_info.schema.type_named('PipelineStartEvent')(
            pipeline=dauphin_pipeline, **basic_params)
    elif dagster_event.event_type == DagsterEventType.PIPELINE_SUCCESS:
        return graphene_info.schema.type_named('PipelineSuccessEvent')(
            pipeline=dauphin_pipeline, **basic_params)
    elif dagster_event.event_type == DagsterEventType.PIPELINE_FAILURE:
        return graphene_info.schema.type_named('PipelineFailureEvent')(
            pipeline=dauphin_pipeline, **basic_params)
    elif dagster_event.event_type == DagsterEventType.PIPELINE_PROCESS_START:
        process_data = dagster_event.pipeline_process_start_data
        return graphene_info.schema.type_named('PipelineProcessStartEvent')(
            pipeline=dauphin_pipeline,
            pipeline_name=process_data.pipeline_name,
            run_id=process_data.run_id,
            **basic_params)
    elif dagster_event.event_type == DagsterEventType.PIPELINE_PROCESS_STARTED:
        process_data = dagster_event.pipeline_process_started_data
        return graphene_info.schema.type_named('PipelineProcessStartedEvent')(
            pipeline=dauphin_pipeline,
            process_id=process_data.process_id,
            **basic_params)
    elif dagster_event.event_type == DagsterEventType.PIPELINE_PROCESS_EXITED:
        process_data = dagster_event.pipeline_process_exited_data
        return graphene_info.schema.type_named('PipelineProcessExitedEvent')(
            pipeline=dauphin_pipeline,
            process_id=process_data.process_id,
            **basic_params)
    elif dagster_event.event_type == DagsterEventType.PIPELINE_INIT_FAILURE:
        return graphene_info.schema.type_named('PipelineInitFailureEvent')(
            pipeline=dauphin_pipeline,
            error=graphene_info.schema.type_named('PythonError')(
                dagster_event.pipeline_init_failure_data.error),
            # parens make black not put trailing commas, which in turn break py27
            # fmt: off
            **(basic_params)
            # fmt: on
        )
    elif dagster_event.event_type == DagsterEventType.OBJECT_STORE_OPERATION:
        operation_result = dagster_event.event_specific_data
        return graphene_info.schema.type_named('ObjectStoreOperationEvent')(
            operation_result=operation_result, **basic_params)
    elif dagster_event.event_type == DagsterEventType.ENGINE_EVENT:
        return graphene_info.schema.type_named('EngineEvent')(
            metadataEntries=_to_dauphin_metadata_entries(
                dagster_event.event_specific_data.metadata_entries),
            **basic_params)
    else:
        raise Exception(
            'Unknown DAGSTER_EVENT type {inner_type} found in logs'.format(
                inner_type=dagster_event.event_type))
Beispiel #8
0
def from_dagster_event_record(event_record, pipeline_name):
    # Lots of event types. Pylint thinks there are too many branches
    # pylint: disable=too-many-branches
    check.inst_param(event_record, "event_record", EventRecord)
    check.param_invariant(event_record.is_dagster_event, "event_record")
    check.str_param(pipeline_name, "pipeline_name")

    # circular ref at module scope
    from .errors import DauphinPythonError

    dagster_event = event_record.dagster_event
    basic_params = construct_basic_params(event_record)
    if dagster_event.event_type == DagsterEventType.STEP_START:
        return DauphinExecutionStepStartEvent(**basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_SKIPPED:
        return DauphinExecutionStepSkippedEvent(**basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_UP_FOR_RETRY:
        return DauphinExecutionStepUpForRetryEvent(
            error=dagster_event.step_retry_data.error,
            secondsToWait=dagster_event.step_retry_data.seconds_to_wait,
            **basic_params,
        )
    elif dagster_event.event_type == DagsterEventType.STEP_RESTARTED:
        return DauphinExecutionStepRestartEvent(**basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_SUCCESS:
        return DauphinExecutionStepSuccessEvent(**basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_INPUT:
        input_data = dagster_event.event_specific_data
        return DauphinExecutionStepInputEvent(
            input_name=input_data.input_name,
            type_check=input_data.type_check_data,
            **basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_OUTPUT:
        output_data = dagster_event.step_output_data
        return DauphinExecutionStepOutputEvent(
            output_name=output_data.output_name,
            type_check=output_data.type_check_data,
            **basic_params,
        )
    elif dagster_event.event_type == DagsterEventType.STEP_MATERIALIZATION:
        materialization = dagster_event.step_materialization_data.materialization
        return DauphinStepMaterializationEvent(materialization=materialization,
                                               **basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
        expectation_result = dagster_event.event_specific_data.expectation_result
        return DauphinStepExpectationResultEvent(
            expectation_result=expectation_result, **basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_FAILURE:
        check.inst(dagster_event.step_failure_data, StepFailureData)
        return DauphinExecutionStepFailureEvent(
            error=DauphinPythonError(dagster_event.step_failure_data.error),
            failureMetadata=dagster_event.step_failure_data.user_failure_data,
            **basic_params,
        )
    elif dagster_event.event_type == DagsterEventType.PIPELINE_ENQUEUED:
        return DauphinPipelineEnqueuedEvent(pipelineName=pipeline_name,
                                            **basic_params)
    elif dagster_event.event_type == DagsterEventType.PIPELINE_DEQUEUED:
        return DauphinPipelineDequeuedEvent(pipelineName=pipeline_name,
                                            **basic_params)
    elif dagster_event.event_type == DagsterEventType.PIPELINE_STARTING:
        return DauphinPipelineStartingEvent(pipelineName=pipeline_name,
                                            **basic_params)
    elif dagster_event.event_type == DagsterEventType.PIPELINE_CANCELING:
        return DauphinPipelineCancelingEvent(pipelineName=pipeline_name,
                                             **basic_params)
    elif dagster_event.event_type == DagsterEventType.PIPELINE_CANCELED:
        return DauphinPipelineCanceledEvent(pipelineName=pipeline_name,
                                            **basic_params)
    elif dagster_event.event_type == DagsterEventType.PIPELINE_START:
        return DauphinPipelineStartEvent(pipelineName=pipeline_name,
                                         **basic_params)
    elif dagster_event.event_type == DagsterEventType.PIPELINE_SUCCESS:
        return DauphinPipelineSuccessEvent(pipelineName=pipeline_name,
                                           **basic_params)
    elif dagster_event.event_type == DagsterEventType.PIPELINE_FAILURE:
        return DauphinPipelineFailureEvent(
            pipelineName=pipeline_name,
            error=DauphinPythonError(dagster_event.pipeline_failure_data.error)
            if (dagster_event.pipeline_failure_data
                and dagster_event.pipeline_failure_data.error) else None,
            **basic_params,
        )

    elif dagster_event.event_type == DagsterEventType.PIPELINE_INIT_FAILURE:
        return DauphinPipelineInitFailureEvent(
            pipelineName=pipeline_name,
            error=DauphinPythonError(
                dagster_event.pipeline_init_failure_data.error),
            **basic_params,
        )
    elif dagster_event.event_type == DagsterEventType.HANDLED_OUTPUT:
        return DauphinHandledOutputEvent(
            output_name=dagster_event.event_specific_data.output_name,
            manager_key=dagster_event.event_specific_data.manager_key,
            **basic_params,
        )
    elif dagster_event.event_type == DagsterEventType.LOADED_INPUT:
        return DauphinLoadedInputEvent(
            input_name=dagster_event.event_specific_data.input_name,
            manager_key=dagster_event.event_specific_data.manager_key,
            upstream_output_name=dagster_event.event_specific_data.
            upstream_output_name,
            upstream_step_key=dagster_event.event_specific_data.
            upstream_step_key,
            **basic_params,
        )
    elif dagster_event.event_type == DagsterEventType.OBJECT_STORE_OPERATION:
        operation_result = dagster_event.event_specific_data
        return DauphinObjectStoreOperationEvent(
            operation_result=operation_result, **basic_params)
    elif dagster_event.event_type == DagsterEventType.ENGINE_EVENT:
        return DauphinEngineEvent(
            metadataEntries=_to_dauphin_metadata_entries(
                dagster_event.engine_event_data.metadata_entries),
            error=DauphinPythonError(dagster_event.engine_event_data.error)
            if dagster_event.engine_event_data.error else None,
            marker_start=dagster_event.engine_event_data.marker_start,
            marker_end=dagster_event.engine_event_data.marker_end,
            **basic_params,
        )
    elif dagster_event.event_type == DagsterEventType.HOOK_COMPLETED:
        return DauphinHookCompletedEvent(**basic_params)
    elif dagster_event.event_type == DagsterEventType.HOOK_SKIPPED:
        return DauphinHookSkippedEvent(**basic_params)
    elif dagster_event.event_type == DagsterEventType.HOOK_ERRORED:
        return DauphinHookErroredEvent(error=DauphinPythonError(
            dagster_event.hook_errored_data.error),
                                       **basic_params)
    else:
        raise Exception(
            "Unknown DAGSTER_EVENT type {inner_type} found in logs".format(
                inner_type=dagster_event.event_type))
Beispiel #9
0
    def __init__(
        self,
        config,
        default_value=FIELD_NO_DEFAULT_PROVIDED,
        is_required=None,
        description=None,
    ):
        from .validate import validate_config
        from .post_process import resolve_defaults

        self.config_type = check.inst(self._resolve_config_arg(config),
                                      ConfigType)

        self.description = check.opt_str_param(description, "description")

        check.opt_bool_param(is_required, "is_required")

        if default_value != FIELD_NO_DEFAULT_PROVIDED:
            check.param_invariant(not (callable(default_value)),
                                  "default_value",
                                  "default_value cannot be a callable")

        if is_required is True:
            check.param_invariant(
                default_value == FIELD_NO_DEFAULT_PROVIDED,
                "default_value",
                "required arguments should not specify default values",
            )

        self._default_value = default_value

        # check explicit default value
        if self.default_provided:
            if self.config_type.kind == ConfigTypeKind.ENUM and is_enum_value(
                    default_value):
                raise DagsterInvalidDefinitionError((
                    "You have passed into a python enum value as the default value "
                    "into of a config enum type {name}. You must pass in the underlying "
                    "string represention as the default value. One of {value_set}."
                ).format(
                    value_set=[
                        ev.config_value for ev in self.config_type.enum_values
                    ],
                    name=self.config_type.given_name,
                ))

            evr = validate_config(self.config_type, default_value)
            if not evr.success:
                raise DagsterInvalidConfigError(
                    "Invalid default_value for Field.",
                    evr.errors,
                    default_value,
                )

        if is_required is None:
            is_optional = all_optional_type(
                self.config_type) or self.default_provided
            is_required = not is_optional

            # on implicitly optional - set the default value
            # by resolving the defaults of the type
            if not is_required and not self.default_provided:
                evr = resolve_defaults(self.config_type, None)
                if not evr.success:
                    raise DagsterInvalidConfigError(
                        "Unable to resolve implicit default_value for Field.",
                        evr.errors,
                        None,
                    )
                self._default_value = evr.value
        self._is_required = is_required
Beispiel #10
0
def load_repository_from_target_info(info):
    return check.inst(
        load_repository_object_from_target_info(info).fn(),
        RepositoryDefinition)
Beispiel #11
0
def inner_plan_execution_iterator(pipeline_context, execution_plan):
    check.inst_param(pipeline_context, 'pipeline_context',
                     SystemExecutionContext)
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)

    retries = pipeline_context.retries

    for event in copy_required_intermediates_for_execution(
            pipeline_context, execution_plan):
        yield event

    # It would be good to implement a reference tracking algorithm here to
    # garbage collect results that are no longer needed by any steps
    # https://github.com/dagster-io/dagster/issues/811
    active_execution = execution_plan.start(retries=retries)
    while not active_execution.is_complete:
        step = active_execution.get_next_step()
        step_context = pipeline_context.for_step(step)

        missing_resources = [
            resource_key
            for resource_key in step_context.required_resource_keys
            if not hasattr(step_context.resources, resource_key)
        ]
        check.invariant(
            len(missing_resources) == 0,
            ('Expected step context for solid {solid_name} to have all required resources, but '
             'missing {missing_resources}.').format(
                 solid_name=step_context.solid.name,
                 missing_resources=missing_resources),
        )

        with pipeline_context.instance.compute_log_manager.watch(
                step_context.pipeline_run, step_context.step.key):
            # capture all of the logs for this step
            uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs(
                step_context, step)
            if uncovered_inputs:
                # In partial pipeline execution, we may end up here without having validated the
                # missing dependent outputs were optional
                _assert_missing_inputs_optional(uncovered_inputs,
                                                execution_plan, step.key)

                step_context.log.info((
                    'Not all inputs covered for {step}. Not executing. Output missing for '
                    'inputs: {uncovered_inputs}').format(
                        uncovered_inputs=uncovered_inputs, step=step.key))
                yield DagsterEvent.step_skipped_event(step_context)
                active_execution.mark_skipped(step.key)
            else:
                for step_event in check.generator(
                        _dagster_event_sequence_for_step(
                            step_context, retries)):
                    check.inst(step_event, DagsterEvent)
                    yield step_event
                    active_execution.handle_event(step_event)

            active_execution.verify_complete(pipeline_context, step.key)

        # process skips from failures or uncovered inputs
        for event in active_execution.skipped_step_events_iterator(
                pipeline_context):
            yield event
Beispiel #12
0
    def StartRun(self, request, _context):
        if self._shutdown_once_executions_finish_event.is_set():
            return api_pb2.StartRunReply(
                serialized_start_run_result=serialize_dagster_namedtuple(
                    StartRunResult(
                        success=False,
                        message=
                        "Tried to start a run on a server after telling it to shut down",
                        serializable_error_info=None,
                    )))

        try:
            execute_run_args = check.inst(
                deserialize_json_to_dagster_namedtuple(
                    request.serialized_execute_run_args),
                ExecuteExternalPipelineArgs,
            )
            run_id = execute_run_args.pipeline_run_id
            recon_pipeline = self._recon_pipeline_from_origin(
                execute_run_args.pipeline_origin)

        except:  # pylint: disable=bare-except
            return api_pb2.StartRunReply(
                serialized_start_run_result=serialize_dagster_namedtuple(
                    StartRunResult(
                        success=False,
                        message=None,
                        serializable_error_info=
                        serializable_error_info_from_exc_info(sys.exc_info()),
                    )))

        event_queue = multiprocessing.Queue()
        termination_event = multiprocessing.Event()
        execution_process = multiprocessing.Process(
            target=start_run_in_subprocess,
            args=[
                request.serialized_execute_run_args,
                recon_pipeline,
                event_queue,
                termination_event,
            ],
        )

        with self._execution_lock:
            execution_process.start()
            self._executions[run_id] = (
                execution_process,
                execute_run_args.instance_ref,
            )
            self._termination_events[run_id] = termination_event

        success = None
        message = None
        serializable_error_info = None

        while success is None:
            time.sleep(EVENT_QUEUE_POLL_INTERVAL)
            # We use `get_nowait()` instead of `get()` so that we can handle the case where the
            # execution process has died unexpectedly -- `get()` would hang forever in that case
            try:
                dagster_event_or_ipc_error_message_or_done = event_queue.get_nowait(
                )
            except queue.Empty:
                if not execution_process.is_alive():
                    # subprocess died unexpectedly
                    success = False
                    message = (
                        "GRPC server: Subprocess for {run_id} terminated unexpectedly with "
                        "exit code {exit_code}".format(
                            run_id=run_id,
                            exit_code=execution_process.exitcode,
                        ))
                    serializable_error_info = serializable_error_info_from_exc_info(
                        sys.exc_info())
            else:
                if isinstance(dagster_event_or_ipc_error_message_or_done,
                              StartRunInSubprocessSuccessful):
                    success = True
                elif isinstance(dagster_event_or_ipc_error_message_or_done,
                                RunInSubprocessComplete):
                    continue
                if isinstance(dagster_event_or_ipc_error_message_or_done,
                              IPCErrorMessage):
                    success = False
                    message = dagster_event_or_ipc_error_message_or_done.message
                    serializable_error_info = (
                        dagster_event_or_ipc_error_message_or_done.
                        serializable_error_info)

        # Ensure that if the run failed, we remove it from the executions map before
        # returning so that CanCancel will never return True
        if not success:
            with self._execution_lock:
                self._clear_run(run_id)

        return api_pb2.StartRunReply(
            serialized_start_run_result=serialize_dagster_namedtuple(
                StartRunResult(
                    success=success,
                    message=message,
                    serializable_error_info=serializable_error_info,
                )))
Beispiel #13
0
def pipeline_initialization_event_generator(
    execution_plan,
    run_config,
    pipeline_run,
    instance,
    scoped_resources_builder_cm,
    system_storage_data=None,
    intermediate_storage=None,
    raise_on_error=False,
):
    execution_plan = check.inst_param(execution_plan, 'execution_plan',
                                      ExecutionPlan)
    pipeline_def = execution_plan.pipeline.get_definition()

    run_config = check.dict_param(run_config, 'run_config', key_type=str)
    pipeline_run = check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)
    instance = check.inst_param(instance, 'instance', DagsterInstance)

    scoped_resources_builder_cm = check.callable_param(
        scoped_resources_builder_cm, 'scoped_resources_builder_cm')
    system_storage_data = check.opt_inst_param(system_storage_data,
                                               'system_storage_data',
                                               SystemStorageData)
    intermediate_storage = check.opt_inst_param(intermediate_storage,
                                                'intermediate_storage_data',
                                                IntermediateStorage)
    raise_on_error = check.bool_param(raise_on_error, 'raise_on_error')

    pipeline_context = None
    resources_manager = None

    try:
        context_creation_data = create_context_creation_data(
            execution_plan,
            run_config,
            pipeline_run,
            instance,
        )

        executor = check.inst(create_executor(context_creation_data), Executor,
                              'Must return an Executor')

        log_manager = create_log_manager(context_creation_data)
        resources_manager = scoped_resources_builder_cm(
            execution_plan,
            context_creation_data.environment_config,
            context_creation_data.pipeline_run,
            log_manager,
            context_creation_data.resource_keys_to_init,
        )
        for event in resources_manager.generate_setup_events():
            yield event
        scoped_resources_builder = check.inst(resources_manager.get_object(),
                                              ScopedResourcesBuilder)
        system_storage_data = create_system_storage_data(
            context_creation_data, system_storage_data,
            scoped_resources_builder)
        if intermediate_storage or context_creation_data.intermediate_storage_def:
            intermediate_storage = create_intermediate_storage(
                context_creation_data,
                intermediate_storage,
                scoped_resources_builder,
            )
        else:
            # remove this as part of https://github.com/dagster-io/dagster/issues/2705
            intermediate_storage = system_storage_data.intermediates_manager
        pipeline_context = construct_pipeline_execution_context(
            context_creation_data=context_creation_data,
            scoped_resources_builder=scoped_resources_builder,
            system_storage_data=system_storage_data,
            intermediate_storage=intermediate_storage,
            log_manager=log_manager,
            executor=executor,
            raise_on_error=raise_on_error,
        )

        _validate_plan_with_context(pipeline_context, execution_plan)

        yield pipeline_context
        for event in resources_manager.generate_teardown_events():
            yield event
    except DagsterError as dagster_error:
        if pipeline_context is None:
            user_facing_exc_info = (
                # pylint does not know original_exc_info exists is is_user_code_error is true
                # pylint: disable=no-member
                dagster_error.original_exc_info
                if dagster_error.is_user_code_error else sys.exc_info())
            error_info = serializable_error_info_from_exc_info(
                user_facing_exc_info)

            yield DagsterEvent.pipeline_init_failure(
                pipeline_name=pipeline_def.name,
                failure_data=PipelineInitFailureData(error=error_info),
                log_manager=_create_context_free_log_manager(
                    instance, pipeline_run, pipeline_def),
            )
            if resources_manager:
                for event in resources_manager.generate_teardown_events():
                    yield event
        else:
            # pipeline teardown failure
            raise dagster_error

        if raise_on_error:
            raise dagster_error
Beispiel #14
0
def _core_resource_initialization_event_generator(
    resource_defs: Dict[str, ResourceDefinition],
    resource_configs: Dict[str, ResourceConfig],
    resource_log_manager: DagsterLogManager,
    resource_managers: Deque[EventGenerationManager],
    execution_plan: Optional[ExecutionPlan],
    pipeline_run: Optional[PipelineRun],
    resource_keys_to_init: Optional[Set[str]],
    instance: Optional[DagsterInstance],
    resource_instances_to_override: Optional[Dict[str, "InitializedResource"]],
    emit_persistent_events: Optional[bool],
    pipeline_def_for_backwards_compat: Optional[PipelineDefinition],
):

    pipeline_name = None
    if emit_persistent_events:
        check.invariant(
            pipeline_run and execution_plan,
            "If emit_persistent_events is enabled, then pipeline_run and execution_plan must be provided",
        )
        pipeline_name = cast(PipelineRun, pipeline_run).pipeline_name
    resource_instances_to_override = check.opt_dict_param(
        resource_instances_to_override, "resource_instances_to_override"
    )
    resource_keys_to_init = check.opt_set_param(resource_keys_to_init, "resource_keys_to_init")
    resource_instances: Dict[str, "InitializedResource"] = {}
    resource_init_times = {}
    try:
        if emit_persistent_events and resource_keys_to_init:
            yield DagsterEvent.resource_init_start(
                pipeline_name,
                execution_plan,
                resource_log_manager,
                resource_keys_to_init,
            )

        resource_dependencies = _resolve_resource_dependencies(resource_defs)

        for level in toposort(resource_dependencies):
            for resource_name in level:

                if resource_name in resource_instances_to_override:
                    # use the given resource instances instead of re-initiating it from resource def
                    resource_def = ResourceDefinition.hardcoded_resource(
                        resource_instances_to_override[resource_name]
                    )
                else:
                    resource_def = resource_defs[resource_name]
                if not resource_name in resource_keys_to_init:
                    continue

                resource_context = InitResourceContext(
                    resource_def=resource_def,
                    resource_config=resource_configs[resource_name].config,
                    pipeline_run=pipeline_run,
                    # Add tags with information about the resource
                    log_manager=resource_log_manager.with_tags(
                        resource_name=resource_name,
                        resource_fn_name=str(resource_def.resource_fn.__name__),
                    ),
                    resource_instance_dict=resource_instances,
                    required_resource_keys=resource_def.required_resource_keys,
                    instance=instance,
                    pipeline_def_for_backwards_compat=pipeline_def_for_backwards_compat,
                )
                manager = single_resource_generation_manager(
                    resource_context, resource_name, resource_def
                )
                for event in manager.generate_setup_events():
                    if event:
                        yield event
                initialized_resource = check.inst(manager.get_object(), InitializedResource)
                resource_instances[resource_name] = initialized_resource.resource
                resource_init_times[resource_name] = initialized_resource.duration
                resource_managers.append(manager)

        if emit_persistent_events and resource_keys_to_init:
            yield DagsterEvent.resource_init_success(
                pipeline_name,
                execution_plan,
                resource_log_manager,
                resource_instances,
                resource_init_times,
            )
        yield ScopedResourcesBuilder(resource_instances)
    except DagsterUserCodeExecutionError as dagster_user_error:
        # Can only end up in this state if we attempt to initialize a resource, so
        # resource_keys_to_init cannot be empty
        if emit_persistent_events:
            yield DagsterEvent.resource_init_failure(
                pipeline_name,
                execution_plan,
                resource_log_manager,
                resource_keys_to_init,
                serializable_error_info_from_exc_info(dagster_user_error.original_exc_info),
            )
        raise dagster_user_error
Beispiel #15
0
    def _execute_step_k8s_job(
        self,
        execute_step_args_packed,
        job_config_dict,
        job_namespace,
        load_incluster_config,
        user_defined_k8s_config_dict=None,
        kubeconfig_file=None,
    ):
        """Run step execution in a K8s job pod."""
        execute_step_args = unpack_value(
            check.dict_param(
                execute_step_args_packed,
                "execute_step_args_packed",
            ))
        check.inst_param(execute_step_args, "execute_step_args",
                         ExecuteStepArgs)
        check.invariant(
            len(execute_step_args.step_keys_to_execute) == 1,
            "Celery K8s task executor can only execute 1 step at a time",
        )

        # Celery will serialize this as a list
        job_config = DagsterK8sJobConfig.from_dict(job_config_dict)
        check.inst_param(job_config, "job_config", DagsterK8sJobConfig)
        check.str_param(job_namespace, "job_namespace")

        check.bool_param(load_incluster_config, "load_incluster_config")

        user_defined_k8s_config = UserDefinedDagsterK8sConfig.from_dict(
            user_defined_k8s_config_dict)
        check.opt_inst_param(
            user_defined_k8s_config,
            "user_defined_k8s_config",
            UserDefinedDagsterK8sConfig,
        )
        check.opt_str_param(kubeconfig_file, "kubeconfig_file")

        # For when launched via DinD or running the cluster
        if load_incluster_config:
            kubernetes.config.load_incluster_config()
        else:
            kubernetes.config.load_kube_config(kubeconfig_file)

        instance = DagsterInstance.from_ref(execute_step_args.instance_ref)
        pipeline_run = instance.get_run_by_id(
            execute_step_args.pipeline_run_id)

        check.inst(
            pipeline_run,
            PipelineRun,
            "Could not load run {}".format(execute_step_args.pipeline_run_id),
        )
        step_key = execute_step_args.step_keys_to_execute[0]

        celery_worker_name = self.request.hostname
        celery_pod_name = os.environ.get("HOSTNAME")
        instance.report_engine_event(
            "Task for step {step_key} picked up by Celery".format(
                step_key=step_key),
            pipeline_run,
            EngineEventData([
                EventMetadataEntry.text(celery_worker_name,
                                        "Celery worker name"),
                EventMetadataEntry.text(celery_pod_name,
                                        "Celery worker Kubernetes Pod name"),
            ]),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )

        if pipeline_run.status != PipelineRunStatus.STARTED:
            instance.report_engine_event(
                "Not scheduling step because pipeline run status is not STARTED",
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(step_key, "Step key"),
                ]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return []

        # Ensure we stay below k8s name length limits
        k8s_name_key = get_k8s_job_name(execute_step_args.pipeline_run_id,
                                        step_key)

        retry_state = execute_step_args.known_state.get_retry_state()

        if retry_state.get_attempt_count(step_key):
            attempt_number = retry_state.get_attempt_count(step_key)
            job_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number)
            pod_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number)
        else:
            job_name = "dagster-job-%s" % (k8s_name_key)
            pod_name = "dagster-job-%s" % (k8s_name_key)

        input_json = serialize_dagster_namedtuple(execute_step_args)
        args = ["dagster", "api", "execute_step", input_json]

        job = construct_dagster_k8s_job(job_config, args, job_name,
                                        user_defined_k8s_config, pod_name)

        # Running list of events generated from this task execution
        events = []

        # Post event for starting execution
        job_name = job.metadata.name
        engine_event = instance.report_engine_event(
            "Executing step {} in Kubernetes job {}".format(
                step_key, job_name),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_key, "Step key"),
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(job_config.job_image, "Job image"),
                    EventMetadataEntry.text(job_config.image_pull_policy,
                                            "Image pull policy"),
                    EventMetadataEntry.text(str(job_config.image_pull_secrets),
                                            "Image pull secrets"),
                    EventMetadataEntry.text(
                        str(job_config.service_account_name),
                        "Service account name"),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryK8sJobExecutor,
            # validated above that step_keys is length 1, and it is not possible to use ETH or
            # execution plan in this function (Celery K8s workers should not access to user code)
            step_key=step_key,
        )
        events.append(engine_event)
        try:
            kubernetes.client.BatchV1Api().create_namespaced_job(
                body=job, namespace=job_namespace)
        except kubernetes.client.rest.ApiException as e:
            if e.reason == "Conflict":
                # There is an existing job with the same name so proceed and see if the existing job succeeded
                instance.report_engine_event(
                    "Did not create Kubernetes job {} for step {} since job name already "
                    "exists, proceeding with existing job.".format(
                        job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, "Step key"),
                            EventMetadataEntry.text(job_name,
                                                    "Kubernetes Job name"),
                        ],
                        marker_end=DELEGATE_MARKER,
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            else:
                instance.report_engine_event(
                    "Encountered unexpected error while creating Kubernetes job {} for step {}, "
                    "exiting.".format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, "Step key"),
                        ],
                        error=serializable_error_info_from_exc_info(
                            sys.exc_info()),
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
                return []

        try:
            wait_for_job_success(
                job_name=job_name,
                namespace=job_namespace,
                instance=instance,
                run_id=execute_step_args.pipeline_run_id,
            )
        except (DagsterK8sError, DagsterK8sTimeoutError) as err:
            step_failure_event = construct_step_failure_event_and_handle(
                pipeline_run, step_key, err, instance=instance)
            events.append(step_failure_event)
        except DagsterK8sPipelineStatusException:
            instance.report_engine_event(
                "Terminating Kubernetes Job because pipeline run status is not STARTED",
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(step_key, "Step key"),
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(job_namespace,
                                            "Kubernetes Job namespace"),
                ]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            delete_job(job_name=job_name, namespace=job_namespace)
            return []
        except (
                DagsterK8sUnrecoverableAPIError,
                DagsterK8sAPIRetryLimitExceeded,
                # We shouldn't see unwrapped APIExceptions anymore, as they should all be wrapped in
                # a retry boundary. We still catch it here just in case we missed one so that we can
                # report it to the event log
                kubernetes.client.rest.ApiException,
        ) as err:
            instance.report_engine_event(
                "Encountered unexpected error while waiting on Kubernetes job {} for step {}, "
                "exiting.".format(job_name, step_key),
                pipeline_run,
                EngineEventData(
                    [
                        EventMetadataEntry.text(step_key, "Step key"),
                    ],
                    error=serializable_error_info_from_exc_info(
                        sys.exc_info()),
                ),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return []

        try:
            pod_names = get_pod_names_in_job(job_name, namespace=job_namespace)
        except kubernetes.client.rest.ApiException as e:
            instance.report_engine_event(
                "Encountered unexpected error retreiving Pods for Kubernetes job {} for step {}, "
                "exiting.".format(job_name, step_key),
                pipeline_run,
                EngineEventData(
                    [
                        EventMetadataEntry.text(step_key, "Step key"),
                    ],
                    error=serializable_error_info_from_exc_info(
                        sys.exc_info()),
                ),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return []

        # Post engine event for log retrieval
        engine_event = instance.report_engine_event(
            "Retrieving logs from Kubernetes Job pods",
            pipeline_run,
            EngineEventData(
                [EventMetadataEntry.text("\n".join(pod_names), "Pod names")]),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )
        events.append(engine_event)

        logs = []
        for pod_name in pod_names:
            try:
                raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace)
                logs += raw_logs.split("\n")
            except kubernetes.client.rest.ApiException as e:
                instance.report_engine_event(
                    "Encountered unexpected error while fetching pod logs for Kubernetes job {}, "
                    "Pod name {} for step {}. Will attempt to continue with other pods."
                    .format(job_name, pod_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, "Step key"),
                        ],
                        error=serializable_error_info_from_exc_info(
                            sys.exc_info()),
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )

        events += filter_dagster_events_from_pod_logs(logs)
        serialized_events = [
            serialize_dagster_namedtuple(event) for event in events
        ]
        return serialized_events
Beispiel #16
0
def _core_resource_initialization_event_generator(
    execution_plan,
    environment_config,
    pipeline_run,
    resource_keys_to_init,
    resource_log_manager,
    resource_managers,
    instance,
    resource_instances_to_override,
):
    pipeline_def = execution_plan.pipeline_def
    resource_instances = {}
    mode_definition = pipeline_def.get_mode_definition(pipeline_run.mode)
    resource_init_times = {}
    try:
        if resource_keys_to_init:
            yield DagsterEvent.resource_init_start(
                execution_plan,
                resource_log_manager,
                resource_keys_to_init,
            )

        resource_dependencies = _resolve_resource_dependencies(
            mode_definition.resource_defs)

        for level in toposort(resource_dependencies):
            for resource_name in level:

                if resource_name in resource_instances_to_override:
                    # use the given resource instances instead of re-initiating it from resource def
                    resource_def = ResourceDefinition.hardcoded_resource(
                        resource_instances_to_override[resource_name])
                else:
                    resource_def = mode_definition.resource_defs[resource_name]
                if not resource_name in resource_keys_to_init:
                    continue

                resource_context = InitResourceContext(
                    resource_def=resource_def,
                    resource_config=environment_config.resources.get(
                        resource_name, {}).get("config"),
                    pipeline_run=pipeline_run,
                    # Add tags with information about the resource
                    log_manager=resource_log_manager.with_tags(
                        resource_name=resource_name,
                        resource_fn_name=str(
                            resource_def.resource_fn.__name__),
                    ),
                    resource_instance_dict=resource_instances,
                    required_resource_keys=resource_def.required_resource_keys,
                    instance_for_backwards_compat=instance,
                    pipeline_def_for_backwards_compat=pipeline_def,
                )
                manager = single_resource_generation_manager(
                    resource_context, resource_name, resource_def)
                for event in manager.generate_setup_events():
                    if event:
                        yield event
                initialized_resource = check.inst(manager.get_object(),
                                                  InitializedResource)
                resource_instances[
                    resource_name] = initialized_resource.resource
                resource_init_times[
                    resource_name] = initialized_resource.duration
                resource_managers.append(manager)

        if resource_keys_to_init:
            yield DagsterEvent.resource_init_success(execution_plan,
                                                     resource_log_manager,
                                                     resource_instances,
                                                     resource_init_times)
        yield ScopedResourcesBuilder(resource_instances)
    except DagsterUserCodeExecutionError as dagster_user_error:
        yield DagsterEvent.resource_init_failure(
            execution_plan,
            resource_log_manager,
            resource_keys_to_init,
            serializable_error_info_from_exc_info(
                dagster_user_error.original_exc_info),
        )
        raise dagster_user_error
def pipeline_initialization_event_generator(
    pipeline_def,
    environment_dict,
    pipeline_run,
    instance,
    execution_plan,
    scoped_resources_builder_cm,
    system_storage_data=None,
    raise_on_error=False,
):
    pipeline_def = check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition)
    environment_dict = check.dict_param(environment_dict, 'environment_dict', key_type=str)
    pipeline_run = check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)
    instance = check.inst_param(instance, 'instance', DagsterInstance)
    execution_plan = check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    scoped_resources_builder_cm = check.callable_param(
        scoped_resources_builder_cm, 'scoped_resources_builder_cm'
    )
    system_storage_data = check.opt_inst_param(
        system_storage_data, 'system_storage_data', SystemStorageData
    )
    raise_on_error = check.bool_param(raise_on_error, 'raise_on_error')

    pipeline_context = None
    resources_manager = None

    try:
        context_creation_data = create_context_creation_data(
            pipeline_def, environment_dict, pipeline_run, instance, execution_plan,
        )
        executor_config = create_executor_config(context_creation_data)
        log_manager = create_log_manager(context_creation_data)
        resources_manager = scoped_resources_builder_cm(
            context_creation_data.pipeline_def,
            context_creation_data.environment_config,
            context_creation_data.pipeline_run,
            log_manager,
            context_creation_data.resource_keys_to_init,
        )
        for event in resources_manager.generate_setup_events():
            yield event
        scoped_resources_builder = check.inst(
            resources_manager.get_object(), ScopedResourcesBuilder
        )
        system_storage_data = create_system_storage_data(
            context_creation_data, system_storage_data, scoped_resources_builder
        )
        pipeline_context = construct_pipeline_execution_context(
            context_creation_data=context_creation_data,
            scoped_resources_builder=scoped_resources_builder,
            system_storage_data=system_storage_data,
            log_manager=log_manager,
            executor_config=executor_config,
            raise_on_error=raise_on_error,
        )
        yield pipeline_context
        for event in resources_manager.generate_teardown_events():
            yield event
    except DagsterError as dagster_error:
        if pipeline_context is None:
            user_facing_exc_info = (
                # pylint does not know original_exc_info exists is is_user_code_error is true
                # pylint: disable=no-member
                dagster_error.original_exc_info
                if dagster_error.is_user_code_error
                else sys.exc_info()
            )
            error_info = serializable_error_info_from_exc_info(user_facing_exc_info)

            yield DagsterEvent.pipeline_init_failure(
                pipeline_name=pipeline_def.name,
                failure_data=PipelineInitFailureData(error=error_info),
                log_manager=_create_context_free_log_manager(instance, pipeline_run, pipeline_def),
            )
            try:
                if resources_manager:
                    for event in resources_manager.generate_teardown_events():
                        yield event
            except DagsterError as dagster_teardown_error:
                # we will fail/reraise based on the original error, so for now just do a best-effort
                # teardown and swallow any errors
                pass
        else:
            # pipeline teardown failure
            raise dagster_error

        if raise_on_error:
            raise dagster_error
Beispiel #18
0
    def execute(self, pipeline_context, execution_plan):
        check.inst_param(pipeline_context, "pipeline_context",
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, "execution_plan", ExecutionPlan)
        check.param_invariant(
            isinstance(pipeline_context.executor, DaskExecutor),
            "pipeline_context",
            "Expected executor to be DaskExecutor got {}".format(
                pipeline_context.executor),
        )

        check.invariant(
            pipeline_context.instance.is_persistent,
            "Dask execution requires a persistent DagsterInstance",
        )

        step_levels = execution_plan.execution_step_levels()

        pipeline_name = pipeline_context.pipeline_def.name

        instance = pipeline_context.instance

        cluster_type = self.cluster_type
        if cluster_type == "local":
            from dask.distributed import LocalCluster

            cluster = LocalCluster(**self.build_dict(pipeline_name))
        elif cluster_type == "yarn":
            from dask_yarn import YarnCluster

            cluster = YarnCluster(**self.build_dict(pipeline_name))
        elif cluster_type == "ssh":
            from dask.distributed import SSHCluster

            cluster = SSHCluster(**self.build_dict(pipeline_name))
        elif cluster_type == "pbs":
            from dask_jobqueue import PBSCluster

            cluster = PBSCluster(**self.build_dict(pipeline_name))
        elif cluster_type == "moab":
            from dask_jobqueue import MoabCluster

            cluster = MoabCluster(**self.build_dict(pipeline_name))
        elif cluster_type == "sge":
            from dask_jobqueue import SGECluster

            cluster = SGECluster(**self.build_dict(pipeline_name))
        elif cluster_type == "lsf":
            from dask_jobqueue import LSFCluster

            cluster = LSFCluster(**self.build_dict(pipeline_name))
        elif cluster_type == "slurm":
            from dask_jobqueue import SLURMCluster

            cluster = SLURMCluster(**self.build_dict(pipeline_name))
        elif cluster_type == "oar":
            from dask_jobqueue import OARCluster

            cluster = OARCluster(**self.build_dict(pipeline_name))
        elif cluster_type == "kube":
            from dask_kubernetes import KubeCluster

            cluster = KubeCluster(**self.build_dict(pipeline_name))
        else:
            raise ValueError(
                f"Must be providing one of the following ('local', 'yarn', 'ssh', 'pbs', 'moab', 'sge', 'lsf', 'slurm', 'oar', 'kube') not {cluster_type}"
            )

        with dask.distributed.Client(cluster) as client:
            execution_futures = []
            execution_futures_dict = {}

            for step_level in step_levels:
                for step in step_level:
                    # We ensure correctness in sequencing by letting Dask schedule futures and
                    # awaiting dependencies within each step.
                    dependencies = []
                    for step_input in step.step_inputs:
                        for key in step_input.dependency_keys:
                            dependencies.append(execution_futures_dict[key])

                    run_config = dict(pipeline_context.run_config,
                                      execution={"in_process": {}})
                    recon_repo = pipeline_context.pipeline.get_reconstructable_repository(
                    )
                    variables = {
                        "executionParams": {
                            "selector": {
                                "pipelineName": pipeline_name,
                                "repositoryName":
                                recon_repo.get_definition().name,
                                "repositoryLocationName": "<<in_process>>",
                            },
                            "runConfigData": run_config,
                            "mode": pipeline_context.mode_def.name,
                            "executionMetadata": {
                                "runId": pipeline_context.pipeline_run.run_id
                            },
                            "stepKeys": [step.key],
                        }
                    }

                    dask_task_name = "%s.%s" % (pipeline_name, step.key)

                    workspace = create_in_process_ephemeral_workspace(
                        pointer=pipeline_context.pipeline.
                        get_reconstructable_repository().pointer)

                    future = client.submit(
                        query_on_dask_worker,
                        workspace,
                        variables,
                        dependencies,
                        instance.get_ref(),
                        key=dask_task_name,
                        resources=get_dask_resource_requirements(step.tags),
                    )

                    execution_futures.append(future)
                    execution_futures_dict[step.key] = future

            # This tells Dask to awaits the step executions and retrieve their results to the
            # master
            for future in dask.distributed.as_completed(execution_futures):
                for step_event in future.result():
                    check.inst(step_event, DagsterEvent)

                    yield step_event
Beispiel #19
0
def sync_list_repositories_grpc(api_client):
    from dagster.grpc.client import DagsterGrpcClient
    from dagster.grpc.types import ListRepositoriesResponse

    check.inst_param(api_client, 'api_client', DagsterGrpcClient)
    return check.inst(api_client.list_repositories(), ListRepositoriesResponse)
Beispiel #20
0
    def _execute_step_k8s_job(
        _self,
        instance_ref_dict,
        step_keys,
        run_config,
        mode,
        repo_name,
        repo_location_name,
        run_id,
        job_config_dict,
        job_namespace,
        load_incluster_config,
        retries_dict,
        pipeline_origin_packed,
        resources=None,
        kubeconfig_file=None,
    ):
        '''Run step execution in a K8s job pod.
        '''

        check.dict_param(instance_ref_dict, 'instance_ref_dict')
        check.list_param(step_keys, 'step_keys', of_type=str)
        check.invariant(
            len(step_keys) == 1, 'Celery K8s task executor can only execute 1 step at a time'
        )
        check.dict_param(run_config, 'run_config')
        check.str_param(mode, 'mode')
        check.str_param(repo_name, 'repo_name')
        check.str_param(repo_location_name, 'repo_location_name')
        check.str_param(run_id, 'run_id')

        # Celery will serialize this as a list
        job_config = DagsterK8sJobConfig.from_dict(job_config_dict)
        check.inst_param(job_config, 'job_config', DagsterK8sJobConfig)
        check.str_param(job_namespace, 'job_namespace')

        check.bool_param(load_incluster_config, 'load_incluster_config')
        check.dict_param(retries_dict, 'retries_dict')

        pipeline_origin = unpack_value(
            check.dict_param(
                pipeline_origin_packed, 'pipeline_origin_packed'
            )  # TODO: make part of args
        )
        check.inst(pipeline_origin, PipelineOrigin)

        check.opt_dict_param(resources, 'resources', key_type=str, value_type=dict)
        check.opt_str_param(kubeconfig_file, 'kubeconfig_file')

        # For when launched via DinD or running the cluster
        if load_incluster_config:
            kubernetes.config.load_incluster_config()
        else:
            kubernetes.config.load_kube_config(kubeconfig_file)

        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        pipeline_run = instance.get_run_by_id(run_id)

        check.invariant(pipeline_run, 'Could not load run {}'.format(run_id))

        step_key = step_keys[0]
        if pipeline_run.status != PipelineRunStatus.STARTED:
            instance.report_engine_event(
                'Not scheduling step because pipeline run status is not STARTED',
                pipeline_run,
                EngineEventData([EventMetadataEntry.text(step_key, 'Step keys'),]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return

        # Ensure we stay below k8s name length limits
        k8s_name_key = get_k8s_job_name(run_id, step_key)

        retries = Retries.from_config(retries_dict)

        if retries.get_attempt_count(step_key):
            attempt_number = retries.get_attempt_count(step_key)
            job_name = 'dagster-job-%s-%d' % (k8s_name_key, attempt_number)
            pod_name = 'dagster-job-%s-%d' % (k8s_name_key, attempt_number)
        else:
            job_name = 'dagster-job-%s' % (k8s_name_key)
            pod_name = 'dagster-job-%s' % (k8s_name_key)

        input_json = serialize_dagster_namedtuple(
            ExecuteStepArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run_id,
                instance_ref=None,
                mode=mode,
                step_keys_to_execute=step_keys,
                run_config=run_config,
                retries_dict=retries_dict,
            )
        )
        command = ['dagster']
        args = ['api', 'execute_step_with_structured_logs', input_json]

        job = construct_dagster_k8s_job(job_config, command, args, job_name, resources, pod_name)

        # Running list of events generated from this task execution
        events = []

        # Post event for starting execution
        job_name = job.metadata.name
        engine_event = instance.report_engine_event(
            'Executing step {} in Kubernetes job {}'.format(step_key, job_name),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_key, 'Step keys'),
                    EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                    EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                    EventMetadataEntry.text(job_config.job_image, 'Job image'),
                    EventMetadataEntry.text(job_config.image_pull_policy, 'Image pull policy'),
                    EventMetadataEntry.text(
                        str(job_config.image_pull_secrets), 'Image pull secrets'
                    ),
                    EventMetadataEntry.text(
                        str(job_config.service_account_name), 'Service account name'
                    ),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryK8sJobExecutor,
            # validated above that step_keys is length 1, and it is not possible to use ETH or
            # execution plan in this function (Celery K8s workers should not access to user code)
            step_key=step_key,
        )
        events.append(engine_event)

        try:
            kubernetes.client.BatchV1Api().create_namespaced_job(body=job, namespace=job_namespace)
        except kubernetes.client.rest.ApiException as e:
            if e.reason == 'Conflict':
                # There is an existing job with the same name so do not procede.
                instance.report_engine_event(
                    'Did not create Kubernetes job {} for step {} since job name already '
                    'exists, exiting.'.format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, 'Step keys'),
                            EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                            EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                        ],
                        marker_end=DELEGATE_MARKER,
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            else:
                instance.report_engine_event(
                    'Encountered unexpected error while creating Kubernetes job {} for step {}, '
                    'exiting.'.format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, 'Step keys'),
                            EventMetadataEntry.text(e, 'Error'),
                        ]
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            return

        try:
            wait_for_job_success(
                job_name=job_name, namespace=job_namespace, instance=instance, run_id=run_id,
            )
        except DagsterK8sPipelineStatusException:
            instance.report_engine_event(
                'Terminating Kubernetes Job because pipeline run status is not STARTED',
                pipeline_run,
                EngineEventData(
                    [
                        EventMetadataEntry.text(step_key, 'Step keys'),
                        EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                        EventMetadataEntry.text(job_namespace, 'Kubernetes Job namespace'),
                    ]
                ),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            delete_job(job_name=job_name, namespace=job_namespace)
            return

        pod_names = get_pod_names_in_job(job_name, namespace=job_namespace)

        # Post engine event for log retrieval
        engine_event = instance.report_engine_event(
            'Retrieving logs from Kubernetes Job pods',
            pipeline_run,
            EngineEventData([EventMetadataEntry.text('\n'.join(pod_names), 'Pod names')]),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )
        events.append(engine_event)

        logs = []
        for pod_name in pod_names:
            raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace)
            logs += raw_logs.split('\n')

        events += filter_dagster_events_from_pod_logs(logs)
        serialized_events = [serialize_dagster_namedtuple(event) for event in events]
        return serialized_events
Beispiel #21
0
    def _execute_step_docker(
        self,
        execute_step_args_packed,
        docker_config,
    ):
        """Run step execution in a Docker container."""
        execute_step_args = unpack_value(
            check.dict_param(
                execute_step_args_packed,
                "execute_step_args_packed",
            ))
        check.inst_param(execute_step_args, "execute_step_args",
                         ExecuteStepArgs)

        check.dict_param(docker_config, "docker_config")

        instance = DagsterInstance.from_ref(execute_step_args.instance_ref)
        pipeline_run = instance.get_run_by_id(
            execute_step_args.pipeline_run_id)
        check.inst(
            pipeline_run,
            PipelineRun,
            "Could not load run {}".format(execute_step_args.pipeline_run_id),
        )
        step_keys_str = ", ".join(execute_step_args.step_keys_to_execute)

        input_json = serialize_dagster_namedtuple(execute_step_args)

        command = "dagster api execute_step {}".format(json.dumps(input_json))

        docker_image = (docker_config["image"]
                        if docker_config.get("image") else execute_step_args.
                        pipeline_origin.repository_origin.container_image)

        if not docker_image:
            raise Exception(
                "No docker image specified by either the pipeline or the repository"
            )

        client = docker.client.from_env()

        if docker_config.get("registry"):
            client.login(
                registry=docker_config["registry"]["url"],
                username=docker_config["registry"]["username"],
                password=docker_config["registry"]["password"],
            )

        # Post event for starting execution
        engine_event = instance.report_engine_event(
            "Executing steps {} in Docker container {}".format(
                step_keys_str, docker_image),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_keys_str, "Step keys"),
                    EventMetadataEntry.text(docker_image, "Image"),
                    EventMetadataEntry.text(self.request.hostname,
                                            "Celery worker"),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryDockerExecutor,
            step_key=execute_step_args.step_keys_to_execute[0],
        )

        serialized_events = [serialize_dagster_namedtuple(engine_event)]

        docker_env = {}
        if docker_config.get("env_vars"):
            docker_env = {
                env_name: os.getenv(env_name)
                for env_name in docker_config["env_vars"]
            }

        try:
            docker_response = client.containers.run(
                docker_image,
                command=command,
                detach=False,
                auto_remove=True,
                # pass through this worker's environment for things like AWS creds etc.
                environment=docker_env,
                network=docker_config.get("network", None),
            )

            res = docker_response.decode("utf-8")
        except docker.errors.ContainerError as err:
            instance.report_engine_event(
                "Failed to run steps {} in Docker container {}".format(
                    step_keys_str, docker_image),
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(docker_image, "Job image"),
                    EventMetadataEntry.text(err.stderr, "Docker stderr"),
                ], ),
                CeleryDockerExecutor,
                step_key=execute_step_args.step_keys_to_execute[0],
            )
            raise
        else:
            if res is None:
                raise Exception(
                    "No response from execute_step in CeleryDockerExecutor")

            serialized_events += [event for event in res.split("\n") if event]

        return serialized_events
Beispiel #22
0
    def execute(pipeline_context, execution_plan, step_keys_to_execute=None):
        check.inst_param(pipeline_context, 'pipeline_context',
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
        check.opt_list_param(step_keys_to_execute,
                             'step_keys_to_execute',
                             of_type=str)

        step_key_set = None if step_keys_to_execute is None else set(
            step_keys_to_execute)

        dask_config = pipeline_context.run_config.executor_config

        check.param_invariant(
            isinstance(pipeline_context.executor_config, DaskConfig),
            'pipeline_context',
            'Expected executor_config to be DaskConfig got {}'.format(
                pipeline_context.executor_config),
        )

        # Checks to ensure storage is compatible with Dask configuration
        storage = pipeline_context.environment_dict.get('storage')
        check.invariant(storage.keys(),
                        'Must specify storage to use Dask execution')

        if dask_config.is_remote_execution:
            check.invariant(
                storage.get('s3'),
                'Must use S3 storage with non-local Dask address {dask_address}'
                .format(dask_address=dask_config.address),
            )
        else:
            check.invariant(
                not storage.get('in_memory'),
                'Cannot use in-memory storage with Dask, use filesystem or S3',
            )

        step_levels = execution_plan.topological_step_levels()

        pipeline_name = pipeline_context.pipeline_def.name

        with dask.distributed.Client(
                **dask_config.build_dict(pipeline_name)) as client:
            execution_futures = []
            execution_futures_dict = {}

            for step_level in step_levels:
                for step in step_level:
                    if step_key_set and step.key not in step_key_set:
                        continue

                    step_context = pipeline_context.for_step(step)

                    check.invariant(
                        not step_context.run_config.loggers,
                        'Cannot inject loggers via RunConfig with the Dask executor',
                    )

                    check.invariant(
                        not step_context.event_callback,
                        'Cannot use event_callback with Dask executor',
                    )

                    # We ensure correctness in sequencing by letting Dask schedule futures and
                    # awaiting dependencies within each step.
                    dependencies = [
                        execution_futures_dict[
                            step_input.prev_output_handle.step_key]
                        for step_input in step.step_inputs
                        if step_input.prev_output_handle is not None
                    ]

                    variables = {
                        'executionParams': {
                            'selector': {
                                'name': pipeline_name
                            },
                            'environmentConfigData':
                            pipeline_context.environment_dict,
                            'mode': pipeline_context.mode_def.name,
                            'executionMetadata': {
                                'runId': pipeline_context.run_config.run_id
                            },
                            'stepKeys': [step.key],
                        }
                    }

                    dask_task_name = '%s.%s' % (pipeline_name, step.key)

                    future = client.submit(
                        query_on_dask_worker,
                        pipeline_context.execution_target_handle,
                        variables,
                        dependencies,
                        key=dask_task_name,
                    )

                    execution_futures.append(future)
                    execution_futures_dict[step.key] = future

            # This tells Dask to awaits the step executions and retrieve their results to the master
            execution_step_events = client.gather(execution_futures)

            # execution_step_events is now a list of lists, the inner lists contain the dagster
            # events emitted by each step execution
            for step_event in itertools.chain.from_iterable(
                    execution_step_events):
                check.inst(step_event, DagsterEvent)

                yield step_event
Beispiel #23
0
def celery_k8s_job_executor(init_context):
    """Celery-based executor which launches tasks as Kubernetes Jobs.

    The Celery executor exposes config settings for the underlying Celery app under
    the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced
    in Celery version 4.0 and the object constructed from config will be passed to the
    :py:class:`celery.Celery` constructor as its ``config_source`` argument.
    (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.)

    The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the
    :py:class:`celery.Celery` constructor.

    In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use
    Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently
    modified, but that when solid executions are especially fast or slow, or when there are
    different requirements around idempotence or retry, it may make sense to execute pipelines
    with variations on these settings.

    If you'd like to configure a Celery Kubernetes Job executor in addition to the
    :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a
    :py:class:`~dagster.ModeDefinition` as follows:

    .. literalinclude:: ../dagster_celery_k8s_tests/example_celery_mode_def.py
       :language: python

    Then you can configure the executor as follows:

    .. code-block:: YAML

        execution:
          celery-k8s:
            config:
              job_image: 'my_repo.com/image_name:latest'
              job_namespace: 'some-namespace'
              broker: 'pyamqp://guest@localhost//'  # Optional[str]: The URL of the Celery broker
              backend: 'rpc://' # Optional[str]: The URL of the Celery results backend
              include: ['my_module'] # Optional[List[str]]: Modules every worker should import
              config_source: # Dict[str, Any]: Any additional parameters to pass to the
                  #...       # Celery workers. This dict will be passed as the `config_source`
                  #...       # argument of celery.Celery().

    Note that the YAML you provide here must align with the configuration with which the Celery
    workers on which you hope to run were started. If, for example, you point the executor at a
    different broker than the one your workers are listening to, the workers will never be able to
    pick up tasks for execution.

    In deployments where the celery_k8s_job_executor is used all appropriate celery and dagster_celery
    commands must be invoked with the `-A dagster_celery_k8s.app` argument.
    """

    check_cross_process_constraints(init_context)

    run_launcher = init_context.instance.run_launcher
    exc_cfg = init_context.executor_config

    check.inst(
        run_launcher,
        CeleryK8sRunLauncher,
        "This engine is only compatible with a CeleryK8sRunLauncher; configure the "
        "CeleryK8sRunLauncher on your instance to use it.",
    )

    job_config = DagsterK8sJobConfig(
        dagster_home=run_launcher.dagster_home,
        instance_config_map=run_launcher.instance_config_map,
        postgres_password_secret=run_launcher.postgres_password_secret,
        job_image=exc_cfg.get("job_image")
        or os.getenv("DAGSTER_CURRENT_IMAGE"),
        image_pull_policy=exc_cfg.get("image_pull_policy"),
        image_pull_secrets=exc_cfg.get("image_pull_secrets"),
        service_account_name=exc_cfg.get("service_account_name"),
        env_config_maps=exc_cfg.get("env_config_maps"),
        env_secrets=exc_cfg.get("env_secrets"),
    )

    # Set on the instance but overrideable here
    broker = run_launcher.broker or exc_cfg.get("broker")
    backend = run_launcher.backend or exc_cfg.get("backend")
    config_source = run_launcher.config_source or exc_cfg.get("config_source")
    include = run_launcher.include or exc_cfg.get("include")
    retries = run_launcher.retries or Retries.from_config(
        exc_cfg.get("retries"))

    return CeleryK8sJobExecutor(
        broker=broker,
        backend=backend,
        config_source=config_source,
        include=include,
        retries=retries,
        job_config=job_config,
        job_namespace=exc_cfg.get("job_namespace"),
        load_incluster_config=exc_cfg.get("load_incluster_config"),
        kubeconfig_file=exc_cfg.get("kubeconfig_file"),
        repo_location_name=exc_cfg.get("repo_location_name"),
    )
Beispiel #24
0
    def execute(self, pipeline_context, execution_plan):
        check.inst_param(pipeline_context, "pipeline_context",
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, "execution_plan", ExecutionPlan)
        check.param_invariant(
            isinstance(pipeline_context.executor, DaskExecutor),
            "pipeline_context",
            "Expected executor to be DaskExecutor got {}".format(
                pipeline_context.executor),
        )

        check.invariant(
            pipeline_context.instance.is_persistent,
            "Dask execution requires a persistent DagsterInstance",
        )

        step_levels = execution_plan.get_steps_to_execute_by_level()

        pipeline_name = pipeline_context.pipeline_def.name

        instance = pipeline_context.instance

        cluster_type = self.cluster_type
        if cluster_type == "existing":
            # address passed directly to Client() below to connect to existing Scheduler
            cluster = self.cluster_configuration["address"]
        elif cluster_type == "local":
            from dask.distributed import LocalCluster

            cluster = LocalCluster(**self.build_dict(pipeline_name))
        elif cluster_type == "yarn":
            from dask_yarn import YarnCluster

            cluster = YarnCluster(**self.build_dict(pipeline_name))
        elif cluster_type == "ssh":
            from dask.distributed import SSHCluster

            cluster = SSHCluster(**self.build_dict(pipeline_name))
        elif cluster_type == "pbs":
            from dask_jobqueue import PBSCluster

            cluster = PBSCluster(**self.build_dict(pipeline_name))
        elif cluster_type == "moab":
            from dask_jobqueue import MoabCluster

            cluster = MoabCluster(**self.build_dict(pipeline_name))
        elif cluster_type == "sge":
            from dask_jobqueue import SGECluster

            cluster = SGECluster(**self.build_dict(pipeline_name))
        elif cluster_type == "lsf":
            from dask_jobqueue import LSFCluster

            cluster = LSFCluster(**self.build_dict(pipeline_name))
        elif cluster_type == "slurm":
            from dask_jobqueue import SLURMCluster

            cluster = SLURMCluster(**self.build_dict(pipeline_name))
        elif cluster_type == "oar":
            from dask_jobqueue import OARCluster

            cluster = OARCluster(**self.build_dict(pipeline_name))
        elif cluster_type == "kube":
            from dask_kubernetes import KubeCluster

            cluster = KubeCluster(**self.build_dict(pipeline_name))
        else:
            raise ValueError(
                f"Must be providing one of the following ('existing', 'local', 'yarn', 'ssh', 'pbs', 'moab', 'sge', 'lsf', 'slurm', 'oar', 'kube') not {cluster_type}"
            )

        with dask.distributed.Client(cluster) as client:
            execution_futures = []
            execution_futures_dict = {}

            for step_level in step_levels:
                for step in step_level:
                    # We ensure correctness in sequencing by letting Dask schedule futures and
                    # awaiting dependencies within each step.
                    dependencies = []
                    for step_input in step.step_inputs:
                        for key in step_input.dependency_keys:
                            dependencies.append(execution_futures_dict[key])

                    run_config = dict(pipeline_context.run_config,
                                      execution={"in_process": {}})
                    recon_repo = pipeline_context.pipeline.get_reconstructable_repository(
                    )

                    dask_task_name = "%s.%s" % (pipeline_name, step.key)

                    recon_pipeline = recon_repo.get_reconstructable_pipeline(
                        pipeline_name)

                    future = client.submit(
                        query_on_dask_worker,
                        dependencies,
                        recon_pipeline,
                        pipeline_context.pipeline_run,
                        run_config,
                        [step.key],
                        pipeline_context.mode_def.name,
                        instance.get_ref(),
                        key=dask_task_name,
                        resources=get_dask_resource_requirements(step.tags),
                    )

                    execution_futures.append(future)
                    execution_futures_dict[step.key] = future

            # This tells Dask to awaits the step executions and retrieve their results to the
            # master
            futures = dask.distributed.as_completed(execution_futures,
                                                    with_results=True)

            # Allow interrupts while waiting for the results from Dask
            for future, result in iterate_with_context(
                    raise_execution_interrupts, futures):
                for step_event in result:
                    check.inst(step_event, DagsterEvent)
                    yield step_event
Beispiel #25
0
def start_inprocess_executor(
    pipeline_context, execution_plan, intermediates_manager, step_keys_to_execute=None
):
    check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext)
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    check.inst_param(intermediates_manager, 'intermediates_manager', IntermediatesManager)
    check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str)

    step_key_set = None if step_keys_to_execute is None else set(step_keys_to_execute)

    check.param_invariant(
        isinstance(pipeline_context.executor_config, ExecutorConfig),
        'pipeline_context',
        'Expected executor_config to be ExecutorConfig got {}'.format(
            pipeline_context.executor_config
        ),
    )

    failed_or_skipped_steps = set()

    step_levels = execution_plan.topological_step_levels()

    # It would be good to implement a reference tracking algorithm here so we could
    # garbage collection results that are no longer needed by any steps
    # https://github.com/dagster-io/dagster/issues/811

    for step_level in step_levels:
        for step in step_level:
            if step_key_set and step.key not in step_key_set:
                continue

            step_context = pipeline_context.for_step(step)

            failed_inputs = [
                step_input.prev_output_handle.step_key
                for step_input in step.step_inputs
                if step_input.prev_output_handle.step_key in failed_or_skipped_steps
            ]
            if failed_inputs:
                step_context.log.info(
                    'Dependencies for step {step} failed: {failed_inputs}. Not executing.'.format(
                        step=step.key, failed_inputs=failed_inputs
                    )
                )
                failed_or_skipped_steps.add(step.key)
                yield DagsterEvent.step_skipped_event(step_context)
                continue

            uncovered_inputs = intermediates_manager.uncovered_inputs(step_context, step)
            if uncovered_inputs:
                # In partial pipeline execution, we may end up here without having validated the
                # missing dependent outputs were optional
                _assert_missing_inputs_optional(uncovered_inputs, execution_plan, step.key)

                step_context.log.info(
                    (
                        'Not all inputs covered for {step}. Not executing. Output missing for '
                        'inputs: {uncovered_inputs}'
                    ).format(uncovered_inputs=uncovered_inputs, step=step.key)
                )
                failed_or_skipped_steps.add(step.key)
                yield DagsterEvent.step_skipped_event(step_context)
                continue

            input_values = _create_input_values(step_context, intermediates_manager)

            for step_event in check.generator(
                execute_step_in_memory(step_context, input_values, intermediates_manager)
            ):
                check.inst(step_event, DagsterEvent)
                if step_event.is_step_failure:
                    failed_or_skipped_steps.add(step.key)

                yield step_event
Beispiel #26
0
 def get_step_by_key(self, step_key: str) -> ExecutionStep:
     step = self._plan.get_step_by_key(step_key)
     return cast(ExecutionStep, check.inst(step, ExecutionStep))
Beispiel #27
0
def load_repository_from_target_info(repo_target_info):
    check.inst_param(repo_target_info, 'repo_target_info',
                     RepositoryTargetInfo)
    return check.inst(
        load_repository_object_from_target_info(repo_target_info).load(),
        RepositoryDefinition)
Beispiel #28
0
def inner_plan_execution_iterator(
    pipeline_context: PlanExecutionContext, execution_plan: ExecutionPlan
) -> Iterator[DagsterEvent]:
    check.inst_param(pipeline_context, "pipeline_context", PlanExecutionContext)
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

    with execution_plan.start(retry_mode=pipeline_context.retry_mode) as active_execution:

        # It would be good to implement a reference tracking algorithm here to
        # garbage collect results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        while not active_execution.is_complete:
            step = active_execution.get_next_step()
            step_context = cast(
                StepExecutionContext,
                pipeline_context.for_step(
                    step, active_execution.retry_state.get_attempt_count(step.key)
                ),
            )
            step_event_list = []

            missing_resources = [
                resource_key
                for resource_key in step_context.required_resource_keys
                if not hasattr(step_context.resources, resource_key)
            ]
            check.invariant(
                len(missing_resources) == 0,
                (
                    "Expected step context for solid {solid_name} to have all required resources, but "
                    "missing {missing_resources}."
                ).format(solid_name=step_context.solid.name, missing_resources=missing_resources),
            )

            # capture all of the logs for this step
            with ExitStack() as stack:
                log_capture_error = None
                try:
                    stack.enter_context(
                        pipeline_context.instance.compute_log_manager.watch(
                            step_context.pipeline_run, step_context.step.key
                        )
                    )
                except Exception as e:
                    yield DagsterEvent.engine_event(
                        pipeline_context=pipeline_context,
                        message="Exception while setting up compute log capture",
                        event_specific_data=EngineEventData(
                            error=serializable_error_info_from_exc_info(sys.exc_info())
                        ),
                        step_handle=step_context.step.handle,
                    )
                    log_capture_error = e

                if not log_capture_error:
                    yield DagsterEvent.capture_logs(
                        step_context, log_key=step_context.step.key, steps=[step_context.step]
                    )

                for step_event in check.generator(dagster_event_sequence_for_step(step_context)):
                    check.inst(step_event, DagsterEvent)
                    step_event_list.append(step_event)
                    yield step_event
                    active_execution.handle_event(step_event)

                active_execution.verify_complete(pipeline_context, step.key)

                try:
                    stack.close()
                except Exception:
                    yield DagsterEvent.engine_event(
                        pipeline_context=pipeline_context,
                        message="Exception while cleaning up compute log capture",
                        event_specific_data=EngineEventData(
                            error=serializable_error_info_from_exc_info(sys.exc_info())
                        ),
                        step_handle=step_context.step.handle,
                    )

            # process skips from failures or uncovered inputs
            for event in active_execution.plan_events_iterator(pipeline_context):
                step_event_list.append(event)
                yield event

            # pass a list of step events to hooks
            for hook_event in _trigger_hook(step_context, step_event_list):
                yield hook_event
Beispiel #29
0
def solid_definition(fn):
    return check.inst(fn(), SolidDefinition)
Beispiel #30
0
def from_dagster_event_record(event_record, pipeline_name,
                              execution_plan_index):
    # Lots of event types. Pylint thinks there are too many branches
    # pylint: disable=too-many-branches
    check.inst_param(event_record, 'event_record', EventRecord)
    check.param_invariant(event_record.is_dagster_event, 'event_record')
    check.str_param(pipeline_name, 'pipeline_name')
    check.opt_inst_param(execution_plan_index, 'execution_plan_index',
                         ExecutionPlanIndex)

    # circular ref at module scope
    from .errors import DauphinPythonError

    dagster_event = event_record.dagster_event
    basic_params = construct_basic_params(event_record, execution_plan_index)
    if dagster_event.event_type == DagsterEventType.STEP_START:
        return DauphinExecutionStepStartEvent(**basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_SKIPPED:
        return DauphinExecutionStepSkippedEvent(**basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_UP_FOR_RETRY:
        return DauphinExecutionStepUpForRetryEvent(
            error=dagster_event.step_retry_data.error,
            secondsToWait=dagster_event.step_retry_data.seconds_to_wait,
            **basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_RESTARTED:
        return DauphinExecutionStepRestartEvent(**basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_SUCCESS:
        return DauphinExecutionStepSuccessEvent(**basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_INPUT:
        input_data = dagster_event.event_specific_data
        return DauphinExecutionStepInputEvent(
            input_name=input_data.input_name,
            type_check=input_data.type_check_data,
            **basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_OUTPUT:
        output_data = dagster_event.step_output_data
        return DauphinExecutionStepOutputEvent(
            output_name=output_data.output_name,
            type_check=output_data.type_check_data,
            **basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_MATERIALIZATION:
        materialization = dagster_event.step_materialization_data.materialization
        return DauphinStepMaterializationEvent(materialization=materialization,
                                               **basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
        expectation_result = dagster_event.event_specific_data.expectation_result
        return DauphinStepExpectationResultEvent(
            expectation_result=expectation_result, **basic_params)
    elif dagster_event.event_type == DagsterEventType.STEP_FAILURE:
        check.inst(dagster_event.step_failure_data, StepFailureData)
        return DauphinExecutionStepFailureEvent(
            error=DauphinPythonError(dagster_event.step_failure_data.error),
            failureMetadata=dagster_event.step_failure_data.user_failure_data,
            **basic_params)
    elif dagster_event.event_type == DagsterEventType.PIPELINE_START:
        return DauphinPipelineStartEvent(pipelineName=pipeline_name,
                                         **basic_params)
    elif dagster_event.event_type == DagsterEventType.PIPELINE_SUCCESS:
        return DauphinPipelineSuccessEvent(pipelineName=pipeline_name,
                                           **basic_params)
    elif dagster_event.event_type == DagsterEventType.PIPELINE_FAILURE:
        return DauphinPipelineFailureEvent(pipelineName=pipeline_name,
                                           **basic_params)

    elif dagster_event.event_type == DagsterEventType.PIPELINE_INIT_FAILURE:
        return DauphinPipelineInitFailureEvent(
            pipelineName=pipeline_name,
            error=DauphinPythonError(
                dagster_event.pipeline_init_failure_data.error),
            **basic_params)
    elif dagster_event.event_type == DagsterEventType.OBJECT_STORE_OPERATION:
        operation_result = dagster_event.event_specific_data
        return DauphinObjectStoreOperationEvent(
            operation_result=operation_result, **basic_params)
    elif dagster_event.event_type == DagsterEventType.ENGINE_EVENT:
        return DauphinEngineEvent(
            metadataEntries=_to_dauphin_metadata_entries(
                dagster_event.engine_event_data.metadata_entries),
            error=DauphinPythonError(dagster_event.engine_event_data.error)
            if dagster_event.engine_event_data.error else None,
            marker_start=dagster_event.engine_event_data.marker_start,
            marker_end=dagster_event.engine_event_data.marker_end,
            **basic_params)
    else:
        raise Exception(
            'Unknown DAGSTER_EVENT type {inner_type} found in logs'.format(
                inner_type=dagster_event.event_type))