def sync_launch_scheduled_execution(schedule_origin, system_tz=None):
    check.inst_param(schedule_origin, "schedule_origin", ExternalJobOrigin)

    with get_temp_file_name() as output_file:

        parts = (
            [sys.executable, "-m", "dagster", "api", "launch_scheduled_execution", output_file,]
            + xplat_shlex_split(schedule_origin.get_repo_cli_args())
            + ["--schedule_name={}".format(schedule_origin.job_name)]
            + (["--override-system-timezone={}".format(system_tz)] if system_tz else [])
        )
        subprocess.check_call(parts)
        result = read_unary_response(output_file)
        if isinstance(result, ScheduledExecutionResult):
            return result
        elif isinstance(result, IPCErrorMessage):
            error = result.serializable_error_info
            raise DagsterSubprocessError(
                "Error in API subprocess: {message}\n\n{err}".format(
                    message=result.message, err=error.to_string()
                ),
                subprocess_error_infos=[error],
            )
        else:
            check.failed("Unexpected result {}".format(result))
Example #2
0
def sync_launch_scheduled_execution(schedule_origin):
    check.inst_param(schedule_origin, 'schedule_origin', SchedulePythonOrigin)

    with get_temp_file_name() as output_file:
        parts = ([
            schedule_origin.executable_path,
            '-m',
            'dagster',
            'api',
            'launch_scheduled_execution',
            output_file,
        ] + xplat_shlex_split(schedule_origin.get_repo_cli_args()) + [
            '--schedule_name={}'.format(schedule_origin.schedule_name),
        ])
        execute_command_in_subprocess(parts)
        result = read_unary_response(output_file)
        if isinstance(result, ScheduledExecutionResult):
            return result
        elif isinstance(result, IPCErrorMessage):
            error = result.serializable_error_info
            raise DagsterSubprocessError(
                'Error in API subprocess: {message}\n\n{err}'.format(
                    message=result.message, err=error.to_string()),
                subprocess_error_infos=[error],
            )
        else:
            check.failed('Unexpected result {}'.format(result))
Example #3
0
def sync_code():
    # Sync remote dagster packages with local dagster code
    sync_code_command = [
        'rsync',
        '-av',
        '-progress',
        "--exclude='scala_modules/'",
        "--exclude='js_modules/'",
        "--exclude='.git/'",
        "--exclude='docs/'",
        '-e',
        '"ssh -i {aws_emr_pem_file}"'.format(aws_emr_pem_file=os.environ['AWS_EMR_PEM_FILE']),
        os.environ['DAGSTER_DIR'],
        os.environ['AWS_EMR_NODE_ADDRESS'] + ':~/',
    ]
    if (
        subprocess.call(
            ' '.join(sync_code_command), stdout=sys.stdout, stderr=sys.stderr, shell=True
        )
        != 0
    ):
        raise DagsterSubprocessError('Failed to sync code to EMR')

    # Install dagster packages on remote node
    remote_install_dagster_packages_command = ['sudo', 'python3', '-m', 'pip', 'install'] + [
        token
        for package_subpath in ['dagster', 'libraries/dagster-pyspark']
        for token in ['-e', '/home/hadoop/dagster/python_modules/' + package_subpath]
    ]

    install_dagster_packages_command = [
        'ssh',
        '-i',
        os.environ['AWS_EMR_PEM_FILE'],
        os.environ['AWS_EMR_NODE_ADDRESS'],
        "'" + ' '.join(remote_install_dagster_packages_command) + "'",
    ]
    if (
        subprocess.call(
            ' '.join(install_dagster_packages_command),
            stdout=sys.stdout,
            stderr=sys.stderr,
            shell=True,
        )
        != 0
    ):
        raise DagsterSubprocessError('Failed to install dagster packages on EMR')
Example #4
0
def sync_code():
    # Sync remote dagster packages with local dagster code
    sync_code_command = [
        "rsync",
        "-av",
        "-progress",
        "--exclude='scala_modules/'",
        "--exclude='js_modules/'",
        "--exclude='.git/'",
        "--exclude='docs/'",
        "-e",
        '"ssh -i {aws_emr_pem_file}"'.format(
            aws_emr_pem_file=os.environ["AWS_EMR_PEM_FILE"]),
        os.environ["DAGSTER_DIR"],
        os.environ["AWS_EMR_NODE_ADDRESS"] + ":~/",
    ]
    if (subprocess.call(" ".join(sync_code_command),
                        stdout=sys.stdout,
                        stderr=sys.stderr,
                        shell=True) != 0):
        raise DagsterSubprocessError("Failed to sync code to EMR")

    # Install dagster packages on remote node
    remote_install_dagster_packages_command = [
        "sudo", "python3", "-m", "pip", "install"
    ] + [
        token for package_subpath in ["dagster", "libraries/dagster-pyspark"]
        for token in
        ["-e", "/home/hadoop/dagster/python_modules/" + package_subpath]
    ]

    install_dagster_packages_command = [
        "ssh",
        "-i",
        os.environ["AWS_EMR_PEM_FILE"],
        os.environ["AWS_EMR_NODE_ADDRESS"],
        "'" + " ".join(remote_install_dagster_packages_command) + "'",
    ]
    if (subprocess.call(
            " ".join(install_dagster_packages_command),
            stdout=sys.stdout,
            stderr=sys.stderr,
            shell=True,
    ) != 0):
        raise DagsterSubprocessError(
            "Failed to install dagster packages on EMR")
Example #5
0
def bounded_parallel_executor(pipeline_context, step_contexts, limit):
    pending_execution = list(step_contexts)
    active_iters = {}
    errors = {}
    term_events = {}
    stopping = False

    while (not stopping and pending_execution) or active_iters:
        try:
            while len(active_iters
                      ) < limit and pending_execution and not stopping:
                step_context = pending_execution.pop(0)
                step = step_context.step
                term_events[step.key] = get_multiprocessing_context().Event()
                active_iters[step.key] = execute_step_out_of_process(
                    step_context, step, errors, term_events)

            empty_iters = []
            for key, step_iter in active_iters.items():
                try:
                    event_or_none = next(step_iter)
                    if event_or_none is None:
                        continue
                    else:
                        yield event_or_none

                except StopIteration:
                    empty_iters.append(key)

            for key in empty_iters:
                del active_iters[key]
                if term_events[key].is_set():
                    stopping = True
                del term_events[key]

        # In the very small chance that we get interrupted in this coordination section and not
        # polling the subprocesses for events - try to clean up greacefully
        except KeyboardInterrupt:
            yield DagsterEvent.engine_event(
                pipeline_context,
                'Multiprocess engine: received KeyboardInterrupt - forwarding to active child processes',
                EngineEventData.interrupted(list(term_events.keys())),
            )
            for event in term_events.values():
                event.set()

    errs = {pid: err for pid, err in errors.items() if err}
    if errs:
        raise DagsterSubprocessError(
            'During multiprocess execution errors occured in child processes:\n{error_list}'
            .format(error_list='\n'.join([
                'In process {pid}: {err}'.format(pid=pid, err=err.to_string())
                for pid, err in errs.items()
            ])),
            subprocess_error_infos=list(errs.values()),
        )
def sync_get_external_execution_plan_grpc(
    api_client,
    pipeline_origin,
    run_config,
    mode,
    pipeline_snapshot_id,
    solid_selection=None,
    step_keys_to_execute=None,
    known_state=None,
):
    from dagster.grpc.client import DagsterGrpcClient

    check.inst_param(api_client, "api_client", DagsterGrpcClient)
    check.inst_param(pipeline_origin, "pipeline_origin",
                     ExternalPipelineOrigin)
    check.opt_list_param(solid_selection, "solid_selection", of_type=str)
    check.dict_param(run_config, "run_config")
    check.str_param(mode, "mode")
    check.opt_list_param(step_keys_to_execute,
                         "step_keys_to_execute",
                         of_type=str)
    check.str_param(pipeline_snapshot_id, "pipeline_snapshot_id")
    check.opt_inst_param(known_state, "known_state", KnownExecutionState)

    result = check.inst(
        api_client.execution_plan_snapshot(
            execution_plan_snapshot_args=ExecutionPlanSnapshotArgs(
                pipeline_origin=pipeline_origin,
                solid_selection=solid_selection,
                run_config=run_config,
                mode=mode,
                step_keys_to_execute=step_keys_to_execute,
                pipeline_snapshot_id=pipeline_snapshot_id,
                known_state=known_state,
            )),
        (ExecutionPlanSnapshot, ExecutionPlanSnapshotErrorData),
    )

    if isinstance(result, ExecutionPlanSnapshotErrorData):
        raise DagsterSubprocessError(result.error.to_string(),
                                     subprocess_error_infos=[result.error])
    return result
def bounded_parallel_executor(step_contexts, limit):
    pending_execution = list(step_contexts)
    active_iters = {}
    pid_tracker = {}

    while pending_execution or active_iters:
        while len(active_iters) < limit and pending_execution:
            step_context = pending_execution.pop()
            step = step_context.step
            active_iters[step.key] = execute_step_out_of_process(step_context, step, pid_tracker)

        empty_iters = []
        for key, step_iter in active_iters.items():
            try:
                event_or_none = next(step_iter)
                if event_or_none is None:
                    continue
                else:
                    yield event_or_none

            except StopIteration:
                empty_iters.append(key)

        for key in empty_iters:
            del active_iters[key]

    errs = {pid: err for pid, err in pid_tracker.items() if err}
    if errs:
        raise DagsterSubprocessError(
            'During multiprocess execution errors occured in child processes:\n{error_list}'.format(
                error_list='\n'.join(
                    [
                        'In process {pid}: {err}'.format(pid=pid, err=err.to_string())
                        for pid, err in errs.items()
                    ]
                )
            ),
            subprocess_error_infos=list(errs.values()),
        )
Example #8
0
def sync_get_external_execution_plan(
    pipeline_origin,
    run_config,
    mode,
    pipeline_snapshot_id,
    solid_selection=None,
    step_keys_to_execute=None,
):

    check.inst_param(pipeline_origin, "pipeline_origin", PipelinePythonOrigin)
    check.opt_list_param(solid_selection, "solid_selection", of_type=str)
    check.dict_param(run_config, "run_config")
    check.str_param(mode, "mode")
    check.opt_list_param(step_keys_to_execute,
                         "step_keys_to_execute",
                         of_type=str)
    check.str_param(pipeline_snapshot_id, "pipeline_snapshot_id")

    result = check.inst(
        execute_unary_api_cli_command(
            pipeline_origin.executable_path,
            "execution_plan",
            ExecutionPlanSnapshotArgs(
                pipeline_origin=pipeline_origin,
                solid_selection=solid_selection,
                run_config=run_config,
                mode=mode,
                step_keys_to_execute=step_keys_to_execute,
                pipeline_snapshot_id=pipeline_snapshot_id,
            ),
        ),
        (ExecutionPlanSnapshot, ExecutionPlanSnapshotErrorData),
    )

    if isinstance(result, ExecutionPlanSnapshotErrorData):
        raise DagsterSubprocessError(result.error.to_string(),
                                     subprocess_error_infos=[result.error])
    return result
def sync_get_external_execution_plan_grpc(
    api_client,
    pipeline_origin,
    run_config,
    mode,
    pipeline_snapshot_id,
    solid_selection=None,
    step_keys_to_execute=None,
):
    from dagster.grpc.client import DagsterGrpcClient

    check.inst_param(api_client, 'api_client', DagsterGrpcClient)
    check.inst_param(pipeline_origin, 'pipeline_origin', PipelineOrigin)
    check.opt_list_param(solid_selection, 'solid_selection', of_type=str)
    check.dict_param(run_config, 'run_config')
    check.str_param(mode, 'mode')
    check.opt_list_param(step_keys_to_execute,
                         'step_keys_to_execute',
                         of_type=str)
    check.str_param(pipeline_snapshot_id, 'pipeline_snapshot_id')

    result = check.inst(
        api_client.execution_plan_snapshot(
            execution_plan_snapshot_args=ExecutionPlanSnapshotArgs(
                pipeline_origin=pipeline_origin,
                solid_selection=solid_selection,
                run_config=run_config,
                mode=mode,
                step_keys_to_execute=step_keys_to_execute,
                pipeline_snapshot_id=pipeline_snapshot_id,
            )),
        (ExecutionPlanSnapshot, ExecutionPlanSnapshotErrorData),
    )

    if isinstance(result, ExecutionPlanSnapshotErrorData):
        raise DagsterSubprocessError(result.error.to_string(),
                                     subprocess_error_infos=[result.error])
    return result
Example #10
0
    def execute(self, pipeline_context, execution_plan):
        check.inst_param(pipeline_context, "pipeline_context",
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

        limit = self.max_concurrent

        yield DagsterEvent.engine_event(
            pipeline_context,
            "Executing steps using multiprocess engine: parent process (pid: {pid})"
            .format(pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(
                os.getpid(),
                step_keys_to_execute=execution_plan.step_keys_to_execute),
        )

        # It would be good to implement a reference tracking algorithm here so we could
        # garbage collection results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        with time_execution_scope() as timer_result:

            with execution_plan.start(
                    retries=self.retries) as active_execution:
                active_iters = {}
                errors = {}
                term_events = {}
                stopping = False

                while (not stopping
                       and not active_execution.is_complete) or active_iters:
                    try:
                        # start iterators
                        while len(active_iters) < limit and not stopping:
                            steps = active_execution.get_steps_to_execute(
                                limit=(limit - len(active_iters)))

                            if not steps:
                                break

                            for step in steps:
                                step_context = pipeline_context.for_step(step)
                                term_events[step.key] = multiprocessing.Event()
                                active_iters[
                                    step.
                                    key] = self.execute_step_out_of_process(
                                        step_context, step, errors,
                                        term_events)

                        # process active iterators
                        empty_iters = []
                        for key, step_iter in active_iters.items():
                            try:
                                event_or_none = next(step_iter)
                                if event_or_none is None:
                                    continue
                                else:
                                    yield event_or_none
                                    active_execution.handle_event(
                                        event_or_none)

                            except ChildProcessCrashException as crash:
                                serializable_error = serializable_error_info_from_exc_info(
                                    sys.exc_info())
                                yield DagsterEvent.engine_event(
                                    pipeline_context,
                                    ("Multiprocess executor: child process for step {step_key} "
                                     "unexpectedly exited with code {exit_code}"
                                     ).format(step_key=key,
                                              exit_code=crash.exit_code),
                                    EngineEventData.engine_error(
                                        serializable_error),
                                    step_key=key,
                                )
                                step_failure_event = DagsterEvent.step_failure_event(
                                    step_context=pipeline_context.for_step(
                                        active_execution.get_step_by_key(key)),
                                    step_failure_data=StepFailureData(
                                        error=serializable_error,
                                        user_failure_data=None),
                                )
                                active_execution.handle_event(
                                    step_failure_event)
                                yield step_failure_event
                                empty_iters.append(key)
                            except StopIteration:
                                empty_iters.append(key)

                        # clear and mark complete finished iterators
                        for key in empty_iters:
                            del active_iters[key]
                            if term_events[key].is_set():
                                stopping = True
                            del term_events[key]
                            active_execution.verify_complete(
                                pipeline_context, key)

                        # process skips from failures or uncovered inputs
                        for event in active_execution.skipped_step_events_iterator(
                                pipeline_context):
                            yield event

                    # In the very small chance that we get interrupted in this coordination section and not
                    # polling the subprocesses for events - try to clean up gracefully
                    except KeyboardInterrupt:
                        yield DagsterEvent.engine_event(
                            pipeline_context,
                            "Multiprocess engine: received KeyboardInterrupt - forwarding to active child processes",
                            EngineEventData.interrupted(
                                list(term_events.keys())),
                        )
                        stopping = True
                        for event in term_events.values():
                            event.set()

                errs = {pid: err for pid, err in errors.items() if err}
                if errs:
                    raise DagsterSubprocessError(
                        "During multiprocess execution errors occurred in child processes:\n{error_list}"
                        .format(error_list="\n".join([
                            "In process {pid}: {err}".format(
                                pid=pid, err=err.to_string())
                            for pid, err in errs.items()
                        ])),
                        subprocess_error_infos=list(errs.values()),
                    )

        yield DagsterEvent.engine_event(
            pipeline_context,
            "Multiprocess engine: parent process exiting after {duration} (pid: {pid})"
            .format(duration=format_duration(timer_result.millis),
                    pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(os.getpid()),
        )
Example #11
0
    def execute(pipeline_context, execution_plan):
        check.inst_param(pipeline_context, 'pipeline_context',
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        check.param_invariant(
            isinstance(pipeline_context.executor_config, CeleryConfig),
            'pipeline_context',
            'Expected executor_config to be CeleryConfig got {}'.format(
                pipeline_context.executor_config),
        )

        celery_config = pipeline_context.executor_config

        storage = pipeline_context.environment_dict.get('storage')

        if (celery_config.broker and not is_local_uri(celery_config.broker)
            ) or (celery_config.backend
                  and not is_local_uri(celery_config.backend)):
            check.invariant(
                storage.get('s3') or storage.get('gcs'),
                'Must use S3 or GCS storage with non-local Celery broker: {broker} '
                'and backend: {backend}'.format(broker=celery_config.broker,
                                                backend=celery_config.backend),
            )
        else:
            check.invariant(
                not storage.get('in_memory'),
                'Cannot use in-memory storage with Celery, use filesystem, S3, or GCS',
            )

        pipeline_name = pipeline_context.pipeline_def.name

        handle_dict = pipeline_context.execution_target_handle.to_dict()

        instance_ref_dict = pipeline_context.instance.get_ref().to_dict()

        environment_dict = dict(pipeline_context.environment_dict,
                                execution={'in_process': {}})

        mode = pipeline_context.mode_def.name

        run_id = pipeline_context.pipeline_run.run_id

        app = make_app(celery_config)

        task_signatures = {}  # Dict[step_key, celery.Signature]
        apply_kwargs = defaultdict(dict)  # Dict[step_key, Dict[str, Any]]

        priority_for_step = lambda step: (-1 * int(
            step.tags.get('dagster-celery/priority', task_default_priority)))
        priority_for_key = lambda step_key: (-1 * apply_kwargs[step_key][
            'priority'])
        _warn_on_priority_misuse(pipeline_context, execution_plan)

        for step_key in execution_plan.step_keys_to_execute:
            step = execution_plan.get_step_by_key(step_key)
            priority = int(
                step.tags.get('dagster-celery/priority',
                              task_default_priority))
            queue = step.tags.get('dagster-celery/queue', task_default_queue)
            task = create_task(app)

            variables = {
                'executionParams': {
                    'selector': {
                        'name': pipeline_name
                    },
                    'environmentConfigData': environment_dict,
                    'mode': mode,
                    'executionMetadata': {
                        'runId': run_id
                    },
                    'stepKeys': [step_key],
                }
            }
            task_signatures[step_key] = task.si(handle_dict, variables,
                                                instance_ref_dict)
            apply_kwargs[step_key] = {
                'priority': priority,
                'queue': queue,
                'routing_key': '{queue}.execute_query'.format(queue=queue),
            }

        step_results = {}  # Dict[ExecutionStep, celery.AsyncResult]
        step_success = {}
        step_errors = {}
        completed_steps = set({})  # Set[step_key]
        active_execution = execution_plan.start(sort_key_fn=priority_for_step)
        stopping = False

        while (not active_execution.is_complete
               and not stopping) or step_results:

            results_to_pop = []
            for step_key, result in sorted(
                    step_results.items(),
                    key=lambda x: priority_for_key(x[0])):
                if result.ready():
                    try:
                        step_events = result.get()
                    except Exception as e:  # pylint: disable=broad-except
                        # We will want to do more to handle the exception here.. maybe subclass Task
                        # Certainly yield an engine or pipeline event
                        step_events = []
                        step_errors[
                            step_key] = serializable_error_info_from_exc_info(
                                sys.exc_info())
                        stopping = True
                    for step_event in step_events:
                        event = deserialize_json_to_dagster_namedtuple(
                            step_event)
                        yield event
                        if event.is_step_success:
                            step_success[step_key] = True
                        elif event.is_step_failure:
                            step_success[step_key] = False

                    results_to_pop.append(step_key)
                    completed_steps.add(step_key)

            for step_key in results_to_pop:
                if step_key in step_results:
                    del step_results[step_key]
                    was_success = step_success.get(step_key)
                    if was_success == True:
                        active_execution.mark_success(step_key)
                    elif was_success == False:
                        active_execution.mark_failed(step_key)
                    else:
                        # check errors list?
                        pipeline_context.log.error(
                            'Step {key} finished without success or failure event, assuming failure.'
                            .format(key=step_key))
                        active_execution.mark_failed(step_key)

            # process skips from failures or uncovered inputs
            for event in active_execution.skipped_step_events_iterator(
                    pipeline_context):
                yield event

            # dont add any new steps if we are stopping
            if stopping:
                continue

            # This is a slight refinement. If we have n workers idle and schedule m > n steps for
            # execution, the first n steps will be picked up by the idle workers in the order in
            # which they are scheduled (and the following m-n steps will be executed in priority
            # order, provided that it takes longer to execute a step than to schedule it). The test
            # case has m >> n to exhibit this behavior in the absence of this sort step.
            for step in active_execution.get_steps_to_execute():
                try:
                    step_results[step.key] = task_signatures[
                        step.key].apply_async(**apply_kwargs[step.key])
                except Exception:
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        'Encountered error during celery task submission.'.
                        format(),
                        event_specific_data=EngineEventData.engine_error(
                            serializable_error_info_from_exc_info(
                                sys.exc_info()), ),
                    )
                    raise

            time.sleep(TICK_SECONDS)

        if step_errors:
            raise DagsterSubprocessError(
                'During celery execution errors occured in workers:\n{error_list}'
                .format(error_list='\n'.join([
                    '[{step}]: {err}'.format(step=key, err=err.to_string())
                    for key, err in step_errors.items()
                ])),
                subprocess_error_infos=list(step_errors.values()),
            )
Example #12
0
    def execute(pipeline_context, execution_plan):
        check.inst_param(pipeline_context, 'pipeline_context',
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        intermediates_manager = pipeline_context.intermediates_manager

        limit = pipeline_context.executor_config.max_concurrent

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Executing steps using multiprocess engine: parent process (pid: {pid})'
            .format(pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(
                os.getpid(),
                step_keys_to_execute=execution_plan.step_keys_to_execute),
        )

        # It would be good to implement a reference tracking algorithm here so we could
        # garbage collection results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        with time_execution_scope() as timer_result:

            active_execution = execution_plan.start(
                retries=pipeline_context.executor_config.retries)
            active_iters = {}
            errors = {}
            term_events = {}
            stopping = False

            while (not stopping
                   and not active_execution.is_complete) or active_iters:
                try:
                    # start iterators
                    while len(active_iters) < limit and not stopping:
                        steps = active_execution.get_steps_to_execute(
                            limit=(limit - len(active_iters)))

                        if not steps:
                            break

                        for step in steps:
                            step_context = pipeline_context.for_step(step)
                            term_events[
                                step.key] = get_multiprocessing_context(
                                ).Event()
                            active_iters[
                                step.key] = execute_step_out_of_process(
                                    step_context, step, errors, term_events)

                    # process active iterators
                    empty_iters = []
                    for key, step_iter in active_iters.items():
                        try:
                            event_or_none = next(step_iter)
                            if event_or_none is None:
                                continue
                            else:
                                yield event_or_none
                                active_execution.handle_event(event_or_none)

                        except StopIteration:
                            empty_iters.append(key)

                    # clear and mark complete finished iterators
                    for key in empty_iters:
                        del active_iters[key]
                        if term_events[key].is_set():
                            stopping = True
                        del term_events[key]
                        active_execution.verify_complete(pipeline_context, key)

                    # process skips from failures or uncovered inputs
                    for event in active_execution.skipped_step_events_iterator(
                            pipeline_context):
                        yield event

                # In the very small chance that we get interrupted in this coordination section and not
                # polling the subprocesses for events - try to clean up gracefully
                except KeyboardInterrupt:
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        'Multiprocess engine: received KeyboardInterrupt - forwarding to active child processes',
                        EngineEventData.interrupted(list(term_events.keys())),
                    )
                    stopping = True
                    for event in term_events.values():
                        event.set()

            errs = {pid: err for pid, err in errors.items() if err}
            if errs:
                raise DagsterSubprocessError(
                    'During multiprocess execution errors occurred in child processes:\n{error_list}'
                    .format(error_list='\n'.join([
                        'In process {pid}: {err}'.format(pid=pid,
                                                         err=err.to_string())
                        for pid, err in errs.items()
                    ])),
                    subprocess_error_infos=list(errs.values()),
                )

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Multiprocess engine: parent process exiting after {duration} (pid: {pid})'
            .format(duration=format_duration(timer_result.millis),
                    pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(os.getpid()),
        )
Example #13
0
    def execute(self, pipeline_context, execution_plan):
        check.inst_param(pipeline_context, "pipeline_context",
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

        limit = self.max_concurrent

        yield DagsterEvent.engine_event(
            pipeline_context,
            "Executing steps using multiprocess executor: parent process (pid: {pid})"
            .format(pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(
                os.getpid(),
                step_keys_to_execute=execution_plan.step_keys_to_execute),
        )

        # It would be good to implement a reference tracking algorithm here so we could
        # garbage collect results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        with time_execution_scope() as timer_result:
            with execution_plan.start(
                    retry_mode=self.retries) as active_execution:
                active_iters = {}
                errors = {}
                term_events = {}
                stopping = False

                while (not stopping
                       and not active_execution.is_complete) or active_iters:
                    if active_execution.check_for_interrupts():
                        yield DagsterEvent.engine_event(
                            pipeline_context,
                            "Multiprocess executor: received termination signal - "
                            "forwarding to active child processes",
                            EngineEventData.interrupted(
                                list(term_events.keys())),
                        )
                        stopping = True
                        active_execution.mark_interrupted()
                        for key, event in term_events.items():
                            event.set()

                    # start iterators
                    while len(active_iters) < limit and not stopping:
                        steps = active_execution.get_steps_to_execute(
                            limit=(limit - len(active_iters)))

                        if not steps:
                            break

                        for step in steps:
                            step_context = pipeline_context.for_step(step)
                            term_events[step.key] = multiprocessing.Event()
                            active_iters[
                                step.key] = self.execute_step_out_of_process(
                                    step_context,
                                    step,
                                    errors,
                                    term_events,
                                    active_execution.get_known_state(),
                                )

                    # process active iterators
                    empty_iters = []
                    for key, step_iter in active_iters.items():
                        try:
                            event_or_none = next(step_iter)
                            if event_or_none is None:
                                continue
                            else:
                                yield event_or_none
                                active_execution.handle_event(event_or_none)

                        except ChildProcessCrashException as crash:
                            serializable_error = serializable_error_info_from_exc_info(
                                sys.exc_info())
                            yield DagsterEvent.engine_event(
                                pipeline_context,
                                ("Multiprocess executor: child process for step {step_key} "
                                 "unexpectedly exited with code {exit_code}"
                                 ).format(step_key=key,
                                          exit_code=crash.exit_code),
                                EngineEventData.engine_error(
                                    serializable_error),
                                step_handle=active_execution.get_step_by_key(
                                    key).handle,
                            )
                            step_failure_event = DagsterEvent.step_failure_event(
                                step_context=pipeline_context.for_step(
                                    active_execution.get_step_by_key(key)),
                                step_failure_data=StepFailureData(
                                    error=serializable_error,
                                    user_failure_data=None),
                            )
                            active_execution.handle_event(step_failure_event)
                            yield step_failure_event
                            empty_iters.append(key)
                        except StopIteration:
                            empty_iters.append(key)

                    # clear and mark complete finished iterators
                    for key in empty_iters:
                        del active_iters[key]
                        del term_events[key]
                        active_execution.verify_complete(pipeline_context, key)

                    # process skipped and abandoned steps
                    yield from active_execution.plan_events_iterator(
                        pipeline_context)

                errs = {pid: err for pid, err in errors.items() if err}

                # After termination starts, raise an interrupted exception once all subprocesses
                # have finished cleaning up (and the only errors were from being interrupted)
                if (stopping and (not active_iters) and all([
                        err_info.cls_name == "DagsterExecutionInterruptedError"
                        for err_info in errs.values()
                ])):
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        "Multiprocess executor: interrupted all active child processes",
                        event_specific_data=EngineEventData(),
                    )
                    raise DagsterExecutionInterruptedError()
                elif errs:
                    raise DagsterSubprocessError(
                        "During multiprocess execution errors occurred in child processes:\n{error_list}"
                        .format(error_list="\n".join([
                            "In process {pid}: {err}".format(
                                pid=pid, err=err.to_string())
                            for pid, err in errs.items()
                        ])),
                        subprocess_error_infos=list(errs.values()),
                    )

        yield DagsterEvent.engine_event(
            pipeline_context,
            "Multiprocess executor: parent process exiting after {duration} (pid: {pid})"
            .format(duration=format_duration(timer_result.millis),
                    pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(os.getpid()),
        )
def core_celery_execution_loop(pipeline_context, execution_plan,
                               step_execution_fn):

    check.inst_param(pipeline_context, "pipeline_context",
                     SystemPipelineExecutionContext)
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)
    check.callable_param(step_execution_fn, "step_execution_fn")

    executor = pipeline_context.executor

    # https://github.com/dagster-io/dagster/issues/2440
    check.invariant(
        execution_plan.artifacts_persisted,
        "Cannot use in-memory storage with Celery, use filesystem (on top of NFS or "
        "similar system that allows files to be available to all nodes), S3, or GCS",
    )

    app = make_app(executor.app_args())

    priority_for_step = lambda step: (-1 * int(
        step.tags.get(DAGSTER_CELERY_STEP_PRIORITY_TAG, task_default_priority)
    ) + -1 * _get_run_priority(pipeline_context))
    priority_for_key = lambda step_key: (priority_for_step(
        execution_plan.get_step_by_key(step_key)))
    _warn_on_priority_misuse(pipeline_context, execution_plan)

    step_results = {}  # Dict[ExecutionStep, celery.AsyncResult]
    step_errors = {}

    with execution_plan.start(
            retries=pipeline_context.executor.retries,
            sort_key_fn=priority_for_step,
    ) as active_execution:

        stopping = False

        while (not active_execution.is_complete
               and not stopping) or step_results:
            if active_execution.check_for_interrupts():
                yield DagsterEvent.engine_event(
                    pipeline_context,
                    "Celery executor: received termination signal - revoking active tasks from workers",
                    EngineEventData.interrupted(list(step_results.keys())),
                )
                stopping = True
                active_execution.mark_interrupted()
                for result in step_results.values():
                    result.revoke()
            results_to_pop = []
            for step_key, result in sorted(
                    step_results.items(),
                    key=lambda x: priority_for_key(x[0])):
                if result.ready():
                    try:
                        step_events = result.get()
                    except TaskRevokedError:
                        step_events = []
                        yield DagsterEvent.engine_event(
                            pipeline_context,
                            'celery task for running step "{step_key}" was revoked.'
                            .format(step_key=step_key, ),
                            EngineEventData(marker_end=DELEGATE_MARKER),
                            step_handle=active_execution.get_step_by_key(
                                step_key).handle,
                        )
                    except Exception:  # pylint: disable=broad-except
                        # We will want to do more to handle the exception here.. maybe subclass Task
                        # Certainly yield an engine or pipeline event
                        step_events = []
                        step_errors[
                            step_key] = serializable_error_info_from_exc_info(
                                sys.exc_info())
                    for step_event in step_events:
                        event = deserialize_json_to_dagster_namedtuple(
                            step_event)
                        yield event
                        active_execution.handle_event(event)

                    results_to_pop.append(step_key)

            for step_key in results_to_pop:
                if step_key in step_results:
                    del step_results[step_key]
                    active_execution.verify_complete(pipeline_context,
                                                     step_key)

            # process skips from failures or uncovered inputs
            for event in active_execution.plan_events_iterator(
                    pipeline_context):
                yield event

            # don't add any new steps if we are stopping
            if stopping or step_errors:
                continue

            # This is a slight refinement. If we have n workers idle and schedule m > n steps for
            # execution, the first n steps will be picked up by the idle workers in the order in
            # which they are scheduled (and the following m-n steps will be executed in priority
            # order, provided that it takes longer to execute a step than to schedule it). The test
            # case has m >> n to exhibit this behavior in the absence of this sort step.
            for step in active_execution.get_steps_to_execute():
                try:
                    queue = step.tags.get(DAGSTER_CELERY_QUEUE_TAG,
                                          task_default_queue)
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        'Submitting celery task for step "{step_key}" to queue "{queue}".'
                        .format(step_key=step.key, queue=queue),
                        EngineEventData(marker_start=DELEGATE_MARKER),
                        step_handle=step.handle,
                    )

                    # Get the Celery priority for this step
                    priority = _get_step_priority(pipeline_context, step)

                    # Submit the Celery tasks
                    step_results[step.key] = step_execution_fn(
                        app, pipeline_context, step, queue, priority)

                except Exception:
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        "Encountered error during celery task submission.".
                        format(),
                        event_specific_data=EngineEventData.engine_error(
                            serializable_error_info_from_exc_info(
                                sys.exc_info()), ),
                    )
                    raise

            time.sleep(TICK_SECONDS)

        if step_errors:
            raise DagsterSubprocessError(
                "During celery execution errors occurred in workers:\n{error_list}"
                .format(error_list="\n".join([
                    "[{step}]: {err}".format(step=key, err=err.to_string())
                    for key, err in step_errors.items()
                ])),
                subprocess_error_infos=list(step_errors.values()),
            )
Example #15
0
def _core_celery_execution_loop(pipeline_context, execution_plan, step_execution_fn):
    from .tasks import make_app

    check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext)
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    check.callable_param(step_execution_fn, 'step_execution_fn')

    check.param_invariant(
        isinstance(pipeline_context.executor_config, (CeleryConfig, CeleryK8sJobConfig)),
        'pipeline_context',
        'Expected executor_config to be Celery config got {}'.format(
            pipeline_context.executor_config
        ),
    )

    celery_config = pipeline_context.executor_config

    # https://github.com/dagster-io/dagster/issues/2440
    check.invariant(
        pipeline_context.system_storage_def.is_persistent,
        'Cannot use in-memory storage with Celery, use filesystem (on top of NFS or '
        'similar system that allows files to be available to all nodes), S3, or GCS',
    )

    app = make_app(celery_config)

    priority_for_step = lambda step: (
        -1 * int(step.tags.get(DAGSTER_CELERY_STEP_PRIORITY_TAG, task_default_priority))
        + -1 * _get_run_priority(pipeline_context)
    )
    priority_for_key = lambda step_key: (
        priority_for_step(execution_plan.get_step_by_key(step_key))
    )
    _warn_on_priority_misuse(pipeline_context, execution_plan)

    step_results = {}  # Dict[ExecutionStep, celery.AsyncResult]
    step_errors = {}
    completed_steps = set({})  # Set[step_key]
    active_execution = execution_plan.start(
        retries=pipeline_context.executor_config.retries, sort_key_fn=priority_for_step
    )
    stopping = False

    while (not active_execution.is_complete and not stopping) or step_results:

        results_to_pop = []
        for step_key, result in sorted(step_results.items(), key=lambda x: priority_for_key(x[0])):
            if result.ready():
                try:
                    step_events = result.get()
                except Exception:  # pylint: disable=broad-except
                    # We will want to do more to handle the exception here.. maybe subclass Task
                    # Certainly yield an engine or pipeline event
                    step_events = []
                    step_errors[step_key] = serializable_error_info_from_exc_info(sys.exc_info())
                    stopping = True
                for step_event in step_events:
                    event = deserialize_json_to_dagster_namedtuple(step_event)
                    yield event
                    active_execution.handle_event(event)

                results_to_pop.append(step_key)
                completed_steps.add(step_key)

        for step_key in results_to_pop:
            if step_key in step_results:
                del step_results[step_key]
                active_execution.verify_complete(pipeline_context, step_key)

        # process skips from failures or uncovered inputs
        for event in active_execution.skipped_step_events_iterator(pipeline_context):
            yield event

        # don't add any new steps if we are stopping
        if stopping:
            continue

        # This is a slight refinement. If we have n workers idle and schedule m > n steps for
        # execution, the first n steps will be picked up by the idle workers in the order in
        # which they are scheduled (and the following m-n steps will be executed in priority
        # order, provided that it takes longer to execute a step than to schedule it). The test
        # case has m >> n to exhibit this behavior in the absence of this sort step.
        for step in active_execution.get_steps_to_execute():
            try:
                queue = step.tags.get(DAGSTER_CELERY_QUEUE_TAG, task_default_queue)
                yield DagsterEvent.engine_event(
                    pipeline_context,
                    'Submitting celery task for step "{step_key}" to queue "{queue}".'.format(
                        step_key=step.key, queue=queue
                    ),
                    EngineEventData(marker_start=DELEGATE_MARKER),
                    step_key=step.key,
                )

                # Get the Celery priority for this step
                priority = _get_step_priority(pipeline_context, step)

                # Submit the Celery tasks
                step_results[step.key] = step_execution_fn(
                    app, pipeline_context, step, queue, priority
                )

            except Exception:
                yield DagsterEvent.engine_event(
                    pipeline_context,
                    'Encountered error during celery task submission.'.format(),
                    event_specific_data=EngineEventData.engine_error(
                        serializable_error_info_from_exc_info(sys.exc_info()),
                    ),
                )
                raise

        time.sleep(TICK_SECONDS)

    if step_errors:
        raise DagsterSubprocessError(
            'During celery execution errors occurred in workers:\n{error_list}'.format(
                error_list='\n'.join(
                    [
                        '[{step}]: {err}'.format(step=key, err=err.to_string())
                        for key, err in step_errors.items()
                    ]
                )
            ),
            subprocess_error_infos=list(step_errors.values()),
        )
Example #16
0
    def execute(pipeline_context, execution_plan):
        from .tasks import make_app

        check.inst_param(pipeline_context, 'pipeline_context',
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        check.param_invariant(
            isinstance(pipeline_context.executor_config, CeleryConfig),
            'pipeline_context',
            'Expected executor_config to be CeleryConfig got {}'.format(
                pipeline_context.executor_config),
        )

        celery_config = pipeline_context.executor_config

        storage = pipeline_context.environment_dict.get('storage')

        if (celery_config.broker and not is_local_uri(celery_config.broker)
            ) or (celery_config.backend
                  and not is_local_uri(celery_config.backend)):
            check.invariant(
                storage.get('s3') or storage.get('gcs'),
                'Must use S3 or GCS storage with non-local Celery broker: {broker} '
                'and backend: {backend}'.format(broker=celery_config.broker,
                                                backend=celery_config.backend),
            )
        else:
            check.invariant(
                not storage.get('in_memory'),
                'Cannot use in-memory storage with Celery, use filesystem, S3, or GCS',
            )

        app = make_app(celery_config)

        priority_for_step = lambda step: (-1 * int(
            step.tags.get('dagster-celery/priority', task_default_priority)
        ) + -1 * _get_run_priority(pipeline_context))
        priority_for_key = lambda step_key: (priority_for_step(
            execution_plan.get_step_by_key(step_key)))
        _warn_on_priority_misuse(pipeline_context, execution_plan)

        step_results = {}  # Dict[ExecutionStep, celery.AsyncResult]
        step_errors = {}
        completed_steps = set({})  # Set[step_key]
        active_execution = execution_plan.start(
            retries=pipeline_context.executor_config.retries,
            sort_key_fn=priority_for_step)
        stopping = False

        while (not active_execution.is_complete
               and not stopping) or step_results:

            results_to_pop = []
            for step_key, result in sorted(
                    step_results.items(),
                    key=lambda x: priority_for_key(x[0])):
                if result.ready():
                    try:
                        step_events = result.get()
                    except Exception as e:  # pylint: disable=broad-except
                        # We will want to do more to handle the exception here.. maybe subclass Task
                        # Certainly yield an engine or pipeline event
                        step_events = []
                        step_errors[
                            step_key] = serializable_error_info_from_exc_info(
                                sys.exc_info())
                        stopping = True
                    for step_event in step_events:
                        event = deserialize_json_to_dagster_namedtuple(
                            step_event)
                        yield event
                        active_execution.handle_event(event)

                    results_to_pop.append(step_key)
                    completed_steps.add(step_key)

            for step_key in results_to_pop:
                if step_key in step_results:
                    del step_results[step_key]
                    active_execution.verify_complete(pipeline_context,
                                                     step_key)

            # process skips from failures or uncovered inputs
            for event in active_execution.skipped_step_events_iterator(
                    pipeline_context):
                yield event

            # don't add any new steps if we are stopping
            if stopping:
                continue

            # This is a slight refinement. If we have n workers idle and schedule m > n steps for
            # execution, the first n steps will be picked up by the idle workers in the order in
            # which they are scheduled (and the following m-n steps will be executed in priority
            # order, provided that it takes longer to execute a step than to schedule it). The test
            # case has m >> n to exhibit this behavior in the absence of this sort step.
            for step in active_execution.get_steps_to_execute():
                try:
                    queue = step.tags.get('dagster-celery/queue',
                                          task_default_queue)
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        'Submitting celery task for step "{step_key}" to queue "{queue}".'
                        .format(step_key=step.key, queue=queue),
                        EngineEventData(marker_start=DELEGATE_MARKER),
                        step_key=step.key,
                    )
                    step_results[step.key] = _submit_task(
                        app, pipeline_context, step, queue)
                except Exception:
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        'Encountered error during celery task submission.'.
                        format(),
                        event_specific_data=EngineEventData.engine_error(
                            serializable_error_info_from_exc_info(
                                sys.exc_info()), ),
                    )
                    raise

            time.sleep(TICK_SECONDS)

        if step_errors:
            raise DagsterSubprocessError(
                'During celery execution errors occurred in workers:\n{error_list}'
                .format(error_list='\n'.join([
                    '[{step}]: {err}'.format(step=key, err=err.to_string())
                    for key, err in step_errors.items()
                ])),
                subprocess_error_infos=list(step_errors.values()),
            )