コード例 #1
0
def execute_step_out_of_process(step_context, step):
    if step_context.run_config.loggers:
        step_context.log.debug(
            'Loggers cannot be injected via RunConfig using the multiprocess executor. Define '
            'loggers on the mode instead. Ignoring loggers: [{logger_names}]'.
            format(logger_names=', '.join([
                '\'{name}\''.format(name=logger.name)
                for logger in step_context.run_config.loggers
            ])))

    run_config = RunConfig(
        run_id=step_context.run_config.run_id,
        tags=step_context.run_config.tags,
        loggers=None,
        event_callback=None,
        reexecution_config=None,
        step_keys_to_execute=step_context.run_config.step_keys_to_execute,
        mode=step_context.run_config.mode,
    )

    command = InProcessExecutorChildProcessCommand(
        step_context.environment_dict, run_config,
        step_context.executor_config, step.key)

    for step_event in execute_child_process_command(command):
        if step_context.run_config.event_callback and isinstance(
                step_event, DagsterEvent):
            log_step_event(step_context, step_event)
        yield step_event
コード例 #2
0
    def launch_step(self, step_context, prior_attempts_count):
        step_run_ref = step_context_to_step_run_ref(
            step_context, prior_attempts_count,
            self.local_pipeline_package_path)
        run_id = step_context.pipeline_run.run_id
        log = step_context.log

        step_key = step_run_ref.step_key
        self._upload_artifacts(log, step_run_ref, run_id, step_key)

        task = self._get_databricks_task(run_id, step_key)
        databricks_run_id = self.databricks_runner.submit_run(
            self.run_config, task)

        try:
            # If this is being called within a `capture_interrupts` context, allow interrupts while
            # waiting for the  execution to complete, so that we can terminate slow or hanging steps
            with raise_execution_interrupts():
                self.databricks_runner.wait_for_run_to_complete(
                    log, databricks_run_id)
        finally:
            if self.wait_for_logs:
                self._log_logs_from_cluster(log, databricks_run_id)

        for event in self.get_step_events(run_id, step_key):
            log_step_event(step_context, event)
            yield event
コード例 #3
0
 def _log_new_events(self, events, plan_context, running_steps):
     # Note: this could lead to duplicated events if the returned events were already logged
     # (they shouldn't be)
     for event in events:
         log_step_event(
             plan_context.for_step(running_steps[event.step_key]),
             event,
         )
コード例 #4
0
    def wait_for_completion_and_log(self, log, run_id, step_key, emr_step_id, step_context):
        s3 = boto3.resource("s3", region_name=self.region_name)
        try:
            for event in self.wait_for_completion(log, s3, run_id, step_key, emr_step_id):
                log_step_event(step_context, event)
                yield event
        except EmrError as emr_error:
            if self.wait_for_logs:
                self._log_logs_from_s3(log, emr_step_id)
            raise emr_error

        if self.wait_for_logs:
            self._log_logs_from_s3(log, emr_step_id)
コード例 #5
0
def execute_step_out_of_process(step_context, step):
    child_run_config = RunConfig(
        run_id=step_context.run_config.run_id,
        tags=step_context.run_config.tags,
        log_sink=None,
        event_callback=None,
        reexecution_config=None,
        step_keys_to_execute=step_context.run_config.step_keys_to_execute,
        mode=step_context.run_config.mode,
    )

    with safe_tempfile_path() as log_sink_file:
        init_db(log_sink_file)
        # Although the type of is_done is threading._Event in py2, not threading.Event,
        # it is still constructed using the threading.Event() factory
        is_done = threading.Event()

        def log_watcher_thread_target():
            log_watcher = JsonSqlite3LogWatcher(log_sink_file,
                                                step_context.log, is_done)
            log_watcher.watch()

        log_watcher_thread = threading.Thread(target=log_watcher_thread_target)

        log_watcher_thread.start()

        command = InProcessExecutorChildProcessCommand(
            step_context.environment_dict,
            child_run_config,
            step_context.executor_config,
            step.key,
            log_sink_file,
        )
        try:
            for step_event in execute_child_process_command(command):
                if step_context.run_config.event_callback and isinstance(
                        step_event, DagsterEvent):
                    log_step_event(step_context, step_event)
                yield step_event

        finally:
            is_done.set()
            log_watcher_thread.join()
コード例 #6
0
    def launch_step(self, step_context, prior_attempts_count):
        step_run_ref = step_context_to_step_run_ref(
            step_context, prior_attempts_count,
            self.local_pipeline_package_path)

        run_id = step_context.pipeline_run.run_id
        log = step_context.log

        step_key = step_run_ref.step_key
        self._post_artifacts(log, step_run_ref, run_id, step_key)

        emr_step_def = self._get_emr_step_def(run_id, step_key,
                                              step_context.solid.name)
        emr_step_id = self.emr_job_runner.add_job_flow_steps(
            log, self.cluster_id, [emr_step_def])[0]
        self.emr_job_runner.wait_for_emr_steps_to_complete(
            log, self.cluster_id, [emr_step_id])
        if self.wait_for_logs:
            self._log_logs_from_s3(log, emr_step_id)

        for event in self.get_step_events(step_context, run_id, step_key):
            log_step_event(step_context, event)
            yield event
コード例 #7
0
    def launch_step(self, step_context, prior_attempts_count):
        step_run_ref = step_context_to_step_run_ref(
            step_context, prior_attempts_count,
            self.local_pipeline_package_path)
        run_id = step_context.pipeline_run.run_id
        log = step_context.log

        step_key = step_run_ref.step_key
        self._upload_artifacts(log, step_run_ref, run_id, step_key)

        task = self._get_databricks_task(run_id, step_key)
        databricks_run_id = self.databricks_runner.submit_run(
            self.run_config, task)

        try:
            self.databricks_runner.wait_for_run_to_complete(
                log, databricks_run_id)
        finally:
            if self.wait_for_logs:
                self._log_logs_from_cluster(log, databricks_run_id)

        for event in self.get_step_events(run_id, step_key):
            log_step_event(step_context, event)
            yield event