Esempio n. 1
0
    def launch_step(self, step_context, prior_attempts_count):
        step_run_ref = step_context_to_step_run_ref(
            step_context, prior_attempts_count,
            self.local_pipeline_package_path)
        run_id = step_context.pipeline_run.run_id
        log = step_context.log

        step_key = step_run_ref.step_key
        self._upload_artifacts(log, step_run_ref, run_id, step_key)

        task = self._get_databricks_task(run_id, step_key)
        databricks_run_id = self.databricks_runner.submit_run(
            self.run_config, task)

        try:
            # If this is being called within a `delay_interrupts` context, allow interrupts while
            # waiting for the  execution to complete, so that we can terminate slow or hanging steps
            with raise_interrupts_immediately():
                self.databricks_runner.wait_for_run_to_complete(
                    log, databricks_run_id)
        finally:
            if self.wait_for_logs:
                self._log_logs_from_cluster(log, databricks_run_id)

        for event in self.get_step_events(run_id, step_key):
            log_step_event(step_context, event)
            yield event
Esempio n. 2
0
    def launch_step(self, step_context, prior_attempts_count):
        step_run_ref = step_context_to_step_run_ref(
            step_context, prior_attempts_count,
            self.local_dagster_job_package_path)
        run_id = step_context.pipeline_run.run_id
        log = step_context.log

        step_key = step_run_ref.step_key
        self._upload_artifacts(log, step_run_ref, run_id, step_key)

        task = self._get_databricks_task(run_id, step_key)
        databricks_run_id = self.databricks_runner.submit_run(
            self.run_config, task)

        try:
            # If this is being called within a `capture_interrupts` context, allow interrupts while
            # waiting for the  execution to complete, so that we can terminate slow or hanging steps
            with raise_execution_interrupts():
                yield from self.step_events_iterator(step_context, step_key,
                                                     databricks_run_id)
        finally:
            self.log_compute_logs(log, run_id, step_key)
            # this is somewhat obsolete
            if self.wait_for_logs:
                self._log_logs_from_cluster(log, databricks_run_id)
Esempio n. 3
0
    def launch_step(self, step_context, prior_attempts_count):
        step_run_ref = step_context_to_step_run_ref(
            step_context, prior_attempts_count, self.local_pipeline_package_path
        )

        run_id = step_context.pipeline_run.run_id
        log = step_context.log

        step_key = step_run_ref.step_key
        self._post_artifacts(log, step_run_ref, run_id, step_key)

        emr_step_def = self._get_emr_step_def(run_id, step_key, step_context.solid.name)
        emr_step_id = self.emr_job_runner.add_job_flow_steps(log, self.cluster_id, [emr_step_def])[
            0
        ]

        return self.wait_for_completion_and_log(log, run_id, step_key, emr_step_id, step_context)
Esempio n. 4
0
def test_step_context_to_step_run_ref():
    with DagsterInstance.ephemeral() as instance:
        step_context = initialize_step_context("", instance)
        step = step_context.step
        step_run_ref = step_context_to_step_run_ref(step_context, 0)
        assert step_run_ref.run_config == step_context.pipeline_run.run_config
        assert step_run_ref.run_id == step_context.pipeline_run.run_id

        rehydrated_step_context = step_run_ref_to_step_context(
            step_run_ref, instance)
        rehydrated_step = rehydrated_step_context.step
        assert rehydrated_step.pipeline_name == step.pipeline_name
        assert rehydrated_step.step_inputs == step.step_inputs
        assert rehydrated_step.step_outputs == step.step_outputs
        assert rehydrated_step.kind == step.kind
        assert rehydrated_step.solid_handle.name == step.solid_handle.name
        assert rehydrated_step.logging_tags == step.logging_tags
        assert rehydrated_step.tags == step.tags
Esempio n. 5
0
def test_step_context_to_step_run_ref():
    step_context = initialize_step_context('')
    step = step_context.step
    step_run_ref = step_context_to_step_run_ref(step_context, 0)
    assert step_run_ref.run_config == step_context.pipeline_run.run_config
    assert step_run_ref.run_id == step_context.pipeline_run.run_id

    rehydrated_step_context = step_run_ref_to_step_context(step_run_ref)
    assert rehydrated_step_context.required_resource_keys == step_context.required_resource_keys
    rehydrated_step = rehydrated_step_context.step
    assert rehydrated_step.pipeline_name == step.pipeline_name
    assert rehydrated_step.key_suffix == step.key_suffix
    assert rehydrated_step.step_inputs == step.step_inputs
    assert rehydrated_step.step_outputs == step.step_outputs
    assert rehydrated_step.kind == step.kind
    assert rehydrated_step.solid_handle.name == step.solid_handle.name
    assert rehydrated_step.logging_tags == step.logging_tags
    assert rehydrated_step.tags == step.tags
    def launch_step(self, step_context, prior_attempts_count):
        step_run_ref = step_context_to_step_run_ref(
            step_context, prior_attempts_count,
            self.local_pipeline_package_path)

        run_id = step_context.pipeline_run.run_id
        log = step_context.log

        step_key = step_run_ref.step_key
        self._post_artifacts(log, step_run_ref, run_id, step_key)

        emr_step_def = self._get_emr_step_def(run_id, step_key,
                                              step_context.solid.name)
        emr_step_id = self.emr_job_runner.add_job_flow_steps(
            log, self.cluster_id, [emr_step_def])[0]
        self.emr_job_runner.wait_for_emr_steps_to_complete(
            log, self.cluster_id, [emr_step_id])
        if self.wait_for_logs:
            self._log_logs_from_s3(log, emr_step_id)

        for event in self.get_step_events(step_context, run_id, step_key):
            log_step_event(step_context, event)
            yield event
    def launch_step(self, step_context, prior_attempts_count):
        step_run_ref = step_context_to_step_run_ref(
            step_context, prior_attempts_count,
            self.local_pipeline_package_path)
        run_id = step_context.pipeline_run.run_id
        log = step_context.log

        step_key = step_run_ref.step_key
        self._upload_artifacts(log, step_run_ref, run_id, step_key)

        task = self._get_databricks_task(run_id, step_key)
        databricks_run_id = self.databricks_runner.submit_run(
            self.run_config, task)

        try:
            self.databricks_runner.wait_for_run_to_complete(
                log, databricks_run_id)
        finally:
            if self.wait_for_logs:
                self._log_logs_from_cluster(log, databricks_run_id)

        for event in self.get_step_events(run_id, step_key):
            log_step_event(step_context, event)
            yield event