Ejemplo n.º 1
0
    def terminate(self, run_id):
        check.str_param(run_id, "run_id")
        if not self._instance:
            return False

        run = self._instance.get_run_by_id(run_id)
        if not run:
            return False

        self._instance.report_engine_event(
            message="Received pipeline termination request.",
            pipeline_run=run,
            cls=self.__class__)

        client = self._get_grpc_client_for_termination(run_id)

        if not client:
            self._instance.report_engine_event(
                message=
                "Unable to get grpc client to send termination request to.",
                pipeline_run=run,
                cls=self.__class__,
            )
            return False

        res = client.cancel_execution(CancelExecutionRequest(run_id=run_id))
        return res.success
Ejemplo n.º 2
0
    def test_terminate_failed(self, graphql_context):
        selector = infer_pipeline_selector(graphql_context, "infinite_loop_pipeline")
        with safe_tempfile_path() as path:
            old_terminate = graphql_context.instance.run_launcher.terminate
            graphql_context.instance.run_launcher.terminate = lambda _run_id: False
            result = execute_dagster_graphql(
                graphql_context,
                LAUNCH_PIPELINE_EXECUTION_MUTATION,
                variables={
                    "executionParams": {
                        "selector": selector,
                        "mode": "default",
                        "runConfigData": {"solids": {"loop": {"config": {"file": path}}}},
                    }
                },
            )

            assert not result.errors
            assert result.data

            # just test existence
            assert (
                result.data["launchPipelineExecution"]["__typename"] == "LaunchPipelineRunSuccess"
            )
            run_id = result.data["launchPipelineExecution"]["run"]["runId"]
            # ensure the execution has happened
            while not os.path.exists(path):
                time.sleep(0.1)

            result = execute_dagster_graphql(
                graphql_context, RUN_CANCELLATION_QUERY, variables={"runId": run_id}
            )
            assert (
                result.data["terminatePipelineExecution"]["__typename"]
                == "TerminatePipelineExecutionFailure"
            )
            assert result.data["terminatePipelineExecution"]["message"].startswith(
                "Unable to terminate run"
            )

            result = execute_dagster_graphql(
                graphql_context,
                RUN_CANCELLATION_QUERY,
                variables={"runId": run_id, "terminatePolicy": "MARK_AS_CANCELED_IMMEDIATELY"},
            )

            assert (
                result.data["terminatePipelineExecution"]["__typename"]
                == "TerminatePipelineExecutionSuccess"
            )

            assert result.data["terminatePipelineExecution"]["run"]["runId"] == run_id

            graphql_context.instance.run_launcher.terminate = old_terminate

            # Clean up the run process on the gRPC server
            repository_location_handle = graphql_context.repository_locations[0].location_handle
            repository_location_handle.client.cancel_execution(
                CancelExecutionRequest(run_id=run_id)
            )
def test_cancel_run():
    with instance_for_test() as instance:

        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable, python_file=__file__, working_directory=None,
        )

        server_process = GrpcServerProcess(loadable_target_origin, max_workers=10)

        with server_process.create_ephemeral_client() as api_client:
            streaming_results = []

            pipeline_run = instance.create_run_for_pipeline(
                streaming_pipeline, run_config={"solids": {"streamer": {"config": {"length": 20}}}},
            )
            execute_run_args = ExecuteRunArgs(
                pipeline_origin=PipelineGrpcServerOrigin(
                    pipeline_name="streaming_pipeline",
                    repository_origin=RepositoryGrpcServerOrigin(
                        host="localhost",
                        socket=api_client.socket,
                        port=api_client.port,
                        repository_name="test_repository",
                    ),
                ),
                pipeline_run_id=pipeline_run.run_id,
                instance_ref=instance.get_ref(),
            )
            stream_events_result_thread = threading.Thread(
                target=_stream_events_target, args=[streaming_results, api_client, execute_run_args]
            )
            stream_events_result_thread.daemon = True
            stream_events_result_thread.start()
            poll_for_step_start(instance, pipeline_run.run_id)

            res = api_client.cancel_execution(
                cancel_execution_request=CancelExecutionRequest(run_id=pipeline_run.run_id)
            )
            assert res.success is True

            poll_for_run(instance, pipeline_run.run_id)

            logs = instance.all_logs(pipeline_run.run_id)
            assert (
                len(
                    [
                        ev
                        for ev in logs
                        if ev.dagster_event.event_type_value == "STEP_MATERIALIZATION"
                    ]
                )
                < 20
            )

            # soft termination
            assert [ev for ev in logs if ev.dagster_event.event_type_value == "STEP_FAILURE"]

        server_process.wait()
Ejemplo n.º 4
0
    def terminate(self, run_id):
        check.str_param(run_id, "run_id")

        client = self._get_grpc_client_for_termination(run_id)

        if not client:
            return False

        res = client.cancel_execution(CancelExecutionRequest(run_id=run_id))

        return res.success
Ejemplo n.º 5
0
    def terminate(self, run_id):
        check.str_param(run_id, 'run_id')

        if run_id not in self._run_id_to_repository_location_handle_cache:
            return False

        res = self._run_id_to_repository_location_handle_cache[
            run_id].client.cancel_execution(
                CancelExecutionRequest(run_id=run_id))

        return res.success
Ejemplo n.º 6
0
def test_cleanup_after_force_terminate(run_config):
    with instance_for_test() as instance, get_managed_grpc_server_workspace(
            instance) as workspace:
        external_pipeline = (
            workspace.get_repository_location("test").get_repository(
                "nope").get_full_external_pipeline("sleepy_pipeline"))
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=sleepy_pipeline,
            run_config=run_config,
            external_pipeline_origin=external_pipeline.get_external_origin(),
            pipeline_code_origin=external_pipeline.get_python_origin(),
        )

        run_id = pipeline_run.run_id

        instance.launch_run(pipeline_run.run_id, workspace)

        poll_for_step_start(instance, run_id)

        # simulate the sequence of events that happen during force-termination:
        # run moves immediately into canceled status while termination happens
        instance.report_run_canceling(pipeline_run)

        instance.report_run_canceled(pipeline_run)

        reloaded_run = instance.get_run_by_id(run_id)
        grpc_info = json.loads(reloaded_run.tags.get(GRPC_INFO_TAG))
        client = DagsterGrpcClient(
            port=grpc_info.get("port"),
            socket=grpc_info.get("socket"),
            host=grpc_info.get("host"),
        )
        client.cancel_execution(CancelExecutionRequest(run_id=run_id))

        # Wait for the run worker to clean up
        start_time = time.time()
        while True:
            if time.time() - start_time > 30:
                raise Exception("Timed out waiting for cleanup message")

            logs = instance.all_logs(run_id)
            if any([
                    "Computational resources were cleaned up after the run was forcibly marked as canceled."
                    in str(event) for event in logs
            ]):
                break

            time.sleep(1)

        assert instance.get_run_by_id(
            run_id).status == PipelineRunStatus.CANCELED
Ejemplo n.º 7
0
    def terminate(self, run_id):
        check.str_param(run_id, "run_id")
        if not self._instance:
            return False

        run = self._instance.get_run_by_id(run_id)
        if not run:
            return False

        client = self._get_grpc_client_for_termination(run_id)

        if not client:
            self._instance.report_engine_event(
                message=
                "Unable to get grpc client to send termination request to.",
                pipeline_run=run,
                cls=self.__class__,
            )
            return False

        self._instance.report_run_canceling(run)
        res = deserialize_json_to_dagster_namedtuple(
            client.cancel_execution(CancelExecutionRequest(run_id=run_id)))
        return res.success
Ejemplo n.º 8
0
def _ephemeral_launched_run_client(
    instance_ref, pipeline_origin, pipeline_run_id, cancellation_event
):
    '''Spins up an ephemeral client & server with two workers. This is to allow for cancellation
    to be processed as an interrupt rather than waiting for the launched run to complete.'''
    check.inst_param(instance_ref, 'instance_ref', InstanceRef)
    check.inst_param(pipeline_origin, 'pipeline_origin', PipelinePythonOrigin)
    check.str_param(pipeline_run_id, 'pipeline_run_id')
    check.inst_param(cancellation_event, 'cancellation_event', multiprocessing.synchronize.Event)

    instance = DagsterInstance.from_ref(instance_ref)
    pipeline_run = instance.get_run_by_id(pipeline_run_id)

    loadable_target_origin = LoadableTargetOrigin.from_python_origin(
        pipeline_origin.repository_origin
    )

    with GrpcServerProcess(loadable_target_origin, max_workers=2) as server_process:
        api_client = server_process.create_ephemeral_client()

        execute_run_thread = threading.Thread(
            target=sync_execute_run_grpc,
            kwargs={
                'api_client': api_client,
                'instance_ref': instance_ref,
                'pipeline_origin': pipeline_origin,
                'pipeline_run': pipeline_run,
            },
        )

        execute_run_thread.start()
        while execute_run_thread.is_alive():
            if cancellation_event.is_set():
                api_client.cancel_execution(CancelExecutionRequest(run_id=pipeline_run_id))
                execute_run_thread.join()
            time.sleep(SUBPROCESS_TICK)