def test_grpc_socket_workspace():
    first_server_process = GrpcServerProcess()
    with first_server_process.create_ephemeral_client() as first_server:
        second_server_process = GrpcServerProcess()
        with second_server_process.create_ephemeral_client() as second_server:
            first_socket = first_server.socket
            second_socket = second_server.socket
            workspace_yaml = """
load_from:
- grpc_server:
    host: localhost
    socket: {socket_one}
- grpc_server:
    socket: {socket_two}
    location_name: 'local_port_default_host'
                """.format(socket_one=first_socket, socket_two=second_socket)

            origins = location_origins_from_config(
                yaml.safe_load(workspace_yaml),
                # fake out as if it were loaded by a yaml file in this directory
                file_relative_path(__file__, "not_a_real.yaml"),
            )

            with ExitStack() as stack:
                repository_location_handles = {
                    name: stack.enter_context(origin.create_handle())
                    for name, origin in origins.items()
                }
                assert len(repository_location_handles) == 2

                default_location_name = "grpc:localhost:{socket}".format(
                    socket=first_socket)
                assert repository_location_handles.get(default_location_name)
                local_port = repository_location_handles.get(
                    default_location_name)

                assert local_port.socket == first_socket
                assert local_port.host == "localhost"
                assert local_port.port is None

                assert repository_location_handles.get(
                    "local_port_default_host")
                local_port_default_host = repository_location_handles.get(
                    "local_port_default_host")

                assert local_port_default_host.socket == second_socket
                assert local_port_default_host.host == "localhost"
                assert local_port_default_host.port is None

                assert all(
                    map(lambda x: x.location_name,
                        repository_location_handles.values()))
        second_server_process.wait()
    first_server_process.wait()
Beispiel #2
0
def test_grpc_socket_workspace():
    first_server_process = GrpcServerProcess()
    with first_server_process.create_ephemeral_client() as first_server:
        second_server_process = GrpcServerProcess()
        with second_server_process.create_ephemeral_client() as second_server:
            first_socket = first_server.socket
            second_socket = second_server.socket
            workspace_yaml = """
load_from:
- grpc_server:
    host: localhost
    socket: {socket_one}
- grpc_server:
    socket: {socket_two}
    location_name: 'local_port_default_host'
                """.format(socket_one=first_socket, socket_two=second_socket)

            workspace = load_workspace_from_config(
                yaml.safe_load(workspace_yaml),
                # fake out as if it were loaded by a yaml file in this directory
                file_relative_path(__file__, "not_a_real.yaml"),
            )
            assert isinstance(workspace, Workspace)
            assert len(workspace.repository_location_handles) == 2

            default_location_name = "grpc:localhost:{socket}".format(
                socket=first_socket)
            assert workspace.has_repository_location_handle(
                default_location_name)
            local_port = workspace.get_repository_location_handle(
                default_location_name)

            assert local_port.socket == first_socket
            assert local_port.host == "localhost"
            assert local_port.port is None

            assert workspace.has_repository_location_handle(
                "local_port_default_host")
            local_port_default_host = workspace.get_repository_location_handle(
                "local_port_default_host")

            assert local_port_default_host.socket == second_socket
            assert local_port_default_host.host == "localhost"
            assert local_port_default_host.port is None

            assert all(
                map(lambda x: x.location_name,
                    workspace.repository_location_handles))
        second_server_process.wait()
    first_server_process.wait()
def test_server_down():
    with grpc_instance() as instance:
        repo_yaml = file_relative_path(__file__, "repo.yaml")
        recon_repo = ReconstructableRepository.from_legacy_repository_yaml(
            repo_yaml)
        loadable_target_origin = recon_repo.get_origin().loadable_target_origin
        server_process = GrpcServerProcess(
            loadable_target_origin=loadable_target_origin,
            max_workers=4,
            force_port=True)

        with server_process.create_ephemeral_client() as api_client:
            repository_location = GrpcServerRepositoryLocation(
                RepositoryLocationHandle.create_grpc_server_location(
                    location_name="test",
                    port=api_client.port,
                    socket=api_client.socket,
                    host=api_client.host,
                ))

            external_pipeline = repository_location.get_repository(
                "nope").get_full_external_pipeline("sleepy_pipeline")

            pipeline_run = instance.create_run_for_pipeline(
                pipeline_def=sleepy_pipeline, run_config=None)

            launcher = instance.run_launcher

            launcher.launch_run(instance, pipeline_run, external_pipeline)

            poll_for_step_start(instance, pipeline_run.run_id)

            assert launcher.can_terminate(pipeline_run.run_id)

            original_run_tags = instance.get_run_by_id(
                pipeline_run.run_id).tags[GRPC_INFO_TAG]

            # Replace run tags with an invalid port
            instance.add_run_tags(
                pipeline_run.run_id,
                {
                    GRPC_INFO_TAG:
                    seven.json.dumps(
                        merge_dicts({"host": "localhost"},
                                    {"port": find_free_port()}))
                },
            )

            assert not launcher.can_terminate(pipeline_run.run_id)

            instance.add_run_tags(
                pipeline_run.run_id,
                {
                    GRPC_INFO_TAG: original_run_tags,
                },
            )

            assert launcher.terminate(pipeline_run.run_id)

        server_process.wait()
def test_server_down():
    with instance_for_test() as instance:
        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            attribute="nope",
            python_file=file_relative_path(__file__, "test_default_run_launcher.py"),
        )

        server_process = GrpcServerProcess(
            loadable_target_origin=loadable_target_origin, max_workers=4, force_port=True
        )

        with server_process.create_ephemeral_client() as api_client:
            repository_location = GrpcServerRepositoryLocation(
                RepositoryLocationHandle.create_from_repository_location_origin(
                    GrpcServerRepositoryLocationOrigin(
                        location_name="test",
                        port=api_client.port,
                        socket=api_client.socket,
                        host=api_client.host,
                    )
                )
            )

            external_pipeline = repository_location.get_repository(
                "nope"
            ).get_full_external_pipeline("sleepy_pipeline")

            pipeline_run = instance.create_run_for_pipeline(
                pipeline_def=sleepy_pipeline, run_config=None
            )

            instance.launch_run(pipeline_run.run_id, external_pipeline)

            poll_for_step_start(instance, pipeline_run.run_id)

            launcher = instance.run_launcher
            assert launcher.can_terminate(pipeline_run.run_id)

            original_run_tags = instance.get_run_by_id(pipeline_run.run_id).tags[GRPC_INFO_TAG]

            # Replace run tags with an invalid port
            instance.add_run_tags(
                pipeline_run.run_id,
                {
                    GRPC_INFO_TAG: seven.json.dumps(
                        merge_dicts({"host": "localhost"}, {"port": find_free_port()})
                    )
                },
            )

            assert not launcher.can_terminate(pipeline_run.run_id)

            instance.add_run_tags(
                pipeline_run.run_id, {GRPC_INFO_TAG: original_run_tags,},
            )

            assert launcher.terminate(pipeline_run.run_id)

        server_process.wait()
def get_deployed_grpc_server_workspace(instance):
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable,
        attribute="nope",
        python_file=file_relative_path(__file__,
                                       "test_default_run_launcher.py"),
    )
    server_process = GrpcServerProcess(
        loadable_target_origin=loadable_target_origin)

    try:
        with server_process.create_ephemeral_client(
        ):  # shuts down when leaves this context
            with WorkspaceProcessContext(
                    instance,
                    GrpcServerTarget(
                        host="localhost",
                        socket=server_process.socket,
                        port=server_process.port,
                        location_name="test",
                    ),
            ) as workspace_process_context:
                yield workspace_process_context.create_request_context()
    finally:
        server_process.wait()
Beispiel #6
0
    def create_process_bound_grpc_server_location(loadable_target_origin,
                                                  location_name):
        from dagster.grpc.client import client_heartbeat_thread
        from dagster.grpc.server import GrpcServerProcess

        server = GrpcServerProcess(
            loadable_target_origin=loadable_target_origin,
            max_workers=2,
            heartbeat=True)
        client = server.create_ephemeral_client()
        heartbeat_thread = threading.Thread(target=client_heartbeat_thread,
                                            args=(client, ))
        heartbeat_thread.daemon = True
        heartbeat_thread.start()
        list_repositories_response = sync_list_repositories_grpc(client)

        code_pointer_dict = list_repositories_response.repository_code_pointer_dict

        return ManagedGrpcPythonEnvRepositoryLocationHandle(
            executable_path=list_repositories_response.executable_path,
            location_name=location_name if location_name else
            _assign_python_env_location_name(code_pointer_dict),
            repository_code_pointer_dict=code_pointer_dict,
            client=client,
            grpc_server_process=server,
        )
def get_external_pipeline_from_grpc_server_repository(pipeline_name):
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable,
        attribute="nope",
        python_file=file_relative_path(__file__,
                                       "test_default_run_launcher.py"),
    )
    server_process = GrpcServerProcess(
        loadable_target_origin=loadable_target_origin)

    try:
        with server_process.create_ephemeral_client() as api_client:
            repository_location = GrpcServerRepositoryLocation(
                RepositoryLocationHandle.
                create_from_repository_location_origin(
                    GrpcServerRepositoryLocationOrigin(
                        location_name="test",
                        port=api_client.port,
                        socket=api_client.socket,
                        host=api_client.host,
                    )))

            yield repository_location.get_repository(
                "nope").get_full_external_pipeline(pipeline_name)
    finally:
        server_process.wait()
Beispiel #8
0
def test_heartbeat():
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable,
        attribute="bar_repo",
        python_file=file_relative_path(__file__, "grpc_repo.py"),
    )
    server = GrpcServerProcess(
        loadable_target_origin=loadable_target_origin,
        max_workers=2,
        heartbeat=True,
        heartbeat_timeout=1,
    )
    with server.create_ephemeral_client() as client:
        assert server.server_process.poll() is None

        # heartbeat keeps the server alive
        time.sleep(0.5)
        client.heartbeat()
        time.sleep(0.5)
        client.heartbeat()
        time.sleep(0.5)
        assert server.server_process.poll() is None

        start_time = time.time()
        while (time.time() - start_time) < 10:
            if server.server_process.poll() is not None:
                return
            time.sleep(0.1)

        raise Exception(
            "Timed out waiting for server to terminate after heartbeat stopped"
        )
Beispiel #9
0
def test_heartbeat():
    loadable_target_origin = LoadableTargetOrigin(
        attribute="bar_repo",
        python_file=file_relative_path(__file__, "grpc_repo.py"),
    )
    server = GrpcServerProcess(
        loadable_target_origin=loadable_target_origin,
        max_workers=2,
        heartbeat=True,
        heartbeat_timeout=1,
    )
    with server.create_ephemeral_client() as client:
        assert server.server_process.poll() is None

        # heartbeat keeps the server alive
        time.sleep(0.5)
        client.heartbeat()
        time.sleep(0.5)
        client.heartbeat()
        time.sleep(0.5)
        assert server.server_process.poll() is None

        # without the heartbeat, the server dies
        time.sleep(2)
        assert server.server_process.poll() is not None
Beispiel #10
0
def test_ssl_grpc_server_workspace():
    server_process = GrpcServerProcess(force_port=True)
    try:
        with server_process.create_ephemeral_client() as client:

            assert client.heartbeat(echo="Hello")

            port = server_process.port
            ssl_yaml = f"""
    load_from:
    - grpc_server:
        host: localhost
        port: {port}
        ssl: true
    """
            origins = location_origins_from_config(
                yaml.safe_load(ssl_yaml),
                # fake out as if it were loaded by a yaml file in this directory
                file_relative_path(__file__, "not_a_real.yaml"),
            )
            origin = list(origins.values())[0]
            assert origin.use_ssl

            # Actually connecting to the server will fail since it's expecting SSL
            # and we didn't set up the server with SSL
            try:
                with origin.create_location():
                    assert False
            except DagsterUserCodeUnreachableError:
                pass

    finally:
        server_process.wait()
Beispiel #11
0
def test_list_command_grpc_socket():
    with instance_for_test() as instance:
        runner = CliRunner()

        server_process = GrpcServerProcess(
            loadable_target_origin=LoadableTargetOrigin(
                executable_path=sys.executable,
                python_file=file_relative_path(__file__, "test_cli_commands.py"),
                attribute="bar",
            ),
        )

        with server_process.create_ephemeral_client() as api_client:
            execute_list_command(
                {"grpc_socket": api_client.socket}, no_print, instance,
            )
            execute_list_command(
                {"grpc_socket": api_client.socket, "grpc_host": api_client.host},
                no_print,
                instance,
            )

            result = runner.invoke(pipeline_list_command, ["--grpc-socket", api_client.socket])
            assert_correct_bar_repository_output(result)

            result = runner.invoke(
                pipeline_list_command,
                ["--grpc-socket", api_client.socket, "--grpc-host", api_client.host],
            )
            assert_correct_bar_repository_output(result)

        server_process.wait()
def test_run_always_finishes():  # pylint: disable=redefined-outer-name
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(
            temp_dir,
            overrides={
                "run_launcher": {
                    "module": "dagster.core.launcher.grpc_run_launcher",
                    "class": "GrpcRunLauncher",
                }
            },
        )

        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=slow_pipeline, run_config=None)
        run_id = pipeline_run.run_id

        loadable_target_origin = LoadableTargetOrigin(
            attribute="nope",
            python_file=file_relative_path(__file__,
                                           "test_default_run_launcher.py"),
        )
        server_process = GrpcServerProcess(
            loadable_target_origin=loadable_target_origin, max_workers=4)
        with server_process.create_ephemeral_client() as api_client:
            repository_location = GrpcServerRepositoryLocation(
                RepositoryLocationHandle.create_grpc_server_location(
                    location_name="test",
                    port=api_client.port,
                    socket=api_client.socket,
                    host=api_client.host,
                ))

            external_pipeline = repository_location.get_repository(
                "nope").get_full_external_pipeline("slow_pipeline")

            assert instance.get_run_by_id(
                run_id).status == PipelineRunStatus.NOT_STARTED

            launcher = instance.run_launcher
            launcher.launch_run(instance=instance,
                                run=pipeline_run,
                                external_pipeline=external_pipeline)

        # Server process now receives shutdown event, run has not finished yet
        pipeline_run = instance.get_run_by_id(run_id)
        assert not pipeline_run.is_finished
        assert server_process.server_process.poll() is None

        # Server should wait until run finishes, then shutdown
        pipeline_run = poll_for_finished_run(instance, run_id)
        assert pipeline_run.status == PipelineRunStatus.SUCCESS

        start_time = time.time()
        while server_process.server_process.poll() is None:
            time.sleep(0.05)
            # Verify server process cleans up eventually
            assert time.time() - start_time < 5

        server_process.wait()
Beispiel #13
0
def test_list_command_deployed_grpc():
    runner = CliRunner()

    with instance_for_test() as instance:
        server_process = GrpcServerProcess(
            loadable_target_origin=LoadableTargetOrigin(
                executable_path=sys.executable,
                python_file=file_relative_path(__file__,
                                               "test_cli_commands.py"),
                attribute="bar",
            ),
            force_port=True,
        )

        with server_process.create_ephemeral_client() as api_client:
            result = runner.invoke(pipeline_list_command,
                                   ["--grpc-port", api_client.port])
            assert_correct_bar_repository_output(result)

            result = runner.invoke(
                pipeline_list_command,
                [
                    "--grpc-port", api_client.port, "--grpc-host",
                    api_client.host
                ],
            )
            assert_correct_bar_repository_output(result)

            result = runner.invoke(pipeline_list_command,
                                   ["--grpc-port", api_client.port])
            assert_correct_bar_repository_output(result)

            result = runner.invoke(
                pipeline_list_command,
                [
                    "--grpc-port", api_client.port, "--grpc-socket",
                    "foonamedsocket"
                ],
            )
            assert result.exit_code != 0

            execute_list_command(
                {"grpc_port": api_client.port},
                no_print,
                instance,
            )

            # Can't supply both port and socket
            with pytest.raises(UsageError):
                execute_list_command(
                    {
                        "grpc_port": api_client.port,
                        "grpc_socket": "foonamedsocket"
                    },
                    no_print,
                    instance,
                )

        server_process.wait()
def test_cancel_run():
    with instance_for_test() as instance:

        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable, python_file=__file__, working_directory=None,
        )

        server_process = GrpcServerProcess(loadable_target_origin, max_workers=10)

        with server_process.create_ephemeral_client() as api_client:
            streaming_results = []

            pipeline_run = instance.create_run_for_pipeline(
                streaming_pipeline, run_config={"solids": {"streamer": {"config": {"length": 20}}}},
            )
            execute_run_args = ExecuteRunArgs(
                pipeline_origin=PipelineGrpcServerOrigin(
                    pipeline_name="streaming_pipeline",
                    repository_origin=RepositoryGrpcServerOrigin(
                        host="localhost",
                        socket=api_client.socket,
                        port=api_client.port,
                        repository_name="test_repository",
                    ),
                ),
                pipeline_run_id=pipeline_run.run_id,
                instance_ref=instance.get_ref(),
            )
            stream_events_result_thread = threading.Thread(
                target=_stream_events_target, args=[streaming_results, api_client, execute_run_args]
            )
            stream_events_result_thread.daemon = True
            stream_events_result_thread.start()
            poll_for_step_start(instance, pipeline_run.run_id)

            res = api_client.cancel_execution(
                cancel_execution_request=CancelExecutionRequest(run_id=pipeline_run.run_id)
            )
            assert res.success is True

            poll_for_run(instance, pipeline_run.run_id)

            logs = instance.all_logs(pipeline_run.run_id)
            assert (
                len(
                    [
                        ev
                        for ev in logs
                        if ev.dagster_event.event_type_value == "STEP_MATERIALIZATION"
                    ]
                )
                < 20
            )

            # soft termination
            assert [ev for ev in logs if ev.dagster_event.event_type_value == "STEP_FAILURE"]

        server_process.wait()
def test_run_always_finishes():  # pylint: disable=redefined-outer-name
    with instance_for_test() as instance:
        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            attribute="nope",
            python_file=file_relative_path(__file__, "test_default_run_launcher.py"),
        )
        server_process = GrpcServerProcess(
            loadable_target_origin=loadable_target_origin, max_workers=4
        )
        with server_process.create_ephemeral_client():  # Shuts down when leaves context
            with WorkspaceProcessContext(
                instance,
                GrpcServerTarget(
                    host="localhost",
                    socket=server_process.socket,
                    port=server_process.port,
                    location_name="test",
                ),
            ) as workspace_process_context:
                workspace = workspace_process_context.create_request_context()

                external_pipeline = (
                    workspace.get_repository_location("test")
                    .get_repository("nope")
                    .get_full_external_pipeline("slow_pipeline")
                )

                pipeline_run = instance.create_run_for_pipeline(
                    pipeline_def=slow_pipeline,
                    run_config=None,
                    external_pipeline_origin=external_pipeline.get_external_origin(),
                    pipeline_code_origin=external_pipeline.get_python_origin(),
                )
                run_id = pipeline_run.run_id

                assert instance.get_run_by_id(run_id).status == PipelineRunStatus.NOT_STARTED

                instance.launch_run(run_id=run_id, workspace=workspace)

        # Server process now receives shutdown event, run has not finished yet
        pipeline_run = instance.get_run_by_id(run_id)
        assert not pipeline_run.is_finished
        assert server_process.server_process.poll() is None

        # Server should wait until run finishes, then shutdown
        pipeline_run = poll_for_finished_run(instance, run_id)
        assert pipeline_run.status == PipelineRunStatus.SUCCESS

        start_time = time.time()
        while server_process.server_process.poll() is None:
            time.sleep(0.05)
            # Verify server process cleans up eventually
            assert time.time() - start_time < 5

        server_process.wait()
Beispiel #16
0
def test_execute_run_api_grpc_python_handle():
    with instance_for_test() as instance:
        with get_foo_pipeline_handle() as pipeline_handle:
            pipeline_run = instance.create_run(
                pipeline_name="foo",
                run_id=None,
                run_config={},
                mode="default",
                solids_to_execute=None,
                step_keys_to_execute=None,
                status=None,
                tags=None,
                root_run_id=None,
                parent_run_id=None,
                pipeline_snapshot=None,
                execution_plan_snapshot=None,
                parent_pipeline_snapshot=None,
            )

            loadable_target_origin = (pipeline_handle.get_external_origin(
            ).external_repository_origin.repository_location_origin.
                                      loadable_target_origin)

            server_process = GrpcServerProcess(loadable_target_origin,
                                               max_workers=2)
            with server_process.create_ephemeral_client() as api_client:
                events = [
                    event for event in sync_execute_run_grpc(
                        api_client=api_client,
                        instance_ref=instance.get_ref(),
                        pipeline_origin=pipeline_handle.get_external_origin(),
                        pipeline_run=pipeline_run,
                    )
                ]

                assert len(events) == 17
                assert [event.event_type_value for event in events] == [
                    "ENGINE_EVENT",
                    "ENGINE_EVENT",
                    "PIPELINE_START",
                    "ENGINE_EVENT",
                    "STEP_START",
                    "STEP_OUTPUT",
                    "OBJECT_STORE_OPERATION",
                    "STEP_SUCCESS",
                    "STEP_START",
                    "OBJECT_STORE_OPERATION",
                    "STEP_INPUT",
                    "STEP_OUTPUT",
                    "OBJECT_STORE_OPERATION",
                    "STEP_SUCCESS",
                    "ENGINE_EVENT",
                    "PIPELINE_SUCCESS",
                    "ENGINE_EVENT",
                ]
            server_process.wait()
Beispiel #17
0
def test_health_check_success():
    loadable_target_origin = LoadableTargetOrigin(
        attribute="bar_repo", python_file=file_relative_path(__file__, "grpc_repo.py"),
    )
    server = GrpcServerProcess(
        loadable_target_origin=loadable_target_origin,
        max_workers=2,
        heartbeat=True,
        heartbeat_timeout=1,
    )
    with server.create_ephemeral_client() as client:
        assert client.health_check_query() == "SERVING"
Beispiel #18
0
def grpc_repo_location():
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable, python_file=__file__, attribute="the_repo"
    )
    server_process = GrpcServerProcess(loadable_target_origin=loadable_target_origin)
    try:
        with server_process.create_ephemeral_client() as api_client:
            yield RepositoryLocation.from_handle(
                RepositoryLocationHandle.create_grpc_server_location(
                    port=api_client.port, socket=api_client.socket, host=api_client.host,
                )
            )
    finally:
        server_process.wait()
Beispiel #19
0
def grpc_schedule_origin(schedule_name):
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable, python_file=__file__, attribute="the_repo"
    )
    server_process = GrpcServerProcess(loadable_target_origin=loadable_target_origin)
    with server_process.create_ephemeral_client() as api_client:
        repo_origin = RepositoryGrpcServerOrigin(
            host=api_client.host,
            port=api_client.port,
            socket=api_client.socket,
            repository_name="the_repo",
        )
        yield repo_origin.get_schedule_origin(schedule_name)
    server_process.wait()
Beispiel #20
0
def test_run_always_finishes():  # pylint: disable=redefined-outer-name
    with instance_for_test() as instance:
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=slow_pipeline, run_config=None)
        run_id = pipeline_run.run_id

        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            attribute="nope",
            python_file=file_relative_path(__file__,
                                           "test_default_run_launcher.py"),
        )
        server_process = GrpcServerProcess(
            loadable_target_origin=loadable_target_origin, max_workers=4)
        with server_process.create_ephemeral_client() as api_client:
            repository_location = GrpcServerRepositoryLocation(
                RepositoryLocationHandle.
                create_from_repository_location_origin(
                    GrpcServerRepositoryLocationOrigin(
                        location_name="test",
                        port=api_client.port,
                        socket=api_client.socket,
                        host=api_client.host,
                    )))

            external_pipeline = repository_location.get_repository(
                "nope").get_full_external_pipeline("slow_pipeline")

            assert instance.get_run_by_id(
                run_id).status == PipelineRunStatus.NOT_STARTED

            instance.launch_run(run_id=pipeline_run.run_id,
                                external_pipeline=external_pipeline)

        # Server process now receives shutdown event, run has not finished yet
        pipeline_run = instance.get_run_by_id(run_id)
        assert not pipeline_run.is_finished
        assert server_process.server_process.poll() is None

        # Server should wait until run finishes, then shutdown
        pipeline_run = poll_for_finished_run(instance, run_id)
        assert pipeline_run.status == PipelineRunStatus.SUCCESS

        start_time = time.time()
        while server_process.server_process.poll() is None:
            time.sleep(0.05)
            # Verify server process cleans up eventually
            assert time.time() - start_time < 5

        server_process.wait()
def grpc_server_bar_kwargs(pipeline_name=None):
    server_process = GrpcServerProcess(
        loadable_target_origin=LoadableTargetOrigin(
            python_file=file_relative_path(__file__, "test_cli_commands.py"),
            attribute="bar"), )
    with server_process.create_ephemeral_client() as client:
        args = {"grpc_host": client.host}
        if pipeline_name:
            args["pipeline"] = "foo"
        if client.port:
            args["grpc_port"] = client.port
        if client.socket:
            args["grpc_socket"] = client.socket
        yield args
    server_process.wait()
Beispiel #22
0
def test_heartbeat():
    loadable_target_origin = LoadableTargetOrigin(
        attribute='bar_repo', python_file=file_relative_path(__file__, 'grpc_repo.py'),
    )
    server = GrpcServerProcess(
        loadable_target_origin=loadable_target_origin,
        max_workers=2,
        heartbeat=True,
        heartbeat_timeout=1,
    )
    with server.create_ephemeral_client() as client:
        client.heartbeat()
        assert server.server_process.poll() is None
        time.sleep(2)
        assert server.server_process.poll() is not None
Beispiel #23
0
def test_process_killed_after_client_finished():

    server_process = GrpcServerProcess()

    with server_process.create_ephemeral_client() as client:
        socket = client.socket
        assert socket and os.path.exists(socket)

    start_time = time.time()
    while server_process.server_process.poll() is None:
        time.sleep(0.05)
        # Verify server process cleans up eventually
        assert time.time() - start_time < 1.5

    # verify socket is cleaned up
    assert not os.path.exists(socket)
def test_run_always_finishes(temp_instance):  # pylint: disable=redefined-outer-name
    instance = temp_instance

    pipeline_run = instance.create_run_for_pipeline(pipeline_def=slow_pipeline, run_config=None)
    run_id = pipeline_run.run_id

    recon_repo = ReconstructableRepository.for_file(__file__, 'nope')
    loadable_target_origin = LoadableTargetOrigin.from_python_origin(recon_repo.get_origin())

    server_process = GrpcServerProcess(loadable_target_origin=loadable_target_origin, max_workers=4)
    with server_process.create_ephemeral_client() as api_client:
        repository_location = GrpcServerRepositoryLocation(
            RepositoryLocationHandle.create_grpc_server_location(
                location_name='test',
                port=api_client.port,
                socket=api_client.socket,
                host=api_client.host,
            )
        )

        external_pipeline = repository_location.get_repository('nope').get_full_external_pipeline(
            'slow_pipeline'
        )

        assert instance.get_run_by_id(run_id).status == PipelineRunStatus.NOT_STARTED

        launcher = instance.run_launcher
        launcher.launch_run(
            instance=instance, run=pipeline_run, external_pipeline=external_pipeline
        )

    # Server process now receives shutdown event, run has not finished yet
    pipeline_run = instance.get_run_by_id(run_id)
    assert not pipeline_run.is_finished
    assert server_process.server_process.poll() is None

    # Server should wait until run finishes, then shutdown
    pipeline_run = poll_for_run(instance, run_id)
    assert pipeline_run.status == PipelineRunStatus.SUCCESS

    start_time = time.time()
    while server_process.server_process.poll() is None:
        time.sleep(0.05)
        # Verify server process cleans up eventually
        assert time.time() - start_time < 5
Beispiel #25
0
 def _mgr_fn(instance, read_only):
     server_process = GrpcServerProcess(
         loadable_target_origin=get_main_loadable_target_origin())
     try:
         with server_process.create_ephemeral_client() as api_client:
             with WorkspaceProcessContext(
                     instance,
                     GrpcServerTarget(
                         port=api_client.port,
                         socket=api_client.socket,
                         host=api_client.host,
                         location_name="test",
                     ),
                     version="",
                     read_only=read_only,
             ) as workspace:
                 yield workspace
     finally:
         server_process.wait()
def grpc_server_bar_cli_args(pipeline_name=None):
    server_process = GrpcServerProcess(
        loadable_target_origin=LoadableTargetOrigin(
            python_file=file_relative_path(__file__, "test_cli_commands.py"),
            attribute="bar"), )
    with server_process.create_ephemeral_client() as client:
        args = ["--grpc-host", client.host]
        if client.port:
            args.append("--grpc-port")
            args.append(client.port)
        if client.socket:
            args.append("--grpc-socket")
            args.append(client.socket)
        if pipeline_name:
            args.append("--pipeline")
            args.append(pipeline_name)

        yield args
    server_process.wait()
Beispiel #27
0
def workspace_process_context_fixture(instance):
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable,
        python_file=file_relative_path(__file__, "test_custom_repository_data.py"),
    )
    server_process = GrpcServerProcess(loadable_target_origin=loadable_target_origin)
    try:
        with server_process.create_ephemeral_client():  # shuts down when leaves this context
            with WorkspaceProcessContext(
                instance,
                GrpcServerTarget(
                    host="localhost",
                    socket=server_process.socket,
                    port=server_process.port,
                    location_name="test",
                ),
            ) as workspace_process_context:
                yield workspace_process_context
    finally:
        server_process.wait()
        def _mgr_fn(recon_repo):
            check.inst_param(recon_repo, "recon_repo",
                             ReconstructableRepository)

            loadable_target_origin = recon_repo.get_python_origin(
            ).loadable_target_origin

            server_process = GrpcServerProcess(
                loadable_target_origin=loadable_target_origin)
            try:
                with server_process.create_ephemeral_client() as api_client:
                    with Workspace(
                            GrpcServerTarget(
                                port=api_client.port,
                                socket=api_client.socket,
                                host=api_client.host,
                                location_name="test",
                            )) as workspace:
                        yield workspace
            finally:
                server_process.wait()
def get_external_pipeline_from_grpc_server_repository(pipeline_name):
    repo_yaml = file_relative_path(__file__, "repo.yaml")
    recon_repo = ReconstructableRepository.from_legacy_repository_yaml(
        repo_yaml)
    loadable_target_origin = recon_repo.get_origin().loadable_target_origin
    server_process = GrpcServerProcess(
        loadable_target_origin=loadable_target_origin)

    try:
        with server_process.create_ephemeral_client() as api_client:
            repository_location = GrpcServerRepositoryLocation(
                RepositoryLocationHandle.create_grpc_server_location(
                    location_name="test",
                    port=api_client.port,
                    socket=api_client.socket,
                    host=api_client.host,
                ))

            yield repository_location.get_repository(
                "nope").get_full_external_pipeline(pipeline_name)
    finally:
        server_process.wait()
Beispiel #30
0
class ManagedGrpcPythonEnvRepositoryLocationHandle(RepositoryLocationHandle):
    """
    A Python environment for which Dagster is managing a gRPC server.
    """
    def __init__(self, origin):
        from dagster.grpc.client import client_heartbeat_thread
        from dagster.grpc.server import GrpcServerProcess

        self.grpc_server_process = None
        self.client = None
        self.heartbeat_shutdown_event = None
        self.heartbeat_thread = None

        self._origin = check.inst_param(
            origin, "origin", ManagedGrpcPythonEnvRepositoryLocationOrigin)
        loadable_target_origin = origin.loadable_target_origin

        self._external_repositories_data = None

        try:
            self.grpc_server_process = GrpcServerProcess(
                loadable_target_origin=loadable_target_origin,
                heartbeat=True,
            )

            self.client = self.grpc_server_process.create_ephemeral_client()

            self.heartbeat_shutdown_event = threading.Event()

            self.heartbeat_thread = threading.Thread(
                target=client_heartbeat_thread,
                args=(
                    self.client,
                    self.heartbeat_shutdown_event,
                ),
                name="grpc-client-heartbeat",
            )
            self.heartbeat_thread.daemon = True
            self.heartbeat_thread.start()

            list_repositories_response = sync_list_repositories_grpc(
                self.client)

            self.repository_code_pointer_dict = (
                list_repositories_response.repository_code_pointer_dict)
            self.container_image = self.client.get_current_image(
            ).current_image

            self._external_repositories_data = sync_get_streaming_external_repositories_data_grpc(
                self.client,
                self,
            )
        except:
            self.cleanup()
            raise

    def create_external_repositories(self):
        from dagster.core.host_representation.external import ExternalRepository

        return {
            repo_name: ExternalRepository(
                repo_data,
                RepositoryHandle(
                    repository_name=repo_name,
                    repository_location_handle=self,
                ),
            )
            for repo_name, repo_data in
            self._external_repositories_data.items()
        }

    def get_repository_python_origin(self, repository_name):
        return _get_repository_python_origin(
            self.executable_path,
            self.repository_code_pointer_dict,
            repository_name,
            self.container_image,
        )

    @property
    def origin(self):
        return self._origin

    @property
    def executable_path(self):
        return self.loadable_target_origin.executable_path

    @property
    def location_name(self):
        return self.origin.location_name

    @property
    def loadable_target_origin(self):
        return self.origin.loadable_target_origin

    @property
    def repository_names(self):
        return set(self.repository_code_pointer_dict.keys())

    @property
    def host(self):
        return "localhost"

    @property
    def port(self):
        return self.grpc_server_process.port

    @property
    def socket(self):
        return self.grpc_server_process.socket

    @property
    def use_ssl(self):
        return False

    def cleanup(self):
        if self.heartbeat_shutdown_event:
            self.heartbeat_shutdown_event.set()
            self.heartbeat_shutdown_event = None

        if self.heartbeat_thread:
            self.heartbeat_thread.join()
            self.heartbeat_thread = None

        if self.client:
            self.client.cleanup_server()
            self.client = None

    @property
    def is_cleaned_up(self):
        return not self.client

    def create_location(self):
        from dagster.core.host_representation.repository_location import (
            GrpcServerRepositoryLocation, )

        return GrpcServerRepositoryLocation(self)

    def get_display_metadata(self):
        return merge_dicts(
            self.origin.get_display_metadata(),
            ({
                "image": self.container_image
            } if self.container_image else {}),
        )