Beispiel #1
0
def grpc_command(port=None, socket=None, host='localhost', **kwargs):
    if seven.IS_WINDOWS and port is None:
        raise click.UsageError(
            'You must pass a valid --port/-p on Windows: --socket/-f not supported.'
        )
    if not (port or socket and not (port and socket)):
        raise click.UsageError(
            'You must pass one and only one of --port/-p or --socket/-f.')

    loadable_target_origin = None
    if any(kwargs[key] for key in
           ['attribute', 'working_directory', 'module_name', 'python_file']):
        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            attribute=kwargs['attribute'],
            working_directory=kwargs['working_directory'],
            module_name=kwargs['module_name'],
            python_file=kwargs['python_file'],
        )

    server = DagsterGrpcServer(port=port,
                               socket=socket,
                               host=host,
                               loadable_target_origin=loadable_target_origin)

    server.serve()
Beispiel #2
0
def sync_get_external_pipeline_subset_ephemeral_grpc(pipeline_origin, solid_selection=None):
    check.inst_param(pipeline_origin, 'pipeline_origin', PipelinePythonOrigin)
    check.opt_list_param(solid_selection, 'solid_selection', of_type=str)

    with ephemeral_grpc_api_client(
        loadable_target_origin=LoadableTargetOrigin(executable_path=pipeline_origin.executable_path)
    ) as api_client:
        return sync_get_external_pipeline_subset_grpc(api_client, pipeline_origin, solid_selection)
Beispiel #3
0
def get_bar_repo_grpc_repository_location_handle():
    return RepositoryLocationHandle.create_process_bound_grpc_server_location(
        loadable_target_origin=LoadableTargetOrigin(
            attribute='bar_repo',
            python_file=file_relative_path(__file__, 'api_tests_repo.py'),
        ),
        location_name='bar_repo',
    )
Beispiel #4
0
def sync_get_external_schedule_execution_data_ephemeral_grpc(
    instance, repository_handle, schedule_name
):
    origin = repository_handle.get_origin()
    with ephemeral_grpc_api_client(
        LoadableTargetOrigin(executable_path=origin.executable_path)
    ) as api_client:
        return sync_get_external_schedule_execution_data_grpc(
            api_client, instance, repository_handle, schedule_name
        )
Beispiel #5
0
def sync_get_external_partition_names_ephemeral_grpc(repository_handle, partition_set_name):
    check.inst_param(repository_handle, 'repository_handle', RepositoryHandle)
    check.str_param(partition_set_name, 'partition_set_name')

    repository_origin = repository_handle.get_origin()

    with ephemeral_grpc_api_client(
        LoadableTargetOrigin(executable_path=repository_origin.executable_path)
    ) as api_client:
        return sync_get_external_partition_names_grpc(
            api_client, repository_handle, partition_set_name
        )
Beispiel #6
0
def test_execute_run_api_grpc_python_handle(pipeline_handle):
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(temp_dir)
        pipeline_run = instance.create_run(
            pipeline_name='foo',
            run_id=None,
            run_config={},
            mode='default',
            solids_to_execute=None,
            step_keys_to_execute=None,
            status=None,
            tags=None,
            root_run_id=None,
            parent_run_id=None,
            pipeline_snapshot=None,
            execution_plan_snapshot=None,
            parent_pipeline_snapshot=None,
        )

        loadable_target_origin = LoadableTargetOrigin.from_python_origin(
            pipeline_handle.get_origin().repository_origin)

        with GrpcServerProcess(loadable_target_origin,
                               max_workers=2) as server_process:
            api_client = server_process.create_ephemeral_client()

            events = [
                event for event in sync_execute_run_grpc(
                    api_client=api_client,
                    instance_ref=instance.get_ref(),
                    pipeline_origin=pipeline_handle.get_origin(),
                    pipeline_run=pipeline_run,
                )
            ]

            assert len(events) == 14
            assert [event.event_type_value for event in events] == [
                'ENGINE_EVENT',
                'ENGINE_EVENT',
                'PIPELINE_START',
                'ENGINE_EVENT',
                'STEP_START',
                'STEP_OUTPUT',
                'STEP_SUCCESS',
                'STEP_START',
                'STEP_INPUT',
                'STEP_OUTPUT',
                'STEP_SUCCESS',
                'ENGINE_EVENT',
                'PIPELINE_SUCCESS',
                'ENGINE_EVENT',
            ]
def get_external_pipeline_from_managed_grpc_python_env_repository(pipeline_name):

    repository_location_handle = RepositoryLocationHandle.create_process_bound_grpc_server_location(
        loadable_target_origin=LoadableTargetOrigin(
            attribute='nope',
            python_file=file_relative_path(__file__, 'test_cli_api_run_launcher.py'),
        ),
        location_name='nope',
    )

    repository_location = GrpcServerRepositoryLocation(repository_location_handle)

    yield repository_location.get_repository('nope').get_full_external_pipeline(pipeline_name)
def get_external_pipeline_from_grpc_server_repository(pipeline_name):
    repo_yaml = file_relative_path(__file__, 'repo.yaml')
    recon_repo = ReconstructableRepository.from_legacy_repository_yaml(repo_yaml)
    loadable_target_origin = LoadableTargetOrigin.from_python_origin(recon_repo.get_origin())
    with GrpcServerProcess(
        loadable_target_origin=loadable_target_origin
    ).create_ephemeral_client() as server:
        repository_location = GrpcServerRepositoryLocation(
            RepositoryLocationHandle.create_grpc_server_location(
                location_name='test', port=server.port, socket=server.socket, host='localhost',
            )
        )

        yield repository_location.get_repository('nope').get_full_external_pipeline(pipeline_name)
        def _mgr_fn(recon_repo):
            '''Goes out of process via grpc'''
            check.inst_param(recon_repo, 'recon_repo', ReconstructableRepository)

            loadable_target_origin = LoadableTargetOrigin.from_python_origin(
                recon_repo.get_origin()
            )

            yield [
                GrpcServerRepositoryLocation(
                    RepositoryLocationHandle.create_process_bound_grpc_server_location(
                        loadable_target_origin=loadable_target_origin, location_name='test',
                    )
                )
            ]
Beispiel #10
0
def test_heartbeat():
    loadable_target_origin = LoadableTargetOrigin(
        attribute='bar_repo', python_file=file_relative_path(__file__, 'grpc_repo.py'),
    )
    server = GrpcServerProcess(
        loadable_target_origin=loadable_target_origin,
        max_workers=2,
        heartbeat=True,
        heartbeat_timeout=1,
    )
    with server.create_ephemeral_client() as client:
        client.heartbeat()
        assert server.server_process.poll() is None
        time.sleep(2)
        assert server.server_process.poll() is not None
def grpc_schedule_origin(schedule_name):
    with seven.TemporaryDirectory() as temp_dir:
        with environ({'DAGSTER_HOME': temp_dir}):
            loadable_target_origin = LoadableTargetOrigin(
                executable_path=sys.executable,
                python_file=__file__,
                attribute='the_repo')
            with GrpcServerProcess(
                    loadable_target_origin=loadable_target_origin
            ).create_ephemeral_client() as api_client:
                repo_origin = RepositoryGrpcServerOrigin(
                    host=api_client.host,
                    port=api_client.port,
                    socket=api_client.socket,
                    repository_name='the_repo',
                )
                yield repo_origin.get_schedule_origin(schedule_name)
def test_run_always_finishes(temp_instance):  # pylint: disable=redefined-outer-name
    instance = temp_instance

    pipeline_run = instance.create_run_for_pipeline(pipeline_def=slow_pipeline, run_config=None)
    run_id = pipeline_run.run_id

    recon_repo = ReconstructableRepository.for_file(__file__, 'nope')
    loadable_target_origin = LoadableTargetOrigin.from_python_origin(recon_repo.get_origin())

    server_process = GrpcServerProcess(loadable_target_origin=loadable_target_origin, max_workers=4)
    with server_process.create_ephemeral_client() as api_client:
        repository_location = GrpcServerRepositoryLocation(
            RepositoryLocationHandle.create_grpc_server_location(
                location_name='test',
                port=api_client.port,
                socket=api_client.socket,
                host=api_client.host,
            )
        )

        external_pipeline = repository_location.get_repository('nope').get_full_external_pipeline(
            'slow_pipeline'
        )

        assert instance.get_run_by_id(run_id).status == PipelineRunStatus.NOT_STARTED

        launcher = instance.run_launcher
        launcher.launch_run(
            instance=instance, run=pipeline_run, external_pipeline=external_pipeline
        )

    # Server process now receives shutdown event, run has not finished yet
    pipeline_run = instance.get_run_by_id(run_id)
    assert not pipeline_run.is_finished
    assert server_process.server_process.poll() is None

    # Server should wait until run finishes, then shutdown
    pipeline_run = poll_for_run(instance, run_id)
    assert pipeline_run.status == PipelineRunStatus.SUCCESS

    start_time = time.time()
    while server_process.server_process.poll() is None:
        time.sleep(0.05)
        # Verify server process cleans up eventually
        assert time.time() - start_time < 5
Beispiel #13
0
def sync_list_repositories_ephemeral_grpc(executable_path, python_file,
                                          module_name, working_directory):
    from dagster.grpc.client import ephemeral_grpc_api_client
    from dagster.grpc.types import LoadableTargetOrigin

    check.str_param(executable_path, 'executable_path')
    check.opt_str_param(python_file, 'python_file')
    check.opt_str_param(module_name, 'module_name')
    check.opt_str_param(working_directory, 'working_directory')

    with ephemeral_grpc_api_client(loadable_target_origin=LoadableTargetOrigin(
            executable_path=executable_path,
            module_name=module_name,
            python_file=python_file,
            working_directory=working_directory,
            attribute=None,
    )) as api_client:
        return sync_list_repositories_grpc(api_client)
        def _mgr_fn(recon_repo):
            check.inst_param(recon_repo, 'recon_repo', ReconstructableRepository)

            loadable_target_origin = LoadableTargetOrigin.from_python_origin(
                recon_repo.get_origin()
            )

            with GrpcServerProcess(loadable_target_origin=loadable_target_origin) as server:
                yield [
                    GrpcServerRepositoryLocation(
                        RepositoryLocationHandle.create_grpc_server_location(
                            port=server.port,
                            socket=server.socket,
                            host='localhost',
                            location_name='test',
                        )
                    )
                ]
Beispiel #15
0
def location_handle_from_python_file(python_file,
                                     attribute,
                                     user_process_api,
                                     location_name=None,
                                     working_directory=None):
    check.str_param(python_file, 'python_file')
    check.opt_str_param(attribute, 'attribute')
    check.inst_param(user_process_api, 'user_process_api', UserProcessApi)
    check.opt_str_param(location_name, 'location_name')
    check.opt_str_param(working_directory, 'working_directory')

    if user_process_api == UserProcessApi.GRPC:
        return RepositoryLocationHandle.create_process_bound_grpc_server_location(
            loadable_target_origin=LoadableTargetOrigin(
                executable_path=sys.executable,
                python_file=python_file,
                module_name=None,
                working_directory=working_directory,
                attribute=attribute,
            ),
            location_name=location_name,
        )

    loadable_targets = ([
        LoadableTarget(
            attribute,
            load_def_in_python_file(python_file, attribute, working_directory))
    ] if attribute else loadable_targets_from_python_file(
        python_file, working_directory))

    repository_code_pointer_dict = {}
    for loadable_target in loadable_targets:
        repository_code_pointer_dict[loadable_target.target_definition.
                                     name] = CodePointer.from_python_file(
                                         python_file,
                                         loadable_target.attribute,
                                         working_directory)

    return RepositoryLocationHandle.create_out_of_process_location(
        repository_code_pointer_dict=repository_code_pointer_dict,
        # default to the name of the repository symbol for now
        location_name=assign_location_name(location_name,
                                           repository_code_pointer_dict),
    )
Beispiel #16
0
def location_handle_from_python_file(
    python_file,
    attribute,
    user_process_api,
    location_name=None,
    working_directory=None,
    executable_path=sys.executable,
):
    check.str_param(python_file, 'python_file')
    check.opt_str_param(attribute, 'attribute')
    check.inst_param(user_process_api, 'user_process_api', UserProcessApi)
    check.opt_str_param(location_name, 'location_name')
    check.opt_str_param(working_directory, 'working_directory')

    if user_process_api == UserProcessApi.GRPC:
        return RepositoryLocationHandle.create_process_bound_grpc_server_location(
            loadable_target_origin=LoadableTargetOrigin(
                executable_path=executable_path,
                python_file=python_file,
                module_name=None,
                working_directory=working_directory,
                attribute=attribute,
            ),
            location_name=location_name,
        )
    else:
        response = sync_list_repositories(
            executable_path=executable_path,
            python_file=python_file,
            module_name=None,
            working_directory=working_directory,
            attribute=attribute,
        )
        return RepositoryLocationHandle.create_python_env_location(
            executable_path=executable_path,
            location_name=location_name,
            repository_code_pointer_dict={
                lrs.repository_name:
                CodePointer.from_python_file(python_file, lrs.attribute,
                                             working_directory)
                for lrs in response.repository_symbols
            },
        )
Beispiel #17
0
def _ephemeral_launched_run_client(
    instance_ref, pipeline_origin, pipeline_run_id, cancellation_event
):
    '''Spins up an ephemeral client & server with two workers. This is to allow for cancellation
    to be processed as an interrupt rather than waiting for the launched run to complete.'''
    check.inst_param(instance_ref, 'instance_ref', InstanceRef)
    check.inst_param(pipeline_origin, 'pipeline_origin', PipelinePythonOrigin)
    check.str_param(pipeline_run_id, 'pipeline_run_id')
    check.inst_param(cancellation_event, 'cancellation_event', multiprocessing.synchronize.Event)

    instance = DagsterInstance.from_ref(instance_ref)
    pipeline_run = instance.get_run_by_id(pipeline_run_id)

    loadable_target_origin = LoadableTargetOrigin.from_python_origin(
        pipeline_origin.repository_origin
    )

    with GrpcServerProcess(loadable_target_origin, max_workers=2) as server_process:
        api_client = server_process.create_ephemeral_client()

        execute_run_thread = threading.Thread(
            target=sync_execute_run_grpc,
            kwargs={
                'api_client': api_client,
                'instance_ref': instance_ref,
                'pipeline_origin': pipeline_origin,
                'pipeline_run': pipeline_run,
            },
        )

        execute_run_thread.start()
        while execute_run_thread.is_alive():
            if cancellation_event.is_set():
                api_client.cancel_execution(CancelExecutionRequest(run_id=pipeline_run_id))
                execute_run_thread.join()
            time.sleep(SUBPROCESS_TICK)
Beispiel #18
0
def test_cancel_run():
    with temp_instance() as instance:

        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            python_file=__file__,
            working_directory=None,
        )

        with GrpcServerProcess(
                loadable_target_origin,
                max_workers=10).create_ephemeral_client() as api_client:
            streaming_results = []

            pipeline_run = instance.create_run_for_pipeline(
                streaming_pipeline,
                run_config={
                    'solids': {
                        'streamer': {
                            'config': {
                                'length': 20
                            }
                        }
                    }
                },
            )
            execute_run_args = ExecuteRunArgs(
                pipeline_origin=PipelineGrpcServerOrigin(
                    pipeline_name='streaming_pipeline',
                    repository_origin=RepositoryGrpcServerOrigin(
                        host='localhost',
                        socket=api_client.socket,
                        port=api_client.port,
                        repository_name='test_repository',
                    ),
                ),
                pipeline_run_id=pipeline_run.run_id,
                instance_ref=instance.get_ref(),
            )
            stream_events_result_thread = threading.Thread(
                target=_stream_events_target,
                args=[streaming_results, api_client, execute_run_args])
            stream_events_result_thread.daemon = True
            stream_events_result_thread.start()
            poll_for_step_start(instance, pipeline_run.run_id)

            res = api_client.cancel_execution(
                cancel_execution_request=CancelExecutionRequest(
                    run_id=pipeline_run.run_id))
            assert res.success is True

            poll_for_run(instance, pipeline_run.run_id)

            logs = instance.all_logs(pipeline_run.run_id)
            assert (len([
                ev for ev in logs
                if ev.dagster_event.event_type_value == 'STEP_MATERIALIZATION'
            ]) < 20)

            # soft termination
            assert [
                ev for ev in logs
                if ev.dagster_event.event_type_value == 'STEP_FAILURE'
            ]
Beispiel #19
0
def _location_handle_from_python_environment_config(python_environment_config,
                                                    yaml_path,
                                                    user_process_api):
    check.dict_param(python_environment_config, 'python_environment_config')
    check.str_param(yaml_path, 'yaml_path')
    check.inst_param(user_process_api, 'user_process_api', UserProcessApi)

    executable_path, target_config = (
        # do shell expansion on path
        os.path.expanduser(python_environment_config['executable_path']),
        python_environment_config['target'],
    )

    check.invariant(is_target_config(target_config))

    python_file_config, python_module_config, python_package_config = (
        target_config.get('python_file'),
        target_config.get('python_module'),
        target_config.get('python_package'),
    )

    if python_file_config:
        absolute_path, attribute, location_name, working_directory = _get_python_file_config_data(
            python_file_config, yaml_path)

        if user_process_api == UserProcessApi.GRPC:
            return RepositoryLocationHandle.create_process_bound_grpc_server_location(
                loadable_target_origin=LoadableTargetOrigin(
                    executable_path=executable_path,
                    python_file=absolute_path,
                    module_name=None,
                    working_directory=None,
                    attribute=attribute,
                ),
                location_name=location_name,
            )
        elif not attribute:
            response = sync_list_repositories(
                executable_path=executable_path,
                python_file=absolute_path,
                module_name=None,
                working_directory=None,
            )
            return RepositoryLocationHandle.create_python_env_location(
                executable_path=executable_path,
                location_name=location_name,
                repository_code_pointer_dict={
                    lrs.repository_name:
                    CodePointer.from_python_file(absolute_path, lrs.attribute,
                                                 working_directory)
                    for lrs in response.repository_symbols
                },
            )
        else:
            return RepositoryLocationHandle.create_python_env_location(
                executable_path=executable_path,
                location_name=location_name,
                repository_code_pointer_dict={
                    attribute:
                    CodePointer.from_python_file(absolute_path, attribute,
                                                 working_directory)
                },
            )

    elif python_module_config:
        check.invariant(python_module_config)
        module_name, attribute, location_name = _get_module_config_data(
            python_module_config)

        if user_process_api == UserProcessApi.GRPC:
            return RepositoryLocationHandle.create_process_bound_grpc_server_location(
                loadable_target_origin=LoadableTargetOrigin(
                    executable_path=executable_path,
                    python_file=None,
                    module_name=module_name,
                    working_directory=None,
                    attribute=attribute,
                ),
                location_name=location_name,
            )
        elif not attribute:
            response = sync_list_repositories(
                executable_path=executable_path,
                python_file=None,
                module_name=module_name,
                working_directory=None,
            )
            return RepositoryLocationHandle.create_python_env_location(
                executable_path=executable_path,
                location_name=location_name,
                repository_code_pointer_dict={
                    lrs.repository_name:
                    CodePointer.from_module(module_name, lrs.attribute)
                    for lrs in response.repository_symbols
                },
            )
        else:
            return RepositoryLocationHandle.create_python_env_location(
                executable_path=executable_path,
                location_name=location_name,
                repository_code_pointer_dict={
                    attribute: CodePointer.from_module(module_name, attribute)
                },
            )

    else:
        check.invariant(python_package_config)
        package_name, attribute, location_name = _get_package_config_data(
            python_package_config)

        if user_process_api == UserProcessApi.GRPC:
            return RepositoryLocationHandle.create_process_bound_grpc_server_location(
                loadable_target_origin=LoadableTargetOrigin(
                    executable_path=executable_path,
                    python_file=None,
                    module_name=package_name,
                    working_directory=None,
                    attribute=attribute,
                ),
                location_name=location_name,
            )
        elif not attribute:
            response = sync_list_repositories(
                executable_path=executable_path,
                python_file=None,
                module_name=package_name,
                working_directory=None,
            )
            return RepositoryLocationHandle.create_python_env_location(
                executable_path=executable_path,
                location_name=location_name,
                repository_code_pointer_dict={
                    lrs.repository_name:
                    CodePointer.from_python_package(package_name,
                                                    lrs.attribute)
                    for lrs in response.repository_symbols
                },
            )
        else:
            return RepositoryLocationHandle.create_python_env_location(
                executable_path=executable_path,
                location_name=location_name,
                repository_code_pointer_dict={
                    attribute:
                    CodePointer.from_python_package(package_name, attribute)
                },
            )