def grpc_command(port=None, socket=None, host='localhost', **kwargs): if seven.IS_WINDOWS and port is None: raise click.UsageError( 'You must pass a valid --port/-p on Windows: --socket/-f not supported.' ) if not (port or socket and not (port and socket)): raise click.UsageError( 'You must pass one and only one of --port/-p or --socket/-f.') loadable_target_origin = None if any(kwargs[key] for key in ['attribute', 'working_directory', 'module_name', 'python_file']): loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, attribute=kwargs['attribute'], working_directory=kwargs['working_directory'], module_name=kwargs['module_name'], python_file=kwargs['python_file'], ) server = DagsterGrpcServer(port=port, socket=socket, host=host, loadable_target_origin=loadable_target_origin) server.serve()
def sync_get_external_pipeline_subset_ephemeral_grpc(pipeline_origin, solid_selection=None): check.inst_param(pipeline_origin, 'pipeline_origin', PipelinePythonOrigin) check.opt_list_param(solid_selection, 'solid_selection', of_type=str) with ephemeral_grpc_api_client( loadable_target_origin=LoadableTargetOrigin(executable_path=pipeline_origin.executable_path) ) as api_client: return sync_get_external_pipeline_subset_grpc(api_client, pipeline_origin, solid_selection)
def get_bar_repo_grpc_repository_location_handle(): return RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=LoadableTargetOrigin( attribute='bar_repo', python_file=file_relative_path(__file__, 'api_tests_repo.py'), ), location_name='bar_repo', )
def sync_get_external_schedule_execution_data_ephemeral_grpc( instance, repository_handle, schedule_name ): origin = repository_handle.get_origin() with ephemeral_grpc_api_client( LoadableTargetOrigin(executable_path=origin.executable_path) ) as api_client: return sync_get_external_schedule_execution_data_grpc( api_client, instance, repository_handle, schedule_name )
def sync_get_external_partition_names_ephemeral_grpc(repository_handle, partition_set_name): check.inst_param(repository_handle, 'repository_handle', RepositoryHandle) check.str_param(partition_set_name, 'partition_set_name') repository_origin = repository_handle.get_origin() with ephemeral_grpc_api_client( LoadableTargetOrigin(executable_path=repository_origin.executable_path) ) as api_client: return sync_get_external_partition_names_grpc( api_client, repository_handle, partition_set_name )
def test_execute_run_api_grpc_python_handle(pipeline_handle): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp(temp_dir) pipeline_run = instance.create_run( pipeline_name='foo', run_id=None, run_config={}, mode='default', solids_to_execute=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot=None, execution_plan_snapshot=None, parent_pipeline_snapshot=None, ) loadable_target_origin = LoadableTargetOrigin.from_python_origin( pipeline_handle.get_origin().repository_origin) with GrpcServerProcess(loadable_target_origin, max_workers=2) as server_process: api_client = server_process.create_ephemeral_client() events = [ event for event in sync_execute_run_grpc( api_client=api_client, instance_ref=instance.get_ref(), pipeline_origin=pipeline_handle.get_origin(), pipeline_run=pipeline_run, ) ] assert len(events) == 14 assert [event.event_type_value for event in events] == [ 'ENGINE_EVENT', 'ENGINE_EVENT', 'PIPELINE_START', 'ENGINE_EVENT', 'STEP_START', 'STEP_OUTPUT', 'STEP_SUCCESS', 'STEP_START', 'STEP_INPUT', 'STEP_OUTPUT', 'STEP_SUCCESS', 'ENGINE_EVENT', 'PIPELINE_SUCCESS', 'ENGINE_EVENT', ]
def get_external_pipeline_from_managed_grpc_python_env_repository(pipeline_name): repository_location_handle = RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=LoadableTargetOrigin( attribute='nope', python_file=file_relative_path(__file__, 'test_cli_api_run_launcher.py'), ), location_name='nope', ) repository_location = GrpcServerRepositoryLocation(repository_location_handle) yield repository_location.get_repository('nope').get_full_external_pipeline(pipeline_name)
def get_external_pipeline_from_grpc_server_repository(pipeline_name): repo_yaml = file_relative_path(__file__, 'repo.yaml') recon_repo = ReconstructableRepository.from_legacy_repository_yaml(repo_yaml) loadable_target_origin = LoadableTargetOrigin.from_python_origin(recon_repo.get_origin()) with GrpcServerProcess( loadable_target_origin=loadable_target_origin ).create_ephemeral_client() as server: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_grpc_server_location( location_name='test', port=server.port, socket=server.socket, host='localhost', ) ) yield repository_location.get_repository('nope').get_full_external_pipeline(pipeline_name)
def _mgr_fn(recon_repo): '''Goes out of process via grpc''' check.inst_param(recon_repo, 'recon_repo', ReconstructableRepository) loadable_target_origin = LoadableTargetOrigin.from_python_origin( recon_repo.get_origin() ) yield [ GrpcServerRepositoryLocation( RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=loadable_target_origin, location_name='test', ) ) ]
def test_heartbeat(): loadable_target_origin = LoadableTargetOrigin( attribute='bar_repo', python_file=file_relative_path(__file__, 'grpc_repo.py'), ) server = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=2, heartbeat=True, heartbeat_timeout=1, ) with server.create_ephemeral_client() as client: client.heartbeat() assert server.server_process.poll() is None time.sleep(2) assert server.server_process.poll() is not None
def grpc_schedule_origin(schedule_name): with seven.TemporaryDirectory() as temp_dir: with environ({'DAGSTER_HOME': temp_dir}): loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, attribute='the_repo') with GrpcServerProcess( loadable_target_origin=loadable_target_origin ).create_ephemeral_client() as api_client: repo_origin = RepositoryGrpcServerOrigin( host=api_client.host, port=api_client.port, socket=api_client.socket, repository_name='the_repo', ) yield repo_origin.get_schedule_origin(schedule_name)
def test_run_always_finishes(temp_instance): # pylint: disable=redefined-outer-name instance = temp_instance pipeline_run = instance.create_run_for_pipeline(pipeline_def=slow_pipeline, run_config=None) run_id = pipeline_run.run_id recon_repo = ReconstructableRepository.for_file(__file__, 'nope') loadable_target_origin = LoadableTargetOrigin.from_python_origin(recon_repo.get_origin()) server_process = GrpcServerProcess(loadable_target_origin=loadable_target_origin, max_workers=4) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_grpc_server_location( location_name='test', port=api_client.port, socket=api_client.socket, host=api_client.host, ) ) external_pipeline = repository_location.get_repository('nope').get_full_external_pipeline( 'slow_pipeline' ) assert instance.get_run_by_id(run_id).status == PipelineRunStatus.NOT_STARTED launcher = instance.run_launcher launcher.launch_run( instance=instance, run=pipeline_run, external_pipeline=external_pipeline ) # Server process now receives shutdown event, run has not finished yet pipeline_run = instance.get_run_by_id(run_id) assert not pipeline_run.is_finished assert server_process.server_process.poll() is None # Server should wait until run finishes, then shutdown pipeline_run = poll_for_run(instance, run_id) assert pipeline_run.status == PipelineRunStatus.SUCCESS start_time = time.time() while server_process.server_process.poll() is None: time.sleep(0.05) # Verify server process cleans up eventually assert time.time() - start_time < 5
def sync_list_repositories_ephemeral_grpc(executable_path, python_file, module_name, working_directory): from dagster.grpc.client import ephemeral_grpc_api_client from dagster.grpc.types import LoadableTargetOrigin check.str_param(executable_path, 'executable_path') check.opt_str_param(python_file, 'python_file') check.opt_str_param(module_name, 'module_name') check.opt_str_param(working_directory, 'working_directory') with ephemeral_grpc_api_client(loadable_target_origin=LoadableTargetOrigin( executable_path=executable_path, module_name=module_name, python_file=python_file, working_directory=working_directory, attribute=None, )) as api_client: return sync_list_repositories_grpc(api_client)
def _mgr_fn(recon_repo): check.inst_param(recon_repo, 'recon_repo', ReconstructableRepository) loadable_target_origin = LoadableTargetOrigin.from_python_origin( recon_repo.get_origin() ) with GrpcServerProcess(loadable_target_origin=loadable_target_origin) as server: yield [ GrpcServerRepositoryLocation( RepositoryLocationHandle.create_grpc_server_location( port=server.port, socket=server.socket, host='localhost', location_name='test', ) ) ]
def location_handle_from_python_file(python_file, attribute, user_process_api, location_name=None, working_directory=None): check.str_param(python_file, 'python_file') check.opt_str_param(attribute, 'attribute') check.inst_param(user_process_api, 'user_process_api', UserProcessApi) check.opt_str_param(location_name, 'location_name') check.opt_str_param(working_directory, 'working_directory') if user_process_api == UserProcessApi.GRPC: return RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, python_file=python_file, module_name=None, working_directory=working_directory, attribute=attribute, ), location_name=location_name, ) loadable_targets = ([ LoadableTarget( attribute, load_def_in_python_file(python_file, attribute, working_directory)) ] if attribute else loadable_targets_from_python_file( python_file, working_directory)) repository_code_pointer_dict = {} for loadable_target in loadable_targets: repository_code_pointer_dict[loadable_target.target_definition. name] = CodePointer.from_python_file( python_file, loadable_target.attribute, working_directory) return RepositoryLocationHandle.create_out_of_process_location( repository_code_pointer_dict=repository_code_pointer_dict, # default to the name of the repository symbol for now location_name=assign_location_name(location_name, repository_code_pointer_dict), )
def location_handle_from_python_file( python_file, attribute, user_process_api, location_name=None, working_directory=None, executable_path=sys.executable, ): check.str_param(python_file, 'python_file') check.opt_str_param(attribute, 'attribute') check.inst_param(user_process_api, 'user_process_api', UserProcessApi) check.opt_str_param(location_name, 'location_name') check.opt_str_param(working_directory, 'working_directory') if user_process_api == UserProcessApi.GRPC: return RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=LoadableTargetOrigin( executable_path=executable_path, python_file=python_file, module_name=None, working_directory=working_directory, attribute=attribute, ), location_name=location_name, ) else: response = sync_list_repositories( executable_path=executable_path, python_file=python_file, module_name=None, working_directory=working_directory, attribute=attribute, ) return RepositoryLocationHandle.create_python_env_location( executable_path=executable_path, location_name=location_name, repository_code_pointer_dict={ lrs.repository_name: CodePointer.from_python_file(python_file, lrs.attribute, working_directory) for lrs in response.repository_symbols }, )
def _ephemeral_launched_run_client( instance_ref, pipeline_origin, pipeline_run_id, cancellation_event ): '''Spins up an ephemeral client & server with two workers. This is to allow for cancellation to be processed as an interrupt rather than waiting for the launched run to complete.''' check.inst_param(instance_ref, 'instance_ref', InstanceRef) check.inst_param(pipeline_origin, 'pipeline_origin', PipelinePythonOrigin) check.str_param(pipeline_run_id, 'pipeline_run_id') check.inst_param(cancellation_event, 'cancellation_event', multiprocessing.synchronize.Event) instance = DagsterInstance.from_ref(instance_ref) pipeline_run = instance.get_run_by_id(pipeline_run_id) loadable_target_origin = LoadableTargetOrigin.from_python_origin( pipeline_origin.repository_origin ) with GrpcServerProcess(loadable_target_origin, max_workers=2) as server_process: api_client = server_process.create_ephemeral_client() execute_run_thread = threading.Thread( target=sync_execute_run_grpc, kwargs={ 'api_client': api_client, 'instance_ref': instance_ref, 'pipeline_origin': pipeline_origin, 'pipeline_run': pipeline_run, }, ) execute_run_thread.start() while execute_run_thread.is_alive(): if cancellation_event.is_set(): api_client.cancel_execution(CancelExecutionRequest(run_id=pipeline_run_id)) execute_run_thread.join() time.sleep(SUBPROCESS_TICK)
def test_cancel_run(): with temp_instance() as instance: loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, working_directory=None, ) with GrpcServerProcess( loadable_target_origin, max_workers=10).create_ephemeral_client() as api_client: streaming_results = [] pipeline_run = instance.create_run_for_pipeline( streaming_pipeline, run_config={ 'solids': { 'streamer': { 'config': { 'length': 20 } } } }, ) execute_run_args = ExecuteRunArgs( pipeline_origin=PipelineGrpcServerOrigin( pipeline_name='streaming_pipeline', repository_origin=RepositoryGrpcServerOrigin( host='localhost', socket=api_client.socket, port=api_client.port, repository_name='test_repository', ), ), pipeline_run_id=pipeline_run.run_id, instance_ref=instance.get_ref(), ) stream_events_result_thread = threading.Thread( target=_stream_events_target, args=[streaming_results, api_client, execute_run_args]) stream_events_result_thread.daemon = True stream_events_result_thread.start() poll_for_step_start(instance, pipeline_run.run_id) res = api_client.cancel_execution( cancel_execution_request=CancelExecutionRequest( run_id=pipeline_run.run_id)) assert res.success is True poll_for_run(instance, pipeline_run.run_id) logs = instance.all_logs(pipeline_run.run_id) assert (len([ ev for ev in logs if ev.dagster_event.event_type_value == 'STEP_MATERIALIZATION' ]) < 20) # soft termination assert [ ev for ev in logs if ev.dagster_event.event_type_value == 'STEP_FAILURE' ]
def _location_handle_from_python_environment_config(python_environment_config, yaml_path, user_process_api): check.dict_param(python_environment_config, 'python_environment_config') check.str_param(yaml_path, 'yaml_path') check.inst_param(user_process_api, 'user_process_api', UserProcessApi) executable_path, target_config = ( # do shell expansion on path os.path.expanduser(python_environment_config['executable_path']), python_environment_config['target'], ) check.invariant(is_target_config(target_config)) python_file_config, python_module_config, python_package_config = ( target_config.get('python_file'), target_config.get('python_module'), target_config.get('python_package'), ) if python_file_config: absolute_path, attribute, location_name, working_directory = _get_python_file_config_data( python_file_config, yaml_path) if user_process_api == UserProcessApi.GRPC: return RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=LoadableTargetOrigin( executable_path=executable_path, python_file=absolute_path, module_name=None, working_directory=None, attribute=attribute, ), location_name=location_name, ) elif not attribute: response = sync_list_repositories( executable_path=executable_path, python_file=absolute_path, module_name=None, working_directory=None, ) return RepositoryLocationHandle.create_python_env_location( executable_path=executable_path, location_name=location_name, repository_code_pointer_dict={ lrs.repository_name: CodePointer.from_python_file(absolute_path, lrs.attribute, working_directory) for lrs in response.repository_symbols }, ) else: return RepositoryLocationHandle.create_python_env_location( executable_path=executable_path, location_name=location_name, repository_code_pointer_dict={ attribute: CodePointer.from_python_file(absolute_path, attribute, working_directory) }, ) elif python_module_config: check.invariant(python_module_config) module_name, attribute, location_name = _get_module_config_data( python_module_config) if user_process_api == UserProcessApi.GRPC: return RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=LoadableTargetOrigin( executable_path=executable_path, python_file=None, module_name=module_name, working_directory=None, attribute=attribute, ), location_name=location_name, ) elif not attribute: response = sync_list_repositories( executable_path=executable_path, python_file=None, module_name=module_name, working_directory=None, ) return RepositoryLocationHandle.create_python_env_location( executable_path=executable_path, location_name=location_name, repository_code_pointer_dict={ lrs.repository_name: CodePointer.from_module(module_name, lrs.attribute) for lrs in response.repository_symbols }, ) else: return RepositoryLocationHandle.create_python_env_location( executable_path=executable_path, location_name=location_name, repository_code_pointer_dict={ attribute: CodePointer.from_module(module_name, attribute) }, ) else: check.invariant(python_package_config) package_name, attribute, location_name = _get_package_config_data( python_package_config) if user_process_api == UserProcessApi.GRPC: return RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=LoadableTargetOrigin( executable_path=executable_path, python_file=None, module_name=package_name, working_directory=None, attribute=attribute, ), location_name=location_name, ) elif not attribute: response = sync_list_repositories( executable_path=executable_path, python_file=None, module_name=package_name, working_directory=None, ) return RepositoryLocationHandle.create_python_env_location( executable_path=executable_path, location_name=location_name, repository_code_pointer_dict={ lrs.repository_name: CodePointer.from_python_package(package_name, lrs.attribute) for lrs in response.repository_symbols }, ) else: return RepositoryLocationHandle.create_python_env_location( executable_path=executable_path, location_name=location_name, repository_code_pointer_dict={ attribute: CodePointer.from_python_package(package_name, attribute) }, )