def test_multiprocessing_execution_for_composite_solid_with_config_mapping(): environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': {'foo': 'baz', 'bar': 3} } } } run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'composite_pipeline_with_config_mapping' ) pipeline_run = InMemoryPipelineRun( run_id, ExecutionSelector('nonce'), environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline( handle, composite_pipeline_with_config_mapping, pipeline_run, raise_on_error=False ) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.SUCCESS environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': {'foo': 'baz', 'bar': 3} } }, 'execution': {'multiprocess': {}}, 'storage': {'filesystem': {}}, } run_id = make_new_run_id() pipeline_run = InMemoryPipelineRun( run_id, ExecutionSelector('nonce'), environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline( handle, composite_pipeline, pipeline_run, raise_on_error=False ) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.SUCCESS
def test_add_get_postgres_run_storage(pg_db): run_storage = PostgresRunStorage.create_nuked_storage( get_test_conn_string()) run_id = str(uuid.uuid4()) run_to_add = PipelineRun( pipeline_name='pipeline_name', run_id=run_id, environment_dict={}, mode='some_mode', # https://github.com/dagster-io/dagster/issues/1709 # ExecutionSelector should be threaded all the way # down from the top selector=ExecutionSelector('pipeline_name'), reexecution_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, ) run_storage.add_run(run_to_add) fetched_run = run_storage.get_run_by_id(run_id) assert run_to_add == fetched_run assert run_storage.has_run(run_id) assert not run_storage.has_run(str(uuid.uuid4())) assert run_storage.all_runs() == [run_to_add] assert run_storage.all_runs_for_pipeline('pipeline_name') == [run_to_add] assert run_storage.all_runs_for_pipeline('nope') == [] run_storage.wipe() assert run_storage.all_runs() == []
def test_running(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_fn(define_passing_pipeline) pipeline = define_passing_pipeline() env_config = { 'solids': {'sum_solid': {'inputs': {'num': script_relative_path('data/num.csv')}}} } selector = ExecutionSelector('csv_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.SUCCESS events = pipeline_run.all_logs() assert events process_start_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_START) assert len(process_start_events) == 1 process_started_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_STARTED) assert len(process_started_events) == 1
def test_execution_crash(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_fn(define_crashy_pipeline) pipeline = define_crashy_pipeline() env_config = { 'solids': {'sum_solid': {'inputs': {'num': script_relative_path('data/num.csv')}}} } selector = ExecutionSelector('csv_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE last_log = pipeline_run.all_logs()[-1] print(last_log.message) assert last_log.message.startswith( 'Exception: Pipeline execution process for {run_id} unexpectedly exited\n'.format( run_id=run_id ) )
def test_failing(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'failing_pipeline') env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') run_storage = InMemoryRunStorage() pipeline_run = run_storage.create_run( run_storage=run_storage, pipeline_name=failing_pipeline.name, run_id=run_id, selector=selector, env_config=env_config, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, failing_pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE assert pipeline_run.all_logs()
def test_failing(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_fn(define_failing_pipeline) pipeline = define_failing_pipeline() env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, mode='default', execution_plan=create_execution_plan(pipeline, env_config), reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE assert pipeline_run.all_logs()
def get_runtime_type(graphene_info, pipeline_name, type_name): pipeline = get_dagster_pipeline_from_selector( graphene_info, ExecutionSelector(pipeline_name)) if not pipeline.has_runtime_type(type_name): raise UserFacingGraphQLError( graphene_info.schema.type_named('RuntimeTypeNotFoundError')( pipeline=pipeline, runtime_type_name=type_name)) return to_dauphin_runtime_type(pipeline.runtime_type_named(type_name))
def test_running(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'passing_pipeline') environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.ephemeral() pipeline_run = instance.create_run( PipelineRun( pipeline_name=passing_pipeline.name, run_id=run_id, selector=selector, environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, passing_pipeline, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS events = instance.all_logs(run_id) assert events process_start_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_START) assert len(process_start_events) == 1 process_started_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_STARTED) assert len(process_started_events) == 1 process_exited_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_EXITED) assert len(process_exited_events) == 1
def _load_run(self, json_data): from dagster.core.execution.api import ExecutionSelector selector = ExecutionSelector( name=json_data['pipeline_name'], solid_subset=json_data.get('pipeline_solid_subset')) run = self.create_run( pipeline_name=json_data['pipeline_name'], run_id=json_data['run_id'], selector=selector, env_config=json_data['config'], mode=json_data['mode'], ) return run
def get_config_type(graphene_info, pipeline_name, config_type_name, mode): check.str_param(pipeline_name, 'pipeline_name') check.str_param(config_type_name, 'config_type_name') check.opt_str_param(mode, 'mode') pipeline = get_dagster_pipeline_from_selector(graphene_info, ExecutionSelector(pipeline_name)) environment_schema = create_environment_schema(pipeline, mode) if not environment_schema.has_config_type(config_type_name): raise UserFacingGraphQLError( graphene_info.schema.type_named('ConfigTypeNotFoundError')( pipeline=pipeline, config_type_name=config_type_name ) ) return to_dauphin_config_type(environment_schema.config_type_named(config_type_name))
def __init__( self, run_storage=None, pipeline_name=None, run_id=None, env_config=None, mode=None, selector=None, reexecution_config=None, step_keys_to_execute=None, ): from dagster.core.execution.api import ExecutionSelector from dagster.core.execution.config import ReexecutionConfig from .runs import RunStorage self._pipeline_name = check.str_param(pipeline_name, 'pipeline_name') self._run_id = check.str_param(run_id, 'run_id') self._env_config = check.opt_dict_param(env_config, 'environment_config', key_type=str) self._mode = check.opt_str_param(mode, 'mode') self._selector = check.opt_inst_param( selector, 'selector', ExecutionSelector, default=ExecutionSelector(name=self.pipeline_name), ) self._reexecution_config = check.opt_inst_param( reexecution_config, 'reexecution_config', ReexecutionConfig) if step_keys_to_execute is not None: self._step_keys_to_execute = check.list_param( step_keys_to_execute, 'step_keys_to_execute', of_type=str) else: self._step_keys_to_execute = None run_storage = check.opt_inst_param(run_storage, 'run_storage', RunStorage) if run_storage: self._run_storage = weakref.proxy(run_storage) else: self._run_storage = None self.__subscribers = [] self._status = PipelineRunStatus.NOT_STARTED
def from_json(data): selector = ExecutionSelector( name=data['pipeline_name'], solid_subset=data.get('pipeline_solid_subset')) run = InMemoryPipelineRun(run_id=data['run_id'], selector=selector, env_config=data['config'], mode=data['mode']) events = [] with open(data['log_file'], 'rb') as logs: while True: try: event_record = pickle.load(logs) check.invariant(isinstance(event_record, EventRecord), 'log file entry not EventRecord') events.append(event_record) except EOFError: break run.store_events(events) return run
def test_execution_crash(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'crashy_pipeline') environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.ephemeral() pipeline_run = instance.create_run( PipelineRun( pipeline_name=crashy_pipeline.name, run_id=run_id, selector=selector, environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, crashy_pipeline, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.FAILURE last_log = instance.all_logs(run_id)[-1] assert last_log.message.startswith( 'Exception: Pipeline execution process for {run_id} unexpectedly exited\n' .format(run_id=run_id))
def test_handle_run_event_pipeline_success_test(): run_storage = PostgresRunStorage.create_nuked_storage( get_test_conn_string()) run_id = str(uuid.uuid4()) run_to_add = PipelineRun( pipeline_name='pipeline_name', run_id=run_id, environment_dict={}, mode='some_mode', # https://github.com/dagster-io/dagster/issues/1709 # ExecutionSelector should be threaded all the way # down from the top selector=ExecutionSelector('pipeline_name'), reexecution_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, ) run_storage.add_run(run_to_add) dagster_pipeline_start_event = DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_START.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ) run_storage.handle_run_event(run_id, dagster_pipeline_start_event) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.STARTED run_storage.handle_run_event( str(uuid.uuid4()), # diff run DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_SUCCESS.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ), ) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.STARTED run_storage.handle_run_event( run_id, # correct run DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_SUCCESS.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ), ) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.SUCCESS
def to_selector(self): return ExecutionSelector(self.name, self.solidSubset)
def test_multiprocessing_execution_for_composite_solid_with_config_mapping(): environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': { 'foo': 'baz', 'bar': 3 } } } } run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'composite_pipeline_with_config_mapping') instance = DagsterInstance.ephemeral() pipeline_run = instance.create_run( PipelineRun( pipeline_name=composite_pipeline_with_config_mapping.name, run_id=run_id, selector=ExecutionSelector('nonce'), environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, composite_pipeline_with_config_mapping, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': { 'foo': 'baz', 'bar': 3 } } }, 'execution': { 'multiprocess': {} }, 'storage': { 'filesystem': {} }, } run_id = make_new_run_id() pipeline_run = instance.create_run( PipelineRun( pipeline_name=composite_pipeline.name, run_id=run_id, selector=ExecutionSelector('nonce'), environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, composite_pipeline, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS