Beispiel #1
0
def test_multiprocessing_execution_for_composite_solid_with_config_mapping():
    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid_and_config_mapping': {
                'config': {'foo': 'baz', 'bar': 3}
            }
        }
    }

    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'composite_pipeline_with_config_mapping'
    )
    pipeline_run = InMemoryPipelineRun(
        run_id,
        ExecutionSelector('nonce'),
        environment_dict,
        mode='default',
        reexecution_config=None,
        step_keys_to_execute=None,
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(
        handle, composite_pipeline_with_config_mapping, pipeline_run, raise_on_error=False
    )
    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.SUCCESS

    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid_and_config_mapping': {
                'config': {'foo': 'baz', 'bar': 3}
            }
        },
        'execution': {'multiprocess': {}},
        'storage': {'filesystem': {}},
    }

    run_id = make_new_run_id()
    pipeline_run = InMemoryPipelineRun(
        run_id,
        ExecutionSelector('nonce'),
        environment_dict,
        mode='default',
        reexecution_config=None,
        step_keys_to_execute=None,
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(
        handle, composite_pipeline, pipeline_run, raise_on_error=False
    )

    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.SUCCESS
def test_add_get_postgres_run_storage(pg_db):
    run_storage = PostgresRunStorage.create_nuked_storage(
        get_test_conn_string())

    run_id = str(uuid.uuid4())
    run_to_add = PipelineRun(
        pipeline_name='pipeline_name',
        run_id=run_id,
        environment_dict={},
        mode='some_mode',
        # https://github.com/dagster-io/dagster/issues/1709
        # ExecutionSelector should be threaded all the way
        # down from the top
        selector=ExecutionSelector('pipeline_name'),
        reexecution_config=None,
        step_keys_to_execute=None,
        status=PipelineRunStatus.NOT_STARTED,
    )
    run_storage.add_run(run_to_add)

    fetched_run = run_storage.get_run_by_id(run_id)

    assert run_to_add == fetched_run

    assert run_storage.has_run(run_id)
    assert not run_storage.has_run(str(uuid.uuid4()))

    assert run_storage.all_runs() == [run_to_add]
    assert run_storage.all_runs_for_pipeline('pipeline_name') == [run_to_add]
    assert run_storage.all_runs_for_pipeline('nope') == []

    run_storage.wipe()
    assert run_storage.all_runs() == []
Beispiel #3
0
def test_running():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_fn(define_passing_pipeline)
    pipeline = define_passing_pipeline()
    env_config = {
        'solids': {'sum_solid': {'inputs': {'num': script_relative_path('data/num.csv')}}}
    }
    selector = ExecutionSelector('csv_hello_world')
    pipeline_run = InMemoryPipelineRun(
        run_id,
        selector,
        env_config,
        mode='default',
        reexecution_config=None,
        step_keys_to_execute=None,
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(handle, pipeline, pipeline_run, raise_on_error=False)
    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.SUCCESS
    events = pipeline_run.all_logs()
    assert events

    process_start_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_START)
    assert len(process_start_events) == 1

    process_started_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_STARTED)
    assert len(process_started_events) == 1
Beispiel #4
0
def test_execution_crash():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_fn(define_crashy_pipeline)
    pipeline = define_crashy_pipeline()
    env_config = {
        'solids': {'sum_solid': {'inputs': {'num': script_relative_path('data/num.csv')}}}
    }
    selector = ExecutionSelector('csv_hello_world')
    pipeline_run = InMemoryPipelineRun(
        run_id,
        selector,
        env_config,
        mode='default',
        reexecution_config=None,
        step_keys_to_execute=None,
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(handle, pipeline, pipeline_run, raise_on_error=False)
    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.FAILURE
    last_log = pipeline_run.all_logs()[-1]
    print(last_log.message)
    assert last_log.message.startswith(
        'Exception: Pipeline execution process for {run_id} unexpectedly exited\n'.format(
            run_id=run_id
        )
    )
Beispiel #5
0
def test_failing():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'failing_pipeline')
    env_config = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': script_relative_path('data/num.csv')
                }
            }
        }
    }
    selector = ExecutionSelector('csv_hello_world')
    run_storage = InMemoryRunStorage()
    pipeline_run = run_storage.create_run(
        run_storage=run_storage,
        pipeline_name=failing_pipeline.name,
        run_id=run_id,
        selector=selector,
        env_config=env_config,
        mode='default',
        reexecution_config=None,
        step_keys_to_execute=None,
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(handle,
                                       failing_pipeline,
                                       pipeline_run,
                                       raise_on_error=False)
    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.FAILURE
    assert pipeline_run.all_logs()
def test_failing():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_fn(define_failing_pipeline)
    pipeline = define_failing_pipeline()
    env_config = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': script_relative_path('data/num.csv')
                }
            }
        }
    }
    selector = ExecutionSelector('csv_hello_world')
    pipeline_run = InMemoryPipelineRun(
        run_id,
        selector,
        env_config,
        mode='default',
        execution_plan=create_execution_plan(pipeline, env_config),
        reexecution_config=None,
        step_keys_to_execute=None,
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(handle,
                                       pipeline,
                                       pipeline_run,
                                       raise_on_error=False)
    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.FAILURE
    assert pipeline_run.all_logs()
Beispiel #7
0
def get_runtime_type(graphene_info, pipeline_name, type_name):
    pipeline = get_dagster_pipeline_from_selector(
        graphene_info, ExecutionSelector(pipeline_name))

    if not pipeline.has_runtime_type(type_name):
        raise UserFacingGraphQLError(
            graphene_info.schema.type_named('RuntimeTypeNotFoundError')(
                pipeline=pipeline, runtime_type_name=type_name))

    return to_dauphin_runtime_type(pipeline.runtime_type_named(type_name))
Beispiel #8
0
def test_running():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'passing_pipeline')
    environment_dict = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': script_relative_path('data/num.csv')
                }
            }
        }
    }
    selector = ExecutionSelector('csv_hello_world')

    instance = DagsterInstance.ephemeral()
    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=passing_pipeline.name,
            run_id=run_id,
            selector=selector,
            environment_dict=environment_dict,
            mode='default',
            reexecution_config=None,
            step_keys_to_execute=None,
            status=PipelineRunStatus.NOT_STARTED,
        ))
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(handle,
                                       passing_pipeline,
                                       pipeline_run,
                                       instance,
                                       raise_on_error=False)
    execution_manager.join()

    assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS
    events = instance.all_logs(run_id)
    assert events

    process_start_events = get_events_of_type(
        events, DagsterEventType.PIPELINE_PROCESS_START)
    assert len(process_start_events) == 1

    process_started_events = get_events_of_type(
        events, DagsterEventType.PIPELINE_PROCESS_STARTED)
    assert len(process_started_events) == 1

    process_exited_events = get_events_of_type(
        events, DagsterEventType.PIPELINE_PROCESS_EXITED)
    assert len(process_exited_events) == 1
Beispiel #9
0
    def _load_run(self, json_data):
        from dagster.core.execution.api import ExecutionSelector

        selector = ExecutionSelector(
            name=json_data['pipeline_name'],
            solid_subset=json_data.get('pipeline_solid_subset'))
        run = self.create_run(
            pipeline_name=json_data['pipeline_name'],
            run_id=json_data['run_id'],
            selector=selector,
            env_config=json_data['config'],
            mode=json_data['mode'],
        )
        return run
Beispiel #10
0
def get_config_type(graphene_info, pipeline_name, config_type_name, mode):
    check.str_param(pipeline_name, 'pipeline_name')
    check.str_param(config_type_name, 'config_type_name')
    check.opt_str_param(mode, 'mode')

    pipeline = get_dagster_pipeline_from_selector(graphene_info, ExecutionSelector(pipeline_name))
    environment_schema = create_environment_schema(pipeline, mode)
    if not environment_schema.has_config_type(config_type_name):
        raise UserFacingGraphQLError(
            graphene_info.schema.type_named('ConfigTypeNotFoundError')(
                pipeline=pipeline, config_type_name=config_type_name
            )
        )

    return to_dauphin_config_type(environment_schema.config_type_named(config_type_name))
Beispiel #11
0
    def __init__(
        self,
        run_storage=None,
        pipeline_name=None,
        run_id=None,
        env_config=None,
        mode=None,
        selector=None,
        reexecution_config=None,
        step_keys_to_execute=None,
    ):
        from dagster.core.execution.api import ExecutionSelector
        from dagster.core.execution.config import ReexecutionConfig
        from .runs import RunStorage

        self._pipeline_name = check.str_param(pipeline_name, 'pipeline_name')
        self._run_id = check.str_param(run_id, 'run_id')
        self._env_config = check.opt_dict_param(env_config,
                                                'environment_config',
                                                key_type=str)
        self._mode = check.opt_str_param(mode, 'mode')
        self._selector = check.opt_inst_param(
            selector,
            'selector',
            ExecutionSelector,
            default=ExecutionSelector(name=self.pipeline_name),
        )
        self._reexecution_config = check.opt_inst_param(
            reexecution_config, 'reexecution_config', ReexecutionConfig)
        if step_keys_to_execute is not None:
            self._step_keys_to_execute = check.list_param(
                step_keys_to_execute, 'step_keys_to_execute', of_type=str)
        else:
            self._step_keys_to_execute = None

        run_storage = check.opt_inst_param(run_storage, 'run_storage',
                                           RunStorage)
        if run_storage:
            self._run_storage = weakref.proxy(run_storage)
        else:
            self._run_storage = None

        self.__subscribers = []

        self._status = PipelineRunStatus.NOT_STARTED
Beispiel #12
0
    def from_json(data):
        selector = ExecutionSelector(
            name=data['pipeline_name'],
            solid_subset=data.get('pipeline_solid_subset'))
        run = InMemoryPipelineRun(run_id=data['run_id'],
                                  selector=selector,
                                  env_config=data['config'],
                                  mode=data['mode'])
        events = []
        with open(data['log_file'], 'rb') as logs:
            while True:
                try:
                    event_record = pickle.load(logs)
                    check.invariant(isinstance(event_record, EventRecord),
                                    'log file entry not EventRecord')
                    events.append(event_record)
                except EOFError:
                    break

        run.store_events(events)
        return run
Beispiel #13
0
def test_execution_crash():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'crashy_pipeline')
    environment_dict = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': script_relative_path('data/num.csv')
                }
            }
        }
    }
    selector = ExecutionSelector('csv_hello_world')

    instance = DagsterInstance.ephemeral()
    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=crashy_pipeline.name,
            run_id=run_id,
            selector=selector,
            environment_dict=environment_dict,
            mode='default',
            reexecution_config=None,
            step_keys_to_execute=None,
            status=PipelineRunStatus.NOT_STARTED,
        ))
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(handle,
                                       crashy_pipeline,
                                       pipeline_run,
                                       instance,
                                       raise_on_error=False)
    execution_manager.join()
    assert instance.get_run(run_id).status == PipelineRunStatus.FAILURE
    last_log = instance.all_logs(run_id)[-1]

    assert last_log.message.startswith(
        'Exception: Pipeline execution process for {run_id} unexpectedly exited\n'
        .format(run_id=run_id))
Beispiel #14
0
def test_handle_run_event_pipeline_success_test():

    run_storage = PostgresRunStorage.create_nuked_storage(
        get_test_conn_string())

    run_id = str(uuid.uuid4())
    run_to_add = PipelineRun(
        pipeline_name='pipeline_name',
        run_id=run_id,
        environment_dict={},
        mode='some_mode',
        # https://github.com/dagster-io/dagster/issues/1709
        # ExecutionSelector should be threaded all the way
        # down from the top
        selector=ExecutionSelector('pipeline_name'),
        reexecution_config=None,
        step_keys_to_execute=None,
        status=PipelineRunStatus.NOT_STARTED,
    )
    run_storage.add_run(run_to_add)

    dagster_pipeline_start_event = DagsterEvent(
        message='a message',
        event_type_value=DagsterEventType.PIPELINE_START.value,
        pipeline_name='pipeline_name',
        step_key=None,
        solid_handle=None,
        step_kind_value=None,
        logging_tags=None,
    )

    run_storage.handle_run_event(run_id, dagster_pipeline_start_event)

    assert run_storage.get_run_by_id(
        run_id).status == PipelineRunStatus.STARTED

    run_storage.handle_run_event(
        str(uuid.uuid4()),  # diff run
        DagsterEvent(
            message='a message',
            event_type_value=DagsterEventType.PIPELINE_SUCCESS.value,
            pipeline_name='pipeline_name',
            step_key=None,
            solid_handle=None,
            step_kind_value=None,
            logging_tags=None,
        ),
    )

    assert run_storage.get_run_by_id(
        run_id).status == PipelineRunStatus.STARTED

    run_storage.handle_run_event(
        run_id,  # correct run
        DagsterEvent(
            message='a message',
            event_type_value=DagsterEventType.PIPELINE_SUCCESS.value,
            pipeline_name='pipeline_name',
            step_key=None,
            solid_handle=None,
            step_kind_value=None,
            logging_tags=None,
        ),
    )

    assert run_storage.get_run_by_id(
        run_id).status == PipelineRunStatus.SUCCESS
Beispiel #15
0
 def to_selector(self):
     return ExecutionSelector(self.name, self.solidSubset)
Beispiel #16
0
def test_multiprocessing_execution_for_composite_solid_with_config_mapping():
    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid_and_config_mapping': {
                'config': {
                    'foo': 'baz',
                    'bar': 3
                }
            }
        }
    }

    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'composite_pipeline_with_config_mapping')

    instance = DagsterInstance.ephemeral()
    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=composite_pipeline_with_config_mapping.name,
            run_id=run_id,
            selector=ExecutionSelector('nonce'),
            environment_dict=environment_dict,
            mode='default',
            reexecution_config=None,
            step_keys_to_execute=None,
            status=PipelineRunStatus.NOT_STARTED,
        ))
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(handle,
                                       composite_pipeline_with_config_mapping,
                                       pipeline_run,
                                       instance,
                                       raise_on_error=False)
    execution_manager.join()
    assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS

    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid_and_config_mapping': {
                'config': {
                    'foo': 'baz',
                    'bar': 3
                }
            }
        },
        'execution': {
            'multiprocess': {}
        },
        'storage': {
            'filesystem': {}
        },
    }

    run_id = make_new_run_id()

    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=composite_pipeline.name,
            run_id=run_id,
            selector=ExecutionSelector('nonce'),
            environment_dict=environment_dict,
            mode='default',
            reexecution_config=None,
            step_keys_to_execute=None,
            status=PipelineRunStatus.NOT_STARTED,
        ))
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(handle,
                                       composite_pipeline,
                                       pipeline_run,
                                       instance,
                                       raise_on_error=False)

    execution_manager.join()
    assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS