Esempio n. 1
0
def test_resource_init_failure():
    @resource
    def failing_resource(_init_context):
        raise Exception("Uh oh")

    @solid(required_resource_keys={"failing_resource"})
    def failing_resource_solid(_context):
        pass

    the_pipeline = PipelineDefinition(
        name="test_resource_init_failure",
        solid_defs=[failing_resource_solid],
        mode_defs=[ModeDefinition(resource_defs={"failing_resource": failing_resource})],
    )

    res = execute_pipeline(the_pipeline, raise_on_error=False)

    event_types = [event.event_type_value for event in res.event_list]
    assert DagsterEventType.PIPELINE_INIT_FAILURE.value in event_types

    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(the_pipeline)
    pipeline_run = instance.create_run_for_pipeline(the_pipeline, execution_plan=execution_plan)

    step_events = execute_plan(
        execution_plan, InMemoryPipeline(the_pipeline), pipeline_run=pipeline_run, instance=instance
    )

    event_types = [event.event_type_value for event in step_events]
    assert DagsterEventType.PIPELINE_INIT_FAILURE.value in event_types

    # Test the pipeline init failure event fires even if we are raising errors
    events = []
    try:
        for event in execute_pipeline_iterator(the_pipeline):
            events.append(event)
    except DagsterResourceFunctionError:
        pass

    event_types = [event.event_type_value for event in events]
    assert DagsterEventType.PIPELINE_INIT_FAILURE.value in event_types
Esempio n. 2
0
def test_terminate_pipeline_on_celery(dagster_celery_worker, instance, tempdir):
    pipeline_def = ReconstructablePipeline.for_file(REPO_FILE, "interrupt_pipeline")

    run_config = {
        "resources": {"io_manager": {"config": {"base_dir": tempdir}}},
        "execution": {"celery": {}},
    }

    results = []
    result_types = []
    interrupt_thread = None

    for result in execute_pipeline_iterator(
        pipeline=pipeline_def,
        run_config=run_config,
        instance=instance,
    ):
        # Interrupt once the first step starts
        if result.event_type == DagsterEventType.STEP_START and not interrupt_thread:
            interrupt_thread = Thread(target=send_interrupt, args=())
            interrupt_thread.start()

        results.append(result)
        result_types.append(result.event_type)

    interrupt_thread.join()

    # At least one step succeeded (the one that was running when the interrupt fired)
    assert DagsterEventType.STEP_SUCCESS in result_types

    # At least one step was revoked (and there were no step failure events)
    revoke_steps = [
        result
        for result in results
        if result.event_type == DagsterEventType.ENGINE_EVENT and "was revoked." in result.message
    ]

    assert len(revoke_steps) > 0

    # The overall pipeline failed
    assert DagsterEventType.PIPELINE_FAILURE in result_types
Esempio n. 3
0
 def run(self):
     """Runs this engine."""
     self._state = SpineEngineState.RUNNING
     run_config = {
         "loggers": {
             "console": {
                 "config": {
                     "log_level": "CRITICAL"
                 }
             }
         },
         "execution": {
             "multithread": {}
         }
     }
     for event in execute_pipeline_iterator(self._pipeline,
                                            run_config=run_config):
         self._process_event(event)
     if self._state == SpineEngineState.RUNNING:
         self._state = SpineEngineState.COMPLETED
     self._queue.put(("dag_exec_finished", str(self._state)))
Esempio n. 4
0
def test_bad_broker():
    event_stream = execute_pipeline_iterator(
        ExecutionTargetHandle.for_pipeline_python_file(
            __file__, 'test_diamond_pipeline'
        ).build_pipeline_definition(),
        environment_dict={
            'storage': {'filesystem': {}},
            'execution': {'celery': {'config': {'config_source': {'broker_url': '*****@*****.**'}}}},
        },
        instance=DagsterInstance.local_temp(),
    )

    # ensure an engine event with an error is yielded if we cant connect to the broker
    saw_engine_error = False
    try:
        for event in event_stream:
            if event.is_engine_event:
                saw_engine_error = bool(event.engine_event_data.error)
    except Exception:  # pylint: disable=broad-except
        pass

    assert saw_engine_error
Esempio n. 5
0
def test_resource_init_failure():
    @resource
    def failing_resource(_init_context):
        raise Exception('Uh oh')

    @solid(required_resource_keys={'failing_resource'})
    def failing_resource_solid(_context):
        pass

    pipeline = PipelineDefinition(
        name='test_resource_init_failure',
        solid_defs=[failing_resource_solid],
        mode_defs=[ModeDefinition(resource_defs={'failing_resource': failing_resource})],
    )

    res = execute_pipeline(pipeline, raise_on_error=False)

    assert res.event_list[0].event_type_value == 'PIPELINE_INIT_FAILURE'

    execution_plan = create_execution_plan(pipeline)
    run_id = make_new_run_id()
    pipeline_run = PipelineRun.create_empty_run(pipeline.name, run_id)

    step_events = execute_plan(
        execution_plan, pipeline_run=pipeline_run, instance=DagsterInstance.ephemeral()
    )

    assert step_events[0].event_type_value == 'PIPELINE_INIT_FAILURE'

    # Test the pipeline init failure event fires even if we are raising errors
    events = []
    try:
        for event in execute_pipeline_iterator(pipeline):
            events.append(event)
    except DagsterResourceFunctionError:
        pass

    assert len(events) == 1
    assert events[0].event_type_value == 'PIPELINE_INIT_FAILURE'
Esempio n. 6
0
def test_bad_broker():
    with pytest.raises(check.CheckError) as exc_info:
        event_stream = execute_pipeline_iterator(
            ExecutionTargetHandle.for_pipeline_python_file(
                __file__, 'test_diamond_pipeline').build_pipeline_definition(),
            environment_dict={
                'storage': {
                    'filesystem': {}
                },
                'execution': {
                    'celery': {
                        'config': {
                            'broker': 'notlocal.bad'
                        }
                    }
                },
            },
            instance=DagsterInstance.local_temp(),
        )
        list(event_stream)
    assert 'Must use S3 or GCS storage with non-local Celery' in str(
        exc_info.value)
Esempio n. 7
0
def test_single_proc_interrupt():
    @pipeline
    def write_a_file_pipeline():
        write_a_file()

    with safe_tempfile_path() as success_tempfile:

        # launch a thread the waits until the file is written to launch an interrupt
        Thread(target=_send_kbd_int, args=([success_tempfile], )).start()

        result_types = []
        result_messages = []

        # next time the launched thread wakes up it will send a keyboard
        # interrupt
        for result in execute_pipeline_iterator(
                write_a_file_pipeline,
                run_config={
                    "solids": {
                        "write_a_file": {
                            "config": {
                                "tempfile": success_tempfile
                            }
                        }
                    }
                },
        ):
            result_types.append(result.event_type)
            result_messages.append(result.message)

        assert DagsterEventType.STEP_FAILURE in result_types
        assert DagsterEventType.PIPELINE_FAILURE in result_types

        assert any([
            "Execution was interrupted unexpectedly. "
            "No user initiated termination request was found, treating as failure."
            in message for message in result_messages
        ])
Esempio n. 8
0
def test_interrupt_multiproc():
    with seven.TemporaryDirectory() as tempdir:
        file_1 = os.path.join(tempdir, 'file_1')
        file_2 = os.path.join(tempdir, 'file_2')
        file_3 = os.path.join(tempdir, 'file_3')
        file_4 = os.path.join(tempdir, 'file_4')

        # launch a thread the waits until the file is written to launch an interrupt
        Thread(target=_send_kbd_int, args=([file_1, file_2, file_3, file_4],)).start()

        results = []
        try:
            # launch a pipeline that writes a file and loops infinitely
            # next time the launched thread wakes up it will send a keyboard
            # interrupt
            for result in execute_pipeline_iterator(
                ExecutionTargetHandle.for_pipeline_python_file(
                    __file__, 'write_files_pipeline'
                ).build_pipeline_definition(),
                environment_dict={
                    'solids': {
                        'write_1': {'config': {'tempfile': file_1}},
                        'write_2': {'config': {'tempfile': file_2}},
                        'write_3': {'config': {'tempfile': file_3}},
                        'write_4': {'config': {'tempfile': file_4}},
                    },
                    'execution': {'multiprocess': {'config': {'max_concurrent': 4}}},
                    'storage': {'filesystem': {}},
                },
                instance=DagsterInstance.local_temp(tempdir=tempdir),
            ):
                results.append(result)
            assert False  # should never reach
        except (DagsterSubprocessError, KeyboardInterrupt):
            pass

        assert [result.event_type for result in results].count(DagsterEventType.STEP_FAILURE) == 4
        assert DagsterEventType.PIPELINE_FAILURE in [result.event_type for result in results]
Esempio n. 9
0
def test_interrupt_multiproc():
    with tempfile.TemporaryDirectory() as tempdir:
        with instance_for_test(temp_dir=tempdir) as instance:

            file_1 = os.path.join(tempdir, "file_1")
            file_2 = os.path.join(tempdir, "file_2")
            file_3 = os.path.join(tempdir, "file_3")
            file_4 = os.path.join(tempdir, "file_4")

            # launch a thread that waits until the file is written to launch an interrupt
            Thread(target=_send_kbd_int, args=([file_1, file_2, file_3, file_4],)).start()

            results = []

            # launch a pipeline that writes a file and loops infinitely
            # next time the launched thread wakes up it will send a keyboard
            # interrupt
            for result in execute_pipeline_iterator(
                reconstructable(write_files_pipeline),
                run_config={
                    "solids": {
                        "write_1": {"config": {"tempfile": file_1}},
                        "write_2": {"config": {"tempfile": file_2}},
                        "write_3": {"config": {"tempfile": file_3}},
                        "write_4": {"config": {"tempfile": file_4}},
                    },
                    "execution": {"multiprocess": {"config": {"max_concurrent": 4}}},
                    "intermediate_storage": {"filesystem": {}},
                },
                instance=instance,
            ):
                results.append(result)

            assert [result.event_type for result in results].count(
                DagsterEventType.STEP_FAILURE
            ) == 4
            assert DagsterEventType.PIPELINE_FAILURE in [result.event_type for result in results]
Esempio n. 10
0
def test_pipeline_subset():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, add_one],
        dependencies={'add_one': {
            'num': DependencyDefinition('return_one')
        }},
    )

    pipeline_result = execute_pipeline(pipeline_def)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('add_one').result_value() == 2

    env_config = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}}

    subset_result = execute_pipeline(pipeline_def.build_sub_pipeline(
        ['add_one']),
                                     environment_dict=env_config)

    assert subset_result.success
    assert len(subset_result.solid_result_list) == 1
    assert subset_result.result_for_solid('add_one').result_value() == 4

    events = execute_pipeline_iterator(pipeline_def.build_sub_pipeline(
        ['add_one']),
                                       environment_dict=env_config)

    for step_event in step_output_event_filter(events):
        assert step_event.is_step_success
Esempio n. 11
0
def test_pipeline_subset():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid(inputs=[InputDefinition('num')])
    def add_one(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solids=[return_one, add_one],
        dependencies={'add_one': {
            'num': DependencyDefinition('return_one')
        }},
    )

    pipeline_result = execute_pipeline(pipeline_def)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('add_one').transformed_value() == 2

    env_config = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}}

    subset_result = execute_pipeline(pipeline_def,
                                     environment_dict=env_config,
                                     solid_subset=['add_one'])

    assert subset_result.success
    assert len(subset_result.solid_result_list) == 1
    assert subset_result.result_for_solid('add_one').transformed_value() == 4

    step_events = execute_pipeline_iterator(pipeline_def,
                                            environment_dict=env_config,
                                            solid_subset=['add_one'])

    for step_event in step_events:
        assert step_event.is_step_success
Esempio n. 12
0
def test_terminate_pipeline_on_celery(rabbitmq):
    with start_celery_worker():
        with tempfile.TemporaryDirectory() as tempdir:
            pipeline_def = ReconstructablePipeline.for_file(
                REPO_FILE, "interrupt_pipeline")

            with instance_for_test_tempdir(tempdir) as instance:
                run_config = {
                    "intermediate_storage": {
                        "filesystem": {
                            "config": {
                                "base_dir": tempdir
                            }
                        }
                    },
                    "execution": {
                        "celery": {}
                    },
                }

                results = []
                result_types = []
                interrupt_thread = None
                received_interrupt = False

                try:
                    for result in execute_pipeline_iterator(
                            pipeline=pipeline_def,
                            run_config=run_config,
                            instance=instance,
                    ):
                        # Interrupt once the first step starts
                        if (result.event_type == DagsterEventType.STEP_START
                                and not interrupt_thread):
                            interrupt_thread = Thread(target=send_interrupt,
                                                      args=())
                            interrupt_thread.start()

                        results.append(result)
                        result_types.append(result.event_type)

                    assert False
                except DagsterExecutionInterruptedError:
                    received_interrupt = True

                interrupt_thread.join()

                assert received_interrupt

                # At least one step succeeded (the one that was running when the interrupt fired)
                assert DagsterEventType.STEP_SUCCESS in result_types

                # At least one step was revoked (and there were no step failure events)
                revoke_steps = [
                    result for result in results
                    if result.event_type == DagsterEventType.ENGINE_EVENT
                    and "was revoked." in result.message
                ]

                assert len(revoke_steps) > 0

            # The overall pipeline failed
            assert DagsterEventType.PIPELINE_FAILURE in result_types
Esempio n. 13
0
def test_pipeline_execution_graph_diamond():
    pipeline = PipelineDefinition(solids=create_diamond_solids(), dependencies=diamond_deps())
    return _do_test(pipeline, lambda: execute_pipeline_iterator(pipeline))
def execute_pipeline_through_queue(
    handle,
    pipeline_name,
    solid_subset,
    environment_dict,
    mode,
    run_id,
    message_queue,
    reexecution_config,
    step_keys_to_execute,
):
    """
    Execute pipeline using message queue as a transport
    """

    check.opt_str_param(mode, 'mode')

    message_queue.put(ProcessStartedSentinel(os.getpid()))

    run_config = RunConfig(
        run_id,
        mode=mode,
        event_callback=message_queue.put,
        reexecution_config=reexecution_config,
        step_keys_to_execute=step_keys_to_execute,
    )

    if 'execution' not in environment_dict or not environment_dict['execution']:
        environment_dict['execution'] = {
            'in_process': {
                'config': {
                    'raise_on_error': False
                }
            }
        }

    try:
        handle.build_repository_definition()
        pipeline_def = handle.with_pipeline_name(
            pipeline_name).build_pipeline_definition()
    except Exception:  # pylint: disable=broad-except
        repo_error = sys.exc_info()
        message_queue.put(
            MultiprocessingError(
                serializable_error_info_from_exc_info(repo_error)))
        return

    try:
        event_list = []
        for event in execute_pipeline_iterator(
                pipeline_def.build_sub_pipeline(solid_subset),
                environment_dict,
                run_config=run_config):
            # message_queue.put(event)
            event_list.append(event)
        return PipelineExecutionResult(pipeline_def, run_config.run_id,
                                       event_list, lambda: None)
    except Exception:  # pylint: disable=broad-except
        error_info = serializable_error_info_from_exc_info(sys.exc_info())
        message_queue.put(MultiprocessingError(error_info))
    finally:
        message_queue.put(MultiprocessingDone())
        message_queue.close()