コード例 #1
0
ファイル: test_event_log.py プロジェクト: databill86/dagster
def test_listen_notify_single_run_event():
    event_log_storage = PostgresEventLogStorage.create_nuked_storage(
        get_test_conn_string())

    @solid
    def return_one(_):
        return 1

    def _solids():
        return_one()

    event_watcher = create_event_watcher(get_test_conn_string())

    run_id = str(uuid.uuid4())

    event_watcher.watch_run(run_id)

    try:
        events, result = gather_events(_solids,
                                       run_config=RunConfig(run_id=run_id))
        for event in events:
            event_log_storage.store_event(event)

        event = event_watcher.queue.get(block=True)

        assert isinstance(event, EventWatcherStart)

        for _ in range(0, 5):
            watcher_event = event_watcher.queue.get(block=True)
            assert isinstance(watcher_event, EventWatcherEvent)
            assert watcher_event.payload.run_id == result.run_id

    finally:
        event_watcher.close()
コード例 #2
0
ファイル: test_event_log.py プロジェクト: databill86/dagster
def test_basic_event_store():
    @solid
    def return_one(_):
        return 1

    def _solids():
        return_one()

    events, _result = gather_events(_solids)

    event_log_storage = PostgresEventLogStorage.create_nuked_storage(
        get_test_conn_string())

    for event in events:
        event_log_storage.store_event(event)

    rows = fetch_all_events(get_test_conn_string())

    out_events = list(
        map(lambda r: deserialize_json_to_dagster_namedtuple(r[0]), rows))

    assert list(map(lambda e: e.dagster_event.event_type, out_events)) == [
        DagsterEventType.PIPELINE_START,
        DagsterEventType.ENGINE_EVENT,
        DagsterEventType.STEP_START,
        DagsterEventType.STEP_OUTPUT,
        DagsterEventType.STEP_SUCCESS,
        DagsterEventType.ENGINE_EVENT,
        DagsterEventType.PIPELINE_SUCCESS,
    ]
コード例 #3
0
def test_wipe_postgres_event_log():
    @solid
    def return_one(_):
        return 1

    def _solids():
        return_one()  # pylint: disable=no-value-for-parameter

    events, result = gather_events(_solids)

    event_log_storage = PostgresEventLogStorage.create_nuked_storage(
        get_test_conn_string())

    for event in events:
        event_log_storage.store_event(event)

    out_events = event_log_storage.get_logs_for_run(result.run_id)

    assert event_types(out_events) == [
        DagsterEventType.PIPELINE_START,
        DagsterEventType.ENGINE_EVENT,
        DagsterEventType.STEP_START,
        DagsterEventType.STEP_OUTPUT,
        DagsterEventType.STEP_SUCCESS,
        DagsterEventType.ENGINE_EVENT,
        DagsterEventType.PIPELINE_SUCCESS,
    ]

    event_log_storage.wipe()

    assert event_log_storage.get_logs_for_run(result.run_id) == []
コード例 #4
0
def test_add_get_postgres_run_storage(pg_db):
    run_storage = PostgresRunStorage.create_nuked_storage(
        get_test_conn_string())

    run_id = str(uuid.uuid4())
    run_to_add = PipelineRun(
        pipeline_name='pipeline_name',
        run_id=run_id,
        environment_dict={},
        mode='some_mode',
        # https://github.com/dagster-io/dagster/issues/1709
        # ExecutionSelector should be threaded all the way
        # down from the top
        selector=ExecutionSelector('pipeline_name'),
        reexecution_config=None,
        step_keys_to_execute=None,
        status=PipelineRunStatus.NOT_STARTED,
    )
    run_storage.add_run(run_to_add)

    fetched_run = run_storage.get_run_by_id(run_id)

    assert run_to_add == fetched_run

    assert run_storage.has_run(run_id)
    assert not run_storage.has_run(str(uuid.uuid4()))

    assert run_storage.all_runs() == [run_to_add]
    assert run_storage.all_runs_for_pipeline('pipeline_name') == [run_to_add]
    assert run_storage.all_runs_for_pipeline('nope') == []

    run_storage.wipe()
    assert run_storage.all_runs() == []
コード例 #5
0
def test_fetch_by_status():
    storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string())
    assert storage
    one = str(uuid.uuid4())
    two = str(uuid.uuid4())
    three = str(uuid.uuid4())
    four = str(uuid.uuid4())
    storage.add_run(
        build_run(run_id=one, pipeline_name='some_pipeline', status=PipelineRunStatus.NOT_STARTED)
    )
    storage.add_run(
        build_run(run_id=two, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED)
    )
    storage.add_run(
        build_run(run_id=three, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED)
    )
    storage.add_run(
        build_run(run_id=four, pipeline_name='some_pipeline', status=PipelineRunStatus.FAILURE)
    )

    assert {run.run_id for run in storage.get_runs_for_status(PipelineRunStatus.NOT_STARTED)} == {
        one
    }

    assert {run.run_id for run in storage.get_runs_for_status(PipelineRunStatus.STARTED)} == {
        two,
        three,
    }

    assert {run.run_id for run in storage.get_runs_for_status(PipelineRunStatus.FAILURE)} == {four}

    assert {run.run_id for run in storage.get_runs_for_status(PipelineRunStatus.SUCCESS)} == set()
コード例 #6
0
ファイル: test_event_log.py プロジェクト: databill86/dagster
def test_basic_get_logs_for_run():
    @solid
    def return_one(_):
        return 1

    def _solids():
        return_one()

    events, result = gather_events(_solids)

    event_log_storage = PostgresEventLogStorage.create_nuked_storage(
        get_test_conn_string())

    for event in events:
        event_log_storage.store_event(event)

    out_events = event_log_storage.get_logs_for_run(result.run_id)

    assert event_types(out_events) == [
        DagsterEventType.PIPELINE_START,
        DagsterEventType.ENGINE_EVENT,
        DagsterEventType.STEP_START,
        DagsterEventType.STEP_OUTPUT,
        DagsterEventType.STEP_SUCCESS,
        DagsterEventType.ENGINE_EVENT,
        DagsterEventType.PIPELINE_SUCCESS,
    ]
コード例 #7
0
def test_nuke():
    storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string())
    assert storage
    run_id = str(uuid.uuid4())
    storage.add_run(build_run(run_id=run_id, pipeline_name='some_pipeline'))
    assert len(storage.all_runs()) == 1
    storage.wipe()
    assert list(storage.all_runs()) == []
コード例 #8
0
def test_handle_run_event_pipeline_success_test():

    run_storage = PostgresRunStorage.create_nuked_storage(
        get_test_conn_string())

    run_id = str(uuid.uuid4())
    run_to_add = PipelineRun.create_empty_run(pipeline_name='pipeline_name',
                                              run_id=run_id)
    run_storage.add_run(run_to_add)

    dagster_pipeline_start_event = DagsterEvent(
        message='a message',
        event_type_value=DagsterEventType.PIPELINE_START.value,
        pipeline_name='pipeline_name',
        step_key=None,
        solid_handle=None,
        step_kind_value=None,
        logging_tags=None,
    )

    run_storage.handle_run_event(run_id, dagster_pipeline_start_event)

    assert run_storage.get_run_by_id(
        run_id).status == PipelineRunStatus.STARTED

    run_storage.handle_run_event(
        str(uuid.uuid4()),  # diff run
        DagsterEvent(
            message='a message',
            event_type_value=DagsterEventType.PIPELINE_SUCCESS.value,
            pipeline_name='pipeline_name',
            step_key=None,
            solid_handle=None,
            step_kind_value=None,
            logging_tags=None,
        ),
    )

    assert run_storage.get_run_by_id(
        run_id).status == PipelineRunStatus.STARTED

    run_storage.handle_run_event(
        run_id,  # correct run
        DagsterEvent(
            message='a message',
            event_type_value=DagsterEventType.PIPELINE_SUCCESS.value,
            pipeline_name='pipeline_name',
            step_key=None,
            solid_handle=None,
            step_kind_value=None,
            logging_tags=None,
        ),
    )

    assert run_storage.get_run_by_id(
        run_id).status == PipelineRunStatus.SUCCESS
コード例 #9
0
ファイル: test_event_log.py プロジェクト: databill86/dagster
def test_listen_notify_filter_two_runs_event():
    event_log_storage = PostgresEventLogStorage.create_nuked_storage(
        get_test_conn_string())

    @solid
    def return_one(_):
        return 1

    def _solids():
        return_one()

    event_watcher = create_event_watcher(get_test_conn_string())

    run_id_one = str(uuid.uuid4())
    run_id_two = str(uuid.uuid4())

    event_watcher.watch_run(run_id_one)
    event_watcher.watch_run(run_id_two)

    try:
        events_one, result_one = gather_events(
            _solids, run_config=RunConfig(run_id=run_id_one))
        for event in events_one:
            event_log_storage.store_event(event)

        events_two, result_two = gather_events(
            _solids, run_config=RunConfig(run_id=run_id_two))
        for event in events_two:
            event_log_storage.store_event(event)

        event = event_watcher.queue.get(block=True)

        assert isinstance(event, EventWatcherStart)

        for _ in range(0, 10):
            watcher_event = event_watcher.queue.get(block=True)
            assert isinstance(watcher_event, EventWatcherEvent)
            assert watcher_event.payload.run_id in {
                result_one.run_id, result_two.run_id
            }

    finally:
        event_watcher.close()
コード例 #10
0
def test_fetch_by_pipeline():
    storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string())
    assert storage
    one = str(uuid.uuid4())
    two = str(uuid.uuid4())
    storage.add_run(build_run(run_id=one, pipeline_name='some_pipeline'))
    storage.add_run(build_run(run_id=two, pipeline_name='some_other_pipeline'))
    assert len(storage.all_runs()) == 2
    some_runs = storage.all_runs_for_pipeline('some_pipeline')
    assert len(some_runs) == 1
    assert some_runs[0].run_id == one
コード例 #11
0
def test_listen_notify_filter_run_event():
    event_log_storage = PostgresEventLogStorage.create_nuked_storage(
        get_test_conn_string())

    @solid
    def return_one(_):
        return 1

    def _solids():
        return_one()  # pylint: disable=no-value-for-parameter

    event_watcher = create_event_watcher(get_test_conn_string())

    run_id_one = str(uuid.uuid4())
    run_id_two = str(uuid.uuid4())

    # only watch one of the runs
    event_watcher.watch_run(run_id_two)

    try:
        events_one, _result_one = gather_events(
            _solids, run_config=RunConfig(run_id=run_id_one))
        for event in events_one:
            event_log_storage.store_event(event)

        events_two, result_two = gather_events(
            _solids, run_config=RunConfig(run_id=run_id_two))
        for event in events_two:
            event_log_storage.store_event(event)

        event = event_watcher.queue.get(block=True)

        assert isinstance(event, EventWatcherStart)

        for _ in range(0, 5):
            watcher_event = event_watcher.queue.get(block=True)
            assert isinstance(watcher_event, EventWatcherEvent)
            assert watcher_event.payload.run_id == result_two.run_id

    finally:
        event_watcher.close()
コード例 #12
0
def test_fetch_by_tag():
    storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string())
    assert storage
    one = str(uuid.uuid4())
    two = str(uuid.uuid4())
    three = str(uuid.uuid4())
    storage.add_run(build_run(run_id=one, pipeline_name='some_pipeline', tags={'mytag': 'hello'}))
    storage.add_run(build_run(run_id=two, pipeline_name='some_pipeline', tags={'mytag': 'goodbye'}))
    storage.add_run(build_run(run_id=three, pipeline_name='some_pipeline'))
    assert len(storage.all_runs()) == 3
    some_runs = storage.all_runs_for_tag('mytag', 'hello')
    assert len(some_runs) == 1
    assert some_runs[0].run_id == one
コード例 #13
0
def test_basic_get_logs_for_run_multiple_runs_cursors():
    event_log_storage = PostgresEventLogStorage.create_nuked_storage(
        get_test_conn_string())

    @solid
    def return_one(_):
        return 1

    def _solids():
        return_one()  # pylint: disable=no-value-for-parameter

    events_one, result_one = gather_events(_solids)
    for event in events_one:
        event_log_storage.store_event(event)

    events_two, result_two = gather_events(_solids)
    for event in events_two:
        event_log_storage.store_event(event)

    out_events_one = event_log_storage.get_logs_for_run(result_one.run_id,
                                                        cursor=1)
    assert len(out_events_one) == 5

    assert event_types(out_events_one) == [
        # DagsterEventType.PIPELINE_START,
        # DagsterEventType.ENGINE_EVENT,
        DagsterEventType.STEP_START,
        DagsterEventType.STEP_OUTPUT,
        DagsterEventType.STEP_SUCCESS,
        DagsterEventType.ENGINE_EVENT,
        DagsterEventType.PIPELINE_SUCCESS,
    ]

    assert set(map(lambda e: e.run_id, out_events_one)) == {result_one.run_id}

    out_events_two = event_log_storage.get_logs_for_run(result_two.run_id,
                                                        cursor=2)
    assert len(out_events_two) == 4

    assert event_types(out_events_two) == [
        # DagsterEventType.PIPELINE_START,
        # DagsterEventType.ENGINE_EVENT,
        # DagsterEventType.STEP_OUTPUT,
        DagsterEventType.STEP_START,
        DagsterEventType.STEP_SUCCESS,
        DagsterEventType.ENGINE_EVENT,
        DagsterEventType.PIPELINE_SUCCESS,
    ]

    assert set(map(lambda e: e.run_id, out_events_two)) == {result_two.run_id}
コード例 #14
0
ファイル: test_event_log.py プロジェクト: databill86/dagster
def test_basic_get_logs_for_run_multiple_runs():
    event_log_storage = PostgresEventLogStorage.create_nuked_storage(
        get_test_conn_string())

    @solid
    def return_one(_):
        return 1

    def _solids():
        return_one()

    events_one, result_one = gather_events(_solids)
    for event in events_one:
        event_log_storage.store_event(event)

    events_two, result_two = gather_events(_solids)
    for event in events_two:
        event_log_storage.store_event(event)

    out_events_one = event_log_storage.get_logs_for_run(result_one.run_id)
    assert len(out_events_one) == 7

    assert set(event_types(out_events_one)) == set([
        DagsterEventType.PIPELINE_START,
        DagsterEventType.ENGINE_EVENT,
        DagsterEventType.STEP_START,
        DagsterEventType.STEP_OUTPUT,
        DagsterEventType.STEP_SUCCESS,
        DagsterEventType.ENGINE_EVENT,
        DagsterEventType.PIPELINE_SUCCESS,
    ])

    assert set(map(lambda e: e.run_id, out_events_one)) == {result_one.run_id}

    out_events_two = event_log_storage.get_logs_for_run(result_two.run_id)
    assert len(out_events_two) == 7

    assert set(event_types(out_events_two)) == set([
        DagsterEventType.STEP_OUTPUT,
        DagsterEventType.PIPELINE_START,
        DagsterEventType.ENGINE_EVENT,
        DagsterEventType.STEP_START,
        DagsterEventType.STEP_SUCCESS,
        DagsterEventType.ENGINE_EVENT,
        DagsterEventType.PIPELINE_SUCCESS,
    ])

    assert set(map(lambda e: e.run_id, out_events_two)) == {result_two.run_id}
コード例 #15
0
def test_add_get_postgres_run_storage(pg_db):
    run_storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string())

    run_id = str(uuid.uuid4())
    run_to_add = build_run(pipeline_name='pipeline_name', run_id=run_id)
    added = run_storage.add_run(run_to_add)
    assert added

    fetched_run = run_storage.get_run_by_id(run_id)

    assert run_to_add == fetched_run

    assert run_storage.has_run(run_id)
    assert not run_storage.has_run(str(uuid.uuid4()))

    assert run_storage.all_runs() == [run_to_add]
    assert run_storage.all_runs_for_pipeline('pipeline_name') == [run_to_add]
    assert run_storage.all_runs_for_pipeline('nope') == []

    run_storage.wipe()
    assert run_storage.all_runs() == []
コード例 #16
0
def test_basic_get_logs_for_run_cursor():
    event_log_storage = PostgresEventLogStorage.create_nuked_storage(
        get_test_conn_string())

    @solid
    def return_one(_):
        return 1

    def _solids():
        return_one()  # pylint: disable=no-value-for-parameter

    events, result = gather_events(_solids)

    for event in events:
        event_log_storage.store_event(event)

    assert event_types(
        event_log_storage.get_logs_for_run(result.run_id, cursor=0)) == [
            # DagsterEventType.PIPELINE_START,
            DagsterEventType.ENGINE_EVENT,
            DagsterEventType.STEP_START,
            DagsterEventType.STEP_OUTPUT,
            DagsterEventType.STEP_SUCCESS,
            DagsterEventType.ENGINE_EVENT,
            DagsterEventType.PIPELINE_SUCCESS,
        ]

    assert event_types(
        event_log_storage.get_logs_for_run(result.run_id, cursor=1)) == [
            # DagsterEventType.PIPELINE_START,
            # DagsterEventType.ENGINE_EVENT,
            DagsterEventType.STEP_START,
            DagsterEventType.STEP_OUTPUT,
            DagsterEventType.STEP_SUCCESS,
            DagsterEventType.ENGINE_EVENT,
            DagsterEventType.PIPELINE_SUCCESS,
        ]
コード例 #17
0
def test_fetch_by_status_cursored():
    storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string())
    assert storage
    one = str(uuid.uuid4())
    two = str(uuid.uuid4())
    three = str(uuid.uuid4())
    four = str(uuid.uuid4())
    storage.add_run(
        build_run(run_id=one, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED)
    )
    storage.add_run(
        build_run(run_id=two, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED)
    )
    storage.add_run(
        build_run(run_id=three, pipeline_name='some_pipeline', status=PipelineRunStatus.NOT_STARTED)
    )
    storage.add_run(
        build_run(run_id=four, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED)
    )

    cursor_four_runs = storage.get_runs_for_status(PipelineRunStatus.STARTED, cursor=four)
    assert len(cursor_four_runs) == 2
    assert {run.run_id for run in cursor_four_runs} == {one, two}

    cursor_two_runs = storage.get_runs_for_status(PipelineRunStatus.STARTED, cursor=two)
    assert len(cursor_two_runs) == 1
    assert {run.run_id for run in cursor_two_runs} == {one}

    cursor_one_runs = storage.get_runs_for_status(PipelineRunStatus.STARTED, cursor=one)
    assert not cursor_one_runs

    cursor_four_limit_one = storage.get_runs_for_status(
        PipelineRunStatus.STARTED, cursor=four, limit=1
    )
    assert len(cursor_four_limit_one) == 1
    assert cursor_four_limit_one[0].run_id == two
コード例 #18
0
def test_slice():
    storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string())
    one, two, three = sorted([str(uuid.uuid4()), str(uuid.uuid4()), str(uuid.uuid4())])
    storage.add_run(build_run(run_id=one, pipeline_name='some_pipeline', tags={'mytag': 'hello'}))
    storage.add_run(build_run(run_id=two, pipeline_name='some_pipeline', tags={'mytag': 'hello'}))
    storage.add_run(build_run(run_id=three, pipeline_name='some_pipeline', tags={'mytag': 'hello'}))

    all_runs = storage.all_runs()
    assert len(all_runs) == 3
    sliced_runs = storage.all_runs(cursor=three, limit=1)
    assert len(sliced_runs) == 1
    assert sliced_runs[0].run_id == two

    all_runs = storage.all_runs_for_pipeline('some_pipeline')
    assert len(all_runs) == 3
    sliced_runs = storage.all_runs_for_pipeline('some_pipeline', cursor=three, limit=1)
    assert len(sliced_runs) == 1
    assert sliced_runs[0].run_id == two

    all_runs = storage.all_runs_for_tag('mytag', 'hello')
    assert len(all_runs) == 3
    sliced_runs = storage.all_runs_for_tag('mytag', 'hello', cursor=three, limit=1)
    assert len(sliced_runs) == 1
    assert sliced_runs[0].run_id == two
コード例 #19
0
def test_handle_run_event_pipeline_success_test():

    run_storage = PostgresRunStorage.create_nuked_storage(
        get_test_conn_string())

    run_id = str(uuid.uuid4())
    run_to_add = PipelineRun(
        pipeline_name='pipeline_name',
        run_id=run_id,
        environment_dict={},
        mode='some_mode',
        # https://github.com/dagster-io/dagster/issues/1709
        # ExecutionSelector should be threaded all the way
        # down from the top
        selector=ExecutionSelector('pipeline_name'),
        reexecution_config=None,
        step_keys_to_execute=None,
        status=PipelineRunStatus.NOT_STARTED,
    )
    run_storage.add_run(run_to_add)

    dagster_pipeline_start_event = DagsterEvent(
        message='a message',
        event_type_value=DagsterEventType.PIPELINE_START.value,
        pipeline_name='pipeline_name',
        step_key=None,
        solid_handle=None,
        step_kind_value=None,
        logging_tags=None,
    )

    run_storage.handle_run_event(run_id, dagster_pipeline_start_event)

    assert run_storage.get_run_by_id(
        run_id).status == PipelineRunStatus.STARTED

    run_storage.handle_run_event(
        str(uuid.uuid4()),  # diff run
        DagsterEvent(
            message='a message',
            event_type_value=DagsterEventType.PIPELINE_SUCCESS.value,
            pipeline_name='pipeline_name',
            step_key=None,
            solid_handle=None,
            step_kind_value=None,
            logging_tags=None,
        ),
    )

    assert run_storage.get_run_by_id(
        run_id).status == PipelineRunStatus.STARTED

    run_storage.handle_run_event(
        run_id,  # correct run
        DagsterEvent(
            message='a message',
            event_type_value=DagsterEventType.PIPELINE_SUCCESS.value,
            pipeline_name='pipeline_name',
            step_key=None,
            solid_handle=None,
            step_kind_value=None,
            logging_tags=None,
        ),
    )

    assert run_storage.get_run_by_id(
        run_id).status == PipelineRunStatus.SUCCESS
コード例 #20
0
def get_conn_with_run_events():
    conn = psycopg2.connect(get_test_conn_string())
    conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
    conn.cursor().execute(DROP_TABLE_SQL)
    conn.cursor().execute(CREATE_TABLE_SQL)
    return conn