예제 #1
0
            def _fn(context):
                after_cursor = None
                if context.cursor:
                    try:
                        after_cursor = int(context.cursor)
                    except ValueError:
                        after_cursor = None

                event_records = context.instance.get_event_records(
                    EventRecordsFilter(
                        event_type=DagsterEventType.ASSET_MATERIALIZATION,
                        asset_key=self._asset_key,
                        after_cursor=after_cursor,
                    ),
                    ascending=False,
                    limit=1,
                )

                if not event_records:
                    return

                event_record = event_records[0]
                yield from materialization_fn(context,
                                              event_record.event_log_entry)
                context.update_cursor(str(event_record.storage_id))
예제 #2
0
    def test_asset_materialization(self, storage):
        asset_key = AssetKey(["path", "to", "asset_one"])

        @solid
        def materialize_one(_):
            yield AssetMaterialization(
                asset_key=asset_key,
                metadata={
                    "text": "hello",
                    "json": {
                        "hello": "world"
                    },
                    "one_float": 1.0,
                    "one_int": 1,
                },
            )
            yield Output(1)

        def _solids():
            materialize_one()

        with instance_for_test() as instance:
            if not storage._instance:  # pylint: disable=protected-access
                storage.register_instance(instance)

            events_one, _ = _synthesize_events(_solids, instance=instance)

            for event in events_one:
                storage.store_event(event)

            assert asset_key in set(storage.all_asset_keys())
            events = storage.get_asset_events(asset_key)
            assert len(events) == 1
            event = events[0]
            assert isinstance(event, EventLogEntry)
            assert (event.dagster_event.event_type_value ==
                    DagsterEventType.ASSET_MATERIALIZATION.value)

            records = storage.get_event_records(
                EventRecordsFilter(asset_key=asset_key))
            assert len(records) == 1
            record = records[0]
            assert isinstance(record, EventLogRecord)
            assert record.event_log_entry == event
예제 #3
0
        def _wrapped_fn(context: SensorEvaluationContext):
            # initiate the cursor to (most recent event id, current timestamp) when:
            # * it's the first time starting the sensor
            # * or, the cursor isn't in valid format (backcompt)
            if context.cursor is None or not RunStatusSensorCursor.is_valid(context.cursor):
                most_recent_event_records = list(
                    context.instance.get_event_records(ascending=False, limit=1)
                )
                most_recent_event_id = (
                    most_recent_event_records[0].storage_id
                    if len(most_recent_event_records) == 1
                    else -1
                )

                new_cursor = RunStatusSensorCursor(
                    update_timestamp=pendulum.now("UTC").isoformat(),
                    record_id=most_recent_event_id,
                )
                context.update_cursor(new_cursor.to_json())
                yield SkipReason(f"Initiating {name}. Set cursor to {new_cursor}")
                return

            record_id, update_timestamp = RunStatusSensorCursor.from_json(context.cursor)

            # Fetch events after the cursor id
            # * we move the cursor forward to the latest visited event's id to avoid revisits
            # * when the daemon is down, bc we persist the cursor info, we can go back to where we
            #   left and backfill alerts for the qualified events (up to 5 at a time) during the downtime
            # Note: this is a cross-run query which requires extra handling in sqlite, see details in SqliteEventLogStorage.
            event_records = context.instance.get_event_records(
                EventRecordsFilter(
                    after_cursor=RunShardedEventsCursor(
                        id=record_id,
                        run_updated_after=cast(datetime, pendulum.parse(update_timestamp)),
                    ),
                    event_type=PIPELINE_RUN_STATUS_TO_EVENT_TYPE[pipeline_run_status],
                ),
                ascending=True,
                limit=5,
            )

            for event_record in event_records:
                event_log_entry = event_record.event_log_entry
                storage_id = event_record.storage_id

                # get run info
                run_records = context.instance.get_run_records(
                    filters=RunsFilter(run_ids=[event_log_entry.run_id])
                )

                # skip if we couldn't find the right run
                if len(run_records) != 1:
                    # bc we couldn't find the run, we use the event timestamp as the approximate
                    # run update timestamp
                    approximate_update_timestamp = utc_datetime_from_timestamp(
                        event_log_entry.timestamp
                    )
                    context.update_cursor(
                        RunStatusSensorCursor(
                            record_id=storage_id,
                            update_timestamp=approximate_update_timestamp.isoformat(),
                        ).to_json()
                    )
                    continue

                pipeline_run = run_records[0].pipeline_run
                update_timestamp = run_records[0].update_timestamp

                # skip if any of of the followings happens:
                if (
                    # the pipeline does not have a repository (manually executed)
                    not pipeline_run.external_pipeline_origin
                    or
                    # the pipeline does not belong to the current repository
                    pipeline_run.external_pipeline_origin.external_repository_origin.repository_name
                    != context.repository_name
                    or
                    # if pipeline is not selected
                    (pipeline_selection and pipeline_run.pipeline_name not in pipeline_selection)
                    or
                    # if job not selected
                    (
                        job_selection
                        and pipeline_run.pipeline_name not in map(lambda x: x.name, job_selection)
                    )
                ):
                    context.update_cursor(
                        RunStatusSensorCursor(
                            record_id=storage_id, update_timestamp=update_timestamp.isoformat()
                        ).to_json()
                    )
                    continue

                serializable_error = None

                try:
                    with user_code_error_boundary(
                        RunStatusSensorExecutionError,
                        lambda: f'Error occurred during the execution sensor "{name}".',
                    ):
                        # one user code invocation maps to one failure event
                        run_status_sensor_fn(
                            RunStatusSensorContext(
                                sensor_name=name,
                                dagster_run=pipeline_run,
                                dagster_event=event_log_entry.dagster_event,
                                instance=context.instance,
                            )
                        )
                except RunStatusSensorExecutionError as run_status_sensor_execution_error:
                    # When the user code errors, we report error to the sensor tick not the original run.
                    serializable_error = serializable_error_info_from_exc_info(
                        run_status_sensor_execution_error.original_exc_info
                    )

                context.update_cursor(
                    RunStatusSensorCursor(
                        record_id=storage_id, update_timestamp=update_timestamp.isoformat()
                    ).to_json()
                )

                # Yield PipelineRunReaction to indicate the execution success/failure.
                # The sensor machinery would
                # * report back to the original run if success
                # * update cursor and job state
                yield PipelineRunReaction(
                    pipeline_run=pipeline_run,
                    run_status=pipeline_run_status,
                    error=serializable_error,
                )
예제 #4
0
    def test_get_event_records_sqlite(self, storage):
        # test for sqlite only because sqlite requires special logic to handle cross-run queries
        if not isinstance(storage, SqliteEventLogStorage):
            pytest.skip()

        asset_key = AssetKey(["path", "to", "asset_one"])

        events = []

        def _append_event(event):
            events.append(event)

        @solid
        def materialize_one(_):
            yield AssetMaterialization(
                asset_key=asset_key,
                metadata={
                    "text": "hello",
                    "json": {
                        "hello": "world"
                    },
                    "one_float": 1.0,
                    "one_int": 1,
                },
            )
            yield Output(1)

        @pipeline(mode_defs=[_mode_def(_append_event)])
        def a_pipe():
            materialize_one()

        with instance_for_test() as instance:
            if not storage._instance:  # pylint: disable=protected-access
                storage.register_instance(instance)

            # first run
            execute_run(
                InMemoryPipeline(a_pipe),
                instance.create_run_for_pipeline(
                    a_pipe,
                    run_id="1",
                    run_config={"loggers": {
                        "callback": {},
                        "console": {}
                    }}),
                instance,
            )

            for event in events:
                storage.store_event(event)

            run_records = instance.get_run_records()
            assert len(run_records) == 1

            # all logs returned in descending order
            all_event_records = storage.get_event_records()
            assert _event_types([all_event_records[0].event_log_entry
                                 ]) == [DagsterEventType.PIPELINE_SUCCESS]
            assert _event_types([all_event_records[-1].event_log_entry
                                 ]) == [DagsterEventType.PIPELINE_START]

            # second run
            events = []
            execute_run(
                InMemoryPipeline(a_pipe),
                instance.create_run_for_pipeline(
                    a_pipe,
                    run_id="2",
                    run_config={"loggers": {
                        "callback": {},
                        "console": {}
                    }}),
                instance,
            )
            run_records = instance.get_run_records()
            assert len(run_records) == 2
            for event in events:
                storage.store_event(event)

            # third run
            events = []
            execute_run(
                InMemoryPipeline(a_pipe),
                instance.create_run_for_pipeline(
                    a_pipe,
                    run_id="3",
                    run_config={"loggers": {
                        "callback": {},
                        "console": {}
                    }}),
                instance,
            )
            run_records = instance.get_run_records()
            assert len(run_records) == 3
            for event in events:
                storage.store_event(event)

            # of_type
            filtered_records = storage.get_event_records(
                EventRecordsFilter(
                    event_type=DagsterEventType.PIPELINE_SUCCESS,
                    after_cursor=RunShardedEventsCursor(
                        id=0,
                        run_updated_after=run_records[-1].update_timestamp
                    ),  # events after first run
                ),
                ascending=True,
            )
            assert len(filtered_records) == 2
            assert _event_types([r.event_log_entry
                                 for r in filtered_records]) == [
                                     DagsterEventType.PIPELINE_SUCCESS,
                                     DagsterEventType.PIPELINE_SUCCESS,
                                 ]
            assert [r.event_log_entry.run_id
                    for r in filtered_records] == ["2", "3"]
예제 #5
0
    def test_get_event_records(self, storage):
        if isinstance(storage, SqliteEventLogStorage):
            # test sqlite in test_get_event_records_sqlite
            pytest.skip()

        asset_key = AssetKey(["path", "to", "asset_one"])

        @solid
        def materialize_one(_):
            yield AssetMaterialization(
                asset_key=asset_key,
                metadata={
                    "text": "hello",
                    "json": {
                        "hello": "world"
                    },
                    "one_float": 1.0,
                    "one_int": 1,
                },
            )
            yield Output(1)

        def _solids():
            materialize_one()

        events, _ = _synthesize_events(_solids)

        for event in events:
            storage.store_event(event)

        all_records = storage.get_event_records()
        # all logs returned in descending order
        assert all_records
        min_record_num = all_records[-1].storage_id
        max_record_num = min_record_num + len(all_records) - 1
        assert [r[0] for r in all_records
                ] == list(range(max_record_num, min_record_num - 1, -1))
        assert _event_types([all_records[0].event_log_entry
                             ]) == [DagsterEventType.PIPELINE_SUCCESS]
        assert _event_types([all_records[-1].event_log_entry
                             ]) == [DagsterEventType.PIPELINE_START]

        # after cursor
        assert not list(
            filter(
                lambda r: r.storage_id <= 2,
                storage.get_event_records(EventRecordsFilter(after_cursor=2)),
            ))
        assert [
            i.storage_id for i in storage.get_event_records(EventRecordsFilter(
                after_cursor=min_record_num + 2),
                                                            ascending=True,
                                                            limit=2)
        ] == [min_record_num + 3, min_record_num + 4]
        assert [
            i.storage_id for i in storage.get_event_records(EventRecordsFilter(
                after_cursor=min_record_num + 2),
                                                            ascending=False,
                                                            limit=2)
        ] == [max_record_num, max_record_num - 1]

        filtered_records = storage.get_event_records(
            EventRecordsFilter(event_type=DagsterEventType.PIPELINE_SUCCESS))
        assert _event_types([r.event_log_entry for r in filtered_records
                             ]) == [DagsterEventType.PIPELINE_SUCCESS]