コード例 #1
0
ファイル: sql_event_log.py プロジェクト: xyzlat/dagster
    def get_logs_for_run_by_log_id(self, run_id, cursor=-1):
        check.str_param(run_id, 'run_id')
        check.int_param(cursor, 'cursor')
        check.invariant(
            cursor >= -1,
            'Don\'t know what to do with negative cursor {cursor}'.format(
                cursor=cursor),
        )

        # cursor starts at 0 & auto-increment column starts at 1 so adjust
        cursor = cursor + 1

        query = (db.select([
            SqlEventLogStorageTable.c.id, SqlEventLogStorageTable.c.event
        ]).where(SqlEventLogStorageTable.c.run_id == run_id).where(
            SqlEventLogStorageTable.c.id > cursor).order_by(
                SqlEventLogStorageTable.c.id.asc()))

        with self.connect(run_id) as conn:
            results = conn.execute(query).fetchall()

        events = {}
        try:
            for (
                    record_id,
                    json_str,
            ) in results:
                events[record_id] = check.inst_param(
                    deserialize_json_to_dagster_namedtuple(json_str), 'event',
                    EventRecord)
        except (seven.JSONDecodeError, check.CheckError) as err:
            six.raise_from(DagsterEventLogInvalidForRun(run_id=run_id), err)

        return events
コード例 #2
0
    def get_stats_for_run(self, run_id):
        check.str_param(run_id, "run_id")

        query = (db.select([
            SqlEventLogStorageTable.c.dagster_event_type,
            db.func.count().label("n_events_of_type"),
            db.func.max(SqlEventLogStorageTable.c.timestamp).label(
                "last_event_timestamp"),
        ]).where(SqlEventLogStorageTable.c.run_id == run_id).group_by(
            "dagster_event_type"))

        with self.run_connection(run_id) as conn:
            results = conn.execute(query).fetchall()

        try:
            counts = {}
            times = {}
            for result in results:
                (dagster_event_type, n_events_of_type,
                 last_event_timestamp) = result
                if dagster_event_type:
                    counts[dagster_event_type] = n_events_of_type
                    times[dagster_event_type] = last_event_timestamp

            enqueued_time = times.get(DagsterEventType.PIPELINE_ENQUEUED.value,
                                      None)
            launch_time = times.get(DagsterEventType.PIPELINE_STARTING.value,
                                    None)
            start_time = times.get(DagsterEventType.PIPELINE_START.value, None)
            end_time = times.get(
                DagsterEventType.PIPELINE_SUCCESS.value,
                times.get(
                    DagsterEventType.PIPELINE_FAILURE.value,
                    times.get(DagsterEventType.PIPELINE_CANCELED.value, None),
                ),
            )

            return PipelineRunStatsSnapshot(
                run_id=run_id,
                steps_succeeded=counts.get(DagsterEventType.STEP_SUCCESS.value,
                                           0),
                steps_failed=counts.get(DagsterEventType.STEP_FAILURE.value,
                                        0),
                materializations=counts.get(
                    DagsterEventType.ASSET_MATERIALIZATION.value, 0),
                expectations=counts.get(
                    DagsterEventType.STEP_EXPECTATION_RESULT.value, 0),
                enqueued_time=datetime_as_float(enqueued_time)
                if enqueued_time else None,
                launch_time=datetime_as_float(launch_time)
                if launch_time else None,
                start_time=datetime_as_float(start_time)
                if start_time else None,
                end_time=datetime_as_float(end_time) if end_time else None,
            )
        except (seven.JSONDecodeError, check.CheckError) as err:
            raise DagsterEventLogInvalidForRun(run_id=run_id) from err
コード例 #3
0
ファイル: sql_event_log.py プロジェクト: amarrella/dagster
    def get_step_stats_for_run(self, run_id, step_keys=None):
        check.str_param(run_id, "run_id")
        check.opt_list_param(step_keys, "step_keys", of_type=str)

        # Originally, this was two different queries:
        # 1) one query which aggregated top-level step stats by grouping by event type / step_key in
        #    a single query, using pure SQL (e.g. start_time, end_time, status, attempt counts).
        # 2) one query which fetched all the raw events for a specific event type and then inspected
        #    the deserialized event object to aggregate stats derived from sequences of events.
        #    (e.g. marker events, materializations, expectations resuls, attempts timing, etc.)
        #
        # For simplicity, we now just do the second type of query and derive the stats in Python
        # from the raw events.  This has the benefit of being easier to read and also the benefit of
        # being able to share code with the in-memory event log storage implementation.  We may
        # choose to revisit this in the future, especially if we are able to do JSON-column queries
        # in SQL as a way of bypassing the serdes layer in all cases.
        raw_event_query = (
            db.select([SqlEventLogStorageTable.c.event])
            .where(SqlEventLogStorageTable.c.run_id == run_id)
            .where(SqlEventLogStorageTable.c.step_key != None)
            .where(
                SqlEventLogStorageTable.c.dagster_event_type.in_(
                    [
                        DagsterEventType.STEP_START.value,
                        DagsterEventType.STEP_SUCCESS.value,
                        DagsterEventType.STEP_SKIPPED.value,
                        DagsterEventType.STEP_FAILURE.value,
                        DagsterEventType.STEP_RESTARTED.value,
                        DagsterEventType.ASSET_MATERIALIZATION.value,
                        DagsterEventType.STEP_EXPECTATION_RESULT.value,
                        DagsterEventType.STEP_RESTARTED.value,
                        DagsterEventType.STEP_UP_FOR_RETRY.value,
                        DagsterEventType.ENGINE_EVENT.value,
                    ]
                )
            )
            .order_by(SqlEventLogStorageTable.c.id.asc())
        )
        if step_keys:
            raw_event_query = raw_event_query.where(
                SqlEventLogStorageTable.c.step_key.in_(step_keys)
            )

        with self.run_connection(run_id) as conn:
            results = conn.execute(raw_event_query).fetchall()

        try:
            records = [
                check.inst_param(
                    deserialize_json_to_dagster_namedtuple(json_str), "event", EventLogEntry
                )
                for (json_str,) in results
            ]
            return build_run_step_stats_from_events(run_id, records)
        except (seven.JSONDecodeError, DeserializationError) as err:
            raise DagsterEventLogInvalidForRun(run_id=run_id) from err
コード例 #4
0
ファイル: sql_event_log.py プロジェクト: amarrella/dagster
    def get_logs_for_run_by_log_id(
        self,
        run_id,
        cursor=-1,
        dagster_event_type=None,
        limit=None,
    ):
        check.str_param(run_id, "run_id")
        check.int_param(cursor, "cursor")
        check.invariant(
            cursor >= -1,
            "Don't know what to do with negative cursor {cursor}".format(cursor=cursor),
        )
        check.opt_inst_param(dagster_event_type, "dagster_event_type", DagsterEventType)

        query = (
            db.select([SqlEventLogStorageTable.c.id, SqlEventLogStorageTable.c.event])
            .where(SqlEventLogStorageTable.c.run_id == run_id)
            .order_by(SqlEventLogStorageTable.c.id.asc())
        )
        if dagster_event_type:
            query = query.where(
                SqlEventLogStorageTable.c.dagster_event_type == dagster_event_type.value
            )

        # adjust 0 based index cursor to SQL offset
        query = query.offset(cursor + 1)

        if limit:
            query = query.limit(limit)

        with self.run_connection(run_id) as conn:
            results = conn.execute(query).fetchall()

        events = {}
        try:
            for (
                record_id,
                json_str,
            ) in results:
                events[record_id] = check.inst_param(
                    deserialize_json_to_dagster_namedtuple(json_str), "event", EventLogEntry
                )
        except (seven.JSONDecodeError, DeserializationError) as err:
            raise DagsterEventLogInvalidForRun(run_id=run_id) from err

        return events
コード例 #5
0
ファイル: sql_event_log.py プロジェクト: plawler92/dagster
    def get_logs_for_run_by_log_id(self,
                                   run_id,
                                   cursor=-1,
                                   dagster_event_type=None):
        check.str_param(run_id, "run_id")
        check.int_param(cursor, "cursor")
        check.invariant(
            cursor >= -1,
            "Don't know what to do with negative cursor {cursor}".format(
                cursor=cursor),
        )
        check.opt_inst_param(dagster_event_type, "dagster_event_type",
                             DagsterEventType)

        # cursor starts at 0 & auto-increment column starts at 1 so adjust
        cursor = cursor + 1

        query = (db.select([
            SqlEventLogStorageTable.c.id, SqlEventLogStorageTable.c.event
        ]).where(SqlEventLogStorageTable.c.run_id == run_id).where(
            SqlEventLogStorageTable.c.id > cursor).order_by(
                SqlEventLogStorageTable.c.id.asc()))
        if dagster_event_type:
            query = query.where(SqlEventLogStorageTable.c.dagster_event_type ==
                                dagster_event_type.value)

        with self.run_connection(run_id) as conn:
            results = conn.execute(query).fetchall()

        events = {}
        try:
            for (
                    record_id,
                    json_str,
            ) in results:
                events[record_id] = check.inst_param(
                    deserialize_json_to_dagster_namedtuple(json_str), "event",
                    EventRecord)
        except (seven.JSONDecodeError, check.CheckError) as err:
            raise DagsterEventLogInvalidForRun(run_id=run_id) from err

        return events
コード例 #6
0
    def get_logs_for_run(self, run_id, cursor=-1):
        '''Get all of the logs corresponding to a run.

        Args:
            run_id (str): The id of the run for which to fetch logs.
            cursor (Optional[int]): Zero-indexed logs will be returned starting from cursor + 1,
                i.e., if cursor is -1, all logs will be returned. (default: -1)
        '''
        check.str_param(run_id, 'run_id')
        check.int_param(cursor, 'cursor')
        check.invariant(
            cursor >= -1,
            'Don\'t know what to do with negative cursor {cursor}'.format(cursor=cursor),
        )

        # cursor starts at 0 & auto-increment column starts at 1 so adjust
        cursor = cursor + 1

        query = (
            db.select([SqlEventLogStorageTable.c.event])
            .where(SqlEventLogStorageTable.c.run_id == run_id)
            .where(SqlEventLogStorageTable.c.id > cursor)
            .order_by(SqlEventLogStorageTable.c.id.asc())
        )

        with self.connect(run_id) as conn:
            results = conn.execute(query).fetchall()

        events = []
        try:
            for (json_str,) in results:
                events.append(
                    check.inst_param(
                        deserialize_json_to_dagster_namedtuple(json_str), 'event', EventRecord
                    )
                )
        except (seven.JSONDecodeError, check.CheckError) as err:
            six.raise_from(DagsterEventLogInvalidForRun(run_id=run_id), err)

        return events
コード例 #7
0
ファイル: sql_event_log.py プロジェクト: xyzlat/dagster
    def get_step_stats_for_run(self, run_id):
        check.str_param(run_id, 'run_id')

        STEP_STATS_EVENT_TYPES = [
            DagsterEventType.STEP_START.value,
            DagsterEventType.STEP_SUCCESS.value,
            DagsterEventType.STEP_SKIPPED.value,
            DagsterEventType.STEP_FAILURE.value,
        ]

        by_step_query = (db.select([
            SqlEventLogStorageTable.c.step_key,
            SqlEventLogStorageTable.c.dagster_event_type,
            db.func.max(
                SqlEventLogStorageTable.c.timestamp).label('timestamp'),
        ]).where(SqlEventLogStorageTable.c.run_id == run_id).where(
            SqlEventLogStorageTable.c.step_key != None).where(
                SqlEventLogStorageTable.c.dagster_event_type.in_(
                    STEP_STATS_EVENT_TYPES)).group_by(
                        SqlEventLogStorageTable.c.step_key,
                        SqlEventLogStorageTable.c.dagster_event_type,
                    ))

        with self.connect(run_id) as conn:
            results = conn.execute(by_step_query).fetchall()

        by_step_key = defaultdict(dict)
        for result in results:
            step_key = result.step_key
            if result.dagster_event_type == DagsterEventType.STEP_START.value:
                by_step_key[step_key]['start_time'] = (datetime_as_float(
                    result.timestamp) if result.timestamp else None)
            if result.dagster_event_type == DagsterEventType.STEP_FAILURE.value:
                by_step_key[step_key]['end_time'] = (datetime_as_float(
                    result.timestamp) if result.timestamp else None)
                by_step_key[step_key]['status'] = StepEventStatus.FAILURE
            if result.dagster_event_type == DagsterEventType.STEP_SUCCESS.value:
                by_step_key[step_key]['end_time'] = (datetime_as_float(
                    result.timestamp) if result.timestamp else None)
                by_step_key[step_key]['status'] = StepEventStatus.SUCCESS
            if result.dagster_event_type == DagsterEventType.STEP_SKIPPED.value:
                by_step_key[step_key]['end_time'] = (datetime_as_float(
                    result.timestamp) if result.timestamp else None)
                by_step_key[step_key]['status'] = StepEventStatus.SKIPPED

        materializations = defaultdict(list)
        expectation_results = defaultdict(list)
        raw_event_query = (db.select([
            SqlEventLogStorageTable.c.event
        ]).where(SqlEventLogStorageTable.c.run_id == run_id).where(
            SqlEventLogStorageTable.c.step_key != None).where(
                SqlEventLogStorageTable.c.dagster_event_type.in_([
                    DagsterEventType.STEP_MATERIALIZATION.value,
                    DagsterEventType.STEP_EXPECTATION_RESULT.value,
                ])).order_by(SqlEventLogStorageTable.c.id.asc()))

        with self.connect(run_id) as conn:
            results = conn.execute(raw_event_query).fetchall()

        try:
            for (json_str, ) in results:
                event = check.inst_param(
                    deserialize_json_to_dagster_namedtuple(json_str), 'event',
                    EventRecord)
                if event.dagster_event.event_type == DagsterEventType.STEP_MATERIALIZATION:
                    materializations[event.step_key].append(
                        event.dagster_event.event_specific_data.materialization
                    )
                elif event.dagster_event.event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
                    expectation_results[event.step_key].append(
                        event.dagster_event.event_specific_data.
                        expectation_result)
        except (seven.JSONDecodeError, check.CheckError) as err:
            six.raise_from(DagsterEventLogInvalidForRun(run_id=run_id), err)

        return [
            RunStepKeyStatsSnapshot(
                run_id=run_id,
                step_key=step_key,
                status=value.get('status'),
                start_time=value.get('start_time'),
                end_time=value.get('end_time'),
                materializations=materializations.get(step_key),
                expectation_results=expectation_results.get(step_key),
            ) for step_key, value in by_step_key.items()
        ]
コード例 #8
0
    def get_step_stats_for_run(self, run_id, step_keys=None):
        check.str_param(run_id, "run_id")
        check.opt_list_param(step_keys, "step_keys", of_type=str)

        STEP_STATS_EVENT_TYPES = [
            DagsterEventType.STEP_START.value,
            DagsterEventType.STEP_SUCCESS.value,
            DagsterEventType.STEP_SKIPPED.value,
            DagsterEventType.STEP_FAILURE.value,
            DagsterEventType.STEP_RESTARTED.value,
        ]

        by_step_query = (db.select([
            SqlEventLogStorageTable.c.step_key,
            SqlEventLogStorageTable.c.dagster_event_type,
            db.func.max(
                SqlEventLogStorageTable.c.timestamp).label("timestamp"),
            db.func.count(SqlEventLogStorageTable.c.id).label(
                "n_events_of_type_for_step"),
        ]).where(SqlEventLogStorageTable.c.run_id == run_id).where(
            SqlEventLogStorageTable.c.step_key != None).where(
                SqlEventLogStorageTable.c.dagster_event_type.in_(
                    STEP_STATS_EVENT_TYPES)))

        if step_keys:
            by_step_query = by_step_query.where(
                SqlEventLogStorageTable.c.step_key.in_(step_keys))

        by_step_query = by_step_query.group_by(
            SqlEventLogStorageTable.c.step_key,
            SqlEventLogStorageTable.c.dagster_event_type,
        )

        with self.run_connection(run_id) as conn:
            results = conn.execute(by_step_query).fetchall()

        by_step_key = defaultdict(dict)
        for result in results:
            step_key = result.step_key
            if result.dagster_event_type == DagsterEventType.STEP_START.value:
                by_step_key[step_key]["start_time"] = (datetime_as_float(
                    result.timestamp) if result.timestamp else None)
                by_step_key[step_key]["attempts"] = by_step_key[step_key].get(
                    "attempts", 0) + 1
            if result.dagster_event_type == DagsterEventType.STEP_RESTARTED.value:
                by_step_key[step_key]["attempts"] = (
                    # In case we see step retarted events but not a step started event, we want to
                    # only count the restarted events, since the attempt count represents
                    # the number of times we have successfully started runnning the step
                    by_step_key[step_key].get("attempts", 0) +
                    result.n_events_of_type_for_step)
            if result.dagster_event_type == DagsterEventType.STEP_FAILURE.value:
                by_step_key[step_key]["end_time"] = (datetime_as_float(
                    result.timestamp) if result.timestamp else None)
                by_step_key[step_key]["status"] = StepEventStatus.FAILURE
            if result.dagster_event_type == DagsterEventType.STEP_SUCCESS.value:
                by_step_key[step_key]["end_time"] = (datetime_as_float(
                    result.timestamp) if result.timestamp else None)
                by_step_key[step_key]["status"] = StepEventStatus.SUCCESS
            if result.dagster_event_type == DagsterEventType.STEP_SKIPPED.value:
                by_step_key[step_key]["end_time"] = (datetime_as_float(
                    result.timestamp) if result.timestamp else None)
                by_step_key[step_key]["status"] = StepEventStatus.SKIPPED

        materializations = defaultdict(list)
        expectation_results = defaultdict(list)
        raw_event_query = (db.select([
            SqlEventLogStorageTable.c.event
        ]).where(SqlEventLogStorageTable.c.run_id == run_id).where(
            SqlEventLogStorageTable.c.step_key != None).where(
                SqlEventLogStorageTable.c.dagster_event_type.in_([
                    DagsterEventType.ASSET_MATERIALIZATION.value,
                    DagsterEventType.STEP_EXPECTATION_RESULT.value,
                ])).order_by(SqlEventLogStorageTable.c.id.asc()))

        if step_keys:
            raw_event_query = raw_event_query.where(
                SqlEventLogStorageTable.c.step_key.in_(step_keys))

        with self.run_connection(run_id) as conn:
            results = conn.execute(raw_event_query).fetchall()

        try:
            for (json_str, ) in results:
                event = check.inst_param(
                    deserialize_json_to_dagster_namedtuple(json_str), "event",
                    EventRecord)
                if event.dagster_event.event_type == DagsterEventType.ASSET_MATERIALIZATION:
                    materializations[event.step_key].append(
                        event.dagster_event.event_specific_data.materialization
                    )
                elif event.dagster_event.event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
                    expectation_results[event.step_key].append(
                        event.dagster_event.event_specific_data.
                        expectation_result)
        except (seven.JSONDecodeError, check.CheckError) as err:
            raise DagsterEventLogInvalidForRun(run_id=run_id) from err

        return [
            RunStepKeyStatsSnapshot(
                run_id=run_id,
                step_key=step_key,
                status=value.get("status"),
                start_time=value.get("start_time"),
                end_time=value.get("end_time"),
                materializations=materializations.get(step_key),
                expectation_results=expectation_results.get(step_key),
                attempts=value.get("attempts"),
            ) for step_key, value in by_step_key.items()
        ]