def get_step_stats_for_run(self, run_id, step_keys=None): check.str_param(run_id, "run_id") check.opt_list_param(step_keys, "step_keys", of_type=str) # Originally, this was two different queries: # 1) one query which aggregated top-level step stats by grouping by event type / step_key in # a single query, using pure SQL (e.g. start_time, end_time, status, attempt counts). # 2) one query which fetched all the raw events for a specific event type and then inspected # the deserialized event object to aggregate stats derived from sequences of events. # (e.g. marker events, materializations, expectations resuls, attempts timing, etc.) # # For simplicity, we now just do the second type of query and derive the stats in Python # from the raw events. This has the benefit of being easier to read and also the benefit of # being able to share code with the in-memory event log storage implementation. We may # choose to revisit this in the future, especially if we are able to do JSON-column queries # in SQL as a way of bypassing the serdes layer in all cases. raw_event_query = ( db.select([SqlEventLogStorageTable.c.event]) .where(SqlEventLogStorageTable.c.run_id == run_id) .where(SqlEventLogStorageTable.c.step_key != None) .where( SqlEventLogStorageTable.c.dagster_event_type.in_( [ DagsterEventType.STEP_START.value, DagsterEventType.STEP_SUCCESS.value, DagsterEventType.STEP_SKIPPED.value, DagsterEventType.STEP_FAILURE.value, DagsterEventType.STEP_RESTARTED.value, DagsterEventType.ASSET_MATERIALIZATION.value, DagsterEventType.STEP_EXPECTATION_RESULT.value, DagsterEventType.STEP_RESTARTED.value, DagsterEventType.STEP_UP_FOR_RETRY.value, DagsterEventType.ENGINE_EVENT.value, ] ) ) .order_by(SqlEventLogStorageTable.c.id.asc()) ) if step_keys: raw_event_query = raw_event_query.where( SqlEventLogStorageTable.c.step_key.in_(step_keys) ) with self.run_connection(run_id) as conn: results = conn.execute(raw_event_query).fetchall() try: records = [ check.inst_param( deserialize_json_to_dagster_namedtuple(json_str), "event", EventLogEntry ) for (json_str,) in results ] return build_run_step_stats_from_events(run_id, records) except (seven.JSONDecodeError, DeserializationError) as err: raise DagsterEventLogInvalidForRun(run_id=run_id) from err
def get_step_stats_for_run(self, run_id, step_keys=None): """Get per-step stats for a pipeline run.""" logs = self.get_logs_for_run(run_id) if step_keys: logs = [ event for event in logs if event.is_dagster_event and event.dagster_event.step_key in step_keys ] return build_run_step_stats_from_events(run_id, logs)
def get_step_stats_for_run(self, run_id: str, step_keys=None) -> List[RunStepKeyStatsSnapshot]: """Get per-step stats for a pipeline run.""" logs = self.get_logs_for_run(run_id) if step_keys: logs = [ event for event in logs if event.is_dagster_event and event.get_dagster_event().step_key in step_keys ] return build_run_step_stats_from_events(run_id, logs)
def get_step_stats_for_run(self, run_id): '''Get per-step stats for a pipeline run.''' return build_run_step_stats_from_events(run_id, self.get_logs_for_run(run_id))