def get_logs_for_run_by_log_id(self, run_id, cursor=-1): check.str_param(run_id, 'run_id') check.int_param(cursor, 'cursor') check.invariant( cursor >= -1, 'Don\'t know what to do with negative cursor {cursor}'.format( cursor=cursor), ) # cursor starts at 0 & auto-increment column starts at 1 so adjust cursor = cursor + 1 query = (db.select([ SqlEventLogStorageTable.c.id, SqlEventLogStorageTable.c.event ]).where(SqlEventLogStorageTable.c.run_id == run_id).where( SqlEventLogStorageTable.c.id > cursor).order_by( SqlEventLogStorageTable.c.id.asc())) with self.connect(run_id) as conn: results = conn.execute(query).fetchall() events = {} try: for ( record_id, json_str, ) in results: events[record_id] = check.inst_param( deserialize_json_to_dagster_namedtuple(json_str), 'event', EventRecord) except (seven.JSONDecodeError, check.CheckError) as err: six.raise_from(DagsterEventLogInvalidForRun(run_id=run_id), err) return events
def get_stats_for_run(self, run_id): check.str_param(run_id, "run_id") query = (db.select([ SqlEventLogStorageTable.c.dagster_event_type, db.func.count().label("n_events_of_type"), db.func.max(SqlEventLogStorageTable.c.timestamp).label( "last_event_timestamp"), ]).where(SqlEventLogStorageTable.c.run_id == run_id).group_by( "dagster_event_type")) with self.run_connection(run_id) as conn: results = conn.execute(query).fetchall() try: counts = {} times = {} for result in results: (dagster_event_type, n_events_of_type, last_event_timestamp) = result if dagster_event_type: counts[dagster_event_type] = n_events_of_type times[dagster_event_type] = last_event_timestamp enqueued_time = times.get(DagsterEventType.PIPELINE_ENQUEUED.value, None) launch_time = times.get(DagsterEventType.PIPELINE_STARTING.value, None) start_time = times.get(DagsterEventType.PIPELINE_START.value, None) end_time = times.get( DagsterEventType.PIPELINE_SUCCESS.value, times.get( DagsterEventType.PIPELINE_FAILURE.value, times.get(DagsterEventType.PIPELINE_CANCELED.value, None), ), ) return PipelineRunStatsSnapshot( run_id=run_id, steps_succeeded=counts.get(DagsterEventType.STEP_SUCCESS.value, 0), steps_failed=counts.get(DagsterEventType.STEP_FAILURE.value, 0), materializations=counts.get( DagsterEventType.ASSET_MATERIALIZATION.value, 0), expectations=counts.get( DagsterEventType.STEP_EXPECTATION_RESULT.value, 0), enqueued_time=datetime_as_float(enqueued_time) if enqueued_time else None, launch_time=datetime_as_float(launch_time) if launch_time else None, start_time=datetime_as_float(start_time) if start_time else None, end_time=datetime_as_float(end_time) if end_time else None, ) except (seven.JSONDecodeError, check.CheckError) as err: raise DagsterEventLogInvalidForRun(run_id=run_id) from err
def get_step_stats_for_run(self, run_id, step_keys=None): check.str_param(run_id, "run_id") check.opt_list_param(step_keys, "step_keys", of_type=str) # Originally, this was two different queries: # 1) one query which aggregated top-level step stats by grouping by event type / step_key in # a single query, using pure SQL (e.g. start_time, end_time, status, attempt counts). # 2) one query which fetched all the raw events for a specific event type and then inspected # the deserialized event object to aggregate stats derived from sequences of events. # (e.g. marker events, materializations, expectations resuls, attempts timing, etc.) # # For simplicity, we now just do the second type of query and derive the stats in Python # from the raw events. This has the benefit of being easier to read and also the benefit of # being able to share code with the in-memory event log storage implementation. We may # choose to revisit this in the future, especially if we are able to do JSON-column queries # in SQL as a way of bypassing the serdes layer in all cases. raw_event_query = ( db.select([SqlEventLogStorageTable.c.event]) .where(SqlEventLogStorageTable.c.run_id == run_id) .where(SqlEventLogStorageTable.c.step_key != None) .where( SqlEventLogStorageTable.c.dagster_event_type.in_( [ DagsterEventType.STEP_START.value, DagsterEventType.STEP_SUCCESS.value, DagsterEventType.STEP_SKIPPED.value, DagsterEventType.STEP_FAILURE.value, DagsterEventType.STEP_RESTARTED.value, DagsterEventType.ASSET_MATERIALIZATION.value, DagsterEventType.STEP_EXPECTATION_RESULT.value, DagsterEventType.STEP_RESTARTED.value, DagsterEventType.STEP_UP_FOR_RETRY.value, DagsterEventType.ENGINE_EVENT.value, ] ) ) .order_by(SqlEventLogStorageTable.c.id.asc()) ) if step_keys: raw_event_query = raw_event_query.where( SqlEventLogStorageTable.c.step_key.in_(step_keys) ) with self.run_connection(run_id) as conn: results = conn.execute(raw_event_query).fetchall() try: records = [ check.inst_param( deserialize_json_to_dagster_namedtuple(json_str), "event", EventLogEntry ) for (json_str,) in results ] return build_run_step_stats_from_events(run_id, records) except (seven.JSONDecodeError, DeserializationError) as err: raise DagsterEventLogInvalidForRun(run_id=run_id) from err
def get_logs_for_run_by_log_id( self, run_id, cursor=-1, dagster_event_type=None, limit=None, ): check.str_param(run_id, "run_id") check.int_param(cursor, "cursor") check.invariant( cursor >= -1, "Don't know what to do with negative cursor {cursor}".format(cursor=cursor), ) check.opt_inst_param(dagster_event_type, "dagster_event_type", DagsterEventType) query = ( db.select([SqlEventLogStorageTable.c.id, SqlEventLogStorageTable.c.event]) .where(SqlEventLogStorageTable.c.run_id == run_id) .order_by(SqlEventLogStorageTable.c.id.asc()) ) if dagster_event_type: query = query.where( SqlEventLogStorageTable.c.dagster_event_type == dagster_event_type.value ) # adjust 0 based index cursor to SQL offset query = query.offset(cursor + 1) if limit: query = query.limit(limit) with self.run_connection(run_id) as conn: results = conn.execute(query).fetchall() events = {} try: for ( record_id, json_str, ) in results: events[record_id] = check.inst_param( deserialize_json_to_dagster_namedtuple(json_str), "event", EventLogEntry ) except (seven.JSONDecodeError, DeserializationError) as err: raise DagsterEventLogInvalidForRun(run_id=run_id) from err return events
def get_logs_for_run_by_log_id(self, run_id, cursor=-1, dagster_event_type=None): check.str_param(run_id, "run_id") check.int_param(cursor, "cursor") check.invariant( cursor >= -1, "Don't know what to do with negative cursor {cursor}".format( cursor=cursor), ) check.opt_inst_param(dagster_event_type, "dagster_event_type", DagsterEventType) # cursor starts at 0 & auto-increment column starts at 1 so adjust cursor = cursor + 1 query = (db.select([ SqlEventLogStorageTable.c.id, SqlEventLogStorageTable.c.event ]).where(SqlEventLogStorageTable.c.run_id == run_id).where( SqlEventLogStorageTable.c.id > cursor).order_by( SqlEventLogStorageTable.c.id.asc())) if dagster_event_type: query = query.where(SqlEventLogStorageTable.c.dagster_event_type == dagster_event_type.value) with self.run_connection(run_id) as conn: results = conn.execute(query).fetchall() events = {} try: for ( record_id, json_str, ) in results: events[record_id] = check.inst_param( deserialize_json_to_dagster_namedtuple(json_str), "event", EventRecord) except (seven.JSONDecodeError, check.CheckError) as err: raise DagsterEventLogInvalidForRun(run_id=run_id) from err return events
def get_logs_for_run(self, run_id, cursor=-1): '''Get all of the logs corresponding to a run. Args: run_id (str): The id of the run for which to fetch logs. cursor (Optional[int]): Zero-indexed logs will be returned starting from cursor + 1, i.e., if cursor is -1, all logs will be returned. (default: -1) ''' check.str_param(run_id, 'run_id') check.int_param(cursor, 'cursor') check.invariant( cursor >= -1, 'Don\'t know what to do with negative cursor {cursor}'.format(cursor=cursor), ) # cursor starts at 0 & auto-increment column starts at 1 so adjust cursor = cursor + 1 query = ( db.select([SqlEventLogStorageTable.c.event]) .where(SqlEventLogStorageTable.c.run_id == run_id) .where(SqlEventLogStorageTable.c.id > cursor) .order_by(SqlEventLogStorageTable.c.id.asc()) ) with self.connect(run_id) as conn: results = conn.execute(query).fetchall() events = [] try: for (json_str,) in results: events.append( check.inst_param( deserialize_json_to_dagster_namedtuple(json_str), 'event', EventRecord ) ) except (seven.JSONDecodeError, check.CheckError) as err: six.raise_from(DagsterEventLogInvalidForRun(run_id=run_id), err) return events
def get_step_stats_for_run(self, run_id): check.str_param(run_id, 'run_id') STEP_STATS_EVENT_TYPES = [ DagsterEventType.STEP_START.value, DagsterEventType.STEP_SUCCESS.value, DagsterEventType.STEP_SKIPPED.value, DagsterEventType.STEP_FAILURE.value, ] by_step_query = (db.select([ SqlEventLogStorageTable.c.step_key, SqlEventLogStorageTable.c.dagster_event_type, db.func.max( SqlEventLogStorageTable.c.timestamp).label('timestamp'), ]).where(SqlEventLogStorageTable.c.run_id == run_id).where( SqlEventLogStorageTable.c.step_key != None).where( SqlEventLogStorageTable.c.dagster_event_type.in_( STEP_STATS_EVENT_TYPES)).group_by( SqlEventLogStorageTable.c.step_key, SqlEventLogStorageTable.c.dagster_event_type, )) with self.connect(run_id) as conn: results = conn.execute(by_step_query).fetchall() by_step_key = defaultdict(dict) for result in results: step_key = result.step_key if result.dagster_event_type == DagsterEventType.STEP_START.value: by_step_key[step_key]['start_time'] = (datetime_as_float( result.timestamp) if result.timestamp else None) if result.dagster_event_type == DagsterEventType.STEP_FAILURE.value: by_step_key[step_key]['end_time'] = (datetime_as_float( result.timestamp) if result.timestamp else None) by_step_key[step_key]['status'] = StepEventStatus.FAILURE if result.dagster_event_type == DagsterEventType.STEP_SUCCESS.value: by_step_key[step_key]['end_time'] = (datetime_as_float( result.timestamp) if result.timestamp else None) by_step_key[step_key]['status'] = StepEventStatus.SUCCESS if result.dagster_event_type == DagsterEventType.STEP_SKIPPED.value: by_step_key[step_key]['end_time'] = (datetime_as_float( result.timestamp) if result.timestamp else None) by_step_key[step_key]['status'] = StepEventStatus.SKIPPED materializations = defaultdict(list) expectation_results = defaultdict(list) raw_event_query = (db.select([ SqlEventLogStorageTable.c.event ]).where(SqlEventLogStorageTable.c.run_id == run_id).where( SqlEventLogStorageTable.c.step_key != None).where( SqlEventLogStorageTable.c.dagster_event_type.in_([ DagsterEventType.STEP_MATERIALIZATION.value, DagsterEventType.STEP_EXPECTATION_RESULT.value, ])).order_by(SqlEventLogStorageTable.c.id.asc())) with self.connect(run_id) as conn: results = conn.execute(raw_event_query).fetchall() try: for (json_str, ) in results: event = check.inst_param( deserialize_json_to_dagster_namedtuple(json_str), 'event', EventRecord) if event.dagster_event.event_type == DagsterEventType.STEP_MATERIALIZATION: materializations[event.step_key].append( event.dagster_event.event_specific_data.materialization ) elif event.dagster_event.event_type == DagsterEventType.STEP_EXPECTATION_RESULT: expectation_results[event.step_key].append( event.dagster_event.event_specific_data. expectation_result) except (seven.JSONDecodeError, check.CheckError) as err: six.raise_from(DagsterEventLogInvalidForRun(run_id=run_id), err) return [ RunStepKeyStatsSnapshot( run_id=run_id, step_key=step_key, status=value.get('status'), start_time=value.get('start_time'), end_time=value.get('end_time'), materializations=materializations.get(step_key), expectation_results=expectation_results.get(step_key), ) for step_key, value in by_step_key.items() ]
def get_step_stats_for_run(self, run_id, step_keys=None): check.str_param(run_id, "run_id") check.opt_list_param(step_keys, "step_keys", of_type=str) STEP_STATS_EVENT_TYPES = [ DagsterEventType.STEP_START.value, DagsterEventType.STEP_SUCCESS.value, DagsterEventType.STEP_SKIPPED.value, DagsterEventType.STEP_FAILURE.value, DagsterEventType.STEP_RESTARTED.value, ] by_step_query = (db.select([ SqlEventLogStorageTable.c.step_key, SqlEventLogStorageTable.c.dagster_event_type, db.func.max( SqlEventLogStorageTable.c.timestamp).label("timestamp"), db.func.count(SqlEventLogStorageTable.c.id).label( "n_events_of_type_for_step"), ]).where(SqlEventLogStorageTable.c.run_id == run_id).where( SqlEventLogStorageTable.c.step_key != None).where( SqlEventLogStorageTable.c.dagster_event_type.in_( STEP_STATS_EVENT_TYPES))) if step_keys: by_step_query = by_step_query.where( SqlEventLogStorageTable.c.step_key.in_(step_keys)) by_step_query = by_step_query.group_by( SqlEventLogStorageTable.c.step_key, SqlEventLogStorageTable.c.dagster_event_type, ) with self.run_connection(run_id) as conn: results = conn.execute(by_step_query).fetchall() by_step_key = defaultdict(dict) for result in results: step_key = result.step_key if result.dagster_event_type == DagsterEventType.STEP_START.value: by_step_key[step_key]["start_time"] = (datetime_as_float( result.timestamp) if result.timestamp else None) by_step_key[step_key]["attempts"] = by_step_key[step_key].get( "attempts", 0) + 1 if result.dagster_event_type == DagsterEventType.STEP_RESTARTED.value: by_step_key[step_key]["attempts"] = ( # In case we see step retarted events but not a step started event, we want to # only count the restarted events, since the attempt count represents # the number of times we have successfully started runnning the step by_step_key[step_key].get("attempts", 0) + result.n_events_of_type_for_step) if result.dagster_event_type == DagsterEventType.STEP_FAILURE.value: by_step_key[step_key]["end_time"] = (datetime_as_float( result.timestamp) if result.timestamp else None) by_step_key[step_key]["status"] = StepEventStatus.FAILURE if result.dagster_event_type == DagsterEventType.STEP_SUCCESS.value: by_step_key[step_key]["end_time"] = (datetime_as_float( result.timestamp) if result.timestamp else None) by_step_key[step_key]["status"] = StepEventStatus.SUCCESS if result.dagster_event_type == DagsterEventType.STEP_SKIPPED.value: by_step_key[step_key]["end_time"] = (datetime_as_float( result.timestamp) if result.timestamp else None) by_step_key[step_key]["status"] = StepEventStatus.SKIPPED materializations = defaultdict(list) expectation_results = defaultdict(list) raw_event_query = (db.select([ SqlEventLogStorageTable.c.event ]).where(SqlEventLogStorageTable.c.run_id == run_id).where( SqlEventLogStorageTable.c.step_key != None).where( SqlEventLogStorageTable.c.dagster_event_type.in_([ DagsterEventType.ASSET_MATERIALIZATION.value, DagsterEventType.STEP_EXPECTATION_RESULT.value, ])).order_by(SqlEventLogStorageTable.c.id.asc())) if step_keys: raw_event_query = raw_event_query.where( SqlEventLogStorageTable.c.step_key.in_(step_keys)) with self.run_connection(run_id) as conn: results = conn.execute(raw_event_query).fetchall() try: for (json_str, ) in results: event = check.inst_param( deserialize_json_to_dagster_namedtuple(json_str), "event", EventRecord) if event.dagster_event.event_type == DagsterEventType.ASSET_MATERIALIZATION: materializations[event.step_key].append( event.dagster_event.event_specific_data.materialization ) elif event.dagster_event.event_type == DagsterEventType.STEP_EXPECTATION_RESULT: expectation_results[event.step_key].append( event.dagster_event.event_specific_data. expectation_result) except (seven.JSONDecodeError, check.CheckError) as err: raise DagsterEventLogInvalidForRun(run_id=run_id) from err return [ RunStepKeyStatsSnapshot( run_id=run_id, step_key=step_key, status=value.get("status"), start_time=value.get("start_time"), end_time=value.get("end_time"), materializations=materializations.get(step_key), expectation_results=expectation_results.get(step_key), attempts=value.get("attempts"), ) for step_key, value in by_step_key.items() ]