def build_run_stats_from_events(run_id, records): try: iter(records) except TypeError as exc: raise check.ParameterCheckError( "Invariant violation for parameter 'records'. Description: Expected iterable." ) from exc for i, record in enumerate(records): check.inst_param(record, f"records[{i}]", EventRecord) steps_succeeded = 0 steps_failed = 0 materializations = 0 expectations = 0 enqueued_time = None launch_time = None start_time = None end_time = None for event in records: if not event.is_dagster_event: continue dagster_event = event.get_dagster_event() event_timestamp_float = (event.timestamp if isinstance( event.timestamp, float) else datetime_as_float(event.timestamp)) if dagster_event.event_type == DagsterEventType.PIPELINE_START: start_time = event_timestamp_float if dagster_event.event_type == DagsterEventType.PIPELINE_STARTING: launch_time = event_timestamp_float if dagster_event.event_type == DagsterEventType.PIPELINE_ENQUEUED: enqueued_time = event_timestamp_float if dagster_event.event_type == DagsterEventType.STEP_FAILURE: steps_failed += 1 if dagster_event.event_type == DagsterEventType.STEP_SUCCESS: steps_succeeded += 1 if dagster_event.event_type == DagsterEventType.STEP_MATERIALIZATION: materializations += 1 if dagster_event.event_type == DagsterEventType.STEP_EXPECTATION_RESULT: expectations += 1 if (dagster_event.event_type == DagsterEventType.PIPELINE_SUCCESS or dagster_event.event_type == DagsterEventType.PIPELINE_FAILURE or dagster_event.event_type == DagsterEventType.PIPELINE_CANCELED): end_time = (event.timestamp if isinstance(event.timestamp, float) else datetime_as_float(event.timestamp)) return PipelineRunStatsSnapshot( run_id, steps_succeeded, steps_failed, materializations, expectations, enqueued_time, launch_time, start_time, end_time, )
def get_stats_for_run(self, run_id): check.str_param(run_id, "run_id") query = (db.select([ SqlEventLogStorageTable.c.dagster_event_type, db.func.count().label("n_events_of_type"), db.func.max(SqlEventLogStorageTable.c.timestamp).label( "last_event_timestamp"), ]).where(SqlEventLogStorageTable.c.run_id == run_id).group_by( "dagster_event_type")) with self.run_connection(run_id) as conn: results = conn.execute(query).fetchall() try: counts = {} times = {} for result in results: (dagster_event_type, n_events_of_type, last_event_timestamp) = result if dagster_event_type: counts[dagster_event_type] = n_events_of_type times[dagster_event_type] = last_event_timestamp enqueued_time = times.get(DagsterEventType.PIPELINE_ENQUEUED.value, None) launch_time = times.get(DagsterEventType.PIPELINE_STARTING.value, None) start_time = times.get(DagsterEventType.PIPELINE_START.value, None) end_time = times.get( DagsterEventType.PIPELINE_SUCCESS.value, times.get( DagsterEventType.PIPELINE_FAILURE.value, times.get(DagsterEventType.PIPELINE_CANCELED.value, None), ), ) return PipelineRunStatsSnapshot( run_id=run_id, steps_succeeded=counts.get(DagsterEventType.STEP_SUCCESS.value, 0), steps_failed=counts.get(DagsterEventType.STEP_FAILURE.value, 0), materializations=counts.get( DagsterEventType.ASSET_MATERIALIZATION.value, 0), expectations=counts.get( DagsterEventType.STEP_EXPECTATION_RESULT.value, 0), enqueued_time=datetime_as_float(enqueued_time) if enqueued_time else None, launch_time=datetime_as_float(launch_time) if launch_time else None, start_time=datetime_as_float(start_time) if start_time else None, end_time=datetime_as_float(end_time) if end_time else None, ) except (seven.JSONDecodeError, check.CheckError) as err: raise DagsterEventLogInvalidForRun(run_id=run_id) from err
def get_stats_for_run(self, run_id): check.str_param(run_id, 'run_id') query = (db.select([ SqlEventLogStorageTable.c.dagster_event_type, db.func.count().label('n_events_of_type'), db.func.max(SqlEventLogStorageTable.c.timestamp).label( 'last_event_timestamp'), ]).where(SqlEventLogStorageTable.c.run_id == run_id).group_by( 'dagster_event_type')) with self.connect(run_id) as conn: results = conn.execute(query).fetchall() try: counts = {} times = {} for result in results: if result[0]: counts[result[0]] = result[1] times[result[0]] = result[2] start_time = times.get(DagsterEventType.PIPELINE_START.value, None) end_time = times.get( DagsterEventType.PIPELINE_SUCCESS.value, times.get(DagsterEventType.PIPELINE_FAILURE.value, None), ) return PipelineRunStatsSnapshot( run_id=run_id, steps_succeeded=counts.get(DagsterEventType.STEP_SUCCESS.value, 0), steps_failed=counts.get(DagsterEventType.STEP_FAILURE.value, 0), materializations=counts.get( DagsterEventType.STEP_MATERIALIZATION.value, 0), expectations=counts.get( DagsterEventType.STEP_EXPECTATION_RESULT.value, 0), start_time=datetime_as_float(start_time) if start_time else None, end_time=datetime_as_float(end_time) if end_time else None, ) except (seven.JSONDecodeError, check.CheckError) as err: six.raise_from(DagsterEventLogInvalidForRun(run_id=run_id), err)
def build_run_stats_from_events(run_id, records): try: iter(records) except TypeError as exc: six.raise_from( check.ParameterCheckError( "Invariant violation for parameter 'records'. Description: Expected iterable." ), from_value=exc, ) for i, record in enumerate(records): check.inst_param(record, "records[{i}]".format(i=i), EventRecord) steps_succeeded = 0 steps_failed = 0 materializations = 0 expectations = 0 start_time = None end_time = None for event in records: if not event.is_dagster_event: continue if event.dagster_event.event_type == DagsterEventType.PIPELINE_START: start_time = (event.timestamp if isinstance( event.timestamp, float) else datetime_as_float( event.timestamp)) if event.dagster_event.event_type == DagsterEventType.STEP_FAILURE: steps_failed += 1 if event.dagster_event.event_type == DagsterEventType.STEP_SUCCESS: steps_succeeded += 1 if event.dagster_event.event_type == DagsterEventType.STEP_MATERIALIZATION: materializations += 1 if event.dagster_event.event_type == DagsterEventType.STEP_EXPECTATION_RESULT: expectations += 1 if (event.dagster_event.event_type == DagsterEventType.PIPELINE_SUCCESS or event.dagster_event.event_type == DagsterEventType.PIPELINE_FAILURE): end_time = (event.timestamp if isinstance(event.timestamp, float) else datetime_as_float(event.timestamp)) return PipelineRunStatsSnapshot(run_id, steps_succeeded, steps_failed, materializations, expectations, start_time, end_time)
def resolve_nextTick(self, graphene_info): if self._sensor_state.status != JobStatus.RUNNING: return None latest_tick = graphene_info.context.instance.get_latest_job_tick( self._sensor_state.job_origin_id) if not latest_tick: return None next_timestamp = latest_tick.timestamp + SENSOR_DAEMON_INTERVAL if next_timestamp < datetime_as_float(datetime.now()): return None return graphene_info.schema.type_named("FutureJobTick")(next_timestamp)
def __init__(self, daemon_status): check.inst_param(daemon_status, "daemon_status", DaemonStatus) if daemon_status.last_heartbeat is None: last_heartbeat_time = None else: last_heartbeat_time = datetime_as_float( daemon_status.last_heartbeat.timestamp) super(DauphinDaemonStatus, self).__init__( daemonType=daemon_status.daemon_type, required=daemon_status.required, healthy=daemon_status.healthy, lastHeartbeatTime=last_heartbeat_time, )
def get_step_stats_for_run(self, run_id): check.str_param(run_id, 'run_id') STEP_STATS_EVENT_TYPES = [ DagsterEventType.STEP_START.value, DagsterEventType.STEP_SUCCESS.value, DagsterEventType.STEP_SKIPPED.value, DagsterEventType.STEP_FAILURE.value, ] by_step_query = (db.select([ SqlEventLogStorageTable.c.step_key, SqlEventLogStorageTable.c.dagster_event_type, db.func.max( SqlEventLogStorageTable.c.timestamp).label('timestamp'), ]).where(SqlEventLogStorageTable.c.run_id == run_id).where( SqlEventLogStorageTable.c.step_key != None).where( SqlEventLogStorageTable.c.dagster_event_type.in_( STEP_STATS_EVENT_TYPES)).group_by( SqlEventLogStorageTable.c.step_key, SqlEventLogStorageTable.c.dagster_event_type, )) with self.connect(run_id) as conn: results = conn.execute(by_step_query).fetchall() by_step_key = defaultdict(dict) for result in results: step_key = result.step_key if result.dagster_event_type == DagsterEventType.STEP_START.value: by_step_key[step_key]['start_time'] = (datetime_as_float( result.timestamp) if result.timestamp else None) if result.dagster_event_type == DagsterEventType.STEP_FAILURE.value: by_step_key[step_key]['end_time'] = (datetime_as_float( result.timestamp) if result.timestamp else None) by_step_key[step_key]['status'] = StepEventStatus.FAILURE if result.dagster_event_type == DagsterEventType.STEP_SUCCESS.value: by_step_key[step_key]['end_time'] = (datetime_as_float( result.timestamp) if result.timestamp else None) by_step_key[step_key]['status'] = StepEventStatus.SUCCESS if result.dagster_event_type == DagsterEventType.STEP_SKIPPED.value: by_step_key[step_key]['end_time'] = (datetime_as_float( result.timestamp) if result.timestamp else None) by_step_key[step_key]['status'] = StepEventStatus.SKIPPED materializations = defaultdict(list) expectation_results = defaultdict(list) raw_event_query = (db.select([ SqlEventLogStorageTable.c.event ]).where(SqlEventLogStorageTable.c.run_id == run_id).where( SqlEventLogStorageTable.c.step_key != None).where( SqlEventLogStorageTable.c.dagster_event_type.in_([ DagsterEventType.STEP_MATERIALIZATION.value, DagsterEventType.STEP_EXPECTATION_RESULT.value, ])).order_by(SqlEventLogStorageTable.c.id.asc())) with self.connect(run_id) as conn: results = conn.execute(raw_event_query).fetchall() try: for (json_str, ) in results: event = check.inst_param( deserialize_json_to_dagster_namedtuple(json_str), 'event', EventRecord) if event.dagster_event.event_type == DagsterEventType.STEP_MATERIALIZATION: materializations[event.step_key].append( event.dagster_event.event_specific_data.materialization ) elif event.dagster_event.event_type == DagsterEventType.STEP_EXPECTATION_RESULT: expectation_results[event.step_key].append( event.dagster_event.event_specific_data. expectation_result) except (seven.JSONDecodeError, check.CheckError) as err: six.raise_from(DagsterEventLogInvalidForRun(run_id=run_id), err) return [ RunStepKeyStatsSnapshot( run_id=run_id, step_key=step_key, status=value.get('status'), start_time=value.get('start_time'), end_time=value.get('end_time'), materializations=materializations.get(step_key), expectation_results=expectation_results.get(step_key), ) for step_key, value in by_step_key.items() ]
def get_step_stats_for_run(self, run_id, step_keys=None): check.str_param(run_id, "run_id") check.opt_list_param(step_keys, "step_keys", of_type=str) STEP_STATS_EVENT_TYPES = [ DagsterEventType.STEP_START.value, DagsterEventType.STEP_SUCCESS.value, DagsterEventType.STEP_SKIPPED.value, DagsterEventType.STEP_FAILURE.value, DagsterEventType.STEP_RESTARTED.value, ] by_step_query = (db.select([ SqlEventLogStorageTable.c.step_key, SqlEventLogStorageTable.c.dagster_event_type, db.func.max( SqlEventLogStorageTable.c.timestamp).label("timestamp"), db.func.count(SqlEventLogStorageTable.c.id).label( "n_events_of_type_for_step"), ]).where(SqlEventLogStorageTable.c.run_id == run_id).where( SqlEventLogStorageTable.c.step_key != None).where( SqlEventLogStorageTable.c.dagster_event_type.in_( STEP_STATS_EVENT_TYPES))) if step_keys: by_step_query = by_step_query.where( SqlEventLogStorageTable.c.step_key.in_(step_keys)) by_step_query = by_step_query.group_by( SqlEventLogStorageTable.c.step_key, SqlEventLogStorageTable.c.dagster_event_type, ) with self.run_connection(run_id) as conn: results = conn.execute(by_step_query).fetchall() by_step_key = defaultdict(dict) for result in results: step_key = result.step_key if result.dagster_event_type == DagsterEventType.STEP_START.value: by_step_key[step_key]["start_time"] = (datetime_as_float( result.timestamp) if result.timestamp else None) by_step_key[step_key]["attempts"] = by_step_key[step_key].get( "attempts", 0) + 1 if result.dagster_event_type == DagsterEventType.STEP_RESTARTED.value: by_step_key[step_key]["attempts"] = ( # In case we see step retarted events but not a step started event, we want to # only count the restarted events, since the attempt count represents # the number of times we have successfully started runnning the step by_step_key[step_key].get("attempts", 0) + result.n_events_of_type_for_step) if result.dagster_event_type == DagsterEventType.STEP_FAILURE.value: by_step_key[step_key]["end_time"] = (datetime_as_float( result.timestamp) if result.timestamp else None) by_step_key[step_key]["status"] = StepEventStatus.FAILURE if result.dagster_event_type == DagsterEventType.STEP_SUCCESS.value: by_step_key[step_key]["end_time"] = (datetime_as_float( result.timestamp) if result.timestamp else None) by_step_key[step_key]["status"] = StepEventStatus.SUCCESS if result.dagster_event_type == DagsterEventType.STEP_SKIPPED.value: by_step_key[step_key]["end_time"] = (datetime_as_float( result.timestamp) if result.timestamp else None) by_step_key[step_key]["status"] = StepEventStatus.SKIPPED materializations = defaultdict(list) expectation_results = defaultdict(list) raw_event_query = (db.select([ SqlEventLogStorageTable.c.event ]).where(SqlEventLogStorageTable.c.run_id == run_id).where( SqlEventLogStorageTable.c.step_key != None).where( SqlEventLogStorageTable.c.dagster_event_type.in_([ DagsterEventType.ASSET_MATERIALIZATION.value, DagsterEventType.STEP_EXPECTATION_RESULT.value, ])).order_by(SqlEventLogStorageTable.c.id.asc())) if step_keys: raw_event_query = raw_event_query.where( SqlEventLogStorageTable.c.step_key.in_(step_keys)) with self.run_connection(run_id) as conn: results = conn.execute(raw_event_query).fetchall() try: for (json_str, ) in results: event = check.inst_param( deserialize_json_to_dagster_namedtuple(json_str), "event", EventRecord) if event.dagster_event.event_type == DagsterEventType.ASSET_MATERIALIZATION: materializations[event.step_key].append( event.dagster_event.event_specific_data.materialization ) elif event.dagster_event.event_type == DagsterEventType.STEP_EXPECTATION_RESULT: expectation_results[event.step_key].append( event.dagster_event.event_specific_data. expectation_result) except (seven.JSONDecodeError, check.CheckError) as err: raise DagsterEventLogInvalidForRun(run_id=run_id) from err return [ RunStepKeyStatsSnapshot( run_id=run_id, step_key=step_key, status=value.get("status"), start_time=value.get("start_time"), end_time=value.get("end_time"), materializations=materializations.get(step_key), expectation_results=expectation_results.get(step_key), attempts=value.get("attempts"), ) for step_key, value in by_step_key.items() ]
def resolve_updateTime(self, graphene_info): run_record = self._get_run_record(graphene_info.context.instance) return datetime_as_float(run_record.update_timestamp)