Example #1
0
def build_run_stats_from_events(run_id, records):
    try:
        iter(records)
    except TypeError as exc:
        raise check.ParameterCheckError(
            "Invariant violation for parameter 'records'. Description: Expected iterable."
        ) from exc
    for i, record in enumerate(records):
        check.inst_param(record, f"records[{i}]", EventRecord)

    steps_succeeded = 0
    steps_failed = 0
    materializations = 0
    expectations = 0
    enqueued_time = None
    launch_time = None
    start_time = None
    end_time = None

    for event in records:
        if not event.is_dagster_event:
            continue
        dagster_event = event.get_dagster_event()

        event_timestamp_float = (event.timestamp if isinstance(
            event.timestamp, float) else datetime_as_float(event.timestamp))
        if dagster_event.event_type == DagsterEventType.PIPELINE_START:
            start_time = event_timestamp_float
        if dagster_event.event_type == DagsterEventType.PIPELINE_STARTING:
            launch_time = event_timestamp_float
        if dagster_event.event_type == DagsterEventType.PIPELINE_ENQUEUED:
            enqueued_time = event_timestamp_float
        if dagster_event.event_type == DagsterEventType.STEP_FAILURE:
            steps_failed += 1
        if dagster_event.event_type == DagsterEventType.STEP_SUCCESS:
            steps_succeeded += 1
        if dagster_event.event_type == DagsterEventType.STEP_MATERIALIZATION:
            materializations += 1
        if dagster_event.event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
            expectations += 1
        if (dagster_event.event_type == DagsterEventType.PIPELINE_SUCCESS or
                dagster_event.event_type == DagsterEventType.PIPELINE_FAILURE
                or dagster_event.event_type
                == DagsterEventType.PIPELINE_CANCELED):
            end_time = (event.timestamp if isinstance(event.timestamp, float)
                        else datetime_as_float(event.timestamp))

    return PipelineRunStatsSnapshot(
        run_id,
        steps_succeeded,
        steps_failed,
        materializations,
        expectations,
        enqueued_time,
        launch_time,
        start_time,
        end_time,
    )
Example #2
0
    def get_stats_for_run(self, run_id):
        check.str_param(run_id, "run_id")

        query = (db.select([
            SqlEventLogStorageTable.c.dagster_event_type,
            db.func.count().label("n_events_of_type"),
            db.func.max(SqlEventLogStorageTable.c.timestamp).label(
                "last_event_timestamp"),
        ]).where(SqlEventLogStorageTable.c.run_id == run_id).group_by(
            "dagster_event_type"))

        with self.run_connection(run_id) as conn:
            results = conn.execute(query).fetchall()

        try:
            counts = {}
            times = {}
            for result in results:
                (dagster_event_type, n_events_of_type,
                 last_event_timestamp) = result
                if dagster_event_type:
                    counts[dagster_event_type] = n_events_of_type
                    times[dagster_event_type] = last_event_timestamp

            enqueued_time = times.get(DagsterEventType.PIPELINE_ENQUEUED.value,
                                      None)
            launch_time = times.get(DagsterEventType.PIPELINE_STARTING.value,
                                    None)
            start_time = times.get(DagsterEventType.PIPELINE_START.value, None)
            end_time = times.get(
                DagsterEventType.PIPELINE_SUCCESS.value,
                times.get(
                    DagsterEventType.PIPELINE_FAILURE.value,
                    times.get(DagsterEventType.PIPELINE_CANCELED.value, None),
                ),
            )

            return PipelineRunStatsSnapshot(
                run_id=run_id,
                steps_succeeded=counts.get(DagsterEventType.STEP_SUCCESS.value,
                                           0),
                steps_failed=counts.get(DagsterEventType.STEP_FAILURE.value,
                                        0),
                materializations=counts.get(
                    DagsterEventType.ASSET_MATERIALIZATION.value, 0),
                expectations=counts.get(
                    DagsterEventType.STEP_EXPECTATION_RESULT.value, 0),
                enqueued_time=datetime_as_float(enqueued_time)
                if enqueued_time else None,
                launch_time=datetime_as_float(launch_time)
                if launch_time else None,
                start_time=datetime_as_float(start_time)
                if start_time else None,
                end_time=datetime_as_float(end_time) if end_time else None,
            )
        except (seven.JSONDecodeError, check.CheckError) as err:
            raise DagsterEventLogInvalidForRun(run_id=run_id) from err
Example #3
0
    def get_stats_for_run(self, run_id):
        check.str_param(run_id, 'run_id')

        query = (db.select([
            SqlEventLogStorageTable.c.dagster_event_type,
            db.func.count().label('n_events_of_type'),
            db.func.max(SqlEventLogStorageTable.c.timestamp).label(
                'last_event_timestamp'),
        ]).where(SqlEventLogStorageTable.c.run_id == run_id).group_by(
            'dagster_event_type'))

        with self.connect(run_id) as conn:
            results = conn.execute(query).fetchall()

        try:
            counts = {}
            times = {}
            for result in results:
                if result[0]:
                    counts[result[0]] = result[1]
                    times[result[0]] = result[2]

            start_time = times.get(DagsterEventType.PIPELINE_START.value, None)
            end_time = times.get(
                DagsterEventType.PIPELINE_SUCCESS.value,
                times.get(DagsterEventType.PIPELINE_FAILURE.value, None),
            )

            return PipelineRunStatsSnapshot(
                run_id=run_id,
                steps_succeeded=counts.get(DagsterEventType.STEP_SUCCESS.value,
                                           0),
                steps_failed=counts.get(DagsterEventType.STEP_FAILURE.value,
                                        0),
                materializations=counts.get(
                    DagsterEventType.STEP_MATERIALIZATION.value, 0),
                expectations=counts.get(
                    DagsterEventType.STEP_EXPECTATION_RESULT.value, 0),
                start_time=datetime_as_float(start_time)
                if start_time else None,
                end_time=datetime_as_float(end_time) if end_time else None,
            )
        except (seven.JSONDecodeError, check.CheckError) as err:
            six.raise_from(DagsterEventLogInvalidForRun(run_id=run_id), err)
Example #4
0
def build_run_stats_from_events(run_id, records):
    try:
        iter(records)
    except TypeError as exc:
        six.raise_from(
            check.ParameterCheckError(
                "Invariant violation for parameter 'records'. Description: Expected iterable."
            ),
            from_value=exc,
        )
    for i, record in enumerate(records):
        check.inst_param(record, "records[{i}]".format(i=i), EventRecord)

    steps_succeeded = 0
    steps_failed = 0
    materializations = 0
    expectations = 0
    start_time = None
    end_time = None

    for event in records:
        if not event.is_dagster_event:
            continue
        if event.dagster_event.event_type == DagsterEventType.PIPELINE_START:
            start_time = (event.timestamp if isinstance(
                event.timestamp, float) else datetime_as_float(
                    event.timestamp))
        if event.dagster_event.event_type == DagsterEventType.STEP_FAILURE:
            steps_failed += 1
        if event.dagster_event.event_type == DagsterEventType.STEP_SUCCESS:
            steps_succeeded += 1
        if event.dagster_event.event_type == DagsterEventType.STEP_MATERIALIZATION:
            materializations += 1
        if event.dagster_event.event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
            expectations += 1
        if (event.dagster_event.event_type == DagsterEventType.PIPELINE_SUCCESS
                or event.dagster_event.event_type
                == DagsterEventType.PIPELINE_FAILURE):
            end_time = (event.timestamp if isinstance(event.timestamp, float)
                        else datetime_as_float(event.timestamp))

    return PipelineRunStatsSnapshot(run_id, steps_succeeded, steps_failed,
                                    materializations, expectations, start_time,
                                    end_time)
Example #5
0
    def resolve_nextTick(self, graphene_info):
        if self._sensor_state.status != JobStatus.RUNNING:
            return None

        latest_tick = graphene_info.context.instance.get_latest_job_tick(
            self._sensor_state.job_origin_id)
        if not latest_tick:
            return None

        next_timestamp = latest_tick.timestamp + SENSOR_DAEMON_INTERVAL
        if next_timestamp < datetime_as_float(datetime.now()):
            return None
        return graphene_info.schema.type_named("FutureJobTick")(next_timestamp)
Example #6
0
    def __init__(self, daemon_status):

        check.inst_param(daemon_status, "daemon_status", DaemonStatus)

        if daemon_status.last_heartbeat is None:
            last_heartbeat_time = None
        else:
            last_heartbeat_time = datetime_as_float(
                daemon_status.last_heartbeat.timestamp)

        super(DauphinDaemonStatus, self).__init__(
            daemonType=daemon_status.daemon_type,
            required=daemon_status.required,
            healthy=daemon_status.healthy,
            lastHeartbeatTime=last_heartbeat_time,
        )
Example #7
0
    def get_step_stats_for_run(self, run_id):
        check.str_param(run_id, 'run_id')

        STEP_STATS_EVENT_TYPES = [
            DagsterEventType.STEP_START.value,
            DagsterEventType.STEP_SUCCESS.value,
            DagsterEventType.STEP_SKIPPED.value,
            DagsterEventType.STEP_FAILURE.value,
        ]

        by_step_query = (db.select([
            SqlEventLogStorageTable.c.step_key,
            SqlEventLogStorageTable.c.dagster_event_type,
            db.func.max(
                SqlEventLogStorageTable.c.timestamp).label('timestamp'),
        ]).where(SqlEventLogStorageTable.c.run_id == run_id).where(
            SqlEventLogStorageTable.c.step_key != None).where(
                SqlEventLogStorageTable.c.dagster_event_type.in_(
                    STEP_STATS_EVENT_TYPES)).group_by(
                        SqlEventLogStorageTable.c.step_key,
                        SqlEventLogStorageTable.c.dagster_event_type,
                    ))

        with self.connect(run_id) as conn:
            results = conn.execute(by_step_query).fetchall()

        by_step_key = defaultdict(dict)
        for result in results:
            step_key = result.step_key
            if result.dagster_event_type == DagsterEventType.STEP_START.value:
                by_step_key[step_key]['start_time'] = (datetime_as_float(
                    result.timestamp) if result.timestamp else None)
            if result.dagster_event_type == DagsterEventType.STEP_FAILURE.value:
                by_step_key[step_key]['end_time'] = (datetime_as_float(
                    result.timestamp) if result.timestamp else None)
                by_step_key[step_key]['status'] = StepEventStatus.FAILURE
            if result.dagster_event_type == DagsterEventType.STEP_SUCCESS.value:
                by_step_key[step_key]['end_time'] = (datetime_as_float(
                    result.timestamp) if result.timestamp else None)
                by_step_key[step_key]['status'] = StepEventStatus.SUCCESS
            if result.dagster_event_type == DagsterEventType.STEP_SKIPPED.value:
                by_step_key[step_key]['end_time'] = (datetime_as_float(
                    result.timestamp) if result.timestamp else None)
                by_step_key[step_key]['status'] = StepEventStatus.SKIPPED

        materializations = defaultdict(list)
        expectation_results = defaultdict(list)
        raw_event_query = (db.select([
            SqlEventLogStorageTable.c.event
        ]).where(SqlEventLogStorageTable.c.run_id == run_id).where(
            SqlEventLogStorageTable.c.step_key != None).where(
                SqlEventLogStorageTable.c.dagster_event_type.in_([
                    DagsterEventType.STEP_MATERIALIZATION.value,
                    DagsterEventType.STEP_EXPECTATION_RESULT.value,
                ])).order_by(SqlEventLogStorageTable.c.id.asc()))

        with self.connect(run_id) as conn:
            results = conn.execute(raw_event_query).fetchall()

        try:
            for (json_str, ) in results:
                event = check.inst_param(
                    deserialize_json_to_dagster_namedtuple(json_str), 'event',
                    EventRecord)
                if event.dagster_event.event_type == DagsterEventType.STEP_MATERIALIZATION:
                    materializations[event.step_key].append(
                        event.dagster_event.event_specific_data.materialization
                    )
                elif event.dagster_event.event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
                    expectation_results[event.step_key].append(
                        event.dagster_event.event_specific_data.
                        expectation_result)
        except (seven.JSONDecodeError, check.CheckError) as err:
            six.raise_from(DagsterEventLogInvalidForRun(run_id=run_id), err)

        return [
            RunStepKeyStatsSnapshot(
                run_id=run_id,
                step_key=step_key,
                status=value.get('status'),
                start_time=value.get('start_time'),
                end_time=value.get('end_time'),
                materializations=materializations.get(step_key),
                expectation_results=expectation_results.get(step_key),
            ) for step_key, value in by_step_key.items()
        ]
Example #8
0
    def get_step_stats_for_run(self, run_id, step_keys=None):
        check.str_param(run_id, "run_id")
        check.opt_list_param(step_keys, "step_keys", of_type=str)

        STEP_STATS_EVENT_TYPES = [
            DagsterEventType.STEP_START.value,
            DagsterEventType.STEP_SUCCESS.value,
            DagsterEventType.STEP_SKIPPED.value,
            DagsterEventType.STEP_FAILURE.value,
            DagsterEventType.STEP_RESTARTED.value,
        ]

        by_step_query = (db.select([
            SqlEventLogStorageTable.c.step_key,
            SqlEventLogStorageTable.c.dagster_event_type,
            db.func.max(
                SqlEventLogStorageTable.c.timestamp).label("timestamp"),
            db.func.count(SqlEventLogStorageTable.c.id).label(
                "n_events_of_type_for_step"),
        ]).where(SqlEventLogStorageTable.c.run_id == run_id).where(
            SqlEventLogStorageTable.c.step_key != None).where(
                SqlEventLogStorageTable.c.dagster_event_type.in_(
                    STEP_STATS_EVENT_TYPES)))

        if step_keys:
            by_step_query = by_step_query.where(
                SqlEventLogStorageTable.c.step_key.in_(step_keys))

        by_step_query = by_step_query.group_by(
            SqlEventLogStorageTable.c.step_key,
            SqlEventLogStorageTable.c.dagster_event_type,
        )

        with self.run_connection(run_id) as conn:
            results = conn.execute(by_step_query).fetchall()

        by_step_key = defaultdict(dict)
        for result in results:
            step_key = result.step_key
            if result.dagster_event_type == DagsterEventType.STEP_START.value:
                by_step_key[step_key]["start_time"] = (datetime_as_float(
                    result.timestamp) if result.timestamp else None)
                by_step_key[step_key]["attempts"] = by_step_key[step_key].get(
                    "attempts", 0) + 1
            if result.dagster_event_type == DagsterEventType.STEP_RESTARTED.value:
                by_step_key[step_key]["attempts"] = (
                    # In case we see step retarted events but not a step started event, we want to
                    # only count the restarted events, since the attempt count represents
                    # the number of times we have successfully started runnning the step
                    by_step_key[step_key].get("attempts", 0) +
                    result.n_events_of_type_for_step)
            if result.dagster_event_type == DagsterEventType.STEP_FAILURE.value:
                by_step_key[step_key]["end_time"] = (datetime_as_float(
                    result.timestamp) if result.timestamp else None)
                by_step_key[step_key]["status"] = StepEventStatus.FAILURE
            if result.dagster_event_type == DagsterEventType.STEP_SUCCESS.value:
                by_step_key[step_key]["end_time"] = (datetime_as_float(
                    result.timestamp) if result.timestamp else None)
                by_step_key[step_key]["status"] = StepEventStatus.SUCCESS
            if result.dagster_event_type == DagsterEventType.STEP_SKIPPED.value:
                by_step_key[step_key]["end_time"] = (datetime_as_float(
                    result.timestamp) if result.timestamp else None)
                by_step_key[step_key]["status"] = StepEventStatus.SKIPPED

        materializations = defaultdict(list)
        expectation_results = defaultdict(list)
        raw_event_query = (db.select([
            SqlEventLogStorageTable.c.event
        ]).where(SqlEventLogStorageTable.c.run_id == run_id).where(
            SqlEventLogStorageTable.c.step_key != None).where(
                SqlEventLogStorageTable.c.dagster_event_type.in_([
                    DagsterEventType.ASSET_MATERIALIZATION.value,
                    DagsterEventType.STEP_EXPECTATION_RESULT.value,
                ])).order_by(SqlEventLogStorageTable.c.id.asc()))

        if step_keys:
            raw_event_query = raw_event_query.where(
                SqlEventLogStorageTable.c.step_key.in_(step_keys))

        with self.run_connection(run_id) as conn:
            results = conn.execute(raw_event_query).fetchall()

        try:
            for (json_str, ) in results:
                event = check.inst_param(
                    deserialize_json_to_dagster_namedtuple(json_str), "event",
                    EventRecord)
                if event.dagster_event.event_type == DagsterEventType.ASSET_MATERIALIZATION:
                    materializations[event.step_key].append(
                        event.dagster_event.event_specific_data.materialization
                    )
                elif event.dagster_event.event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
                    expectation_results[event.step_key].append(
                        event.dagster_event.event_specific_data.
                        expectation_result)
        except (seven.JSONDecodeError, check.CheckError) as err:
            raise DagsterEventLogInvalidForRun(run_id=run_id) from err

        return [
            RunStepKeyStatsSnapshot(
                run_id=run_id,
                step_key=step_key,
                status=value.get("status"),
                start_time=value.get("start_time"),
                end_time=value.get("end_time"),
                materializations=materializations.get(step_key),
                expectation_results=expectation_results.get(step_key),
                attempts=value.get("attempts"),
            ) for step_key, value in by_step_key.items()
        ]
Example #9
0
 def resolve_updateTime(self, graphene_info):
     run_record = self._get_run_record(graphene_info.context.instance)
     return datetime_as_float(run_record.update_timestamp)