Ejemplo n.º 1
0
    def __init__(self, jprogress: JavaObject) -> None:
        from pyspark import SparkContext

        self._jprogress: JavaObject = jprogress
        self._id: uuid.UUID = uuid.UUID(jprogress.id().toString())
        self._runId: uuid.UUID = uuid.UUID(jprogress.runId().toString())
        self._name: Optional[str] = jprogress.name()
        self._timestamp: str = jprogress.timestamp()
        self._batchId: int = jprogress.batchId()
        self._batchDuration: int = jprogress.batchDuration()
        self._durationMs: Dict[str, int] = dict(jprogress.durationMs())
        self._eventTime: Dict[str, str] = dict(jprogress.eventTime())
        self._stateOperators: List[StateOperatorProgress] = [
            StateOperatorProgress(js) for js in jprogress.stateOperators()
        ]
        self._sources: List[SourceProgress] = [
            SourceProgress(js) for js in jprogress.sources()
        ]
        self._sink: SinkProgress = SinkProgress(jprogress.sink())

        # TODO(SPARK-38760): Write a test with DataFrame.observe API implementation.
        self._observedMetrics: Dict[str, Row] = {
            k: pickle.loads(
                SparkContext._jvm.PythonSQLUtils.toPyRow(
                    jr)  # type: ignore[union-attr]
            )
            for k, jr in dict(jprogress.observedMetrics()).items()
        }
Ejemplo n.º 2
0
 def __init__(self, jevent: JavaObject) -> None:
     self._id: uuid.UUID = uuid.UUID(jevent.id().toString())
     self._runId: uuid.UUID = uuid.UUID(jevent.runId().toString())
     self._name: Optional[str] = jevent.name()
     self._timestamp: str = jevent.timestamp()