Ejemplo n.º 1
0
    def add_job(self, job: SparkJob) -> None:
        """Add a Spark job to the cache.

        Args:
            job (SparkJob): The new Spark job to add.
        """
        with self.lock:
            self.job_by_id[job.get_id()] = job
            if isinstance(job, StreamIngestionJob):
                self.hash_by_id[job.get_id()] = job.get_hash()
Ejemplo n.º 2
0
def _job_to_proto(spark_job: SparkJob) -> JobProto:
    job = JobProto()
    job.id = spark_job.get_id()
    job.log_uri = cast(str, spark_job.get_log_uri() or "")
    job.error_message = cast(str, spark_job.get_error_message() or "")
    status = spark_job.get_status()
    if status == SparkJobStatus.COMPLETED:
        job.status = JobStatus.JOB_STATUS_DONE
    elif status == SparkJobStatus.IN_PROGRESS:
        job.status = JobStatus.JOB_STATUS_RUNNING
    elif status == SparkJobStatus.FAILED:
        job.status = JobStatus.JOB_STATUS_ERROR
    elif status == SparkJobStatus.STARTING:
        job.status = JobStatus.JOB_STATUS_PENDING
    else:
        raise ValueError(f"Invalid job status {status}")

    if isinstance(spark_job, RetrievalJob):
        job.type = JobType.RETRIEVAL_JOB
        job.retrieval.output_location = spark_job.get_output_file_uri(
            block=False)
    elif isinstance(spark_job, BatchIngestionJob):
        job.type = JobType.BATCH_INGESTION_JOB
        job.batch_ingestion.table_name = spark_job.get_feature_table()
    elif isinstance(spark_job, StreamIngestionJob):
        job.type = JobType.STREAM_INGESTION_JOB
        job.stream_ingestion.table_name = spark_job.get_feature_table()
    else:
        raise ValueError(f"Invalid job type {job}")

    job.start_time.FromDatetime(spark_job.get_start_time())

    return job