Esempio n. 1
0
    def start_stream_to_online_ingestion(
        self, ingestion_job_params: StreamIngestionJobParameters
    ) -> StreamIngestionJob:
        """
        Starts a stream ingestion job to a Spark cluster.

        Raises:
            SparkJobFailure: The spark job submission failed, encountered error
                during execution, or timeout.

        Returns:
            StreamIngestionJob: wrapper around remote job.
        """

        main_file = self._datalake.upload_file(
            ingestion_job_params.get_main_file_path())

        extra_jar_paths: List[str] = []
        for extra_jar in ingestion_job_params.get_extra_jar_paths():
            extra_jar_paths.append(self._datalake.upload_file(extra_jar))

        tags = _prepare_job_tags(ingestion_job_params,
                                 STREAM_TO_ONLINE_JOB_TYPE)
        tags[METADATA_JOBHASH] = ingestion_job_params.get_job_hash()
        job_info = _submit_job(
            self._api,
            ingestion_job_params.get_project() + "_stream_to_online_ingestion",
            main_file,
            main_class=ingestion_job_params.get_class_name(),
            arguments=ingestion_job_params.get_arguments(),
            reference_files=extra_jar_paths,
            configuration=None,
            tags=tags)

        return cast(StreamIngestionJob, self._job_from_job_info(job_info))
Esempio n. 2
0
    def start_stream_to_online_ingestion(
        self, ingestion_job_params: StreamIngestionJobParameters
    ) -> StreamIngestionJob:
        """
        Starts a stream ingestion job to a Spark cluster.

        Raises:
            SparkJobFailure: The spark job submission failed, encountered error
                during execution, or timeout.

        Returns:
            StreamIngestionJob: wrapper around remote job.
        """

        jar_s3_path = self._upload_jar(
            ingestion_job_params.get_main_file_path())

        extra_jar_paths: List[str] = []
        for extra_jar in ingestion_job_params.get_extra_jar_paths():
            extra_jar_paths.append(self._upload_jar(extra_jar))

        job_hash = ingestion_job_params.get_job_hash()
        job_id = _generate_job_id()

        resource = _prepare_job_resource(
            job_template=self._stream_ingestion_template,
            job_id=job_id,
            job_type=STREAM_TO_ONLINE_JOB_TYPE,
            main_application_file=jar_s3_path,
            main_class=ingestion_job_params.get_class_name(),
            packages=[],
            jars=extra_jar_paths,
            extra_metadata={METADATA_JOBHASH: job_hash},
            azure_credentials=self._get_azure_credentials(),
            arguments=ingestion_job_params.get_arguments(),
            namespace=self._namespace,
            extra_labels={
                LABEL_FEATURE_TABLE:
                _truncate_label(ingestion_job_params.get_feature_table_name()),
                LABEL_FEATURE_TABLE_HASH:
                _generate_project_table_hash(
                    ingestion_job_params.get_project(),
                    ingestion_job_params.get_feature_table_name(),
                ),
                LABEL_PROJECT:
                ingestion_job_params.get_project(),
            },
        )

        job_info = _submit_job(
            api=self._api,
            resource=resource,
            namespace=self._namespace,
        )

        return cast(StreamIngestionJob, self._job_from_job_info(job_info))