Ejemplo n.º 1
0
def _get_expected_job_hash_to_tables(
        client: Client,
        projects: List[str]) -> Dict[str, Tuple[str, FeatureTable]]:
    """
    Checks all feature tables for the requires project(s) and determines all required stream
    ingestion jobs from them. Outputs a map of the expected job_hash to a tuple of (project, table_name).

    Args:
        all_projects (bool): If true, runs the check for all project.
            Otherwise only checks the current project.

    Returns:
        Dict[str, Tuple[str, str]]: Map of job_hash -> (project, table_name) for expected stream ingestion jobs
    """
    job_hash_to_table_refs = {}

    for project in projects:
        feature_tables = client.feature_store.list_feature_tables(project)
        for feature_table in feature_tables:
            if feature_table.stream_source is not None:
                params = get_stream_to_online_ingestion_params(
                    client, project, feature_table, [])
                job_hash = params.get_job_hash()
                job_hash_to_table_refs[job_hash] = (project, feature_table)

    return job_hash_to_table_refs
Ejemplo n.º 2
0
    def StartStreamToOnlineIngestionJob(
            self, request: StartStreamToOnlineIngestionJobRequest, context):
        """Start job to ingest data from stream into online store"""

        job_submission_count.labels("streaming", request.project,
                                    request.table_name).inc()

        if not self.is_whitelisted(request.project):
            raise ValueError(
                f"Project {request.project} is not whitelisted. Please contact your Feast administrator to whitelist it."
            )

        feature_table = self.client.feature_store.get_feature_table(
            request.table_name, request.project)

        if self.client.config.getboolean(opt.JOB_SERVICE_ENABLE_CONTROL_LOOP):
            # If the control loop is enabled, return existing stream ingestion job id instead of starting a new one
            params = get_stream_to_online_ingestion_params(
                self.client, request.project, feature_table, [])
            job_hash = params.get_job_hash()
            for job in list_jobs(include_terminated=True, client=self.client):
                if isinstance(
                        job,
                        StreamIngestionJob) and job.get_hash() == job_hash:
                    job_start_timestamp = Timestamp()
                    job_start_timestamp.FromDatetime(job.get_start_time())
                    return StartStreamToOnlineIngestionJobResponse(
                        id=job.get_id(),
                        job_start_time=job_start_timestamp,
                        table_name=job.get_feature_table(),
                        log_uri=job.get_log_uri(),  # type: ignore
                    )
            raise RuntimeError(
                "Feast Job Service has control loop enabled, "
                "but couldn't find the existing stream ingestion job for the given FeatureTable"
            )

        # TODO: add extra_jars to request
        job = start_stream_to_online_ingestion(
            client=self.client,
            project=request.project,
            feature_table=feature_table,
            extra_jars=[],
        )

        job_start_timestamp = Timestamp()
        job_start_timestamp.FromDatetime(job.get_start_time())
        return StartStreamToOnlineIngestionJobResponse(
            id=job.get_id(),
            job_start_time=job_start_timestamp,
            table_name=request.table_name,
            log_uri=job.get_log_uri(),  # type: ignore
        )