def _get_expected_job_hash_to_table_refs( client: feast.Client, projects: List[str]) -> Dict[str, Tuple[str, str]]: """ Checks all feature tables for the requires project(s) and determines all required stream ingestion jobs from them. Outputs a map of the expected job_hash to a tuple of (project, table_name). Args: all_projects (bool): If true, runs the check for all project. Otherwise only checks the current project. Returns: Dict[str, Tuple[str, str]]: Map of job_hash -> (project, table_name) for expected stream ingestion jobs """ job_hash_to_table_refs = {} for project in projects: feature_tables = client.list_feature_tables(project) for feature_table in feature_tables: if feature_table.stream_source is not None: params = get_stream_to_online_ingestion_params( client, project, feature_table, []) job_hash = params.get_job_hash() job_hash_to_table_refs[job_hash] = (project, feature_table.name) return job_hash_to_table_refs
def StartStreamToOnlineIngestionJob( self, request: StartStreamToOnlineIngestionJobRequest, context): """Start job to ingest data from stream into online store""" feature_table = self.client.get_feature_table(request.table_name, request.project) if self.client._config.getboolean(opt.JOB_SERVICE_ENABLE_CONTROL_LOOP): # If the control loop is enabled, return existing stream ingestion job id instead of starting a new one params = get_stream_to_online_ingestion_params( self.client, request.project, feature_table, []) job_hash = params.get_job_hash() for job in list_jobs(include_terminated=True, client=self.client): if isinstance( job, StreamIngestionJob) and job.get_hash() == job_hash: return StartStreamToOnlineIngestionJobResponse( id=job.get_id()) raise RuntimeError( "Feast Job Service has control loop enabled, but couldn't find the existing stream ingestion job for the given FeatureTable" ) # TODO: add extra_jars to request job = start_stream_to_online_ingestion( client=self.client, project=request.project, feature_table=feature_table, extra_jars=[], ) return StartStreamToOnlineIngestionJobResponse(id=job.get_id())