def feature_sets(self) -> List[FeatureSetRef]: """ Getter for the IngestJob's feature sets """ # convert featureset protos to native objects return [ FeatureSetRef.from_proto(fs) for fs in self.proto.feature_set_references ]
def test_from_feature_set(self): feature_set = FeatureSet("test", "test") feature_set.version = 2 ref = FeatureSetRef.from_feature_set(feature_set) assert ref.name == "test" assert ref.project == "test" assert ref.version == 2
def test_list_ingest_jobs(self, mock_jobcontroller_client, mocker): mocker.patch.object( mock_jobcontroller_client, "_jobcontroller_service_stub", return_value=Core.JobControllerServiceStub(grpc.insecure_channel("")), ) feature_set_ref = FeatureSetRef(project="test", name="driver",) mocker.patch.object( mock_jobcontroller_client._jobcontroller_service_stub, "ListIngestionJobs", return_value=ListIngestionJobsResponse( jobs=[ IngestJobProto( id="kafka-to-redis", external_id="job-2222", status=IngestionJobStatus.RUNNING, feature_set_references=[feature_set_ref.to_proto()], source=Source( type=SourceType.KAFKA, kafka_source_config=KafkaSourceConfig( bootstrap_servers="localhost:9092", topic="topic" ), ), stores=[Store(name="redis")], ) ] ), ) # list ingestion jobs by target feature set reference ingest_jobs = mock_jobcontroller_client.list_ingest_jobs( feature_set_ref=feature_set_ref ) assert len(ingest_jobs) >= 1 ingest_job = ingest_jobs[0] assert ( ingest_job.status == IngestionJobStatus.RUNNING and ingest_job.id == "kafka-to-redis" and ingest_job.external_id == "job-2222" and ingest_job.feature_sets[0].name == "driver" and ingest_job.source.source_type == "Kafka" )
def test_list_ingest_jobs(self, mocked_client, mocker): mocker.patch.object( mocked_client, "_core_service_stub", return_value=Core.CoreServiceStub(grpc.insecure_channel("")), ) feature_set_proto = FeatureSetProto( spec=FeatureSetSpecProto( project="test", name="driver", max_age=Duration(seconds=3600), ) ) mocker.patch.object( mocked_client._core_service_stub, "ListIngestionJobs", return_value=ListIngestionJobsResponse( jobs=[ IngestJobProto( id="kafka-to-redis", external_id="job-2222", status=IngestionJobStatus.RUNNING, feature_sets=[feature_set_proto], source=Source( type=SourceType.KAFKA, kafka_source_config=KafkaSourceConfig( bootstrap_servers="localhost:9092", topic="topic" ), ), store=Store(name="redis"), ) ] ), ) # list ingestion jobs by target feature set reference ingest_jobs = mocked_client.list_ingest_jobs( feature_set_ref=FeatureSetRef.from_feature_set( FeatureSet.from_proto(feature_set_proto) ) ) assert len(ingest_jobs) >= 1 ingest_job = ingest_jobs[0] assert ( ingest_job.status == IngestionJobStatus.RUNNING and ingest_job.id == "kafka-to-redis" and ingest_job.external_id == "job-2222" and ingest_job.feature_sets[0].name == "driver" and ingest_job.source.source_type == "Kafka" )
def test_all_types_ingest_jobs(client, all_types_dataframe): # list ingestion jobs given featureset all_types_fs = client.get_feature_set(name="all_types") ingest_jobs = client.list_ingest_jobs( feature_set_ref=FeatureSetRef.from_feature_set(all_types_fs)) # filter ingestion jobs to only those that are running ingest_jobs = [job for job in ingest_jobs if job.status == IngestionJobStatus.RUNNING] assert len(ingest_jobs) >= 1 for ingest_job in ingest_jobs: # restart ingestion ingest_job client.restart_ingest_job(ingest_job) ingest_job.wait(IngestionJobStatus.RUNNING) assert ingest_job.status == IngestionJobStatus.RUNNING # stop ingestion ingest_job client.stop_ingest_job(ingest_job) ingest_job.wait(IngestionJobStatus.ABORTED) assert ingest_job.status == IngestionJobStatus.ABORTED
def test_basic_ingest_jobs(client): # list ingestion jobs given featureset cust_trans_fs = client.get_feature_set(name="customer_transactions") ingest_jobs = client.list_ingest_jobs( feature_set_ref=FeatureSetRef.from_feature_set(cust_trans_fs)) # filter ingestion jobs to only those that are running ingest_jobs = [job for job in ingest_jobs if job.status == IngestionJobStatus.RUNNING] assert len(ingest_jobs) >= 1 for ingest_job in ingest_jobs: # restart ingestion ingest_job client.restart_ingest_job(ingest_job) ingest_job.wait(IngestionJobStatus.RUNNING) assert ingest_job.status == IngestionJobStatus.RUNNING # stop ingestion ingest_job client.stop_ingest_job(ingest_job) ingest_job.wait(IngestionJobStatus.ABORTED) assert ingest_job.status == IngestionJobStatus.ABORTED
def ingest_job_list(job_id, feature_set_ref, store_name): """ List ingestion jobs """ # parse feature set reference if feature_set_ref is not None: feature_set_ref = FeatureSetRef.from_str(feature_set_ref) # pull & render ingestion jobs as a table feast_client = JCClient() table = [] for ingest_job in feast_client.list_ingest_jobs( job_id=job_id, feature_set_ref=feature_set_ref, store_name=store_name ): table.append([ingest_job.id, IngestionJobStatus.Name(ingest_job.status)]) from tabulate import tabulate print(tabulate(table, headers=["ID", "STATUS"], tablefmt="plain"))
def list_ingest_jobs( self, job_id: str = None, feature_set_ref: FeatureSetRef = None, store_name: str = None, ): """ List the ingestion jobs currently registered in Feast, with optional filters. Provides detailed metadata about each ingestion job. Args: job_id: Select specific ingestion job with the given job_id feature_set_ref: Filter ingestion jobs by target feature set (via reference) store_name: Filter ingestion jobs by target feast store's name Returns: List of IngestJobs matching the given filters """ # construct list request feature_set_ref_proto = None if feature_set_ref: feature_set_ref_proto = feature_set_ref.to_proto() list_filter = ListIngestionJobsRequest.Filter( id=job_id, feature_set_reference=feature_set_ref_proto, store_name=store_name, ) request = ListIngestionJobsRequest(filter=list_filter) # make list request & unpack response response = self._core_service.ListIngestionJobs( request, metadata=self._get_grpc_metadata(), ) # type: ignore ingest_jobs = [ IngestJob(proto, self._core_service, auth_metadata_plugin=self._auth_metadata) for proto in response.jobs # type: ignore ] return ingest_jobs
def test_all_types_ingest_jobs(client, all_types_dataframe): # list ingestion jobs given featureset all_types_fs = client.get_feature_set(name="all_types") ingest_jobs = client.list_ingest_jobs( feature_set_ref=FeatureSetRef.from_feature_set(all_types_fs)) # filter ingestion jobs to only those that are running ingest_jobs = [ job for job in ingest_jobs if job.status == IngestionJobStatus.RUNNING ] assert len(ingest_jobs) >= 1 ingest_job = ingest_jobs[0] # restart ingestion ingest_job # restart means stop current job # (replacement will be automatically spawned) client.restart_ingest_job(ingest_job) # wait for replacement to be created time.sleep(15) # should be more than polling_interval # id without timestamp part # that remains the same between jobs shared_id = "-".join(ingest_job.id.split("-")[:-1]) replacement_jobs = [ job for job in ingest_jobs if job.status == IngestionJobStatus.RUNNING and job.id.startswith(shared_id) ] assert len(replacement_jobs) >= 1 replacement_job = replacement_jobs[0] replacement_job.wait(IngestionJobStatus.RUNNING) assert replacement_job.status == IngestionJobStatus.RUNNING # stop ingestion ingest_job client.stop_ingest_job(replacement_job) replacement_job.wait(IngestionJobStatus.ABORTED) assert replacement_job.status == IngestionJobStatus.ABORTED
def test_str_ref(self): original_ref = FeatureSetRef(project="test", name="test") ref_str = repr(original_ref) parsed_ref = FeatureSetRef.from_str(ref_str) assert original_ref == parsed_ref