def test_fetch_job(turing_api, active_project, api_response_get, expected, api_response_refresh, updated, use_google_oauth): turing.set_url(turing_api, use_google_oauth) turing.set_project(active_project.name) responses.add(method="GET", url=f"/v1/projects/{active_project.id}/jobs/{expected.id}", body=api_response_get, status=200, content_type="application/json") job = turing.batch.EnsemblingJob.get_by_id(expected.id) assert job == expected responses.reset() responses.add(method="GET", url=f"/v1/projects/{active_project.id}/jobs/{expected.id}", body=api_response_refresh, status=200, content_type="application/json") job.refresh() assert job == updated
def test_submit_job(turing_api, active_project, ensembling_job_config, api_response, expected, use_google_oauth): turing.set_url(turing_api, use_google_oauth) turing.set_project(active_project.name) responses.add(method="POST", url=f"/v1/projects/{active_project.id}/jobs", body=api_response, status=201, content_type="application/json") actual = turing.batch.job.EnsemblingJob.submit( ensembler_id=2, config=ensembling_job_config, ) assert actual == expected
def main(turing_api: str, project: str): # Initialize Turing client turing.set_url(turing_api) turing.set_project(project) # List projects projects = turing.Project.list() for p in projects: print(p) # Save pyfunc ensembler in Turing's backend ensembler = turing.PyFuncEnsembler.create( name="my-ensembler", ensembler_instance=MyEnsembler(), conda_env={ 'dependencies': [ 'python>=3.8.0', # other dependencies, if required ] }) print("Ensembler created:\n", ensembler) # Update Ensembler's name ensembler.update(name="my-ensembler-updated") print("Updated:\n", ensembler) # Update Ensembler's implementation ensembler.update( ensembler_instance=MyEnsembler(), conda_env={ 'channels': ['defaults'], 'dependencies': ['python=3.7.0', "cookiecutter>=1.7.2", "numpy"] }, code_dir=[os.path.join(os.path.dirname(__file__), "../../samples")], ) print("Updated:\n", ensembler) # List pyfunc ensemblers ensemblers = turing.PyFuncEnsembler.list() for e in ensemblers: print(e)
def test_terminate_job(turing_api, active_project, job, api_response_delete, api_response_get, expected, use_google_oauth): turing.set_url(turing_api, use_google_oauth) turing.set_project(active_project.name) responses.add(method="DELETE", url=f"/v1/projects/{active_project.id}/jobs/{job.id}", body=api_response_delete, status=201, content_type="application/json") responses.add(method="GET", url=f"/v1/projects/{active_project.id}/jobs/{job.id}", body=api_response_get, status=200, content_type="application/json") assert job != expected job.terminate() assert job == expected
def test_list_jobs(turing_api, active_project, api_response, expected, use_google_oauth): turing.set_url(turing_api, use_google_oauth) turing.set_project(active_project.name) responses.add(method="GET", url=f"/v1/projects/{active_project.id}/jobs?" f"status={turing.batch.EnsemblingJobStatus.PENDING.value}&" f"status={turing.batch.EnsemblingJobStatus.RUNNING.value}", body=api_response, match_querystring=True, status=200, content_type="application/json") actual = turing.batch.EnsemblingJob.list(status=[ turing.batch.EnsemblingJobStatus.PENDING, turing.batch.EnsemblingJobStatus.RUNNING ]) assert len(actual) == len(expected) for actual, expected in zip(actual, expected): assert actual == expected
def main(turing_api: str, project: str): # Initialize Turing client: turing.set_url(turing_api) turing.set_project(project) # Save pyfunc ensembler in Turing's backend: ensembler = turing.PyFuncEnsembler.create( name="my-ensembler", ensembler_instance=MyEnsembler(), conda_env={ 'dependencies': [ 'python>=3.8.0', # other dependencies, if required ] }) print("Ensembler created:\n", ensembler) # Or fetch existing ensembler by its ID: # ensembler_id = < ENSEMBLER_ID > # ensembler = turing.PyFuncEnsembler.get_by_id(ensembler_id) # Define configuration of the batch ensembling job # Configure datasource, that contains input features: source = turing.batch.config.source.BigQueryDataset( table="project.dataset.features", features=["feature_1", "feature_2", "features_3"]).join_on(columns=["feature_1"]) # Configure dataset(s), that contain predictions of individual models: predictions = { 'model_odd': turing.batch.config.source.BigQueryDataset( table="project.dataset.scores_model_odd", features=["feature_1", "prediction_score"]).join_on( columns=["feature_1"]).select(columns=["prediction_score"]), 'model_even': turing.batch.config.source.BigQueryDataset( query=""" SELECT feature_1, prediction_score FROM `project.dataset.scores_model_even` WHERE target_date = DATE("2021-03-15", "Asia/Jakarta") """, options={ "viewsEnabled": "true", "materializationDataset": "my_dataset" }).join_on(columns=["feature_1"]).select( columns=["prediction_score"]) } # Configure ensembling result: result_config = turing.batch.config.ResultConfig( type=turing.batch.config.ResultType.INTEGER, column_name="prediction_result") # Configure destination, where ensembling results will be stored: sink = turing.batch.config.sink.BigQuerySink( table="project.dataset.ensembling_results", staging_bucket="staging_bucket" ).save_mode(turing.batch.config.sink.SaveMode.OVERWRITE) \ .select(columns=["feature_1", "feature_2", "prediction_result"]) # (Optional) Configure resources allocation for the job execution: resource_request = turing.batch.config.ResourceRequest( driver_cpu_request="1", driver_memory_request="1G", executor_replica=5, executor_cpu_request="500Mi", executor_memory_request="800M") # (Optional) Configure environment variables here env_vars = { "SOME_VAR": "SOME_VALUE", } # Submit the job for execution: job = ensembler.submit_job( turing.batch.config.EnsemblingJobConfig( source=source, predictions=predictions, result_config=result_config, sink=sink, service_account=SERVICE_ACCOUNT_NAME, resource_request=resource_request, env_vars=env_vars, )) print(job) # You can also retrieve the instance of existing job by its ID: # job_id = < JOB_ID > # job = turing.batch.EnsemblingJob.get_by_id(job_id=job_id) # # # Or list all ensembling jobs within the project # jobs = turing.batch.EnsemblingJob.list(status=[ # turing.batch.EnsemblingJobStatus.PENDING, # turing.batch.EnsemblingJobStatus.RUNNING, # ]) # Refresh the status of the job for i in range(3): time.sleep(5) job.refresh() print(f"Refresh #{i+1}: {job}") # It's also possible to terminate a running job: job.terminate() print(f"Job's termination in process: {job}")