Esempio n. 1
0
def test_fetch_job(turing_api, active_project, api_response_get, expected,
                   api_response_refresh, updated, use_google_oauth):
    turing.set_url(turing_api, use_google_oauth)
    turing.set_project(active_project.name)

    responses.add(method="GET",
                  url=f"/v1/projects/{active_project.id}/jobs/{expected.id}",
                  body=api_response_get,
                  status=200,
                  content_type="application/json")

    job = turing.batch.EnsemblingJob.get_by_id(expected.id)

    assert job == expected

    responses.reset()
    responses.add(method="GET",
                  url=f"/v1/projects/{active_project.id}/jobs/{expected.id}",
                  body=api_response_refresh,
                  status=200,
                  content_type="application/json")

    job.refresh()

    assert job == updated
Esempio n. 2
0
def test_submit_job(turing_api, active_project, ensembling_job_config,
                    api_response, expected, use_google_oauth):
    turing.set_url(turing_api, use_google_oauth)
    turing.set_project(active_project.name)

    responses.add(method="POST",
                  url=f"/v1/projects/{active_project.id}/jobs",
                  body=api_response,
                  status=201,
                  content_type="application/json")

    actual = turing.batch.job.EnsemblingJob.submit(
        ensembler_id=2,
        config=ensembling_job_config,
    )
    assert actual == expected
Esempio n. 3
0
def main(turing_api: str, project: str):
    # Initialize Turing client
    turing.set_url(turing_api)
    turing.set_project(project)

    # List projects
    projects = turing.Project.list()
    for p in projects:
        print(p)

    # Save pyfunc ensembler in Turing's backend
    ensembler = turing.PyFuncEnsembler.create(
        name="my-ensembler",
        ensembler_instance=MyEnsembler(),
        conda_env={
            'dependencies': [
                'python>=3.8.0',
                # other dependencies, if required
            ]
        })
    print("Ensembler created:\n", ensembler)

    # Update Ensembler's name
    ensembler.update(name="my-ensembler-updated")
    print("Updated:\n", ensembler)

    # Update Ensembler's implementation
    ensembler.update(
        ensembler_instance=MyEnsembler(),
        conda_env={
            'channels': ['defaults'],
            'dependencies': ['python=3.7.0', "cookiecutter>=1.7.2", "numpy"]
        },
        code_dir=[os.path.join(os.path.dirname(__file__), "../../samples")],
    )
    print("Updated:\n", ensembler)

    # List pyfunc ensemblers
    ensemblers = turing.PyFuncEnsembler.list()
    for e in ensemblers:
        print(e)
Esempio n. 4
0
def test_terminate_job(turing_api, active_project, job, api_response_delete,
                       api_response_get, expected, use_google_oauth):
    turing.set_url(turing_api, use_google_oauth)
    turing.set_project(active_project.name)

    responses.add(method="DELETE",
                  url=f"/v1/projects/{active_project.id}/jobs/{job.id}",
                  body=api_response_delete,
                  status=201,
                  content_type="application/json")

    responses.add(method="GET",
                  url=f"/v1/projects/{active_project.id}/jobs/{job.id}",
                  body=api_response_get,
                  status=200,
                  content_type="application/json")

    assert job != expected

    job.terminate()

    assert job == expected
Esempio n. 5
0
def test_list_jobs(turing_api, active_project, api_response, expected,
                   use_google_oauth):
    turing.set_url(turing_api, use_google_oauth)
    turing.set_project(active_project.name)

    responses.add(method="GET",
                  url=f"/v1/projects/{active_project.id}/jobs?"
                  f"status={turing.batch.EnsemblingJobStatus.PENDING.value}&"
                  f"status={turing.batch.EnsemblingJobStatus.RUNNING.value}",
                  body=api_response,
                  match_querystring=True,
                  status=200,
                  content_type="application/json")

    actual = turing.batch.EnsemblingJob.list(status=[
        turing.batch.EnsemblingJobStatus.PENDING,
        turing.batch.EnsemblingJobStatus.RUNNING
    ])

    assert len(actual) == len(expected)

    for actual, expected in zip(actual, expected):
        assert actual == expected
Esempio n. 6
0
def main(turing_api: str, project: str):
    # Initialize Turing client:
    turing.set_url(turing_api)
    turing.set_project(project)

    # Save pyfunc ensembler in Turing's backend:
    ensembler = turing.PyFuncEnsembler.create(
        name="my-ensembler",
        ensembler_instance=MyEnsembler(),
        conda_env={
            'dependencies': [
                'python>=3.8.0',
                # other dependencies, if required
            ]
        })
    print("Ensembler created:\n", ensembler)

    # Or fetch existing ensembler by its ID:
    # ensembler_id = < ENSEMBLER_ID >
    # ensembler = turing.PyFuncEnsembler.get_by_id(ensembler_id)

    # Define configuration of the batch ensembling job

    # Configure datasource, that contains input features:
    source = turing.batch.config.source.BigQueryDataset(
        table="project.dataset.features",
        features=["feature_1", "feature_2",
                  "features_3"]).join_on(columns=["feature_1"])

    # Configure dataset(s), that contain predictions of individual models:
    predictions = {
        'model_odd':
        turing.batch.config.source.BigQueryDataset(
            table="project.dataset.scores_model_odd",
            features=["feature_1", "prediction_score"]).join_on(
                columns=["feature_1"]).select(columns=["prediction_score"]),
        'model_even':
        turing.batch.config.source.BigQueryDataset(
            query="""
                    SELECT feature_1, prediction_score
                    FROM `project.dataset.scores_model_even`
                    WHERE target_date = DATE("2021-03-15", "Asia/Jakarta")
                """,
            options={
                "viewsEnabled": "true",
                "materializationDataset": "my_dataset"
            }).join_on(columns=["feature_1"]).select(
                columns=["prediction_score"])
    }

    # Configure ensembling result:
    result_config = turing.batch.config.ResultConfig(
        type=turing.batch.config.ResultType.INTEGER,
        column_name="prediction_result")

    # Configure destination, where ensembling results will be stored:
    sink = turing.batch.config.sink.BigQuerySink(
        table="project.dataset.ensembling_results",
        staging_bucket="staging_bucket"
    ).save_mode(turing.batch.config.sink.SaveMode.OVERWRITE) \
        .select(columns=["feature_1", "feature_2", "prediction_result"])

    # (Optional) Configure resources allocation for the job execution:
    resource_request = turing.batch.config.ResourceRequest(
        driver_cpu_request="1",
        driver_memory_request="1G",
        executor_replica=5,
        executor_cpu_request="500Mi",
        executor_memory_request="800M")

    # (Optional) Configure environment variables here
    env_vars = {
        "SOME_VAR": "SOME_VALUE",
    }

    # Submit the job for execution:
    job = ensembler.submit_job(
        turing.batch.config.EnsemblingJobConfig(
            source=source,
            predictions=predictions,
            result_config=result_config,
            sink=sink,
            service_account=SERVICE_ACCOUNT_NAME,
            resource_request=resource_request,
            env_vars=env_vars,
        ))
    print(job)

    # You can also retrieve the instance of existing job by its ID:
    # job_id = < JOB_ID >
    # job = turing.batch.EnsemblingJob.get_by_id(job_id=job_id)
    #
    # # Or list all ensembling jobs within the project
    # jobs = turing.batch.EnsemblingJob.list(status=[
    #     turing.batch.EnsemblingJobStatus.PENDING,
    #     turing.batch.EnsemblingJobStatus.RUNNING,
    # ])

    # Refresh the status of the job
    for i in range(3):
        time.sleep(5)
        job.refresh()
        print(f"Refresh #{i+1}: {job}")

    # It's also possible to terminate a running job:
    job.terminate()
    print(f"Job's termination in process: {job}")