Beispiel #1
0
def test_worker_disk_image_formatting(worker_disk_type, is_valid):
    # config formats worker_disk_type on creation.  We need to ensure it can
    # read the unformatted and formatted versions
    pipeline_config_dict = {
        "project": "test_project",
        "worker_disk_type": worker_disk_type,
    }

    if is_valid:
        config.KlioPipelineConfig(pipeline_config_dict,
                                  job_name="test_job",
                                  version=2)
    else:
        with pytest.raises(ValueError):
            config.KlioPipelineConfig(pipeline_config_dict,
                                      job_name="test_job",
                                      version=2)
Beispiel #2
0
def test_klio_pipeline_config(
    pipeline_config_dict, final_pipeline_config_dict,
):

    config_obj = config.KlioPipelineConfig(
        pipeline_config_dict, job_name="test-job", version=1
    )

    config_sub_network = (
        "https://www.googleapis.com/compute/v1/projects/test-project/"
        "regions/us-central1/subnetworks/xpn-us1"
    )

    assert "test-project" == config_obj.project
    assert "gs://some/stage" == config_obj.staging_location
    assert "gs://some/temp" == config_obj.temp_location
    assert "gcr.io/sigint/foo" == config_obj.worker_harness_container_image
    assert config_obj.streaming
    assert not config_obj.update
    assert ["beam_fn_api"] == config_obj.experiments
    assert "us-central1" == config_obj.region
    assert config_sub_network == config_obj.subnetwork
    assert 3 == config_obj.num_workers
    assert 5 == config_obj.max_num_workers
    assert 50 == config_obj.disk_size_gb
    assert "n1-standard-4" == config_obj.worker_machine_type
    assert config_obj.sdk_location is None
    assert "DataflowRunner" == config_obj.runner
    assert "https://example.com" == config_obj.dataflow_endpoint
    assert "*****@*****.**" == config_obj.service_account_email
    assert config_obj.no_auth is True
    assert "gs://some/template" == config_obj.template_location
    assert ["some", "labels", "single_label"] == config_obj.labels
    assert "single_label" == config_obj.label
    assert (
        '{"transform": "is mapped", "using": "json"}'
        == config_obj.transform_name_mapping
    )
    assert "THROUGHPUT_BASED" == config_obj.autoscaling_algorithm
    assert "COST_OPTIMIZED" == config_obj.flexrs_goal
    assert (
        "compute.googleapis.com/projects/test-project/regions/us-central1/"
        "diskTypes/pd-ssd" == config_obj.worker_disk_type
    )
    assert config_obj.use_public_ips is True
    assert "Intel Skylake" == config_obj.min_cpu_platform
    assert "/foo/bar.jar" == config_obj.dataflow_worker_jar
    assert "wombat" == config_obj.random_param

    assert final_pipeline_config_dict == config_obj.as_dict()

    repr_actual = repr(config_obj)
    assert "KlioPipelineConfig(job_name='test-job')" == repr_actual
Beispiel #3
0
def test_bare_klio_pipeline_config(bare_pipeline_config_dict):
    config_obj = config.KlioPipelineConfig(bare_pipeline_config_dict,
                                           version=1,
                                           job_name="test-job")

    assert config_obj.streaming is True
    assert not config_obj.update
    assert ["beam_fn_api"] == config_obj.experiments
    assert "direct" == config_obj.runner
    assert "europe-west1" == config_obj.region
    assert config_obj.subnetwork is None
    assert 2 == config_obj.num_workers
    assert 2 == config_obj.max_num_workers
    assert 32 == config_obj.disk_size_gb
    assert "n1-standard-2" == config_obj.worker_machine_type
    assert config_obj.no_auth is False
    assert [] == config_obj.labels
    assert config_obj.enable_streaming_engine is False
    assert config_obj.autoscaling_algorithm == "NONE"

    expected_none_attrs = [
        "project",
        "staging_location",
        "temp_location",
        "worker_harness_container_image",
        "sdk_location",
        "setup_file",
        "requirements_file",
        "dataflow_endpoint",
        "service_account_email",
        "template_location",
        "transform_name_mapping",
        "dataflow_kms_key",
        "flexrs_goal",
        "worker_disk_type",
        "use_public_ips",
        "min_cpu_platform",
        "dataflow_worker_jar",
    ]
    for attr in expected_none_attrs:
        attr_to_test = getattr(config_obj, attr)
        assert attr_to_test is None

    repr_actual = repr(config_obj)
    assert "KlioPipelineConfig(job_name='test-job')" == repr_actual