def test_worker_disk_image_formatting(worker_disk_type, is_valid): # config formats worker_disk_type on creation. We need to ensure it can # read the unformatted and formatted versions pipeline_config_dict = { "project": "test_project", "worker_disk_type": worker_disk_type, } if is_valid: config.KlioPipelineConfig(pipeline_config_dict, job_name="test_job", version=2) else: with pytest.raises(ValueError): config.KlioPipelineConfig(pipeline_config_dict, job_name="test_job", version=2)
def test_klio_pipeline_config( pipeline_config_dict, final_pipeline_config_dict, ): config_obj = config.KlioPipelineConfig( pipeline_config_dict, job_name="test-job", version=1 ) config_sub_network = ( "https://www.googleapis.com/compute/v1/projects/test-project/" "regions/us-central1/subnetworks/xpn-us1" ) assert "test-project" == config_obj.project assert "gs://some/stage" == config_obj.staging_location assert "gs://some/temp" == config_obj.temp_location assert "gcr.io/sigint/foo" == config_obj.worker_harness_container_image assert config_obj.streaming assert not config_obj.update assert ["beam_fn_api"] == config_obj.experiments assert "us-central1" == config_obj.region assert config_sub_network == config_obj.subnetwork assert 3 == config_obj.num_workers assert 5 == config_obj.max_num_workers assert 50 == config_obj.disk_size_gb assert "n1-standard-4" == config_obj.worker_machine_type assert config_obj.sdk_location is None assert "DataflowRunner" == config_obj.runner assert "https://example.com" == config_obj.dataflow_endpoint assert "*****@*****.**" == config_obj.service_account_email assert config_obj.no_auth is True assert "gs://some/template" == config_obj.template_location assert ["some", "labels", "single_label"] == config_obj.labels assert "single_label" == config_obj.label assert ( '{"transform": "is mapped", "using": "json"}' == config_obj.transform_name_mapping ) assert "THROUGHPUT_BASED" == config_obj.autoscaling_algorithm assert "COST_OPTIMIZED" == config_obj.flexrs_goal assert ( "compute.googleapis.com/projects/test-project/regions/us-central1/" "diskTypes/pd-ssd" == config_obj.worker_disk_type ) assert config_obj.use_public_ips is True assert "Intel Skylake" == config_obj.min_cpu_platform assert "/foo/bar.jar" == config_obj.dataflow_worker_jar assert "wombat" == config_obj.random_param assert final_pipeline_config_dict == config_obj.as_dict() repr_actual = repr(config_obj) assert "KlioPipelineConfig(job_name='test-job')" == repr_actual
def test_bare_klio_pipeline_config(bare_pipeline_config_dict): config_obj = config.KlioPipelineConfig(bare_pipeline_config_dict, version=1, job_name="test-job") assert config_obj.streaming is True assert not config_obj.update assert ["beam_fn_api"] == config_obj.experiments assert "direct" == config_obj.runner assert "europe-west1" == config_obj.region assert config_obj.subnetwork is None assert 2 == config_obj.num_workers assert 2 == config_obj.max_num_workers assert 32 == config_obj.disk_size_gb assert "n1-standard-2" == config_obj.worker_machine_type assert config_obj.no_auth is False assert [] == config_obj.labels assert config_obj.enable_streaming_engine is False assert config_obj.autoscaling_algorithm == "NONE" expected_none_attrs = [ "project", "staging_location", "temp_location", "worker_harness_container_image", "sdk_location", "setup_file", "requirements_file", "dataflow_endpoint", "service_account_email", "template_location", "transform_name_mapping", "dataflow_kms_key", "flexrs_goal", "worker_disk_type", "use_public_ips", "min_cpu_platform", "dataflow_worker_jar", ] for attr in expected_none_attrs: attr_to_test = getattr(config_obj, attr) assert attr_to_test is None repr_actual = repr(config_obj) assert "KlioPipelineConfig(job_name='test-job')" == repr_actual