def test_verify_gcs_bucket_exists(klio_config, mock_storage): test_path = "gs://bucket/blob" mock_storage.create_bucket.side_effect = api_ex.Conflict("test") job = verify.VerifyJob(klio_config, True) job._storage_client = mock_storage actual = job._verify_gcs_bucket(test_path) assert actual is True
def test_verify_stackdriver_dashboard_raises(klio_config, mock_get_sd_group_url, caplog): mock_get_sd_group_url.side_effect = Exception("error") with pytest.raises(Exception, match="error"): job = verify.VerifyJob(klio_config, False) job._verify_stackdriver_dashboard() assert 2 == len(caplog.records)
def test_verify_raises_exception_raises_system_exit(mocker, klio_config): job = verify.VerifyJob(klio_config, False) mock_verify_all = mocker.patch.object(job, "_verify_all") mock_verify_all.return_value = False mock_verify_all.side_effect = Exception with pytest.raises(SystemExit): job.verify_job()
def test_verify_stackdriver_dashboard_errors(klio_config, mock_get_sd_group_url, mock_create_sd_group, caplog): mock_get_sd_group_url.return_value = None mock_create_sd_group.return_value = None job = verify.VerifyJob(klio_config, True) actual = job._verify_stackdriver_dashboard() assert actual is False assert 3 == len(caplog.records)
def test_verify_pub_topic_exists( klio_config, mock_publisher, ): test_topic = "test" mock_publisher.create_topic.side_effect = api_ex.AlreadyExists("test") job = verify.VerifyJob(klio_config, True) job._publisher_client = mock_publisher actual = job._verify_pub_topic(test_topic, input) assert actual is True
def test_verify_iam_roles_with_svc_account(klio_config, mock_discovery_client): "If the user configures a SA, verify it instead of the default compute SA" job = verify.VerifyJob(klio_config, False) job.klio_config.pipeline_options.service_account_email = ( "*****@*****.**") bindings = [ { "role": "roles/monitoring.metricWriter", "members": ["serviceAccount:[email protected]"], }, { "role": "roles/pubsub.publisher", "members": ["serviceAccount:[email protected]"], }, { "role": "roles/pubsub.subscriber", "members": ["serviceAccount:[email protected]"], }, { "role": "roles/storage.objectCreator", "members": ["serviceAccount:[email protected]"], }, { "role": "roles/storage.objectViewer", "members": ["serviceAccount:[email protected]"], }, { "role": "roles/editor", "members": ["serviceAccount:the-default-svc-account"], }, ] gcp_project = job.klio_config.pipeline_options.project compute_client = mock_discovery_client.build("compute") compute_client.projects().get().execute.return_value = { "defaultServiceAccount": "the-default-svc-account" } iam_client = mock_discovery_client.build("cloudresourcemanager") iam_client.projects().getIamPolicy(resource=gcp_project, body={}).execute.return_value = { "bindings": bindings } job._compute_client = compute_client job._iam_client = iam_client result = job._verify_iam_roles() # Assert that we don't fetch the default SA since we don't need it compute_client.projects().get( project=gcp_project).execute.assert_not_called() iam_client.projects().getIamPolicy( resource=gcp_project, body={}).execute.assert_called_once_with() assert result is True
def test_verify_iam_roles_editor(caplog, klio_config, mock_discovery_client): bindings = [ { "role": "roles/monitoring.metricWriter", "members": ["serviceAccount:the-default-svc-account"], }, { "role": "roles/pubsub.publisher", "members": ["serviceAccount:the-default-svc-account"], }, { "role": "roles/pubsub.subscriber", "members": ["serviceAccount:the-default-svc-account"], }, { "role": "roles/storage.objectCreator", "members": ["serviceAccount:the-default-svc-account"], }, { "role": "roles/storage.objectViewer", "members": ["serviceAccount:the-default-svc-account"], }, { "role": "roles/editor", "members": ["serviceAccount:the-default-svc-account"], }, ] gcp_project = klio_config.pipeline_options.project compute_client = mock_discovery_client.build("compute") compute_client.projects().get().execute.return_value = { "defaultServiceAccount": "the-default-svc-account" } iam_client = mock_discovery_client.build("cloudresourcemanager") iam_client.projects().getIamPolicy(resource=gcp_project, body={}).execute.return_value = { "bindings": bindings } job = verify.VerifyJob(klio_config, False) job._compute_client = compute_client job._iam_client = iam_client result = job._verify_iam_roles() compute_client.projects().get( project=gcp_project).execute.assert_called_once_with() iam_client.projects().getIamPolicy( resource=gcp_project, body={}).execute.assert_called_once_with() assert result is True with caplog.at_level(logging.WARNING): assert len(caplog.records) == 3 msg = caplog.records[1].msg assert "unsafe project editor or owner permissions" in msg
def test_verify_pub_topic(klio_config, mock_publisher, create_resources): test_topic = "test" job = verify.VerifyJob(klio_config, create_resources) job._publisher_client = mock_publisher actual = job._verify_pub_topic(test_topic, input) if create_resources: mock_publisher.create_topic.assert_called_once_with(test_topic) else: mock_publisher.get_topic.assert_called_once_with(test_topic) assert actual is True
def test_verify_pub_topic_exceptions(klio_config, mock_publisher, not_found): test_topic = "test" if not_found: mock_publisher.get_topic.side_effect = exceptions.NotFound("test") else: mock_publisher.get_topic.side_effect = Exception job = verify.VerifyJob(klio_config, False) job._publisher_client = mock_publisher actual = job._verify_pub_topic(test_topic, input) assert actual is False
def test_verify_gcs_bucket_exceptions(klio_config, mock_storage, not_found): test_path = "gs://bucket/blob" if not_found: mock_storage.get_bucket.side_effect = exceptions.NotFound("test") else: mock_storage.get_bucket.side_effect = Exception job = verify.VerifyJob(klio_config, False) job._storage_client = mock_storage actual = job._verify_gcs_bucket(test_path) assert actual is False
def test_verify_gcs_bucket(klio_config, mock_storage, create_resources): test_path = "gs://bucket/blob" job = verify.VerifyJob(klio_config, create_resources) job._storage_client = mock_storage actual = job._verify_gcs_bucket(test_path) if create_resources: mock_storage.create_bucket.assert_called_once_with("bucket") else: mock_storage.get_bucket.assert_called_once_with("bucket") assert actual is True
def test_verify_outputs_logs( event_topic, data_location, expected_log_count, mocker, klio_config, mock_storage, mock_publisher, caplog, ): mock_verify_gcs = mocker.patch.object( verify.VerifyJob, "_verify_gcs_bucket" ) mock_verify_pub = mocker.patch.object( verify.VerifyJob, "_verify_pub_topic" ) job = verify.VerifyJob(klio_config, False) job._publisher_client = mock_publisher job._storage_client = mock_storage event_config = { "type": "pubsub", "topic": event_topic, "io_type": kconfig._io.KlioIOType.EVENT, "io_direction": kconfig._io.KlioIODirection.OUTPUT, } job.klio_config.job_config.events.outputs = [ kconfig._io.KlioPubSubEventOutput.from_dict(event_config) ] data_config = { "type": "gcs", "location": data_location, "io_type": kconfig._io.KlioIOType.DATA, "io_direction": kconfig._io.KlioIODirection.OUTPUT, } job.klio_config.job_config.data.outputs = [ kconfig._io.KlioGCSOutputDataConfig.from_dict(data_config) ] job._verify_outputs() assert expected_log_count == len(caplog.records) if data_location is not None: mock_verify_gcs.assert_called_with(data_location) if event_topic is not None: mock_verify_pub.assert_called_with(event_topic, "output")
def test_verify_subscription_and_topic_exists(klio_config, mock_publisher, mock_sub): test_sub = "test" upstream_topic = "Some" job = verify.VerifyJob(klio_config, True) job._publisher_client = mock_publisher job._subscriber_client = mock_sub mock_sub.create_subscription.side_effect = api_ex.AlreadyExists("test") actual = job._verify_subscription_and_topic(test_sub, upstream_topic) expected = True, True assert expected == actual
def test_verify_inputs_logs( data_dict, event_dict, expected_log_count, klio_config, mock_storage, mock_publisher, mock_sub, mocker, caplog, ): mocker.patch.object(verify.VerifyJob, "_verify_gcs_bucket") mock_verify_sub = mocker.patch.object( verify.VerifyJob, "_verify_subscription_and_topic" ) mock_verify_sub.return_value = (False, False) job = verify.VerifyJob(klio_config, False) job._publisher_client = mock_publisher job._storage_client = mock_storage job._subscriber_client = mock_sub job.klio_config.pipeline_options.project = "sigint" if event_dict: event_dict["type"] = "pubsub" event_dict["io_type"] = kconfig._io.KlioIOType.EVENT event_dict["io_direction"] = kconfig._io.KlioIODirection.OUTPUT job.klio_config.job_config.events.inputs = [ kconfig._io.KlioPubSubEventInput.from_dict(event_dict) ] else: job.klio_config.job_config.events.inputs = [] if data_dict: data_dict["type"] = "gcs" data_dict["io_type"] = kconfig._io.KlioIOType.DATA data_dict["io_direction"] = kconfig._io.KlioIODirection.OUTPUT job.klio_config.job_config.data.inputs = [ kconfig._io.KlioGCSOutputDataConfig.from_dict(data_dict) ] else: job.klio_config.job_config.data.inputs = [] job._verify_inputs() assert expected_log_count == len(caplog.records)
def test_verify_subscription_and_topic( klio_config, mock_publisher, mock_sub, create_resources ): test_sub = "test" upstream_topic = "Some" job = verify.VerifyJob(klio_config, create_resources) job._publisher_client = mock_publisher job._subscriber_client = mock_sub if create_resources: actual = job._verify_subscription_and_topic(test_sub, upstream_topic,) mock_sub.create_subscription.assert_called_once_with( name=test_sub, topic=upstream_topic ) else: actual = job._verify_subscription_and_topic(test_sub, upstream_topic,) mock_sub.get_subscription.assert_called_once_with(test_sub) expected = True, True assert expected == actual
def test_unverified_event_inputs( mocker, caplog, mock_storage, mock_publisher, mock_sub, klio_config, mock_event_input, ): mock_verify_gcs_bucket = mocker.patch.object( verify.VerifyJob, "_verify_gcs_bucket" ) mock_verify_sub = mocker.patch.object( verify.VerifyJob, "_verify_subscription_and_topic" ) job = verify.VerifyJob(klio_config, False) job._publisher_client = mock_publisher job._storage_client = mock_storage job._subscriber_client = mock_sub job.klio_config.pipeline_options.project = "sigint" job.klio_config.job_config.events.inputs = [mock_event_input] data_config = { "type": "gcs", "location": "test", "io_type": kconfig._io.KlioIOType.DATA, "io_direction": kconfig._io.KlioIODirection.INPUT, } job.klio_config.job_config.data.inputs = [ kconfig._io.KlioGCSInputDataConfig.from_dict(data_config) ] mock_verify_gcs_bucket.return_value = True actual = job._verify_inputs() mock_verify_sub.assert_not_called() mock_verify_gcs_bucket.assert_called_with("test") assert actual assert 3 == len(caplog.records)
def test_verify_subscription_and_topic_exceptions(klio_config, mock_publisher, mock_sub, not_found, no_topic): test_sub = "test" if no_topic: upstream_topic = None expected = False, True else: upstream_topic = "Some" expected = True, False if not_found: mock_sub.get_subscription.side_effect = exceptions.NotFound("test") else: mock_sub.get_subscription.side_effect = Exception job = verify.VerifyJob(klio_config, False) job._publisher_client = mock_publisher job._subscriber_client = mock_sub actual = job._verify_subscription_and_topic(test_sub, upstream_topic) assert expected == actual
def test_unverified_event_outputs( mocker, caplog, klio_config, mock_event_output, mock_storage, mock_publisher, ): mock_verify_gcs_bucket = mocker.patch.object( verify.VerifyJob, "_verify_gcs_bucket" ) mock_verify_pub_topic = mocker.patch.object( verify.VerifyJob, "_verify_pub_topic" ) job = verify.VerifyJob(klio_config, False) job._publisher_client = mock_publisher job._storage_client = mock_storage job.klio_config.job_config.events.outputs = [mock_event_output] data_config = { "type": "gcs", "location": "test", "io_type": kconfig._io.KlioIOType.DATA, "io_direction": kconfig._io.KlioIODirection.OUTPUT, } job.klio_config.job_config.data.outputs = [ kconfig._io.KlioGCSOutputDataConfig.from_dict(data_config) ] mock_verify_gcs_bucket.return_value = True actual = job._verify_outputs() assert actual mock_verify_pub_topic.assert_not_called() mock_verify_gcs_bucket.assert_called_with("test") assert 3 == len(caplog.records)
def test_verify( mocker, klio_config, mock_storage, mock_publisher, mock_sub, caplog ): caplog.set_level(logging.INFO) job = verify.VerifyJob(klio_config, False) job._publisher_client = mock_publisher job._storage_client = mock_storage job._subscriber_client = mock_sub mock_verify_inputs = mocker.patch.object(job, "_verify_inputs") mock_verify_outputs = mocker.patch.object(job, "_verify_outputs") mock_verify_tmp = mocker.patch.object(job, "_verify_tmp_files") mock_verify_iam_roles = mocker.patch.object(job, "_verify_iam_roles") mock_verify_dashboard = mocker.patch.object( job, "_verify_stackdriver_dashboard" ) mocker.patch.object(verify, "discovery") mock_verify_config = mocker.patch.object(kconfig, "KlioConfig") mock_verify_config.return_value = klio_config mock_storage.return_value = mock_storage mock_publisher.return_value = mock_publisher mock_sub.return_value = mock_sub mock_verify_inputs.return_value = True mock_verify_outputs.return_value = True mock_verify_tmp.return_value = True mock_verify_iam_roles.return_value = True job.verify_job() mock_verify_inputs.assert_called_once_with() mock_verify_outputs.assert_called_once_with() mock_verify_tmp.assert_called_once_with() assert mock_verify_iam_roles.called mock_verify_dashboard.assert_called_once_with() assert 1 == len(caplog.records)
def test_verify_iam_roles( caplog, expected, bindings, create_resources, mock_discovery_client, klio_config, ): compute_client = mock_discovery_client.build("compute") compute_client.projects().get().execute.return_value = { "defaultServiceAccount": "the-default-svc-account" } iam_client = mock_discovery_client.build("cloudresourcemanager") job = verify.VerifyJob(klio_config, create_resources) job._compute_client = compute_client job._iam_client = iam_client gcp_project = job.klio_config.pipeline_options.project iam_client.projects().getIamPolicy( resource=gcp_project, body={} ).execute.return_value = {"bindings": bindings} result = job._verify_iam_roles() if create_resources: assert ( "--create-resources is not able to add these roles" in caplog.records[-1].msg ) compute_client.projects().get( project=gcp_project ).execute.assert_called_once_with() iam_client.projects().getIamPolicy( resource=gcp_project, body={} ).execute.assert_called_once_with() assert result is expected
def test_verify_stackdriver_dashboard( klio_config, mock_get_sd_group_url, mock_create_sd_group, create_resources, dashboard_url, ): mock_get_sd_group_url.return_value = dashboard_url job = verify.VerifyJob(klio_config, create_resources) actual = job._verify_stackdriver_dashboard() mock_get_sd_group_url.assert_called_once_with("test-gcp-project", "klio-job-name", "europe-west1") if create_resources: mock_create_sd_group.assert_called_once_with("test-gcp-project", "klio-job-name", "europe-west1") assert actual is True elif dashboard_url: assert actual is True else: assert actual is False
def test_verify_inputs( mocker, unverified_bucket, unverified_topic, unverified_sub, klio_config, mock_storage, mock_publisher, mock_sub, ): mock_verify_gcs_bucket = mocker.patch.object(verify.VerifyJob, "_verify_gcs_bucket") mock_verify_sub = mocker.patch.object(verify.VerifyJob, "_verify_subscription_and_topic") job = verify.VerifyJob(klio_config, False) job._publisher_client = mock_publisher job._storage_client = mock_storage job._subscriber_client = mock_sub job.klio_config.pipeline_options.project = "sigint" event_config = { "type": "pubsub", "topic": "test", "subscription": "test", "io_type": kconfig._io.KlioIOType.EVENT, "io_direction": kconfig._io.KlioIODirection.INPUT, } job.klio_config.job_config.events.inputs = [ kconfig._io.KlioPubSubEventInput.from_dict(event_config) ] data_config = { "type": "gcs", "location": "test", "io_type": kconfig._io.KlioIOType.DATA, "io_direction": kconfig._io.KlioIODirection.INPUT, } job.klio_config.job_config.data.inputs = [ kconfig._io.KlioGCSInputDataConfig.from_dict(data_config) ] if unverified_topic and unverified_sub and unverified_bucket: mock_verify_gcs_bucket.return_value = False mock_verify_sub.return_value = False, False actual = job._verify_inputs() expected = False assert expected == actual elif unverified_topic and unverified_sub: mock_verify_sub.return_value = False, False actual = job._verify_inputs() expected = False assert expected == actual elif unverified_topic: mock_verify_sub.return_value = False, True actual = job._verify_inputs() expected = False assert expected == actual elif unverified_sub: mock_verify_sub.return_value = True, False actual = job._verify_inputs() expected = False assert expected == actual elif unverified_bucket: mock_verify_gcs_bucket.return_value = False mock_verify_sub.return_value = True, True actual = job._verify_inputs() expected = False assert expected == actual else: mock_verify_gcs_bucket.return_value = True mock_verify_sub.return_value = True, True actual = job._verify_inputs() expected = True assert expected == actual mock_verify_gcs_bucket.assert_called_with("test") mock_verify_sub.assert_called_with("test", "test")
def test_verify_outputs( mocker, klio_config, unverified_gcs, unverified_topic, mock_storage, mock_publisher, ): mock_verify_gcs_bucket = mocker.patch.object(verify.VerifyJob, "_verify_gcs_bucket") mock_verify_pub_topic = mocker.patch.object(verify.VerifyJob, "_verify_pub_topic") job = verify.VerifyJob(klio_config, False) job._publisher_client = mock_publisher job._storage_client = mock_storage event_config = { "type": "pubsub", "topic": "test", "io_type": kconfig._io.KlioIOType.EVENT, "io_direction": kconfig._io.KlioIODirection.OUTPUT, } job.klio_config.job_config.events.outputs = [ kconfig._io.KlioPubSubEventOutput.from_dict(event_config) ] data_config = { "type": "gcs", "location": "test", "io_type": kconfig._io.KlioIOType.DATA, "io_direction": kconfig._io.KlioIODirection.OUTPUT, } job.klio_config.job_config.data.outputs = [ kconfig._io.KlioGCSOutputDataConfig.from_dict(data_config) ] if unverified_gcs and unverified_topic: mock_verify_gcs_bucket.return_value = False mock_verify_pub_topic.return_value = False actual = job._verify_outputs() expected = False assert expected == actual elif unverified_topic: mock_verify_gcs_bucket.return_value = True mock_verify_pub_topic.return_value = False actual = job._verify_outputs() expected = False assert expected == actual elif unverified_gcs: mock_verify_gcs_bucket.return_value = False mock_verify_pub_topic.return_value = True actual = job._verify_outputs() expected = False assert expected == actual else: mock_verify_gcs_bucket.return_value = True mock_verify_pub_topic.return_value = True actual = job._verify_outputs() expected = True assert expected == actual mock_verify_gcs_bucket.assert_called_with("test") mock_verify_pub_topic.assert_called_with("test", "output")
def test_verify_gcs_bucket_invalid_name(klio_config, mock_storage, caplog): job = verify.VerifyJob(klio_config, True) job._storage_client = mock_storage assert not job._verify_gcs_bucket("a/b/c") assert 2 == len(caplog.records)