def test_docker_agent_deploy_with_no_interface_check_linux( monkeypatch, cloud_api, linux_platform): api = MagicMock() api.ping.return_value = True api.create_container.return_value = {"Id": "container_id"} monkeypatch.setattr( "prefect.agent.docker.agent.DockerAgent._get_docker_client", MagicMock(return_value=api), ) get_ip = MagicMock() monkeypatch.setattr("prefect.agent.docker.agent.get_docker_ip", get_ip) agent = DockerAgent(docker_interface=False) agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "storage": Docker(registry_url="", image_name="name", image_tag="tag").serialize(), "environment": LocalEnvironment().serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) assert not get_ip.called
def test_docker_agent_deploy_with_interface_check_linux( api, monkeypatch, linux_platform): get_ip = MagicMock() monkeypatch.setattr("prefect.agent.docker.agent.get_docker_ip", get_ip) agent = DockerAgent() agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": Docker(registry_url="", image_name="name", image_tag="tag").serialize(), "environment": LocalEnvironment().serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) assert get_ip.called
def test_docker_agent_network(monkeypatch, cloud_api): api = MagicMock() api.ping.return_value = True api.create_container.return_value = {"Id": "container_id"} api.create_networking_config.return_value = {"test-network": "config"} monkeypatch.setattr( "prefect.agent.docker.agent.DockerAgent._get_docker_client", MagicMock(return_value=api), ) agent = DockerAgent(network="test-network") agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "storage": Docker(registry_url="test", image_name="name", image_tag="tag").serialize(), "environment": LocalEnvironment().serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) assert agent.network == "test-network" args, kwargs = api.create_container.call_args assert kwargs["networking_config"] == {"test-network": "config"}
def test_docker_agent_network(api): api.create_networking_config.return_value = {"test-network": "config"} with pytest.warns(UserWarning): agent = DockerAgent(network="test-network") agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": Docker(registry_url="test", image_name="name", image_tag="tag").serialize(), "environment": LocalEnvironment().serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) assert agent.network == "test-network" assert agent.networks is None args, kwargs = api.create_container.call_args assert kwargs["networking_config"] == {"test-network": "config"}
def test_docker_agent_deploy_flow_no_registry_does_not_pull(api): agent = DockerAgent() agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": Docker(registry_url="", image_name="name", image_tag="tag").serialize(), "environment": LocalEnvironment().serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) assert not api.pull.called assert api.create_container.called assert api.start.called
def test_docker_agent_deploy_flow_reg_allow_list_not_allowed(api): agent = DockerAgent(reg_allow_list=["test1"]) with pytest.raises(ValueError) as error: agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": Docker(registry_url="test2", image_name="name", image_tag="tag").serialize(), "environment": LocalEnvironment().serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) expected_error = ("Trying to pull image from a Docker registry 'test2'" " which is not in the reg_allow_list") assert not api.pull.called assert not api.create_container.called assert not api.start.called assert str(error.value) == expected_error
def test_docker_agent_deploy_flow_storage_raises(monkeypatch, cloud_api): monkeypatch.setattr("prefect.agent.agent.Client", MagicMock()) api = MagicMock() api.ping.return_value = True api.create_container.return_value = {"Id": "container_id"} monkeypatch.setattr( "prefect.agent.docker.agent.DockerAgent._get_docker_client", MagicMock(return_value=api), ) agent = DockerAgent() with pytest.raises(ValueError): agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "storage": Local().serialize(), "id": "foo", "environment": LocalEnvironment().serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", "version": "version", })) assert not api.pull.called
def test_k8s_agent_replace_yaml_respects_multiple_image_secrets(monkeypatch, cloud_api): get_jobs = MagicMock(return_value=[]) monkeypatch.setattr( "prefect.agent.kubernetes.agent.KubernetesAgent.manage_jobs", get_jobs, ) monkeypatch.setenv("IMAGE_PULL_SECRETS", "some-secret,other-secret") monkeypatch.setenv("IMAGE_PULL_POLICY", "custom_policy") flow_run = GraphQLResult( { "flow": GraphQLResult( { "storage": Docker( registry_url="test", image_name="name", image_tag="tag" ).serialize(), "environment": LocalEnvironment().serialize(), "id": "new_id", "core_version": "0.13.0", } ), "id": "id", } ) with set_temporary_config( {"cloud.agent.auth_token": "token", "logging.log_to_cloud": True} ): agent = KubernetesAgent(env_vars=dict(AUTH_THING="foo", PKG_SETTING="bar")) job = agent.generate_job_spec_from_environment(flow_run, image="test/name:tag") expected_secrets = [{"name": "some-secret"}, {"name": "other-secret"}] assert job["spec"]["template"]["spec"]["imagePullSecrets"] == expected_secrets
def test_docker_agent_deploy_flow_uses_environment_metadata(api): agent = DockerAgent() agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": Local().serialize(), "environment": LocalEnvironment(metadata={ "image": "repo/name:tag" }).serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) assert api.pull.called assert api.create_container.called assert api.start.called assert api.create_host_config.call_args[1]["auto_remove"] is True assert api.create_container.call_args[1][ "command"] == "prefect execute flow-run" assert api.create_container.call_args[1]["host_config"][ "AutoRemove"] is True assert api.start.call_args[1]["container"] == "container_id"
def test_k8s_agent_replace_yaml_responds_to_logging_config( monkeypatch, cloud_api, flag ): k8s_config = MagicMock() monkeypatch.setattr("kubernetes.config", k8s_config) get_jobs = MagicMock(return_value=[]) monkeypatch.setattr( "prefect.agent.kubernetes.agent.KubernetesAgent.manage_jobs", get_jobs, ) flow_run = GraphQLResult( { "flow": GraphQLResult( { "storage": Docker( registry_url="test", image_name="name", image_tag="tag" ).serialize(), "environment": LocalEnvironment().serialize(), "id": "new_id", "core_version": "0.13.0", } ), "id": "id", "name": "name", } ) agent = KubernetesAgent(no_cloud_logs=flag) job = agent.replace_job_spec_yaml(flow_run, image="test/name:tag") env = job["spec"]["template"]["spec"]["containers"][0]["env"] assert env[6]["value"] == str(not flag).lower()
def test_k8s_agent_includes_agent_labels_in_job(monkeypatch, cloud_api): get_jobs = MagicMock(return_value=[]) monkeypatch.setattr( "prefect.agent.kubernetes.agent.KubernetesAgent.manage_jobs", get_jobs, ) flow_run = GraphQLResult( { "flow": GraphQLResult( { "storage": Docker( registry_url="test", image_name="name", image_tag="tag" ).serialize(), "environment": LocalEnvironment().serialize(), "id": "new_id", "core_version": "0.13.0", } ), "id": "id", } ) agent = KubernetesAgent(labels=["foo", "bar"]) job = agent.generate_job_spec_from_environment(flow_run, image="test/name:tag") env = job["spec"]["template"]["spec"]["containers"][0]["env"] assert env[5]["value"] == "['foo', 'bar']"
def test_docker_agent_deploy_flow_no_pull_using_environment_metadata(api): agent = DockerAgent(no_pull=True) agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": Local().serialize(), "environment": LocalEnvironment(metadata={ "image": "name:tag" }).serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) assert not api.pull.called assert api.create_container.called assert api.start.called
def test_k8s_agent_replace_yaml_no_pull_secrets(monkeypatch, cloud_api): get_jobs = MagicMock(return_value=[]) monkeypatch.setattr( "prefect.agent.kubernetes.agent.KubernetesAgent.manage_jobs", get_jobs, ) flow_run = GraphQLResult( { "flow": GraphQLResult( { "storage": Docker( registry_url="test", image_name="name", image_tag="tag" ).serialize(), "environment": LocalEnvironment().serialize(), "id": "id", "core_version": "0.13.0", } ), "id": "id", } ) agent = KubernetesAgent() job = agent.generate_job_spec_from_environment(flow_run, image="test/name:tag") assert not job["spec"]["template"]["spec"].get("imagePullSecrets", None)
def main(): with Flow("Check listings", environment=LocalEnvironment(executor=DaskExecutor())) as flow: city = Parameter("city") ## Extract # get the current listings listings = get_current_listings(city) # fetch the pages pages = fetch_pages(listings, city) ## Transform # parse the listings data = parse_listings(pages) # Load save_listings(data) # flow.storage = Docker(registry_url="bramevert/craig") # flow.run_config = DockerRun( # env={"GOOGLE_APPLICATION_CREDENTIALS": "/home/app/craiglist-crawler-a7aff758fc9d.json"}, # image="craig:latest", # labels=["bram-desktop"], # ) # flow.register(project_name="Craiglist Crawler") # flow.run_agent() flow.run(city="vancouver")
def test_docker_agent_deploy_flow_no_registry_does_not_pull( monkeypatch, cloud_api): api = MagicMock() api.ping.return_value = True api.create_container.return_value = {"Id": "container_id"} monkeypatch.setattr( "prefect.agent.docker.agent.DockerAgent._get_docker_client", MagicMock(return_value=api), ) agent = DockerAgent() agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "storage": Docker(registry_url="", image_name="name", image_tag="tag").serialize(), "environment": LocalEnvironment().serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) assert not api.pull.called assert api.create_container.called assert api.start.called
def test_get_flow_image_raises_on_missing_info(): flow = Flow( "test", environment=LocalEnvironment(), storage=Local(), ) with pytest.raises(ValueError): get_flow_image(flow=flow)
def test_get_flow_image_env_metadata(): flow = Flow( "test", environment=LocalEnvironment(metadata={"image": "repo/name:tag"}), storage=Local(), ) image = get_flow_image(flow=flow) assert image == "repo/name:tag"
def test_get_flow_image_docker_storage(): flow = Flow( "test", environment=LocalEnvironment(), storage=Docker(registry_url="test", image_name="name", image_tag="tag"), ) image = get_flow_image(flow=flow) assert image == "test/name:tag"
def test_get_flow_image_raises_on_missing_info(): flow_run = GraphQLResult({ "flow": GraphQLResult({ "storage": Local().serialize(), "environment": LocalEnvironment().serialize(), "id": "id", }), "id": "id", }) with pytest.raises(ValueError): image = get_flow_image(flow_run=flow_run)
def test_k8s_agent_deploy_flow(core_version, command, monkeypatch, cloud_api): k8s_config = MagicMock() monkeypatch.setattr("kubernetes.config", k8s_config) batch_client = MagicMock() monkeypatch.setattr( "kubernetes.client.BatchV1Api", MagicMock(return_value=batch_client) ) core_client = MagicMock() core_client.list_namespaced_pod.return_value = MagicMock(items=[]) monkeypatch.setattr( "kubernetes.client.CoreV1Api", MagicMock(return_value=core_client) ) get_jobs = MagicMock(return_value=[]) monkeypatch.setattr( "prefect.agent.kubernetes.agent.KubernetesAgent.manage_jobs", get_jobs, ) agent = KubernetesAgent() agent.deploy_flow( flow_run=GraphQLResult( { "flow": GraphQLResult( { "storage": Docker( registry_url="test", image_name="name", image_tag="tag" ).serialize(), "environment": LocalEnvironment().serialize(), "id": "id", "core_version": core_version, } ), "id": "id", } ) ) assert agent.batch_client.create_namespaced_job.called assert ( agent.batch_client.create_namespaced_job.call_args[1]["namespace"] == "default" ) assert ( agent.batch_client.create_namespaced_job.call_args[1]["body"]["apiVersion"] == "batch/v1" ) assert agent.batch_client.create_namespaced_job.call_args[1]["body"]["spec"][ "template" ]["spec"]["containers"][0]["args"] == [command]
def test_get_flow_run_command(core_version, command): flow_run = GraphQLResult({ "flow": GraphQLResult({ "storage": Local().serialize(), "environment": LocalEnvironment().serialize(), "id": "id", "core_version": core_version, }), "id": "id", }) assert get_flow_run_command(flow_run) == command
def test_get_flow_run_command_works_if_core_version_not_on_response(): legacy_command = "prefect execute cloud-flow" flow_run = GraphQLResult({ "flow": GraphQLResult({ "storage": Local().serialize(), "environment": LocalEnvironment().serialize(), "id": "id", }), "id": "id", }) assert get_flow_run_command(flow_run) == legacy_command
def test_docker_agent_deploy_flow_sets_container_name_with_index( api, collision_count): """ Asserts that the container name is set to the flow run name and that collisions with existing containers with the same name is handled by adding an index """ if collision_count: # Add the basic name first existing_names = ["flow-run-name"] for i in range(1, collision_count): existing_names.append(f"flow-run-name-{i}") else: existing_names = [] def fail_if_name_exists(*args, **kwargs): if kwargs.get("name") in existing_names: raise docker.errors.APIError( "Conflict. The container name 'foobar' is already in use") return {} api.create_container = MagicMock(side_effect=fail_if_name_exists) agent = DockerAgent() agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": Local().serialize(), "environment": LocalEnvironment(metadata={ "image": "repo/name:tag" }).serialize(), "core_version": "0.13.0", }), "id": "id", "name": "flow-run-name", })) expected_name = ("flow-run-name" if not collision_count else f"flow-run-name-{collision_count}") assert api.create_container.call_args[1]["name"] == expected_name
def main(): with Flow("test_flow") as flow: run_thing1() run_thing2() flow.environment = LocalEnvironment(executor=LocalDaskExecutor( scheduler="processes")) flow.storage = Docker( base_image="prefect_logger_repro", local_image=True, ignore_healthchecks=True, registry_url="artifactory.aq.tc/prefect/", ) flow.register(project_name="default")
def test_docker_agent_deploy_flow_show_flow_logs(monkeypatch, cloud_api): process = MagicMock() monkeypatch.setattr("multiprocessing.Process", process) api = MagicMock() api.ping.return_value = True api.create_container.return_value = {"Id": "container_id"} monkeypatch.setattr( "prefect.agent.docker.agent.DockerAgent._get_docker_client", MagicMock(return_value=api), ) agent = DockerAgent(show_flow_logs=True) agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "storage": Docker(registry_url="test", image_name="name", image_tag="tag").serialize(), "environment": LocalEnvironment().serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) process_kwargs = dict( target=_stream_container_logs, kwargs={ "base_url": agent.base_url, "container_id": "container_id" }, ) process.assert_called_with(**process_kwargs) # Check all arguments to `multiprocessing.Process` are pickleable assert pickle.loads(pickle.dumps(process_kwargs)) == process_kwargs assert len(agent.processes) == 1 assert api.create_container.called assert api.start.called
def test_k8s_agent_deploy_flow_uses_environment_metadata(monkeypatch, cloud_api): k8s_config = MagicMock() monkeypatch.setattr("kubernetes.config", k8s_config) batch_client = MagicMock() monkeypatch.setattr( "kubernetes.client.BatchV1Api", MagicMock(return_value=batch_client) ) core_client = MagicMock() core_client.list_namespaced_pod.return_value = MagicMock(items=[]) monkeypatch.setattr( "kubernetes.client.CoreV1Api", MagicMock(return_value=core_client) ) get_jobs = MagicMock(return_value=[]) monkeypatch.setattr( "prefect.agent.kubernetes.agent.KubernetesAgent.manage_jobs", get_jobs, ) agent = KubernetesAgent() agent.deploy_flow( flow_run=GraphQLResult( { "flow": GraphQLResult( { "storage": Local().serialize(), "environment": LocalEnvironment( metadata={"image": "repo/name:tag"} ).serialize(), "id": "id", "core_version": "0.13.0", } ), "id": "id", } ) ) assert agent.batch_client.create_namespaced_job.called assert ( agent.batch_client.create_namespaced_job.call_args[1]["body"]["spec"][ "template" ]["spec"]["containers"][0]["image"] == "repo/name:tag" )
def test_get_flow_image_docker_storage(): flow_run = GraphQLResult({ "flow": GraphQLResult({ "storage": Docker(registry_url="test", image_name="name", image_tag="tag").serialize(), "environment": LocalEnvironment().serialize(), "id": "id", }), "id": "id", }) image = get_flow_image(flow_run=flow_run) assert image == "test/name:tag"
def test_get_flow_image_env_metadata(): flow_run = GraphQLResult( { "flow": GraphQLResult( { "storage": Local().serialize(), "environment": LocalEnvironment( metadata={"image": "repo/name:tag"} ).serialize(), "id": "id", } ), "id": "id", } ) image = get_flow_image(flow_run=flow_run) assert image == "repo/name:tag"
def create_flow() -> Flow: local_parallelizing_environment = LocalEnvironment( executor=LocalDaskExecutor()) with Flow(FLOW_NAME, environment=local_parallelizing_environment) as flow: country = Parameter("country", default=DEFAULT_COUNTRY) bucket = Parameter("bucket", default=DEFAULT_BUCKET) covid_df = download_data() filtered_covid_df = filter_data(covid_df, country) prepared_df = enrich_data(filtered_covid_df) aggregated_df = aggregate_data(prepared_df) print_data(aggregated_df) csv_results = prepare_data_for_upload(aggregated_df) upload_to_s3(csv_results["csv"], csv_results["filename"], bucket=bucket) return flow
def test_docker_agent_deploy_flow_uses_environment_metadata( monkeypatch, cloud_api): api = MagicMock() api.ping.return_value = True api.create_container.return_value = {"Id": "container_id"} api.create_host_config.return_value = {"AutoRemove": True} monkeypatch.setattr( "prefect.agent.docker.agent.DockerAgent._get_docker_client", MagicMock(return_value=api), ) agent = DockerAgent() agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "storage": Local().serialize(), "environment": LocalEnvironment(metadata={ "image": "repo/name:tag" }).serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) assert api.pull.called assert api.create_container.called assert api.start.called assert api.create_host_config.call_args[1]["auto_remove"] is True assert api.create_container.call_args[1][ "command"] == "prefect execute flow-run" assert api.create_container.call_args[1]["host_config"][ "AutoRemove"] is True assert api.start.call_args[1]["container"] == "container_id"