def test_create_dockerfile_with_flow_file(no_docker_host_var, tmpdir): contents = """from prefect import Flow\nf=Flow('test-flow')""" full_path = os.path.join(tmpdir, "flow.py") with open(full_path, "w") as f: f.write(contents) with open(os.path.join(tmpdir, "test"), "w+") as t: t.write("asdf") with tempfile.TemporaryDirectory() as tempdir_inside: storage = Docker( files={full_path: "flow.py"}, stored_as_script=True, path="flow.py" ) f = Flow("test-flow") storage.add_flow(f) dpath = storage.create_dockerfile_object(directory=tempdir_inside) with open(dpath, "r") as dockerfile: output = dockerfile.read() assert "COPY flow.py flow.py" in output storage = Docker(files={full_path: "flow.py"}, stored_as_script=True) f = Flow("test-flow") storage.add_flow(f) with pytest.raises(ValueError): storage.create_dockerfile_object(directory=tempdir_inside)
def test_env_var_precedence_docker_storage(monkeypatch, no_docker_host_var): monkeypatch.setenv("DOCKER_HOST", "bar") storage = Docker() assert storage.base_url assert storage.base_url == "bar" storage = Docker(base_url="foo") assert storage.base_url == "foo" storage = Docker(base_url="foo")
def test_docker_agent_networks_no_networks(api): api.create_networking_config.return_value = {"test-network-1": "config1"} api.create_endpoint_config.return_value = "endpoint-config" agent = DockerAgent(networks=[]) agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": Docker(registry_url="test", image_name="name", image_tag="tag").serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) assert agent.networks == [] api.create_networking_config.assert_not_called() _, kwargs = api.create_container.call_args assert kwargs["networking_config"] is None api.connect_container_to_network.assert_not_called()
def test_docker_storage_get_flow_method(): with tempfile.TemporaryDirectory() as directory: storage = Docker(base_image="python:3.6", prefect_directory=directory) with pytest.raises(ValueError): storage.get_flow() @prefect.task def add_to_dict(): with open(os.path.join(directory, "output"), "w") as tmp: tmp.write("success") flow_dir = os.path.join(directory, "flows") os.makedirs(flow_dir, exist_ok=True) with open(os.path.join(flow_dir, "test.prefect"), "w+") as env: flow = Flow("test", tasks=[add_to_dict]) flow_path = os.path.join(flow_dir, "test.prefect") with open(flow_path, "wb") as f: cloudpickle.dump(flow, f) out = storage.add_flow(flow) f = storage.get_flow(out) assert isinstance(f, Flow) assert f.name == "test" assert len(f.tasks) == 1
def test_docker_agent_network(api): api.create_networking_config.return_value = {"test-network": "config"} with pytest.warns(UserWarning): agent = DockerAgent(network="test-network") agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": Docker(registry_url="test", image_name="name", image_tag="tag").serialize(), "environment": LocalEnvironment().serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) assert agent.network == "test-network" assert agent.networks is None args, kwargs = api.create_container.call_args assert kwargs["networking_config"] == {"test-network": "config"}
def test_k8s_agent_includes_agent_labels_in_job(monkeypatch, cloud_api): get_jobs = MagicMock(return_value=[]) monkeypatch.setattr( "prefect.agent.kubernetes.agent.KubernetesAgent.manage_jobs", get_jobs, ) flow_run = GraphQLResult( { "flow": GraphQLResult( { "storage": Docker( registry_url="test", image_name="name", image_tag="tag" ).serialize(), "environment": LocalEnvironment().serialize(), "id": "new_id", "core_version": "0.13.0", } ), "id": "id", } ) agent = KubernetesAgent(labels=["foo", "bar"]) job = agent.generate_job_spec_from_environment(flow_run, image="test/name:tag") env = job["spec"]["template"]["spec"]["containers"][0]["env"] assert env[5]["value"] == "['foo', 'bar']"
def test_k8s_agent_replace_yaml_no_pull_secrets(monkeypatch, cloud_api): get_jobs = MagicMock(return_value=[]) monkeypatch.setattr( "prefect.agent.kubernetes.agent.KubernetesAgent.manage_jobs", get_jobs, ) flow_run = GraphQLResult( { "flow": GraphQLResult( { "storage": Docker( registry_url="test", image_name="name", image_tag="tag" ).serialize(), "environment": LocalEnvironment().serialize(), "id": "id", "core_version": "0.13.0", } ), "id": "id", } ) agent = KubernetesAgent() job = agent.generate_job_spec_from_environment(flow_run, image="test/name:tag") assert not job["spec"]["template"]["spec"].get("imagePullSecrets", None)
def test_k8s_agent_replace_yaml_responds_to_logging_config( monkeypatch, cloud_api, flag ): get_jobs = MagicMock(return_value=[]) monkeypatch.setattr( "prefect.agent.kubernetes.agent.KubernetesAgent.manage_jobs", get_jobs, ) flow_run = GraphQLResult( { "flow": GraphQLResult( { "storage": Docker( registry_url="test", image_name="name", image_tag="tag" ).serialize(), "environment": LocalEnvironment().serialize(), "id": "new_id", "core_version": "0.13.0", } ), "id": "id", "name": "name", } ) agent = KubernetesAgent(no_cloud_logs=flag) job = agent.generate_job_spec_from_environment(flow_run, image="test/name:tag") env = job["spec"]["template"]["spec"]["containers"][0]["env"] assert env[6]["value"] == str(not flag).lower()
def test_k8s_agent_replace_yaml_respects_multiple_image_secrets(monkeypatch, cloud_api): get_jobs = MagicMock(return_value=[]) monkeypatch.setattr( "prefect.agent.kubernetes.agent.KubernetesAgent.manage_jobs", get_jobs, ) monkeypatch.setenv("IMAGE_PULL_SECRETS", "some-secret,other-secret") monkeypatch.setenv("IMAGE_PULL_POLICY", "custom_policy") flow_run = GraphQLResult( { "flow": GraphQLResult( { "storage": Docker( registry_url="test", image_name="name", image_tag="tag" ).serialize(), "environment": LocalEnvironment().serialize(), "id": "new_id", "core_version": "0.13.0", } ), "id": "id", } ) with set_temporary_config( {"cloud.agent.auth_token": "token", "logging.log_to_cloud": True} ): agent = KubernetesAgent(env_vars=dict(AUTH_THING="foo", PKG_SETTING="bar")) job = agent.generate_job_spec_from_environment(flow_run, image="test/name:tag") expected_secrets = [{"name": "some-secret"}, {"name": "other-secret"}] assert job["spec"]["template"]["spec"]["imagePullSecrets"] == expected_secrets
def test_docker_agent_deploy_with_no_interface_check_linux( api, monkeypatch, linux_platform ): get_ip = MagicMock() monkeypatch.setattr("prefect.agent.docker.agent.get_docker_ip", get_ip) agent = DockerAgent(docker_interface=False) agent.deploy_flow( flow_run=GraphQLResult( { "flow": GraphQLResult( { "id": "foo", "storage": Docker( registry_url="", image_name="name", image_tag="tag" ).serialize(), "environment": LocalEnvironment().serialize(), "core_version": "0.13.0", } ), "id": "id", "name": "name", } ) ) assert not get_ip.called
def test_docker_agent_deploy_flow_show_flow_logs(api, monkeypatch): process = MagicMock() monkeypatch.setattr("multiprocessing.Process", process) agent = DockerAgent(show_flow_logs=True) agent.deploy_flow( flow_run=GraphQLResult( { "flow": GraphQLResult( { "id": "foo", "storage": Docker( registry_url="test", image_name="name", image_tag="tag" ).serialize(), "environment": LocalEnvironment().serialize(), "core_version": "0.13.0", } ), "id": "id", "name": "name", } ) ) process_kwargs = dict( target=_stream_container_logs, kwargs={"base_url": agent.base_url, "container_id": "container_id"}, ) process.assert_called_with(**process_kwargs) # Check all arguments to `multiprocessing.Process` are pickleable assert pickle.loads(pickle.dumps(process_kwargs)) == process_kwargs assert len(agent.processes) == 1 assert api.create_container.called assert api.start.called
def test_docker_agent_deploy_flow(core_version, command, api): agent = DockerAgent() agent.deploy_flow( flow_run=GraphQLResult( { "flow": GraphQLResult( { "id": "foo", "storage": Docker( registry_url="test", image_name="name", image_tag="tag" ).serialize(), "environment": LocalEnvironment().serialize(), "core_version": core_version, } ), "id": "id", "name": "name", } ) ) assert api.pull.called assert api.create_container.called assert api.start.called assert api.create_host_config.call_args[1]["auto_remove"] is True assert api.create_container.call_args[1]["command"] == command assert api.create_container.call_args[1]["host_config"]["AutoRemove"] is True assert api.start.call_args[1]["container"] == "container_id"
def test_k8s_agent_removes_yaml_no_volume(monkeypatch, cloud_api): flow_run = GraphQLResult({ "flow": GraphQLResult({ "storage": Docker(registry_url="test", image_name="name", image_tag="tag").serialize(), "environment": LocalEnvironment().serialize(), "id": "id", "core_version": "0.13.0", }), "id": "id", }) agent = KubernetesAgent() job = agent.generate_job_spec_from_environment(flow_run, image="test/name:tag") assert not job["spec"]["template"]["spec"].get("volumes", None) assert not job["spec"]["template"]["spec"]["containers"][0].get( "volumeMounts", None)
def test_docker_agent_deploy_flow_reg_allow_list_allowed(api): agent = DockerAgent(reg_allow_list=["test1"]) agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": Docker(registry_url="test1", image_name="name", image_tag="tag").serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) assert api.pull.called assert api.create_container.called assert api.start.called
def test_docker_agent_networks_as_modes(api, network): api.create_networking_config.return_value = {network: "config1"} api.create_endpoint_config.return_value = "endpoint-config" agent = DockerAgent(networks=[network]) agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": Docker(registry_url="test", image_name="name", image_tag="tag").serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) assert network in agent.networks api.create_networking_config.assert_called_once_with( {network: "endpoint-config"}) _, container_create_kwargs = api.create_container.call_args assert container_create_kwargs["networking_config"] == {network: "config1"} _, host_config_kwargs = api.create_host_config.call_args assert host_config_kwargs["network_mode"] == network
def test_initialized_docker_storage(no_docker_host_var): storage = Docker( registry_url="test1", base_image="test3", python_dependencies=["test"], image_name="test4", image_tag="test5", env_vars={"test": "1"}, base_url="test_url", tls_config={"tls": "here"}, prefect_version="my-branch", local_image=True, build_kwargs={"nocache": True}, ) assert storage.registry_url == "test1" assert storage.base_image == "test3" assert storage.image_name == "test4" assert storage.image_tag == "test5" assert storage.python_dependencies == ["test", "wheel"] assert storage.env_vars == { "test": "1", "PREFECT__USER_CONFIG_PATH": "/opt/prefect/config.toml", } assert storage.base_url == "test_url" assert storage.tls_config == {"tls": "here"} assert storage.build_kwargs == {"nocache": True} assert storage.prefect_version == "my-branch" assert storage.local_image
def main(register, run): if register: schedule = Schedule(clocks=[CronClock("1 19 * * *")]) else: schedule = None result = GCSResult(bucket='uuazed-prefect') with Flow("numerai-reports", schedule, result=result) as flow: filenames = fetch() upload_to_gcs(filenames) flow.storage = Docker( registry_url="gcr.io/numerai-171710", python_dependencies=['pandas', 'numerapi', 'pyarrow'], files={ os.path.abspath("data.py"): "numerai_reports/data.py", os.path.abspath("settings.py"): "numerai_reports/settings.py", os.path.abspath("utils.py"): "numerai_reports/utils.py", }, env_vars={"PYTHONPATH": "$PYTHONPATH:/"}, secrets=["GCP_CREDENTIALS"]) if register: flow.register(project_name="numerai", labels=["docker"]) if run: flow.run()
def test_docker_agent_deploy_flow_no_registry_does_not_pull(api): agent = DockerAgent() agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": Docker(registry_url="", image_name="name", image_tag="tag").serialize(), "environment": LocalEnvironment().serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) assert not api.pull.called assert api.create_container.called assert api.start.called
def test_copy_files_with_dockerignore(): with tempfile.TemporaryDirectory() as sample_top_directory: sample_sub_directory = os.path.join(sample_top_directory, "subdir") os.mkdir(sample_sub_directory) sample_file = os.path.join(sample_sub_directory, "test.txt") with open(sample_file, "w+") as t: t.write("asdf") dockerignore = os.path.join(sample_sub_directory, ".dockerignore") with open(dockerignore, "w+") as t: t.write("test.txt") with tempfile.TemporaryDirectory() as directory: storage = Docker( files={ sample_sub_directory: "/test_dir", sample_file: "/path/test_file.txt", }, dockerignore=dockerignore, ) storage.add_flow(Flow("foo")) storage.create_dockerfile_object(directory=directory) contents = os.listdir(directory) assert ".dockerignore" in contents, contents
def test_docker_agent_networks(api): api.create_networking_config.return_value = { "test-network-1": "config1", "test-network-2": "config2", } agent = DockerAgent(networks=["test-network-1", "test-network-2"]) agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": Docker(registry_url="test", image_name="name", image_tag="tag").serialize(), "environment": LocalEnvironment().serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) assert "test-network-1" in agent.networks assert "test-network-2" in agent.networks args, kwargs = api.create_container.call_args assert kwargs["networking_config"] == { "test-network-1": "config1", "test-network-2": "config2", }
def test_docker_agent_deploy_flow_reg_allow_list_not_allowed(api): agent = DockerAgent(reg_allow_list=["test1"]) with pytest.raises(ValueError) as error: agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": Docker(registry_url="test2", image_name="name", image_tag="tag").serialize(), "environment": LocalEnvironment().serialize(), "core_version": "0.13.0", }), "id": "id", "name": "name", })) expected_error = ("Trying to pull image from a Docker registry 'test2'" " which is not in the reg_allow_list") assert not api.pull.called assert not api.create_container.called assert not api.start.called assert str(error.value) == expected_error
def test_create_dockerfile_from_everything(no_docker_host_var): with tempfile.TemporaryDirectory() as tempdir_outside: with open(os.path.join(tempdir_outside, "test"), "w+") as t: t.write("asdf") with tempfile.TemporaryDirectory() as tempdir: storage = Docker( registry_url="test1", base_image="test3", python_dependencies=["test"], image_name="test4", image_tag="test5", files={os.path.join(tempdir_outside, "test"): "./test2"}, base_url="test_url", ) f = Flow("test") g = Flow("other") storage.add_flow(f) storage.add_flow(g) dpath = storage.create_dockerfile_object(directory=tempdir) with open(dpath, "r") as dockerfile: output = dockerfile.read() assert "FROM test3" in output assert "COPY test ./test2" in output assert "COPY healthcheck.py /opt/prefect/healthcheck.py" in output assert "COPY test.flow /opt/prefect/flows/test.prefect" in output assert "COPY other.flow /opt/prefect/flows/other.prefect" in output
def test_dockerfile_env_vars(tmpdir): env_vars = OrderedDict( [ ("NUM", 1), ("STR_WITH_SPACES", "Hello world!"), ("STR_WITH_QUOTES", 'Hello "friend"'), ("STR_WITH_SINGLE_QUOTES", "'foo'"), ] ) storage = Docker( env_vars=env_vars, ) storage.add_flow(Flow("foo")) dpath = storage.create_dockerfile_object(directory=str(tmpdir)) with open(dpath, "r") as dockerfile: output = dockerfile.read() expected = textwrap.dedent( """ ENV NUM=1 \\ STR_WITH_SPACES='Hello world!' \\ STR_WITH_QUOTES='Hello "friend"' \\ STR_WITH_SINGLE_QUOTES="'foo'" \\ """ ) assert expected in output
def test_add_flow_to_docker(): storage = Docker() f = Flow("test") assert f not in storage assert storage.add_flow(f) == "/opt/prefect/flows/test.prefect" assert f.name in storage assert storage.flows[f.name] == "/opt/prefect/flows/test.prefect"
def test_copy_files(): with tempfile.TemporaryDirectory() as sample_top_directory: sample_sub_directory = os.path.join(sample_top_directory, "subdir") os.mkdir(sample_sub_directory) sample_file = os.path.join(sample_sub_directory, "test.txt") with open(sample_file, "w+") as t: t.write("asdf") with tempfile.TemporaryDirectory() as directory: storage = Docker( files={ sample_sub_directory: "/test_dir", sample_file: "/path/test_file.txt", }, ) storage.add_flow(Flow("foo")) dpath = storage.create_dockerfile_object(directory=directory) with open(dpath, "r") as dockerfile: output = dockerfile.read() contents = os.listdir(directory) assert "subdir" in contents, contents assert "test.txt" in contents, contents assert "COPY {} /test_dir".format( os.path.join(directory, "subdir").replace("\\", "/") in output ), output assert "COPY {} /path/test_file.txt".format( os.path.join(directory, "test.txt").replace("\\", "/") in output ), output
def test_build_image_passes_and_pushes(monkeypatch): flow = Flow("test") storage = Docker(registry_url="reg", base_image="python:3.6") pull_image = MagicMock() monkeypatch.setattr("prefect.storage.Docker.pull_image", pull_image) push_image = MagicMock() monkeypatch.setattr("prefect.storage.Docker.push_image", push_image) build = MagicMock() monkeypatch.setattr("docker.APIClient.build", build) images = MagicMock(return_value=["test"]) monkeypatch.setattr("docker.APIClient.images", images) remove = MagicMock() monkeypatch.setattr("docker.APIClient.remove_image", remove) image_name, image_tag = storage._build_image(flow) assert image_name assert image_tag assert "reg" in push_image.call_args[0][0] assert "reg" in remove.call_args[1]["image"]
def test_setup_definition_exists(monkeypatch): existing_task_definition = { "containerDefinitions": [{ "environment": [ { "name": "PREFECT__CLOUD__GRAPHQL", "value": config.cloud.graphql }, { "name": "PREFECT__CLOUD__USE_LOCAL_SECRETS", "value": "false" }, { "name": "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS", "value": "prefect.engine.cloud.CloudFlowRunner", }, { "name": "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS", "value": "prefect.engine.cloud.CloudTaskRunner", }, { "name": "PREFECT__CLOUD__SEND_FLOW_RUN_LOGS", "value": "true" }, { "name": "PREFECT__LOGGING__EXTRA_LOGGERS", "value": str(config.logging.extra_loggers), }, ], "name": "flow-container", "image": "test/image:tag", "command": [ "/bin/sh", "-c", "python -c 'import prefect; prefect.environments.execution.load_and_run_flow()'", ], }], } boto3_client = MagicMock() boto3_client.describe_task_definition.return_value = { "taskDefinition": existing_task_definition } monkeypatch.setattr("boto3.client", MagicMock(return_value=boto3_client)) environment = FargateTaskEnvironment() environment.setup( Flow( "test", storage=Docker(registry_url="test", image_name="image", image_tag="tag"), )) assert boto3_client.describe_task_definition.called assert not boto3_client.register_task_definition.called
def test_add_flow_with_weird_name_is_cleaned(): storage = Docker() flow = prefect.Flow("WELL what do you know?!~? looks like a test!!!!") loc = storage.add_flow(flow) assert "?" not in loc assert "!" not in loc assert " " not in loc assert "~" not in loc
def test_docker_storage_name_registry_url_none(): storage = Docker(base_image="python:3.6") with pytest.raises(ValueError): storage.name storage.image_name = "test2" storage.image_tag = "test3" assert storage.name == "test2:test3"
def test_build_image_fails_no_registry(monkeypatch): storage = Docker(base_image="python:3.6", image_name="test", image_tag="latest") client = MagicMock() monkeypatch.setattr("docker.APIClient", client) with pytest.raises(ValueError, match="failed to build"): image_name, image_tag = storage._build_image(push=False)