def test_create_dask_environment(): environment = DaskKubernetesEnvironment() assert environment assert environment.min_workers == 1 assert environment.max_workers == 2 assert environment.work_stealing is False assert environment.scheduler_logs is False assert environment.private_registry is False assert environment.docker_secret is None assert environment.labels == set() assert environment.on_start is None assert environment.on_exit is None assert environment.logger.name == "prefect.DaskKubernetesEnvironment"
def test_populate_custom_worker_spec_yaml(log_flag): environment = DaskKubernetesEnvironment() file_path = os.path.dirname( prefect.environments.execution.dask.k8s.__file__) with open(path.join(file_path, "worker_pod.yaml")) as pod_file: pod = yaml.safe_load(pod_file) pod["spec"]["containers"][0]["env"] = [] with set_temporary_config({ "cloud.graphql": "gql_test", "cloud.auth_token": "auth_test", "logging.log_to_cloud": log_flag, "logging.extra_loggers": "['test_logger']", }): with prefect.context(flow_run_id="id_test", image="my_image"): yaml_obj = environment._populate_worker_spec_yaml(yaml_obj=pod) assert yaml_obj["metadata"]["labels"][ "identifier"] == environment.identifier_label assert yaml_obj["metadata"]["labels"]["flow_run_id"] == "id_test" env = yaml_obj["spec"]["containers"][0]["env"] assert env[0]["value"] == "gql_test" assert env[1]["value"] == "auth_test" assert env[2]["value"] == "id_test" assert env[3]["value"] == "false" assert env[4]["value"] == "prefect.engine.cloud.CloudFlowRunner" assert env[5]["value"] == "prefect.engine.cloud.CloudTaskRunner" assert env[6]["value"] == "prefect.engine.executors.DaskExecutor" assert env[7]["value"] == str(log_flag).lower() assert ( env[8]["value"] == "['test_logger', 'dask_kubernetes.core', 'distributed.deploy.adaptive', 'kubernetes']" ) assert yaml_obj["spec"]["containers"][0]["image"] == "my_image"
def test_setup_doesnt_pass_if_private_registry(monkeypatch): environment = DaskKubernetesEnvironment(private_registry=True) assert environment.docker_secret == "DOCKER_REGISTRY_CREDENTIALS" config = MagicMock() monkeypatch.setattr("kubernetes.config", config) v1 = MagicMock() v1.list_namespaced_secret.return_value = MagicMock(items=[]) monkeypatch.setattr( "kubernetes.client", MagicMock(CoreV1Api=MagicMock(return_value=v1)) ) create_secret = MagicMock() monkeypatch.setattr( "prefect.environments.DaskKubernetesEnvironment._create_namespaced_secret", create_secret, ) with set_temporary_config({"cloud.auth_token": "test"}): environment.setup(flow=base_flow) assert create_secret.called
def test_populate_job_yaml(): environment = DaskKubernetesEnvironment() file_path = os.path.dirname(prefect.environments.execution.dask.k8s.__file__) with open(path.join(file_path, "job.yaml")) as job_file: job = yaml.safe_load(job_file) with set_temporary_config( {"cloud.graphql": "gql_test", "cloud.auth_token": "auth_test"} ): with prefect.context(flow_run_id="id_test", namespace="namespace_test"): yaml_obj = environment._populate_job_yaml( yaml_obj=job, docker_name="test1/test2:test3", flow_file_path="test4" ) assert yaml_obj["metadata"]["name"] == "prefect-dask-job-{}".format( environment.identifier_label ) assert yaml_obj["metadata"]["labels"]["identifier"] == environment.identifier_label assert yaml_obj["metadata"]["labels"]["flow_run_id"] == "id_test" assert ( yaml_obj["spec"]["template"]["metadata"]["labels"]["identifier"] == environment.identifier_label ) env = yaml_obj["spec"]["template"]["spec"]["containers"][0]["env"] assert env[0]["value"] == "gql_test" assert env[1]["value"] == "auth_test" assert env[2]["value"] == "id_test" assert env[3]["value"] == "namespace_test" assert env[4]["value"] == "test1/test2:test3" assert env[5]["value"] == "test4" assert ( yaml_obj["spec"]["template"]["spec"]["containers"][0]["image"] == "test1/test2:test3" )
def test_create_secret_isnt_called_if_exists(monkeypatch): environment = DaskKubernetesEnvironment(private_registry=True) config = MagicMock() monkeypatch.setattr("kubernetes.config", config) secret = MagicMock() secret.metadata.name = "foo-docker" v1 = MagicMock() v1.list_namespaced_secret.return_value = MagicMock(items=[secret]) monkeypatch.setattr("kubernetes.client", MagicMock(CoreV1Api=MagicMock(return_value=v1))) create_secret = MagicMock() monkeypatch.setattr( "prefect.environments.DaskKubernetesEnvironment._create_namespaced_secret", create_secret, ) with set_temporary_config({"cloud.auth_token": "test"}): with prefect.context(namespace="foo"): environment.setup(flow=base_flow) assert not create_secret.called
def test_populate_worker_pod_yaml_with_multiple_image_pull_secrets(): environment = DaskKubernetesEnvironment( image_pull_secret="some-secret,another-one") file_path = os.path.dirname( prefect.environments.execution.dask.k8s.__file__) with open(path.join(file_path, "worker_pod.yaml")) as pod_file: pod = yaml.safe_load(pod_file) with set_temporary_config({ "cloud.graphql": "gql_test", "cloud.auth_token": "auth_test" }): with prefect.context(flow_run_id="id_test", image="my_image", namespace="foo-man"): yaml_obj = environment._populate_worker_pod_yaml(yaml_obj=pod) assert yaml_obj["spec"]["imagePullSecrets"] == [ dict(name="some-secret"), dict(name="another-one"), ]
def test_create_dask_environment_args(): environment = DaskKubernetesEnvironment( min_workers=5, max_workers=6, work_stealing=True, private_registry=True, docker_secret="docker", ) assert environment assert environment.min_workers == 5 assert environment.max_workers == 6 assert environment.work_stealing is True assert environment.private_registry is True assert environment.docker_secret == "docker"
def test_populate_custom_scheduler_spec_yaml(): environment = DaskKubernetesEnvironment() file_path = os.path.dirname( prefect.environments.execution.dask.k8s.__file__) with open(path.join(file_path, "job.yaml")) as job_file: job = yaml.safe_load(job_file) job["spec"]["template"]["spec"]["containers"][0]["env"] = [] with set_temporary_config({ "cloud.graphql": "gql_test", "cloud.auth_token": "auth_test" }): with prefect.context(flow_run_id="id_test", namespace="namespace_test"): yaml_obj = environment._populate_scheduler_spec_yaml( yaml_obj=job, docker_name="test1/test2:test3", flow_file_path="test4") env = yaml_obj["spec"]["template"]["spec"]["containers"][0]["env"] assert env[0]["value"] == "gql_test" assert env[1]["value"] == "auth_test" assert env[2]["value"] == "id_test" assert env[3]["value"] == "namespace_test" assert env[4]["value"] == "test1/test2:test3" assert env[5]["value"] == "test4" assert env[6]["value"] == "false" assert env[7]["value"] == "prefect.engine.cloud.CloudFlowRunner" assert env[8]["value"] == "prefect.engine.cloud.CloudTaskRunner" assert env[9]["value"] == "prefect.engine.executors.DaskExecutor" assert env[10]["value"] == "true" assert (yaml_obj["spec"]["template"]["spec"]["containers"][0]["image"] == "test1/test2:test3")
def test_populate_worker_pod_yaml(): environment = DaskKubernetesEnvironment() file_path = os.path.dirname(prefect.environments.execution.dask.k8s.__file__) with open(path.join(file_path, "worker_pod.yaml")) as pod_file: pod = yaml.safe_load(pod_file) with set_temporary_config( {"cloud.graphql": "gql_test", "cloud.auth_token": "auth_test"} ): with prefect.context(flow_run_id="id_test", image="my_image"): yaml_obj = environment._populate_worker_pod_yaml(yaml_obj=pod) assert yaml_obj["metadata"]["labels"]["identifier"] == environment.identifier_label assert yaml_obj["metadata"]["labels"]["flow_run_id"] == "id_test" env = yaml_obj["spec"]["containers"][0]["env"] assert env[0]["value"] == "gql_test" assert env[1]["value"] == "auth_test" assert env[2]["value"] == "id_test" assert yaml_obj["spec"]["containers"][0]["image"] == "my_image"
def test_initialize_environment_with_spec_populates(monkeypatch): with tempfile.TemporaryDirectory() as directory: with open(os.path.join(directory, "scheduler.yaml"), "w+") as file: file.write("scheduler") with open(os.path.join(directory, "worker.yaml"), "w+") as file: file.write("worker") environment = DaskKubernetesEnvironment( scheduler_spec_file=os.path.join(directory, "scheduler.yaml"), worker_spec_file=os.path.join(directory, "worker.yaml"), ) assert environment._scheduler_spec == "scheduler" assert environment._worker_spec == "worker"
def test_create_dask_environment_args(): environment = DaskKubernetesEnvironment( min_workers=5, max_workers=6, work_stealing=False, scheduler_logs=True, private_registry=True, docker_secret="docker", metadata={"test": "here"}, image_pull_secret="secret", ) assert environment assert environment.min_workers == 5 assert environment.max_workers == 6 assert environment.work_stealing is False assert environment.scheduler_logs is True assert environment.private_registry is True assert environment.docker_secret == "docker" assert environment.metadata == {"test": "here"} assert environment.image_pull_secret == "secret"
def test_roundtrip_cloudpickle(): with tempfile.TemporaryDirectory() as directory: with open(os.path.join(directory, "scheduler.yaml"), "w+") as file: file.write("scheduler") with open(os.path.join(directory, "worker.yaml"), "w+") as file: file.write("worker") environment = DaskKubernetesEnvironment( scheduler_spec_file=os.path.join(directory, "scheduler.yaml"), worker_spec_file=os.path.join(directory, "worker.yaml"), ) assert environment._scheduler_spec == "scheduler" assert environment._worker_spec == "worker" new = cloudpickle.loads(cloudpickle.dumps(environment)) assert isinstance(new, DaskKubernetesEnvironment) assert new._scheduler_spec == "scheduler" assert new._worker_spec == "worker"
def environment(self) -> Environment: """ The pipeline runtime environment. Returns ------- prefect.environments.Environment An instance of a Prefect Environment. By default a :class:`prefect.environments.DaskKubernetesEnvironment` is used. """ scheduler_spec_file = str(HERE / "job.yaml") worker_spec_file = str(HERE / "worker_pod.yaml") environment = DaskKubernetesEnvironment( min_workers=1, max_workers=30, scheduler_spec_file=scheduler_spec_file, worker_spec_file=worker_spec_file, metadata=dict(image="pangeoforge/default-image"), ) return environment
def test_execute(monkeypatch): environment = DaskKubernetesEnvironment() config = MagicMock() monkeypatch.setattr("kubernetes.config", config) batchv1 = MagicMock() monkeypatch.setattr("kubernetes.client", MagicMock(BatchV1Api=MagicMock(return_value=batchv1))) environment = DaskKubernetesEnvironment() storage = Docker(registry_url="test1", image_name="test2", image_tag="test3") flow = base_flow flow.storage = storage with set_temporary_config({"cloud.auth_token": "test"}): environment.execute(flow=flow) assert (batchv1.create_namespaced_job.call_args[1]["body"]["apiVersion"] == "batch/v1")
def test_dask_environment_dependencies(): environment = DaskKubernetesEnvironment() assert environment.dependencies == ["kubernetes"]
from prefect import task, Flow from prefect.environments import DaskKubernetesEnvironment from prefect.environments.storage import S3 @task def get_value(): return "Example!" @task def output_value(value): print(value) flow = Flow("dk8s-debug", ) # set task dependencies using imperative API output_value.set_upstream(get_value, flow=flow) output_value.bind(value=get_value, flow=flow) flow.storage = S3(bucket="my-prefect-flows", secrets=["AWS_CREDENTIALS"]) flow.environment = DaskKubernetesEnvironment( metadata={"image": "joshmeek18/flows:all_extras9"}) flow.register(project_name="Demo")
def test_execute_improper_storage(): environment = DaskKubernetesEnvironment() with pytest.raises(TypeError): environment.execute(storage=Local(), flow_location="")
logger.debug("DEBUG") logger.info("INFO") logger.critical("CRITICAL") return x + 1 @task def reduce_task(x): logger = prefect.context.get("logger") logger.info(sum(x)) with Flow( "Map / Reduce dk8s", storage=Docker(registry_url="joshmeek18", image_name="flows", prefect_version="extraloggers"), # environment=RemoteEnvironment( # executor="prefect.engine.executors.DaskExecutor", # executor_kwargs={"address": "tcp://dask-scheduler:8786"}, # ), environment=DaskKubernetesEnvironment(), ) as flow: numbers = numbers_task() first_map = map_task.map(numbers) second_map = map_task.map(first_map) reduction = reduce_task(second_map) flow.register(project_name="QA")
def test_execute_storage_missing_fields(): environment = DaskKubernetesEnvironment() with pytest.raises(ValueError): environment.execute(storage=Docker(), flow_location="")
@task def get_value(): time.sleep(10) return "Example!" @task def output_value(value): print(value) with Flow( "local-dask-k8s", environment=DaskKubernetesEnvironment(min_workers=2, max_workers=4), storage=Docker(registry_url="joshmeek18", image_name="flows", prefect_version="master"), ) as flow: get_value() get_value() get_value() get_value() get_value() get_value() get_value() get_value() get_value() get_value() get_value() from prefect.engine.executors import DaskExecutor
from prefect.environments.storage import Docker @task def get_value(): return "Example!" @task def output_value(value): print(value) flow = Flow( "Custom Worker Spec Dask Kubernetes Example", environment=DaskKubernetesEnvironment(worker_spec_file="worker_spec.yaml"), storage=Docker(registry_url="joshmeek18", image_name="flows", image_tag="qqq", prefect_version="test_branch"), ) # set task dependencies using imperative API output_value.set_upstream(get_value, flow=flow) output_value.bind(value=get_value, flow=flow) # print(flow.environment._worker_spec) flow.register(project_name="Demo") # out = flow.save()
def test_populate_custom_yaml_specs_with_logging_vars(log_flag): environment = DaskKubernetesEnvironment() file_path = os.path.dirname( prefect.environments.execution.dask.k8s.__file__) log_vars = [ { "name": "PREFECT__LOGGING__LOG_TO_CLOUD", "value": "YES", }, { "name": "PREFECT__LOGGING__LEVEL", "value": "NO", }, { "name": "PREFECT__LOGGING__EXTRA_LOGGERS", "value": "MAYBE", }, ] with open(path.join(file_path, "job.yaml")) as job_file: job = yaml.safe_load(job_file) job["spec"]["template"]["spec"]["containers"][0]["env"] = [] job["spec"]["template"]["spec"]["containers"][0]["env"].extend( log_vars) with set_temporary_config({ "cloud.graphql": "gql_test", "cloud.auth_token": "auth_test", "logging.log_to_cloud": log_flag, "logging.extra_loggers": ["test_logger"], }): with prefect.context(flow_run_id="id_test", namespace="namespace_test"): yaml_obj = environment._populate_scheduler_spec_yaml( yaml_obj=job, docker_name="test1/test2:test3") assert yaml_obj["metadata"]["name"] == "prefect-dask-job-{}".format( environment.identifier_label) env = yaml_obj["spec"]["template"]["spec"]["containers"][0]["env"] assert env[0]["value"] == "YES" assert env[1]["value"] == "NO" assert env[2]["value"] == "MAYBE" assert len(env) == 12 # worker with open(path.join(file_path, "worker_pod.yaml")) as pod_file: pod = yaml.safe_load(pod_file) pod["spec"]["containers"][0]["env"] = [] pod["spec"]["containers"][0]["env"].extend(log_vars) with set_temporary_config({ "cloud.graphql": "gql_test", "cloud.auth_token": "auth_test", "logging.log_to_cloud": log_flag, "logging.extra_loggers": ["test_logger"], }): with prefect.context(flow_run_id="id_test", image="my_image"): yaml_obj = environment._populate_worker_spec_yaml(yaml_obj=pod) assert (yaml_obj["metadata"]["labels"]["prefect.io/identifier"] == environment.identifier_label) assert yaml_obj["metadata"]["labels"][ "prefect.io/flow_run_id"] == "id_test" env = yaml_obj["spec"]["containers"][0]["env"] assert env[0]["value"] == "YES" assert env[1]["value"] == "NO" assert env[2]["value"] == "MAYBE" assert len(env) == 10
def test_create_dask_environment_labels(): environment = DaskKubernetesEnvironment(labels=["foo"]) assert environment.labels == set(["foo"])
def test_create_dask_environment_identifier_label_none(): environment = DaskKubernetesEnvironment() environment._identifier_label = None assert environment.identifier_label
def test_create_dask_environment_identifier_label(): environment = DaskKubernetesEnvironment() assert environment.identifier_label
def test_setup_dask_environment_passes(): environment = DaskKubernetesEnvironment() environment.setup(storage=Docker()) assert environment
def test_setup_dask_environment_passes(): environment = DaskKubernetesEnvironment() environment.setup(flow=base_flow) assert environment
parameter_defaults={"length": 20}) schedule = Schedule(clocks=[clock1, clock2]) # Deployment # Storage of code retrieved from GitHub repository at runtime from prefect.environments.storage import GitHub, Docker, S3, GCS, Local storage = GitHub(repo="amazing_flows", path="flows/evolving_etl.py", secrets=["GITHUB_ACCESS_TOKEN"]) # Environment configuration to dynamically spawn Dask clusters on Kubernetes for FlowRun from prefect.environments import DaskKubernetesEnvironment environment = DaskKubernetesEnvironment(worker_spec_file="worker_spec.yaml", labels=["Evolving", "ETL"]) # Define Tasks in a Flow Context with Flow('Evolving ETL', result=S3Result(bucket="flow-result-storage"), state_handlers=[my_state_handler], schedule=schedule, storage=storage, environment=environment) as flow: with case(length, 5): e = extract(length) with case(length, 50): e = extract(length) t = transform.map(e) l = load(t)