def test_create_dask_environment():
    environment = DaskKubernetesEnvironment()
    assert environment
    assert environment.min_workers == 1
    assert environment.max_workers == 2
    assert environment.work_stealing is False
    assert environment.scheduler_logs is False
    assert environment.private_registry is False
    assert environment.docker_secret is None
    assert environment.labels == set()
    assert environment.on_start is None
    assert environment.on_exit is None
    assert environment.logger.name == "prefect.DaskKubernetesEnvironment"
def test_populate_custom_worker_spec_yaml(log_flag):
    environment = DaskKubernetesEnvironment()

    file_path = os.path.dirname(
        prefect.environments.execution.dask.k8s.__file__)

    with open(path.join(file_path, "worker_pod.yaml")) as pod_file:
        pod = yaml.safe_load(pod_file)
        pod["spec"]["containers"][0]["env"] = []

    with set_temporary_config({
            "cloud.graphql": "gql_test",
            "cloud.auth_token": "auth_test",
            "logging.log_to_cloud": log_flag,
            "logging.extra_loggers": "['test_logger']",
    }):
        with prefect.context(flow_run_id="id_test", image="my_image"):
            yaml_obj = environment._populate_worker_spec_yaml(yaml_obj=pod)

    assert yaml_obj["metadata"]["labels"][
        "identifier"] == environment.identifier_label
    assert yaml_obj["metadata"]["labels"]["flow_run_id"] == "id_test"

    env = yaml_obj["spec"]["containers"][0]["env"]

    assert env[0]["value"] == "gql_test"
    assert env[1]["value"] == "auth_test"
    assert env[2]["value"] == "id_test"
    assert env[3]["value"] == "false"
    assert env[4]["value"] == "prefect.engine.cloud.CloudFlowRunner"
    assert env[5]["value"] == "prefect.engine.cloud.CloudTaskRunner"
    assert env[6]["value"] == "prefect.engine.executors.DaskExecutor"
    assert env[7]["value"] == str(log_flag).lower()
    assert (
        env[8]["value"] ==
        "['test_logger', 'dask_kubernetes.core', 'distributed.deploy.adaptive', 'kubernetes']"
    )

    assert yaml_obj["spec"]["containers"][0]["image"] == "my_image"
Example #3
0
def test_setup_doesnt_pass_if_private_registry(monkeypatch):
    environment = DaskKubernetesEnvironment(private_registry=True)
    assert environment.docker_secret == "DOCKER_REGISTRY_CREDENTIALS"

    config = MagicMock()
    monkeypatch.setattr("kubernetes.config", config)

    v1 = MagicMock()
    v1.list_namespaced_secret.return_value = MagicMock(items=[])
    monkeypatch.setattr(
        "kubernetes.client", MagicMock(CoreV1Api=MagicMock(return_value=v1))
    )

    create_secret = MagicMock()
    monkeypatch.setattr(
        "prefect.environments.DaskKubernetesEnvironment._create_namespaced_secret",
        create_secret,
    )
    with set_temporary_config({"cloud.auth_token": "test"}):
        environment.setup(flow=base_flow)

    assert create_secret.called
Example #4
0
def test_populate_job_yaml():
    environment = DaskKubernetesEnvironment()

    file_path = os.path.dirname(prefect.environments.execution.dask.k8s.__file__)

    with open(path.join(file_path, "job.yaml")) as job_file:
        job = yaml.safe_load(job_file)

    with set_temporary_config(
        {"cloud.graphql": "gql_test", "cloud.auth_token": "auth_test"}
    ):
        with prefect.context(flow_run_id="id_test", namespace="namespace_test"):
            yaml_obj = environment._populate_job_yaml(
                yaml_obj=job, docker_name="test1/test2:test3", flow_file_path="test4"
            )

    assert yaml_obj["metadata"]["name"] == "prefect-dask-job-{}".format(
        environment.identifier_label
    )
    assert yaml_obj["metadata"]["labels"]["identifier"] == environment.identifier_label
    assert yaml_obj["metadata"]["labels"]["flow_run_id"] == "id_test"
    assert (
        yaml_obj["spec"]["template"]["metadata"]["labels"]["identifier"]
        == environment.identifier_label
    )

    env = yaml_obj["spec"]["template"]["spec"]["containers"][0]["env"]

    assert env[0]["value"] == "gql_test"
    assert env[1]["value"] == "auth_test"
    assert env[2]["value"] == "id_test"
    assert env[3]["value"] == "namespace_test"
    assert env[4]["value"] == "test1/test2:test3"
    assert env[5]["value"] == "test4"

    assert (
        yaml_obj["spec"]["template"]["spec"]["containers"][0]["image"]
        == "test1/test2:test3"
    )
Example #5
0
def test_create_secret_isnt_called_if_exists(monkeypatch):
    environment = DaskKubernetesEnvironment(private_registry=True)

    config = MagicMock()
    monkeypatch.setattr("kubernetes.config", config)

    secret = MagicMock()
    secret.metadata.name = "foo-docker"
    v1 = MagicMock()
    v1.list_namespaced_secret.return_value = MagicMock(items=[secret])
    monkeypatch.setattr("kubernetes.client",
                        MagicMock(CoreV1Api=MagicMock(return_value=v1)))

    create_secret = MagicMock()
    monkeypatch.setattr(
        "prefect.environments.DaskKubernetesEnvironment._create_namespaced_secret",
        create_secret,
    )
    with set_temporary_config({"cloud.auth_token": "test"}):
        with prefect.context(namespace="foo"):
            environment.setup(flow=base_flow)

    assert not create_secret.called
def test_populate_worker_pod_yaml_with_multiple_image_pull_secrets():
    environment = DaskKubernetesEnvironment(
        image_pull_secret="some-secret,another-one")

    file_path = os.path.dirname(
        prefect.environments.execution.dask.k8s.__file__)

    with open(path.join(file_path, "worker_pod.yaml")) as pod_file:
        pod = yaml.safe_load(pod_file)

    with set_temporary_config({
            "cloud.graphql": "gql_test",
            "cloud.auth_token": "auth_test"
    }):
        with prefect.context(flow_run_id="id_test",
                             image="my_image",
                             namespace="foo-man"):
            yaml_obj = environment._populate_worker_pod_yaml(yaml_obj=pod)

    assert yaml_obj["spec"]["imagePullSecrets"] == [
        dict(name="some-secret"),
        dict(name="another-one"),
    ]
def test_create_dask_environment_args():
    environment = DaskKubernetesEnvironment(
        min_workers=5,
        max_workers=6,
        work_stealing=True,
        private_registry=True,
        docker_secret="docker",
    )
    assert environment
    assert environment.min_workers == 5
    assert environment.max_workers == 6
    assert environment.work_stealing is True
    assert environment.private_registry is True
    assert environment.docker_secret == "docker"
Example #8
0
def test_populate_custom_scheduler_spec_yaml():
    environment = DaskKubernetesEnvironment()

    file_path = os.path.dirname(
        prefect.environments.execution.dask.k8s.__file__)

    with open(path.join(file_path, "job.yaml")) as job_file:
        job = yaml.safe_load(job_file)
        job["spec"]["template"]["spec"]["containers"][0]["env"] = []

    with set_temporary_config({
            "cloud.graphql": "gql_test",
            "cloud.auth_token": "auth_test"
    }):
        with prefect.context(flow_run_id="id_test",
                             namespace="namespace_test"):
            yaml_obj = environment._populate_scheduler_spec_yaml(
                yaml_obj=job,
                docker_name="test1/test2:test3",
                flow_file_path="test4")

    env = yaml_obj["spec"]["template"]["spec"]["containers"][0]["env"]

    assert env[0]["value"] == "gql_test"
    assert env[1]["value"] == "auth_test"
    assert env[2]["value"] == "id_test"
    assert env[3]["value"] == "namespace_test"
    assert env[4]["value"] == "test1/test2:test3"
    assert env[5]["value"] == "test4"
    assert env[6]["value"] == "false"
    assert env[7]["value"] == "prefect.engine.cloud.CloudFlowRunner"
    assert env[8]["value"] == "prefect.engine.cloud.CloudTaskRunner"
    assert env[9]["value"] == "prefect.engine.executors.DaskExecutor"
    assert env[10]["value"] == "true"

    assert (yaml_obj["spec"]["template"]["spec"]["containers"][0]["image"] ==
            "test1/test2:test3")
Example #9
0
def test_populate_worker_pod_yaml():
    environment = DaskKubernetesEnvironment()

    file_path = os.path.dirname(prefect.environments.execution.dask.k8s.__file__)

    with open(path.join(file_path, "worker_pod.yaml")) as pod_file:
        pod = yaml.safe_load(pod_file)

    with set_temporary_config(
        {"cloud.graphql": "gql_test", "cloud.auth_token": "auth_test"}
    ):
        with prefect.context(flow_run_id="id_test", image="my_image"):
            yaml_obj = environment._populate_worker_pod_yaml(yaml_obj=pod)

    assert yaml_obj["metadata"]["labels"]["identifier"] == environment.identifier_label
    assert yaml_obj["metadata"]["labels"]["flow_run_id"] == "id_test"

    env = yaml_obj["spec"]["containers"][0]["env"]

    assert env[0]["value"] == "gql_test"
    assert env[1]["value"] == "auth_test"
    assert env[2]["value"] == "id_test"

    assert yaml_obj["spec"]["containers"][0]["image"] == "my_image"
Example #10
0
def test_initialize_environment_with_spec_populates(monkeypatch):

    with tempfile.TemporaryDirectory() as directory:

        with open(os.path.join(directory, "scheduler.yaml"), "w+") as file:
            file.write("scheduler")
        with open(os.path.join(directory, "worker.yaml"), "w+") as file:
            file.write("worker")

        environment = DaskKubernetesEnvironment(
            scheduler_spec_file=os.path.join(directory, "scheduler.yaml"),
            worker_spec_file=os.path.join(directory, "worker.yaml"),
        )

        assert environment._scheduler_spec == "scheduler"
        assert environment._worker_spec == "worker"
Example #11
0
def test_create_dask_environment_args():
    environment = DaskKubernetesEnvironment(
        min_workers=5,
        max_workers=6,
        work_stealing=False,
        scheduler_logs=True,
        private_registry=True,
        docker_secret="docker",
        metadata={"test": "here"},
        image_pull_secret="secret",
    )
    assert environment
    assert environment.min_workers == 5
    assert environment.max_workers == 6
    assert environment.work_stealing is False
    assert environment.scheduler_logs is True
    assert environment.private_registry is True
    assert environment.docker_secret == "docker"
    assert environment.metadata == {"test": "here"}
    assert environment.image_pull_secret == "secret"
Example #12
0
def test_roundtrip_cloudpickle():
    with tempfile.TemporaryDirectory() as directory:

        with open(os.path.join(directory, "scheduler.yaml"), "w+") as file:
            file.write("scheduler")
        with open(os.path.join(directory, "worker.yaml"), "w+") as file:
            file.write("worker")

        environment = DaskKubernetesEnvironment(
            scheduler_spec_file=os.path.join(directory, "scheduler.yaml"),
            worker_spec_file=os.path.join(directory, "worker.yaml"),
        )

        assert environment._scheduler_spec == "scheduler"
        assert environment._worker_spec == "worker"

        new = cloudpickle.loads(cloudpickle.dumps(environment))
        assert isinstance(new, DaskKubernetesEnvironment)
        assert new._scheduler_spec == "scheduler"
        assert new._worker_spec == "worker"
Example #13
0
    def environment(self) -> Environment:
        """
        The pipeline runtime environment.

        Returns
        -------
        prefect.environments.Environment
            An instance of a Prefect Environment. By default
            a :class:`prefect.environments.DaskKubernetesEnvironment`
            is used.
        """
        scheduler_spec_file = str(HERE / "job.yaml")
        worker_spec_file = str(HERE / "worker_pod.yaml")

        environment = DaskKubernetesEnvironment(
            min_workers=1,
            max_workers=30,
            scheduler_spec_file=scheduler_spec_file,
            worker_spec_file=worker_spec_file,
            metadata=dict(image="pangeoforge/default-image"),
        )
        return environment
Example #14
0
def test_execute(monkeypatch):
    environment = DaskKubernetesEnvironment()

    config = MagicMock()
    monkeypatch.setattr("kubernetes.config", config)

    batchv1 = MagicMock()
    monkeypatch.setattr("kubernetes.client",
                        MagicMock(BatchV1Api=MagicMock(return_value=batchv1)))

    environment = DaskKubernetesEnvironment()
    storage = Docker(registry_url="test1",
                     image_name="test2",
                     image_tag="test3")

    flow = base_flow
    flow.storage = storage
    with set_temporary_config({"cloud.auth_token": "test"}):
        environment.execute(flow=flow)

    assert (batchv1.create_namespaced_job.call_args[1]["body"]["apiVersion"] ==
            "batch/v1")
Example #15
0
def test_dask_environment_dependencies():
    environment = DaskKubernetesEnvironment()
    assert environment.dependencies == ["kubernetes"]
Example #16
0
from prefect import task, Flow
from prefect.environments import DaskKubernetesEnvironment
from prefect.environments.storage import S3


@task
def get_value():
    return "Example!"


@task
def output_value(value):
    print(value)


flow = Flow("dk8s-debug", )

# set task dependencies using imperative API
output_value.set_upstream(get_value, flow=flow)
output_value.bind(value=get_value, flow=flow)

flow.storage = S3(bucket="my-prefect-flows", secrets=["AWS_CREDENTIALS"])
flow.environment = DaskKubernetesEnvironment(
    metadata={"image": "joshmeek18/flows:all_extras9"})
flow.register(project_name="Demo")
def test_execute_improper_storage():
    environment = DaskKubernetesEnvironment()
    with pytest.raises(TypeError):
        environment.execute(storage=Local(), flow_location="")
Example #18
0
    logger.debug("DEBUG")
    logger.info("INFO")
    logger.critical("CRITICAL")
    return x + 1


@task
def reduce_task(x):
    logger = prefect.context.get("logger")
    logger.info(sum(x))


with Flow(
        "Map / Reduce dk8s",
        storage=Docker(registry_url="joshmeek18",
                       image_name="flows",
                       prefect_version="extraloggers"),
        # environment=RemoteEnvironment(
        #     executor="prefect.engine.executors.DaskExecutor",
        #     executor_kwargs={"address": "tcp://dask-scheduler:8786"},
        # ),
        environment=DaskKubernetesEnvironment(),
) as flow:
    numbers = numbers_task()
    first_map = map_task.map(numbers)
    second_map = map_task.map(first_map)
    reduction = reduce_task(second_map)

flow.register(project_name="QA")
def test_execute_storage_missing_fields():
    environment = DaskKubernetesEnvironment()
    with pytest.raises(ValueError):
        environment.execute(storage=Docker(), flow_location="")
Example #20
0

@task
def get_value():
    time.sleep(10)
    return "Example!"


@task
def output_value(value):
    print(value)


with Flow(
    "local-dask-k8s",
    environment=DaskKubernetesEnvironment(min_workers=2, max_workers=4),
    storage=Docker(registry_url="joshmeek18", image_name="flows", prefect_version="master"),
) as flow:
    get_value()
    get_value()
    get_value()
    get_value()
    get_value()
    get_value()
    get_value()
    get_value()
    get_value()
    get_value()
    get_value()

from prefect.engine.executors import DaskExecutor
Example #21
0
from prefect.environments.storage import Docker


@task
def get_value():
    return "Example!"


@task
def output_value(value):
    print(value)


flow = Flow(
    "Custom Worker Spec Dask Kubernetes Example",
    environment=DaskKubernetesEnvironment(worker_spec_file="worker_spec.yaml"),
    storage=Docker(registry_url="joshmeek18",
                   image_name="flows",
                   image_tag="qqq",
                   prefect_version="test_branch"),
)

# set task dependencies using imperative API
output_value.set_upstream(get_value, flow=flow)
output_value.bind(value=get_value, flow=flow)

# print(flow.environment._worker_spec)

flow.register(project_name="Demo")
# out = flow.save()
Example #22
0
def test_populate_custom_yaml_specs_with_logging_vars(log_flag):
    environment = DaskKubernetesEnvironment()

    file_path = os.path.dirname(
        prefect.environments.execution.dask.k8s.__file__)

    log_vars = [
        {
            "name": "PREFECT__LOGGING__LOG_TO_CLOUD",
            "value": "YES",
        },
        {
            "name": "PREFECT__LOGGING__LEVEL",
            "value": "NO",
        },
        {
            "name": "PREFECT__LOGGING__EXTRA_LOGGERS",
            "value": "MAYBE",
        },
    ]

    with open(path.join(file_path, "job.yaml")) as job_file:
        job = yaml.safe_load(job_file)
        job["spec"]["template"]["spec"]["containers"][0]["env"] = []
        job["spec"]["template"]["spec"]["containers"][0]["env"].extend(
            log_vars)

    with set_temporary_config({
            "cloud.graphql": "gql_test",
            "cloud.auth_token": "auth_test",
            "logging.log_to_cloud": log_flag,
            "logging.extra_loggers": ["test_logger"],
    }):
        with prefect.context(flow_run_id="id_test",
                             namespace="namespace_test"):
            yaml_obj = environment._populate_scheduler_spec_yaml(
                yaml_obj=job, docker_name="test1/test2:test3")

    assert yaml_obj["metadata"]["name"] == "prefect-dask-job-{}".format(
        environment.identifier_label)

    env = yaml_obj["spec"]["template"]["spec"]["containers"][0]["env"]

    assert env[0]["value"] == "YES"
    assert env[1]["value"] == "NO"
    assert env[2]["value"] == "MAYBE"
    assert len(env) == 12

    # worker
    with open(path.join(file_path, "worker_pod.yaml")) as pod_file:
        pod = yaml.safe_load(pod_file)
        pod["spec"]["containers"][0]["env"] = []
        pod["spec"]["containers"][0]["env"].extend(log_vars)

    with set_temporary_config({
            "cloud.graphql": "gql_test",
            "cloud.auth_token": "auth_test",
            "logging.log_to_cloud": log_flag,
            "logging.extra_loggers": ["test_logger"],
    }):
        with prefect.context(flow_run_id="id_test", image="my_image"):
            yaml_obj = environment._populate_worker_spec_yaml(yaml_obj=pod)

    assert (yaml_obj["metadata"]["labels"]["prefect.io/identifier"] ==
            environment.identifier_label)
    assert yaml_obj["metadata"]["labels"][
        "prefect.io/flow_run_id"] == "id_test"

    env = yaml_obj["spec"]["containers"][0]["env"]

    assert env[0]["value"] == "YES"
    assert env[1]["value"] == "NO"
    assert env[2]["value"] == "MAYBE"
    assert len(env) == 10
Example #23
0
def test_create_dask_environment_labels():
    environment = DaskKubernetesEnvironment(labels=["foo"])
    assert environment.labels == set(["foo"])
Example #24
0
def test_create_dask_environment_identifier_label_none():
    environment = DaskKubernetesEnvironment()
    environment._identifier_label = None
    assert environment.identifier_label
Example #25
0
def test_create_dask_environment_identifier_label():
    environment = DaskKubernetesEnvironment()
    assert environment.identifier_label
def test_setup_dask_environment_passes():
    environment = DaskKubernetesEnvironment()
    environment.setup(storage=Docker())
    assert environment
Example #27
0
def test_setup_dask_environment_passes():
    environment = DaskKubernetesEnvironment()
    environment.setup(flow=base_flow)
    assert environment
Example #28
0
        parameter_defaults={"length": 20})

schedule = Schedule(clocks=[clock1, clock2])

# Deployment
# Storage of code retrieved from GitHub repository at runtime
from prefect.environments.storage import GitHub, Docker, S3, GCS, Local

storage = GitHub(repo="amazing_flows",
                 path="flows/evolving_etl.py",
                 secrets=["GITHUB_ACCESS_TOKEN"])

# Environment configuration to dynamically spawn Dask clusters on Kubernetes for FlowRun
from prefect.environments import DaskKubernetesEnvironment

environment = DaskKubernetesEnvironment(worker_spec_file="worker_spec.yaml",
                                        labels=["Evolving", "ETL"])

# Define Tasks in a Flow Context
with Flow('Evolving ETL',
          result=S3Result(bucket="flow-result-storage"),
          state_handlers=[my_state_handler],
          schedule=schedule,
          storage=storage,
          environment=environment) as flow:
    with case(length, 5):
        e = extract(length)
    with case(length, 50):
        e = extract(length)

    t = transform.map(e)
    l = load(t)