def test_create_k8s_job_callbacks(): def f(): pass with tempfile.TemporaryDirectory() as directory: with open(os.path.join(directory, "job.yaml"), "w+") as file: file.write("job") environment = KubernetesJobEnvironment( job_spec_file=os.path.join(directory, "job.yaml"), labels=["foo"], on_start=f, on_exit=f, ) assert environment.labels == set(["foo"]) assert environment.on_start is f assert environment.on_exit is f
def test_roundtrip_cloudpickle(): with tempfile.TemporaryDirectory() as directory: with open(os.path.join(directory, "job.yaml"), "w+") as file: file.write("job") environment = KubernetesJobEnvironment( job_spec_file=os.path.join(directory, "job.yaml")) assert environment._job_spec == "job" new = cloudpickle.loads(cloudpickle.dumps(environment)) assert isinstance(new, KubernetesJobEnvironment) assert new._job_spec == "job" # Identifer labels do not persist assert environment.identifier_label assert new.identifier_label assert environment.identifier_label != new.identifier_label
def test_k8s_job_environment_dependencies(): environment = KubernetesJobEnvironment() assert environment.dependencies == ["kubernetes"]
def test_populate_job_yaml_multiple_containers(): with tempfile.TemporaryDirectory() as directory: with open(os.path.join(directory, "job.yaml"), "w+") as file: file.write("job") environment = KubernetesJobEnvironment( job_spec_file=os.path.join(directory, "job.yaml")) file_path = os.path.dirname( prefect.environments.execution.dask.k8s.__file__) with open(path.join(file_path, "job.yaml")) as job_file: job = yaml.safe_load(job_file) # Generate yaml object with multiple containers job["spec"]["template"]["spec"]["containers"][0]["env"] = [] job["spec"]["template"]["spec"]["containers"].append( copy.deepcopy( job["spec"]["template"]["spec"]["containers"][0])) job["spec"]["template"]["spec"]["containers"][1]["env"] = [] with set_temporary_config({ "cloud.graphql": "gql_test", "cloud.auth_token": "auth_test", "logging.extra_loggers": "['test_logger']", }): with prefect.context(flow_run_id="id_test", namespace="namespace_test"): yaml_obj = environment._populate_job_spec_yaml( yaml_obj=job, docker_name="test1/test2:test3", flow_file_path="test4", ) assert (yaml_obj["metadata"]["labels"]["identifier"] == environment.identifier_label) assert yaml_obj["metadata"]["labels"]["flow_run_id"] == "id_test" assert (yaml_obj["spec"]["template"]["metadata"]["labels"] ["identifier"] == environment.identifier_label) # Assert First Container env = yaml_obj["spec"]["template"]["spec"]["containers"][0]["env"] assert env[0]["value"] == "gql_test" assert env[1]["value"] == "auth_test" assert env[2]["value"] == "id_test" assert env[3]["value"] == "namespace_test" assert env[4]["value"] == "test1/test2:test3" assert env[5]["value"] == "test4" assert env[10]["value"] == "['test_logger']" assert (yaml_obj["spec"]["template"]["spec"]["containers"][0]["image"] == "test1/test2:test3") assert yaml_obj["spec"]["template"]["spec"]["containers"][0][ "command"] == [ "/bin/sh", "-c", ] assert yaml_obj["spec"]["template"]["spec"]["containers"][0]["args"] == [ "python -c 'import prefect; prefect.Flow.load(prefect.context.flow_file_path).environment.run_flow()'" ] # Assert Second Container env = yaml_obj["spec"]["template"]["spec"]["containers"][1]["env"] assert env[0]["value"] == "gql_test" assert env[1]["value"] == "auth_test" assert env[2]["value"] == "id_test" assert env[3]["value"] == "namespace_test" assert env[4]["value"] == "test1/test2:test3" assert env[5]["value"] == "test4" assert env[10]["value"] == "['test_logger']" assert (yaml_obj["spec"]["template"]["spec"]["containers"][1]["image"] != "test1/test2:test3") assert yaml_obj["spec"]["template"]["spec"]["containers"][1]["args"] != [ "python -c 'import prefect; prefect.Flow.load(prefect.context.flow_file_path).environment.run_flow()'" ]
def test_create_k8s_job_environment_labels(job_spec_file): environment = KubernetesJobEnvironment(job_spec_file=job_spec_file, labels=["foo"]) assert environment.labels == set(["foo"])
def test_populate_job_yaml_multiple_containers( job_spec_file, job, default_command_args ): environment = KubernetesJobEnvironment(job_spec_file=job_spec_file) # Generate yaml object with multiple containers job["spec"]["template"]["spec"]["containers"][0]["env"] = [] job["spec"]["template"]["spec"]["containers"].append( copy.deepcopy(job["spec"]["template"]["spec"]["containers"][0]) ) job["spec"]["template"]["spec"]["containers"][1]["env"] = [] job["spec"]["template"]["spec"]["containers"][1]["args"] = "echo 'other command'" environment._job_spec = job with set_temporary_config( { "cloud.graphql": "gql_test", "cloud.auth_token": "auth_test", "logging.extra_loggers": "['test_logger']", } ): with prefect.context(flow_run_id="id_test", namespace="namespace_test"): yaml_obj = environment._populate_run_time_job_spec_details( docker_name="test1/test2:test3", ) assert ( yaml_obj["metadata"]["labels"]["prefect.io/identifier"] == environment.identifier_label ) assert yaml_obj["metadata"]["labels"]["prefect.io/flow_run_id"] == "id_test" assert ( yaml_obj["spec"]["template"]["metadata"]["labels"]["prefect.io/identifier"] == environment.identifier_label ) # Assert First Container env = yaml_obj["spec"]["template"]["spec"]["containers"][0]["env"] assert env[0]["value"] == "gql_test" assert env[1]["value"] == "auth_test" assert env[2]["value"] == "id_test" assert env[3]["value"] == "namespace_test" assert env[4]["value"] == "test1/test2:test3" assert env[9]["value"] == "['test_logger']" assert ( yaml_obj["spec"]["template"]["spec"]["containers"][0]["image"] == "test1/test2:test3" ) assert yaml_obj["spec"]["template"]["spec"]["containers"][0]["command"] == [ "/bin/sh", "-c", ] assert ( yaml_obj["spec"]["template"]["spec"]["containers"][0]["args"] == default_command_args ) # Assert Second Container env = yaml_obj["spec"]["template"]["spec"]["containers"][1]["env"] assert env[0]["value"] == "gql_test" assert env[1]["value"] == "auth_test" assert env[2]["value"] == "id_test" assert env[3]["value"] == "namespace_test" assert env[4]["value"] == "test1/test2:test3" assert env[9]["value"] == "['test_logger']" assert ( yaml_obj["spec"]["template"]["spec"]["containers"][1]["image"] != "test1/test2:test3" ) assert ( yaml_obj["spec"]["template"]["spec"]["containers"][1]["args"] != default_command_args )
def test_execute_storage_missing_fields(job_spec_file): environment = KubernetesJobEnvironment(job_spec_file=job_spec_file) with pytest.raises(ValueError): environment.execute(Flow("test", storage=Docker()))
def test_setup_k8s_job_environment_passes(job_spec_file): environment = KubernetesJobEnvironment(job_spec_file=job_spec_file) environment.setup(Flow("test", storage=Docker()))
def test_create_k8s_job_environment_identifier_label_none(job_spec_file): environment = KubernetesJobEnvironment(job_spec_file=job_spec_file) environment._identifier_label = None assert environment.identifier_label
from prefect import task, Flow from prefect.environments import KubernetesJobEnvironment from prefect.environments.storage import Docker @task def get_value(): return "Example!" @task def output_value(value): print(value) flow = Flow( "Kubernetes Job Environment w/ Resource Requests & Limits", environment=KubernetesJobEnvironment(job_spec_file="job_spec.yaml"), storage=Docker(registry_url="joshmeek18", image_name="flows", prefect_version="test_branch"), ) # set task dependencies using imperative API output_value.set_upstream(get_value, flow=flow) output_value.bind(value=get_value, flow=flow) flow.register(project_name="Demo")
def test_initialize_environment_with_spec_populates(monkeypatch, job_spec_file): environment = KubernetesJobEnvironment(job_spec_file=job_spec_file) assert environment._job_spec == "job"
source_url=url) return data with Flow( name="example-selenium", schedule=Schedule(clocks=[ # TODO: specify the schedule you want this to run, and with what parameters # https://docs.prefect.io/core/concepts/schedules.html CronClock(cron='0 0 * * *', parameter_defaults=dict( home_page='https://www.metacritic.com/', gaming_platform='Switch')), ]), # TODO: specify the environment you want to execute the Flow in (from Prefect Cloud) environment=KubernetesJobEnvironment(job_spec_file='job_spec.yaml', ), storage=Docker( # TODO: change to your docker registry: # https://docs.prefect.io/cloud/recipes/configuring_storage.html registry_url='szelenka', # TODO: need to specify a base Docker image which has the chromedriver dependencies already installed base_image='szelenka/python-selenium-chromium:3.7.4', # TODO: 'pin' the exact versions you used on your development machine python_dependencies=['selenium==3.141.0', 'sqlalchemy==1.3.15'], ), # TODO: specify how you want to handle results # https://docs.prefect.io/core/concepts/results.html#results-and-result-handlers result_handler=LocalResultHandler()) as flow: # specify the DAG input parameters _path_to_chromedriver = Parameter('path_to_chromedriver', default='/usr/bin/chromedriver')