Example #1
0
def test_get_flow_image_raises_on_missing_info():
    flow = Flow(
        "test",
        environment=LocalEnvironment(),
        storage=Local(),
    )
    with pytest.raises(ValueError):
        get_flow_image(flow=flow)
Example #2
0
def test_get_flow_image_raises_on_missing_info():
    flow = Flow(
        "test",
        run_config=UniversalRun(),
        storage=Local(),
    )
    with pytest.raises(ValueError):
        get_flow_image(flow=flow)
Example #3
0
    def execute(self, flow: "Flow", **kwargs: Any) -> None:  # type: ignore
        """
        Create a single Kubernetes job that runs the flow.

        Args:
            - flow (Flow): the Flow object
            - **kwargs (Any): additional keyword arguments to pass to the runner

        Raises:
            - Exception: if the environment is unable to create the Kubernetes job
        """
        docker_name = get_flow_image(flow)

        from kubernetes import client, config

        # Verify environment is running in cluster
        try:
            config.load_incluster_config()
        except config.config_exception.ConfigException:
            self.logger.error("Environment not currently running inside a cluster")
            raise EnvironmentError("Environment not currently inside a cluster")

        batch_client = client.BatchV1Api()

        job = self._populate_run_time_job_spec_details(docker_name=docker_name)

        # Create Job
        try:
            batch_client.create_namespaced_job(
                namespace=prefect.context.get("namespace"), body=job
            )
        except Exception as exc:
            self.logger.critical("Failed to create Kubernetes job: {}".format(exc))
            raise exc
Example #4
0
def test_get_flow_image_env_metadata():
    flow = Flow(
        "test",
        environment=LocalEnvironment(metadata={"image": "repo/name:tag"}),
        storage=Local(),
    )
    image = get_flow_image(flow=flow)
    assert image == "repo/name:tag"
Example #5
0
def test_get_flow_image_docker_storage():
    flow = Flow(
        "test",
        environment=LocalEnvironment(),
        storage=Docker(registry_url="test", image_name="name", image_tag="tag"),
    )
    image = get_flow_image(flow=flow)
    assert image == "test/name:tag"
Example #6
0
def test_get_flow_image_run_config():
    flow = Flow(
        "test",
        run_config=DockerRun(image="repo/name:tag"),
        storage=Local(),
    )
    image = get_flow_image(flow=flow)
    assert image == "repo/name:tag"
Example #7
0
    def _render_task_definition_kwargs(self, flow: "Flow") -> dict:
        task_definition_kwargs = self.task_definition_kwargs.copy()

        env_values = [
            {"name": "PREFECT__CLOUD__GRAPHQL", "value": config.cloud.graphql},
            {"name": "PREFECT__CLOUD__USE_LOCAL_SECRETS", "value": "false"},
            {
                "name": "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS",
                "value": "prefect.engine.cloud.CloudFlowRunner",
            },
            {
                "name": "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS",
                "value": "prefect.engine.cloud.CloudTaskRunner",
            },
            {"name": "PREFECT__LOGGING__LOG_TO_CLOUD", "value": "true"},
            {
                "name": "PREFECT__LOGGING__EXTRA_LOGGERS",
                "value": str(config.logging.extra_loggers),
            },
        ]

        # create containerDefinitions if they do not exist
        if not task_definition_kwargs.get("containerDefinitions"):
            task_definition_kwargs["containerDefinitions"] = []
            task_definition_kwargs["containerDefinitions"].append({})

        # set environment variables for all containers
        for definition in task_definition_kwargs["containerDefinitions"]:
            if not definition.get("environment"):
                definition["environment"] = []
            definition["environment"].extend(env_values)

        # set name on first container
        if not task_definition_kwargs["containerDefinitions"][0].get("name"):
            task_definition_kwargs["containerDefinitions"][0]["name"] = ""

        task_definition_kwargs.get("containerDefinitions")[0]["name"] = "flow-container"

        # set image on first container
        if not task_definition_kwargs["containerDefinitions"][0].get("image"):
            task_definition_kwargs["containerDefinitions"][0]["image"] = ""

        task_definition_kwargs.get("containerDefinitions")[0]["image"] = get_flow_image(
            flow
        )

        # set command on first container
        if not task_definition_kwargs["containerDefinitions"][0].get("command"):
            task_definition_kwargs["containerDefinitions"][0]["command"] = []

        task_definition_kwargs.get("containerDefinitions")[0]["command"] = [
            "/bin/sh",
            "-c",
            "python -c 'import prefect; prefect.environments.execution.load_and_run_flow()'",
        ]

        return task_definition_kwargs
Example #8
0
def test_get_flow_image_docker_storage():
    flow = Flow(
        "test",
        run_config=UniversalRun(),
        storage=Docker(registry_url="test", image_name="name",
                       image_tag="tag"),
    )
    image = get_flow_image(flow=flow)
    assert image == "test/name:tag"
Example #9
0
    def execute(  # type: ignore
            self, flow: "Flow", **kwargs: Any) -> None:
        """
        Create a single Kubernetes job that spins up a dask scheduler, dynamically
        creates worker pods, and runs the flow.

        Args:
            - flow (Flow): the Flow object
            - **kwargs (Any): additional keyword arguments to pass to the runner

        Raises:
            - Exception: if the environment is unable to create the Kubernetes job
        """
        docker_name = get_flow_image(flow)

        from kubernetes import client, config

        # Verify environment is running in cluster
        try:
            config.load_incluster_config()
        except config.config_exception.ConfigException:
            self.logger.error(
                "Environment not currently running inside a cluster")
            raise EnvironmentError(
                "Environment not currently inside a cluster")

        batch_client = client.BatchV1Api()

        if self._scheduler_spec:
            job = self._scheduler_spec
            job = self._populate_scheduler_spec_yaml(yaml_obj=job,
                                                     docker_name=docker_name)
        else:
            with open(path.join(path.dirname(__file__),
                                "job.yaml")) as job_file:
                job = yaml.safe_load(job_file)
                job = self._populate_job_yaml(yaml_obj=job,
                                              docker_name=docker_name)

        # Create Job
        try:
            batch_client.create_namespaced_job(
                namespace=prefect.context.get("namespace"), body=job)
        except Exception as exc:
            self.logger.critical(
                "Failed to create Kubernetes job: {}".format(exc))
            raise exc
Example #10
0
    def execute(  # type: ignore
            self, flow: "Flow", **kwargs: Any) -> None:
        """
        Run the Fargate task that was defined for this flow.

        Args:
            - flow (Flow): the Flow object
            - **kwargs (Any): additional keyword arguments to pass to the runner
        """
        from boto3 import client as boto3_client

        flow_run_id = prefect.context.get("flow_run_id", "unknown")
        container_overrides = [{
            "name":
            "flow-container",
            "environment": [
                {
                    "name":
                    "PREFECT__CLOUD__AUTH_TOKEN",
                    "value":
                    config.cloud.agent.auth_token or config.cloud.auth_token,
                },
                {
                    "name": "PREFECT__CONTEXT__FLOW_RUN_ID",
                    "value": flow_run_id
                },
                {
                    "name": "PREFECT__CONTEXT__IMAGE",
                    "value": get_flow_image(flow)
                },
            ],
        }]

        boto3_c = boto3_client(
            "ecs",
            aws_access_key_id=self.aws_access_key_id,
            aws_secret_access_key=self.aws_secret_access_key,
            aws_session_token=self.aws_session_token,
            region_name=self.region_name,
        )

        boto3_c.run_task(
            overrides={"containerOverrides": container_overrides},
            launchType=self.launch_type,
            **self.task_run_kwargs,
        )
Example #11
0
    def execute(self, flow: "Flow") -> None:  # type: ignore
        """
        Create a single Kubernetes job that spins up a dask scheduler, dynamically
        creates worker pods, and runs the flow.

        Args:
            - flow (Flow): the Flow object

        Raises:
            - Exception: if the environment is unable to create the Kubernetes job
        """
        docker_name = get_flow_image(flow)

        from kubernetes import client, config

        # Verify environment is running in cluster
        try:
            config.load_incluster_config()
Example #12
0
    def setup(self, flow: "Flow") -> None:  # type: ignore
        """
        Register the task definition if it does not already exist.

        Args:
            - flow (Flow): the Flow object
        """
        from boto3 import client as boto3_client
        from botocore.exceptions import ClientError

        boto3_c = boto3_client(
            "ecs",
            aws_access_key_id=self.aws_access_key_id,
            aws_secret_access_key=self.aws_secret_access_key,
            aws_session_token=self.aws_session_token,
            region_name=self.region_name,
        )

        definition_exists = True
        try:
            boto3_c.describe_task_definition(
                taskDefinition=self.task_definition_kwargs.get("family")
            )
        except ClientError:
            definition_exists = False

        if not definition_exists:
            env_values = [
                {"name": "PREFECT__CLOUD__GRAPHQL", "value": config.cloud.graphql},
                {"name": "PREFECT__CLOUD__USE_LOCAL_SECRETS", "value": "false"},
                {
                    "name": "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS",
                    "value": "prefect.engine.cloud.CloudFlowRunner",
                },
                {
                    "name": "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS",
                    "value": "prefect.engine.cloud.CloudTaskRunner",
                },
                {"name": "PREFECT__LOGGING__LOG_TO_CLOUD", "value": "true"},
                {
                    "name": "PREFECT__LOGGING__EXTRA_LOGGERS",
                    "value": str(config.logging.extra_loggers),
                },
            ]

            # create containerDefinitions if they do not exist
            if not self.task_definition_kwargs.get("containerDefinitions"):
                self.task_definition_kwargs["containerDefinitions"] = []
                self.task_definition_kwargs["containerDefinitions"].append({})

            # set environment variables for all containers
            for definition in self.task_definition_kwargs["containerDefinitions"]:
                if not definition.get("environment"):
                    definition["environment"] = []
                definition["environment"].extend(env_values)

            # set name on first container
            if not self.task_definition_kwargs["containerDefinitions"][0].get("name"):
                self.task_definition_kwargs["containerDefinitions"][0]["name"] = ""

            self.task_definition_kwargs.get("containerDefinitions")[0][
                "name"
            ] = "flow-container"

            # set image on first container
            if not self.task_definition_kwargs["containerDefinitions"][0].get("image"):
                self.task_definition_kwargs["containerDefinitions"][0]["image"] = ""

            self.task_definition_kwargs.get("containerDefinitions")[0][
                "image"
            ] = get_flow_image(flow)

            # set command on first container
            if not self.task_definition_kwargs["containerDefinitions"][0].get(
                "command"
            ):
                self.task_definition_kwargs["containerDefinitions"][0]["command"] = []

            self.task_definition_kwargs.get("containerDefinitions")[0]["command"] = [
                "/bin/sh",
                "-c",
                "python -c 'import prefect; prefect.environments.execution.load_and_run_flow()'",
            ]

            boto3_c.register_task_definition(**self.task_definition_kwargs)