예제 #1
0
def test_serialize_docker_run(config):
    msg = RunConfigSchema().dump(config)
    config2 = RunConfigSchema().load(msg)
    assert sorted(config.labels) == sorted(config2.labels)
    fields = ["env", "image", "ports"]
    for field in fields:
        assert getattr(config, field) == getattr(config2, field)
예제 #2
0
def test_serialize_local_run(config):
    msg = RunConfigSchema().dump(config)
    config2 = RunConfigSchema().load(msg)
    assert sorted(config.labels) == sorted(config2.labels)
    fields = ["env", "working_dir"]
    for field in fields:
        assert getattr(config, field) == getattr(config2, field)
예제 #3
0
    def _get_run_config(self, flow_run: GraphQLResult,
                        run_config_cls: Type[RunConfig]) -> RunConfig:
        """
        Get a run_config for the flow, if present. The returned run config is always of
        type `run_config_cls`

        Args:
            - flow_run (GraphQLResult): A GraphQLResult flow run object
            - run_config_cls (Callable): The expected run-config class

        Returns:
            - RunConfig: The flow run's run-config or an instance of `run_config_cls`
        """
        # If the flow is using a run_config, load it
        if getattr(flow_run, "run_config", None) is not None:
            run_config = RunConfigSchema().load(flow_run.run_config)
            if isinstance(run_config, UniversalRun):
                # Convert to agent-specific run-config
                return run_config_cls(env=run_config.env,
                                      labels=run_config.labels)
            elif not isinstance(run_config, run_config_cls):
                msg = (
                    "Flow run %s has a `run_config` of type `%s`, only `%s` is supported"
                    % (flow_run.id, type(run_config).__name__,
                       run_config_cls.__name__))
                self.logger.error(msg)
                raise TypeError(msg)
            return run_config

        # Otherwise, return the default run_config
        return run_config_cls()
예제 #4
0
    def _from_flow_data(cls, flow_data: dict, **kwargs: Any) -> "FlowView":
        """
        Instantiate a `FlowView` from serialized data

        This method deserializes objects into their Prefect types.

        Args:
            - flow_data: The dict of serialized data
            - **kwargs: Additional kwargs are passed to __init__ and overrides attributes
                from `flow_data`
        """
        flow_data = flow_data.copy()

        flow_id = flow_data.pop("id")
        flow_group_data = flow_data.pop("flow_group")
        flow_group_labels = flow_group_data["labels"]
        project_name = flow_data.pop("project")["name"]
        storage = StorageSchema().load(flow_data.pop("storage"))
        run_config = RunConfigSchema().load(flow_data.pop("run_config"))

        # Combine the data from `flow_data` with `kwargs`
        flow_args = {
            **dict(
                flow_id=flow_id,
                project_name=project_name,
                storage=storage,
                flow_group_labels=flow_group_labels,
                run_config=run_config,
                **flow_data,
            ),
            **kwargs,
        }

        return cls(**flow_args)
예제 #5
0
def test_serialize_ecs_run(config):
    msg = RunConfigSchema().dump(config)
    config2 = RunConfigSchema().load(msg)
    assert sorted(config.labels) == sorted(config2.labels)
    fields = [
        "task_definition",
        "task_definition_path",
        "image",
        "env",
        "cpu",
        "memory",
        "task_role_arn",
        "run_task_kwargs",
    ]
    for field in fields:
        assert getattr(config, field) == getattr(config2, field)
예제 #6
0
    def deploy_flow(self, flow_run: GraphQLResult) -> str:
        """
        Deploy a flow run as an ECS task.

        Args:
            - flow_run (GraphQLResult): A GraphQLResult flow run object

        Returns:
            - str: Information about the deployment
        """
        self.logger.info("Deploying flow run %r", flow_run.id)

        # Load and validate the flow's run_config
        if getattr(flow_run.flow, "run_config", None) is not None:
            run_config = RunConfigSchema().load(flow_run.flow.run_config)
            if not isinstance(run_config, ECSRun):
                self.logger.error(
                    "Flow run %s has a `run_config` of type `%s`, only `ECSRun` is supported",
                    flow_run.id,
                    type(run_config).__name__,
                )
                raise TypeError("Unsupported RunConfig type: %s" %
                                type(run_config).__name__)
        else:
            self.logger.error(
                "Flow run %s has a null `run_config`, only `ECSRun` is supported",
                flow_run.id,
            )
            raise ValueError("Flow is missing a `run_config`")

        taskdef_arn = self.get_task_definition_arn(flow_run, run_config)
        if taskdef_arn is None:
            # Register a new task definition
            self.logger.debug("Registering new task definition for flow %s",
                              flow_run.flow.id)
            taskdef = self.generate_task_definition(flow_run, run_config)
            resp = self.ecs_client.register_task_definition(**taskdef)
            taskdef_arn = resp["taskDefinition"]["taskDefinitionArn"]
            self.logger.debug(
                "Registered task definition %s for flow %s",
                taskdef_arn,
                flow_run.flow.id,
            )
        else:
            self.logger.debug("Using task definition %s for flow %s",
                              taskdef_arn, flow_run.flow.id)

        # Get kwargs to pass to run_task
        kwargs = self.get_run_task_kwargs(flow_run, run_config)

        resp = self.ecs_client.run_task(taskDefinition=taskdef_arn, **kwargs)
        if resp.get("tasks"):
            task_arn = resp["tasks"][0]["taskArn"]
            self.logger.debug("Started task %r for flow run %r", task_arn,
                              flow_run.id)
            return f"Task {task_arn}"

        raise ValueError(
            "Failed to start task for flow run {0}. Failures: {1}".format(
                flow_run.id, resp.get("failures")))
예제 #7
0
    def _get_run_config(
        self, flow_run: GraphQLResult, run_config_cls: Type[RunConfig]
    ) -> Optional[RunConfig]:
        """
        Get a run_config for the flow, if present.

        Args:
            - flow_run (GraphQLResult): A GraphQLResult flow run object
            - run_config_cls (Callable): The expected run-config class

        Returns:
            - RunConfig: The flow run's run-config. Returns None if an
                environment-based flow.
        """
        # If the flow is using a run_config, load it
        if getattr(flow_run, "run_config", None) is not None:
            run_config = RunConfigSchema().load(flow_run.run_config)
            if isinstance(run_config, UniversalRun):
                # Convert to agent-specific run-config
                return run_config_cls(env=run_config.env, labels=run_config.labels)
            elif not isinstance(run_config, run_config_cls):
                msg = (
                    "Flow run %s has a `run_config` of type `%s`, only `%s` is supported"
                    % (flow_run.id, type(run_config).__name__, run_config_cls.__name__)
                )
                self.logger.error(msg)
                raise TypeError(msg)
            return run_config
        elif getattr(flow_run.flow, "environment", None) is None:
            # No environment, use default run_config
            return run_config_cls()

        return None
예제 #8
0
def test_serialize_kubernetes_run(config):
    msg = RunConfigSchema().dump(config)
    config2 = RunConfigSchema().load(msg)
    assert sorted(config.labels) == sorted(config2.labels)
    fields = [
        "job_template",
        "job_template_path",
        "image",
        "env",
        "cpu_limit",
        "cpu_request",
        "memory_limit",
        "memory_request",
    ]
    for field in fields:
        assert getattr(config, field) == getattr(config2, field)
예제 #9
0
def get_flow_image(flow_run: GraphQLResult, default: str = None) -> str:
    """
    Retrieve the image to use for this flow run deployment.

    Args:
        - flow_run (GraphQLResult): A GraphQLResult flow run object
        - default (str, optional): A default image to use. If not specified,
            The `prefecthq/prefect` image corresponding with the flow's prefect
            version will be used.

    Returns:
        - str: a full image name to use for this flow run

    Raises:
        - ValueError: if deployment attempted on unsupported Storage type and `image` not
            present in environment metadata
    """
    from prefect.storage import Docker
    from prefect.serialization.storage import StorageSchema
    from prefect.serialization.run_config import RunConfigSchema
    from prefect.serialization.environment import EnvironmentSchema

    has_run_config = getattr(flow_run, "run_config", None) is not None
    has_environment = getattr(flow_run.flow, "environment", None) is not None

    storage = StorageSchema().load(flow_run.flow.storage)
    # Not having an environment implies run-config based flow, even if
    # run_config is None.
    if has_run_config or not has_environment:
        # Precedence:
        # - Image on docker storage
        # - Image on run_config
        # - Provided default
        # - `prefecthq/prefect` for flow's core version
        if isinstance(storage, Docker):
            return storage.name
        if has_run_config:
            run_config = RunConfigSchema().load(flow_run.run_config)
            if getattr(run_config, "image", None) is not None:
                return run_config.image
        if default is not None:
            return default
        # core_version should always be present, but just in case
        version = flow_run.flow.get("core_version") or "latest"
        cleaned_version = version.split("+")[0]
        return f"prefecthq/prefect:{cleaned_version}"
    else:
        environment = EnvironmentSchema().load(flow_run.flow.environment)
        if hasattr(environment, "metadata") and hasattr(
                environment.metadata, "image"):
            return environment.metadata.get("image")
        elif isinstance(storage, Docker):
            return storage.name
        raise ValueError(
            f"Storage for flow run {flow_run.id} is not of type Docker and "
            f"environment has no `image` attribute in the metadata field.")
예제 #10
0
def load_active_run_config():
    client = Client()
    query = {
        "query": {
            with_args("flow_run_by_pk", {"id": prefect.context.flow_run_id}): {
                "run_config": True
            }
        }
    }
    blob = client.graphql(query).data.flow_run_by_pk.run_config
    return RunConfigSchema().load(blob)
예제 #11
0
def test_serialize_kubernetes_run(config):
    msg = RunConfigSchema().dump(config)
    config2 = RunConfigSchema().load(msg)
    assert sorted(config.labels) == sorted(config2.labels)
    fields = [
        "job_template",
        "job_template_path",
        "image",
        "env",
        "cpu_limit",
        "cpu_request",
        "memory_limit",
        "memory_request",
<<<<<<< HEAD
        "service_account_name",
        "image_pull_secrets",
=======
>>>>>>> prefect clone
    ]
    for field in fields:
        assert getattr(config, field) == getattr(config2, field)
예제 #12
0
    def _from_flow_run_data(
            cls,
            flow_run_data: dict,
            task_runs: Iterable["TaskRunView"] = None) -> "FlowRunView":
        """
        Instantiate a `TaskRunView` from serialized data.

        This method deserializes objects into their Prefect types.

        Exists to maintain consistency in the design of backend "View" classes.

        Args:
            - flow_run_data: A dict of flow run data
            - task_runs: An optional iterable of task runs to pre-populate the cache with

        Returns:
            A populated `FlowRunView` instance
        """
        flow_run_data = flow_run_data.copy()  # Avoid mutating the input object

        flow_run_id = flow_run_data.pop("id")
        serialized_state = flow_run_data.pop("serialized_state")
        state = (
            State.deserialize(serialized_state) if
            serialized_state  # Flow run may not have initialized its state yet
            else Pending(
                message="A state for this flow run is not yet available."))
        run_config_data = flow_run_data.pop("run_config")
        run_config = (RunConfigSchema().load(run_config_data)
                      if run_config_data else None)

        states_data = flow_run_data.pop("states", [])
        states = list(
            sorted(
                [
                    _TimestampedState.from_dict(state_data)
                    for state_data in states_data
                ],
                key=lambda s: s.timestamp,
            ))
        updated_at = cast(pendulum.DateTime,
                          pendulum.parse(flow_run_data.pop("updated")))

        return cls(
            flow_run_id=flow_run_id,
            task_runs=task_runs,
            state=state,
            updated_at=updated_at,
            states=states,
            run_config=run_config,
            **flow_run_data,
        )
예제 #13
0
 def build_flow_run(self, config, storage=None):
     if storage is None:
         storage = Local()
     return GraphQLResult({
         "flow":
         GraphQLResult({
             "storage": storage.serialize(),
             "run_config": RunConfigSchema().dump(config),
             "id": "new_id",
             "core_version": "0.13.0",
         }),
         "id":
         "id",
     })
예제 #14
0
def get_flow_image(flow_run: GraphQLResult) -> str:
    """
    Retrieve the image to use for this flow run deployment.

    Args:
        - flow_run (GraphQLResult): A GraphQLResult flow run object

    Returns:
        - str: a full image name to use for this flow run

    Raises:
        - ValueError: if deployment attempted on unsupported Storage type and `image` not
            present in environment metadata
    """
    from prefect.environments.storage import Docker
    from prefect.serialization.storage import StorageSchema
    from prefect.serialization.run_config import RunConfigSchema
    from prefect.serialization.environment import EnvironmentSchema

    has_run_config = getattr(flow_run.flow, "run_config", None) is not None
    has_environment = getattr(flow_run.flow, "environment", None) is not None

    storage = StorageSchema().load(flow_run.flow.storage)
    # Not having an environment implies run-config based flow, even if
    # run_config is None.
    if has_run_config or not has_environment:
        if isinstance(storage, Docker):
            return storage.name
        elif has_run_config:
            run_config = RunConfigSchema().load(flow_run.flow.run_config)
            if getattr(run_config, "image", None) is not None:
                return run_config.image
        # No image found on run-config, and no environment present. Use default.
        # core_version should always be present, but just in case
        version = flow_run.flow.get("core_version") or "latest"
        cleaned_version = version.split("+")[0]
        return f"prefecthq/prefect:all_extras-{cleaned_version}"
    else:
        environment = EnvironmentSchema().load(flow_run.flow.environment)
        if hasattr(environment, "metadata") and hasattr(environment.metadata, "image"):
            return environment.metadata.get("image")
        elif isinstance(storage, Docker):
            return storage.name
        raise ValueError(
            f"Storage for flow run {flow_run.id} is not of type Docker and "
            f"environment has no `image` attribute in the metadata field."
        )
예제 #15
0
        """
        self.logger.info("Deploying flow run {}".format(flow_run.id))  # type: ignore

        # 'import docker' is expensive time-wise, we should do this just-in-time to keep
        # the 'import prefect' time low
        import docker

<<<<<<< HEAD
        run_config = self._get_run_config(flow_run, DockerRun)
        assert run_config is None or isinstance(run_config, DockerRun)  # mypy

        image = get_flow_image(flow_run=flow_run)
        env_vars = self.populate_env_vars(flow_run, image, run_config=run_config)
=======
        if getattr(flow_run.flow, "run_config", None) is not None:
            run_config = RunConfigSchema().load(flow_run.flow.run_config)
            if not isinstance(run_config, DockerRun):
                self.logger.error(
                    "Flow run %s has a `run_config` of type `%s`, only `DockerRun` is supported",
                    flow_run.id,
                    type(run_config).__name__,
                )
                raise TypeError(
                    "Unsupported RunConfig type: %s" % type(run_config).__name__
                )
        else:
            run_config = None

        image = get_flow_image(flow_run=flow_run)
        env_vars = self.populate_env_vars(flow_run, run_config=run_config)
>>>>>>> prefect clone
예제 #16
0
def test_serialize_universal_run(config):
    msg = RunConfigSchema().dump(config)
    config2 = RunConfigSchema().load(msg)
    assert (config.env) == config2.env
    assert sorted(config.labels) == sorted(config2.labels)
예제 #17
0
파일: agent.py 프로젝트: strojank/prefect
    def generate_job_spec_from_run_config(self,
                                          flow_run: GraphQLResult) -> dict:
        """Generate a k8s job spec for a flow run.

        Args:
            - flow_run (GraphQLResult): A flow run object

        Returns:
            - dict: a dictionary representation of a k8s job for flow execution
        """
        run_config = RunConfigSchema().load(flow_run.flow.run_config)

        if run_config.job_template:
            job = run_config.job_template
        else:
            job_template_path = run_config.job_template_path or self.job_template_path
            self.logger.debug("Loading job template from %r",
                              job_template_path)
            template_bytes = read_bytes_from_path(job_template_path)
            job = yaml.safe_load(template_bytes)

        identifier = uuid.uuid4().hex[:8]

        job_name = f"prefect-job-{identifier}"

        # Populate job metadata for identification
        k8s_labels = {
            "prefect.io/identifier": identifier,
            "prefect.io/flow_run_id": flow_run.id,  # type: ignore
            "prefect.io/flow_id": flow_run.flow.id,  # type: ignore
        }
        _get_or_create(job, "metadata.labels")
        _get_or_create(job, "spec.template.metadata.labels")
        job["metadata"]["name"] = job_name
        job["metadata"]["labels"].update(**k8s_labels)
        job["spec"]["template"]["metadata"]["labels"].update(**k8s_labels)

        # Get the first container, which is used for the prefect job
        containers = _get_or_create(job, "spec.template.spec.containers", [])
        if not containers:
            containers.append({})
        container = containers[0]

        # Set container image
        container["image"] = image = get_flow_image(flow_run)

        # Set flow run command
        container["args"] = [get_flow_run_command(flow_run)]

        # Populate environment variables from the following sources,
        # with precedence:
        # - Values required for flow execution, hardcoded below
        # - Values set on the KubernetesRun object
        # - Values set using the `--env` CLI flag on the agent
        # - Values in the job template
        env = self.env_vars.copy()
        if run_config.env:
            env.update(run_config.env)
        env.update({
            "PREFECT__CLOUD__API":
            config.cloud.api,
            "PREFECT__CLOUD__AUTH_TOKEN":
            config.cloud.agent.auth_token,
            "PREFECT__CLOUD__USE_LOCAL_SECRETS":
            "false",
            "PREFECT__CONTEXT__FLOW_RUN_ID":
            flow_run.id,
            "PREFECT__CONTEXT__FLOW_ID":
            flow_run.flow.id,
            "PREFECT__CONTEXT__IMAGE":
            image,
            "PREFECT__LOGGING__LOG_TO_CLOUD":
            str(self.log_to_cloud).lower(),
            "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS":
            "prefect.engine.cloud.CloudFlowRunner",
            "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS":
            "prefect.engine.cloud.CloudTaskRunner",
        })
        container_env = [{"name": k, "value": v} for k, v in env.items()]
        for entry in container.get("env", []):
            if entry["name"] not in env:
                container_env.append(entry)
        container["env"] = container_env

        # Set resource requirements if provided
        _get_or_create(container, "resources.requests")
        _get_or_create(container, "resources.limits")
        resources = container["resources"]
        if run_config.memory_request:
            resources["requests"]["memory"] = run_config.memory_request
        if run_config.memory_limit:
            resources["limits"]["memory"] = run_config.memory_limit
        if run_config.cpu_request:
            resources["requests"]["cpu"] = run_config.cpu_request
        if run_config.cpu_limit:
            resources["limits"]["cpu"] = run_config.cpu_limit

        return job
예제 #18
0
    def deploy_flow(self, flow_run: GraphQLResult) -> str:
        """
        Deploy flow runs on your local machine as Docker containers

        Args:
            - flow_run (GraphQLResult): A GraphQLResult flow run object

        Returns:
            - str: Information about the deployment

        Raises:
            - ValueError: if deployment attempted on unsupported Storage type
        """
        self.logger.info("Deploying flow run {}".format(
            flow_run.id))  # type: ignore

        storage = StorageSchema().load(flow_run.flow.storage)
        if isinstance(storage, Docker):
            self.logger.error(
                "Flow run %s has an unsupported storage type: `%s`",
                flow_run.id,
                type(storage).__name__,
            )
            raise TypeError("Unsupported Storage type: %s" %
                            type(storage).__name__)

        # If the flow is using a run_config, load it
        if getattr(flow_run.flow, "run_config", None) is not None:
            run_config = RunConfigSchema().load(flow_run.flow.run_config)
            if not isinstance(run_config, LocalRun):
                self.logger.error(
                    "Flow run %s has a `run_config` of type `%s`, only `LocalRun` is supported",
                    flow_run.id,
                    type(run_config).__name__,
                )
                raise TypeError("Unsupported RunConfig type: %s" %
                                type(run_config).__name__)
        else:
            run_config = None

        env = self.populate_env_vars(flow_run, run_config=run_config)

        working_dir = None if run_config is None else run_config.working_dir
        if working_dir and not os.path.exists(working_dir):
            msg = f"Flow run {flow_run.id} has a nonexistent `working_dir` configured: {working_dir}"
            self.logger.error(msg)
            raise ValueError(msg)

        stdout = sys.stdout if self.show_flow_logs else DEVNULL

        # note: we will allow these processes to be orphaned if the agent were to exit
        # before the flow runs have completed. The lifecycle of the agent should not
        # dictate the lifecycle of the flow run. However, if the user has elected to
        # show flow logs, these log entries will continue to stream to the users terminal
        # until these child processes exit, even if the agent has already exited.
        p = Popen(
            get_flow_run_command(flow_run).split(" "),
            stdout=stdout,
            stderr=STDOUT,
            env=env,
            cwd=working_dir,
        )

        self.processes.add(p)
        self.logger.debug("Submitted flow run {} to process PID {}".format(
            flow_run.id, p.pid))

        return "PID: {}".format(p.pid)
예제 #19
0
    def deploy_flow(self, flow_run: GraphQLResult) -> str:
        """
        Deploy flow runs on your local machine as Docker containers

        Args:
            - flow_run (GraphQLResult): A GraphQLResult flow run object

        Returns:
            - str: Information about the deployment
        """
        self.logger.info("Deploying flow run {}".format(
            flow_run.id))  # type: ignore

        # 'import docker' is expensive time-wise, we should do this just-in-time to keep
        # the 'import prefect' time low
        import docker

        if getattr(flow_run.flow, "run_config", None) is not None:
            run_config = RunConfigSchema().load(flow_run.flow.run_config)
            if not isinstance(run_config, DockerRun):
                self.logger.error(
                    "Flow run %s has a `run_config` of type `%s`, only `DockerRun` is supported",
                    flow_run.id,
                    type(run_config).__name__,
                )
                raise TypeError("Unsupported RunConfig type: %s" %
                                type(run_config).__name__)
        else:
            run_config = None

        image = get_flow_image(flow_run=flow_run)
        env_vars = self.populate_env_vars(flow_run, run_config=run_config)

        if not self.no_pull and len(image.split("/")) > 1:
            self.logger.info("Pulling image {}...".format(image))
            registry = image.split("/")[0]
            if self.reg_allow_list and registry not in self.reg_allow_list:
                self.logger.error(
                    "Trying to pull image from a Docker registry '{}' which"
                    " is not in the reg_allow_list".format(registry))
                raise ValueError(
                    "Trying to pull image from a Docker registry '{}' which"
                    " is not in the reg_allow_list".format(registry))
            else:
                pull_output = self.docker_client.pull(image,
                                                      stream=True,
                                                      decode=True)
                for line in pull_output:
                    self.logger.debug(line)
                self.logger.info(
                    "Successfully pulled image {}...".format(image))

        # Create any named volumes (if they do not already exist)
        for named_volume_name in self.named_volumes:
            try:
                self.docker_client.inspect_volume(name=named_volume_name)
            except docker.errors.APIError:
                self.logger.debug(
                    "Creating named volume {}".format(named_volume_name))
                self.docker_client.create_volume(
                    name=named_volume_name,
                    driver="local",
                    labels={"prefect_created": "true"},
                )

        # Create a container
        self.logger.debug("Creating Docker container {}".format(image))

        host_config = {"auto_remove": True}  # type: dict
        container_mount_paths = self.container_mount_paths
        if container_mount_paths:
            host_config.update(binds=self.host_spec)

        if sys.platform.startswith("linux") and self.docker_interface:
            docker_internal_ip = get_docker_ip()
            host_config.update(
                extra_hosts={"host.docker.internal": docker_internal_ip})

        networking_config = None
        if self.network:
            networking_config = self.docker_client.create_networking_config(
                {self.network: self.docker_client.create_endpoint_config()})

        container = self.docker_client.create_container(
            image,
            command=get_flow_run_command(flow_run),
            environment=env_vars,
            volumes=container_mount_paths,
            host_config=self.docker_client.create_host_config(**host_config),
            networking_config=networking_config,
        )

        # Start the container
        self.logger.debug("Starting Docker container with ID {}".format(
            container.get("Id")))
        if self.network:
            self.logger.debug("Adding container to docker network: {}".format(
                self.network))

        self.docker_client.start(container=container.get("Id"))

        if self.show_flow_logs:
            self.stream_flow_logs(container.get("Id"))

        self.logger.debug("Docker container {} started".format(
            container.get("Id")))

        return "Container ID: {}".format(container.get("Id"))