Esempio n. 1
0
    def run(
        self,
        flow_name: str = None,
        project_name: str = None,
        parameters: dict = None,
        idempotency_key: str = None,
        new_flow_context: dict = None,
    ) -> str:
        """
        Run method for the task; responsible for scheduling the specified flow run.

        Args:
            - flow_name (str, optional): the name of the flow to schedule; if not provided,
                this method will use the flow name provided at initialization
            - project_name (str, optional): the Cloud project in which the flow is located; if
                not provided, this method will use the project provided at initialization. If
                running with Prefect Core's server as the backend, this should not be provided.
            - parameters (dict, optional): the parameters to pass to the flow run being
                scheduled; if not provided, this method will use the parameters provided at
                initialization
            - idempotency_key (str, optional): an optional idempotency key for scheduling the
                flow run; if provided, ensures that only one run is created if this task is retried
                or rerun with the same inputs.  If not provided, the current flow run ID will be used.
            - new_flow_context (dict, optional): the optional run context for the new flow run

        Returns:
            - str: the ID of the newly-scheduled flow run

        Raises:
            - ValueError: if flow was not provided, cannot be found, or if a project name was
                not provided while using Cloud as a backend

        Example:
            ```python
            from prefect.tasks.prefect.flow_run import FlowRunTask

            kickoff_task = FlowRunTask(project_name="Hello, World!", flow_name="My Cloud Flow")
            ```

        """
        is_hosted_backend = "prefect.io" in urlparse(config.cloud.api).netloc

        # verify that flow and project names were passed where necessary
        if flow_name is None:
            raise ValueError("Must provide a flow name.")
        if project_name is None and is_hosted_backend:
            raise ValueError("Must provide a project name.")

        where_clause = {
            "name": {
                "_eq": flow_name
            },
            "archived": {
                "_eq": False
            },
        }

        if project_name:
            where_clause["project"] = {"name": {"_eq": project_name}}

        # find the flow ID to schedule
        query = {
            "query": {
                with_args(
                    "flow",
                    {
                        "where": where_clause,
                        "order_by": {
                            "version": EnumValue("desc")
                        },
                        "limit": 1,
                    },
                ): {"id"}
            }
        }

        client = Client()
        flow = client.graphql(query).data.flow

        # verify that a flow has been returned
        if not flow:
            raise ValueError("Flow '{}' not found.".format(flow_name))

        # grab the ID for the most recent version
        flow_id = flow[0].id

        if is_hosted_backend and context.get("flow_run_id"):
            map_index = context.get("map_index")
            default = context.get("flow_run_id") + (f"-{map_index}"
                                                    if map_index else "")
            idem_key = idempotency_key or default
        else:
            idem_key = None

        # providing an idempotency key ensures that retries for this task
        # will not create additional flow runs
        flow_run_id = client.create_flow_run(
            flow_id=flow_id,
            parameters=parameters,
            idempotency_key=idem_key,
            context=new_flow_context,
        )

        self.logger.debug(f"Flow Run {flow_run_id} created.")

        if not self.wait:
            return flow_run_id

        while True:
            time.sleep(10)
            flow_run_state = client.get_flow_run_info(flow_run_id).state
            if flow_run_state.is_finished():
                exc = signal_from_state(flow_run_state)(
                    f"{flow_run_id} finished in state {flow_run_state}")
                raise exc
Esempio n. 2
0
    def __init__(
            self,
            agent_config_id: str = None,
            name: str = None,
            labels: Iterable[str] = None,
            env_vars: dict = None,
            max_polls: int = None,
            agent_address: str = None,
            no_cloud_logs: bool = None,
            base_url: str = None,
            no_pull: bool = None,
            volumes: List[str] = None,
            show_flow_logs: bool = False,
            networks: List[str] = None,
            reg_allow_list: List[str] = None,
            docker_client_timeout: int = None,
            docker_interface: bool = None,  # Deprecated in 0.14.18
    ) -> None:
        super().__init__(
            agent_config_id=agent_config_id,
            name=name,
            labels=labels,
            env_vars=env_vars,
            max_polls=max_polls,
            agent_address=agent_address,
            no_cloud_logs=no_cloud_logs,
        )
        if platform == "win32":
            default_url = "npipe:////./pipe/docker_engine"
        else:
            default_url = "unix://var/run/docker.sock"
        self.logger.debug("Platform {} and default docker daemon {}".format(
            platform, default_url))

        # Determine Daemon URL
        self.base_url = base_url or context.get("base_url", default_url)
        self.logger.debug("Base docker daemon url {}".format(self.base_url))

        # Determine pull specification
        self.no_pull = no_pull or context.get("no_pull", False)
        self.logger.debug("no_pull set to {}".format(self.no_pull))

        # Resolve volumes from specs
        (
            self.named_volumes,
            self.container_mount_paths,
            self.host_spec,
        ) = self._parse_volume_spec(volumes or [])

        if docker_interface is not None:
            warnings.warn(
                "DockerAgent `docker_interface` argument is deprecated and will be "
                "removed from Prefect. Setting it has no effect.",
                UserWarning,
            )

        # Add containers to the given Docker networks
        self.networks = networks

        self.docker_client_timeout = docker_client_timeout or 60

        self.failed_connections = 0
        self.docker_client = self._get_docker_client()
        self.show_flow_logs = show_flow_logs
        self.processes = []  # type: List[multiprocessing.Process]

        self.reg_allow_list = reg_allow_list
        self.logger.debug("reg_allow_list set to {}".format(
            self.reg_allow_list))

        # Ping Docker daemon for connection issues
        try:
            self.logger.debug("Pinging docker daemon")
            self.docker_client.ping()
        except Exception as exc:
            self.logger.exception(
                "Issue connecting to the Docker daemon. Make sure it is running."
            )
            raise exc
        self.logger.debug(f"Base URL: {self.base_url}")
        self.logger.debug(f"No pull: {self.no_pull}")
        self.logger.debug(f"Volumes: {volumes}")
        self.logger.debug(f"Networks: {self.networks}")
Esempio n. 3
0
    def __init__(
        self,
        name: str = None,
        labels: Iterable[str] = None,
        env_vars: dict = None,
        max_polls: int = None,
        agent_address: str = None,
        base_url: str = None,
        no_pull: bool = None,
        volumes: List[str] = None,
        show_flow_logs: bool = False,
        network: str = None,
        docker_interface: bool = True,
    ) -> None:
        super().__init__(
            name=name,
            labels=labels,
            env_vars=env_vars,
            max_polls=max_polls,
            agent_address=agent_address,
        )
        if platform == "win32":
            default_url = "npipe:////./pipe/docker_engine"
        else:
            default_url = "unix://var/run/docker.sock"
        self.logger.debug("Platform {} and default docker daemon {}".format(
            platform, default_url))

        # Determine Daemon URL
        self.base_url = base_url or context.get("base_url", default_url)
        self.logger.debug("Base docker daemon url {}".format(self.base_url))

        # Determine pull specification
        self.no_pull = no_pull or context.get("no_pull", False)
        self.logger.debug("no_pull set to {}".format(self.no_pull))

        # Resolve volumes from specs
        (
            self.named_volumes,
            self.container_mount_paths,
            self.host_spec,
        ) = self._parse_volume_spec(volumes or [])

        # Add containers to a docker network
        self.network = network
        self.logger.debug("Docker network set to {}".format(self.network))

        self.docker_interface = docker_interface
        self.logger.debug("Docker interface toggle set to {}".format(
            self.docker_interface))

        self.failed_connections = 0
        self.docker_client = self._get_docker_client()
        self.show_flow_logs = show_flow_logs
        self.processes = []  # type: List[multiprocessing.Process]

        # Ping Docker daemon for connection issues
        try:
            self.logger.debug("Pinging docker daemon")
            self.docker_client.ping()
        except Exception as exc:
            self.logger.exception(
                "Issue connecting to the Docker daemon. Make sure it is running."
            )
            raise exc
Esempio n. 4
0
    def run(
        self,
        flow_name: str = None,
        project_name: str = None,
        parameters: dict = None,
        run_config: RunConfig = None,
        new_flow_context: dict = None,
        run_name: str = None,
        idempotency_key: str = None,
        scheduled_start_time: datetime.datetime = None,
    ) -> str:
        """
        Run method for the task; responsible for scheduling the specified flow run.

        Args:
            - flow_name (str, optional): the name of the flow to schedule; if not provided,
                this method will use the flow name provided at initialization
            - project_name (str, optional): the Cloud project in which the flow is located; if
                not provided, this method will use the project provided at initialization. If
                running with Prefect Core's server as the backend, this should not be provided.
            - parameters (dict, optional): the parameters to pass to the flow run being
                scheduled; if not provided, this method will use the parameters provided at
                initialization
            - run_config (RunConfig, optional): a run-config to use for this flow
                run, overriding any existing flow settings.
            - new_flow_context (dict, optional): the optional run context for the new flow run
            - run_name (str, optional): name to be set for the flow run
            - idempotency_key (str, optional): a unique idempotency key for scheduling the
                flow run. Duplicate flow runs with the same idempotency key will only create
                a single flow run. This is useful for ensuring that only one run is created
                if this task is retried. If not provided, defaults to the active `task_run_id`.
            - scheduled_start_time (datetime, optional): the time to schedule the execution
                for; if not provided, defaults to now

        Returns:
            - str: the ID of the newly-scheduled flow run

        Raises:
            - ValueError: if flow was not provided, cannot be found, or if a project name was
                not provided while using Cloud as a backend

        Example:
            ```python
            from prefect.tasks.prefect.flow_run import StartFlowRun

            kickoff_task = StartFlowRun(project_name="Hello, World!", flow_name="My Cloud Flow")
            ```

        """

        # verify that flow and project names were passed where necessary
        if flow_name is None:
            raise ValueError("Must provide a flow name.")
        if project_name is None:
            raise ValueError("Must provide a project name.")

        where_clause = {
            "name": {
                "_eq": flow_name
            },
            "archived": {
                "_eq": False
            },
            "project": {
                "name": {
                    "_eq": project_name
                }
            },
        }

        # find the flow ID to schedule
        query = {
            "query": {
                with_args(
                    "flow",
                    {
                        "where": where_clause,
                        "order_by": {
                            "version": EnumValue("desc")
                        },
                        "limit": 1,
                    },
                ): {"id"}
            }
        }

        client = Client()
        flow = client.graphql(query).data.flow

        # verify that a flow has been returned
        if not flow:
            raise ValueError("Flow '{}' not found.".format(flow_name))

        # grab the ID for the most recent version
        flow_id = flow[0].id

        if idempotency_key is None:
            idempotency_key = context.get("task_run_id", None)

        # providing an idempotency key ensures that retries for this task
        # will not create additional flow runs
        flow_run_id = client.create_flow_run(
            flow_id=flow_id,
            parameters=parameters,
            run_config=run_config,
            idempotency_key=idempotency_key,
            context=new_flow_context,
            run_name=run_name,
            scheduled_start_time=scheduled_start_time,
        )

        self.logger.debug(f"Flow Run {flow_run_id} created.")

        self.logger.debug(
            f"Creating link artifact for Flow Run {flow_run_id}.")
        run_link = client.get_cloud_url("flow-run", flow_run_id, as_user=False)
        create_link(urlparse(run_link).path)

        if not self.wait:
            return flow_run_id

        while True:
            time.sleep(10)
            flow_run_state = client.get_flow_run_info(flow_run_id).state
            if flow_run_state.is_finished():
                exc = signal_from_state(flow_run_state)(
                    f"{flow_run_id} finished in state {flow_run_state}")
                raise exc