Esempio n. 1
0
 async def test_create_compressed_flow(self, run_query, project_id):
     serialized_flow = compress(
         prefect.Flow(name="test").serialize(build=False))
     result = await run_query(
         query=self.create_compressed_flow_mutation,
         variables=dict(input=dict(serialized_flow=serialized_flow,
                                   project_id=project_id)),
     )
     flow = await models.Flow.where(
         id=result.data.create_flow_from_compressed_string.id
     ).first({"project_id", "name"})
     assert flow.project_id == project_id
     assert flow.name == "test"
Esempio n. 2
0
    def deploy(
        self,
        flow: "Flow",
        project_name: str,
        build: bool = True,
        set_schedule_active: bool = True,
        compressed: bool = True,
    ) -> str:
        """
        Push a new flow to Prefect Cloud

        Args:
            - flow (Flow): a flow to deploy
            - project_name (str): the project that should contain this flow.
            - build (bool, optional): if `True`, the flow's environment is built
                prior to serialization; defaults to `True`
            - set_schedule_active (bool, optional): if `False`, will set the
                schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule).
                Defaults to `True`. This can be changed later.
            - compressed (bool, optional): if `True`, the serialized flow will be; defaults to `True`
                compressed

        Returns:
            - str: the ID of the newly-deployed flow

        Raises:
            - ClientError: if the deploy failed
        """
        required_parameters = {p for p in flow.parameters() if p.required}
        if flow.schedule is not None and required_parameters:
            raise ClientError(
                "Flows with required parameters can not be scheduled automatically."
            )
        if compressed:
            create_mutation = {
                "mutation($input: createFlowFromCompressedStringInput!)": {
                    "createFlowFromCompressedString(input: $input)": {"id"}
                }
            }
        else:
            create_mutation = {
                "mutation($input: createFlowInput!)": {
                    "createFlow(input: $input)": {"id"}
                }
            }

        query_project = {
            "query": {
                with_args("project", {
                    "where": {
                        "name": {
                            "_eq": project_name
                        }
                    }
                }): {
                    "id": True
                }
            }
        }

        project = self.graphql(query_project).data.project  # type: ignore

        if not project:
            raise ValueError(
                'Project {} not found. Run `client.create_project("{}")` to create it.'
                .format(project_name, project_name))

        serialized_flow = flow.serialize(build=build)  # type: Any

        # verify that the serialized flow can be deserialized
        try:
            prefect.serialization.flow.FlowSchema().load(serialized_flow)
        except Exception as exc:
            raise ValueError(
                "Flow could not be deserialized successfully. Error was: {}".
                format(repr(exc)))

        if compressed:
            serialized_flow = compress(serialized_flow)
        res = self.graphql(
            create_mutation,
            variables=dict(input=dict(
                projectId=project[0].id,
                serializedFlow=serialized_flow,
                setScheduleActive=set_schedule_active,
            )),
        )  # type: Any

        flow_id = (res.data.createFlowFromCompressedString.id
                   if compressed else res.data.createFlow.id)
        return flow_id
Esempio n. 3
0
    def register(
        self,
        flow: "Flow",
        project_name: str,
        build: bool = True,
        set_schedule_active: bool = True,
        version_group_id: str = None,
        compressed: bool = True,
        no_url: bool = False,
    ) -> str:
        """
        Push a new flow to Prefect Cloud

        Args:
            - flow (Flow): a flow to register
            - project_name (str): the project that should contain this flow.
            - build (bool, optional): if `True`, the flow's environment is built
                prior to serialization; defaults to `True`
            - set_schedule_active (bool, optional): if `False`, will set the
                schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule).
                Defaults to `True`. This can be changed later.
            - version_group_id (str, optional): the UUID version group ID to use for versioning this Flow
                in Cloud; if not provided, the version group ID associated with this Flow's project and name
                will be used.
            - compressed (bool, optional): if `True`, the serialized flow will be; defaults to `True`
                compressed
            - no_url (bool, optional): if `True`, the stdout from this function will not contain the
                URL link to the newly-registered flow in the Cloud UI

        Returns:
            - str: the ID of the newly-registered flow

        Raises:
            - ClientError: if the register failed
        """
        required_parameters = {p for p in flow.parameters() if p.required}
        if flow.schedule is not None and required_parameters:
            required_names = {p.name for p in required_parameters}
            if not all(
                [
                    required_names == set(c.parameter_defaults.keys())
                    for c in flow.schedule.clocks
                ]
            ):
                raise ClientError(
                    "Flows with required parameters can not be scheduled automatically."
                )
        if any(e.key for e in flow.edges) and flow.result_handler is None:
            warnings.warn(
                "No result handler was specified on your Flow. Cloud features such as input caching and resuming task runs from failure may not work properly.",
                UserWarning,
            )
        if compressed:
            create_mutation = {
                "mutation($input: createFlowFromCompressedStringInput!)": {
                    "createFlowFromCompressedString(input: $input)": {"id"}
                }
            }
        else:
            create_mutation = {
                "mutation($input: createFlowInput!)": {
                    "createFlow(input: $input)": {"id"}
                }
            }

        query_project = {
            "query": {
                with_args("project", {"where": {"name": {"_eq": project_name}}}): {
                    "id": True
                }
            }
        }

        project = self.graphql(query_project).data.project  # type: ignore

        if not project:
            raise ValueError(
                'Project {} not found. Run `client.create_project("{}")` to create it.'.format(
                    project_name, project_name
                )
            )

        serialized_flow = flow.serialize(build=build)  # type: Any

        # verify that the serialized flow can be deserialized
        try:
            prefect.serialization.flow.FlowSchema().load(serialized_flow)
        except Exception as exc:
            raise ValueError(
                "Flow could not be deserialized successfully. Error was: {}".format(
                    repr(exc)
                )
            )

        if compressed:
            serialized_flow = compress(serialized_flow)
        res = self.graphql(
            create_mutation,
            variables=dict(
                input=dict(
                    projectId=project[0].id,
                    serializedFlow=serialized_flow,
                    setScheduleActive=set_schedule_active,
                    versionGroupId=version_group_id,
                )
            ),
        )  # type: Any

        flow_id = (
            res.data.createFlowFromCompressedString.id
            if compressed
            else res.data.createFlow.id
        )

        if not no_url:
            # Generate direct link to Cloud flow
            flow_url = self.get_cloud_url("flow", flow_id)

            print("Flow: {}".format(flow_url))

        return flow_id
Esempio n. 4
0
def register_serialized_flow(
    client: "prefect.Client",
    serialized_flow: dict,
    project_id: str,
    force: bool = False,
    schedule: bool = True,
) -> Tuple[str, int, bool]:
    """Register a pre-serialized flow.

    Args:
        - client (prefect.Client): the prefect client
        - serialized_flow (dict): the serialized flow
        - project_id (str): the project id
        - force (bool, optional): If `False` (default), an idempotency key will
            be generated to avoid unnecessary re-registration. Set to `True` to
            force re-registration.
        - schedule (bool, optional): If `True` (default) activates the flow schedule
            upon registering.

    Returns:
        - flow_id (str): the flow id
        - flow_version (int): the flow version
        - is_new (bool): True if this is a new flow version, false if
            re-registration was skipped.
    """
    # Get most recent flow id for this flow. This can be removed once
    # the registration graphql routes return more information
    flow_name = serialized_flow["name"]
    resp = client.graphql({
        "query": {
            with_args(
                "flow",
                {
                    "where": {
                        "_and": {
                            "name": {
                                "_eq": flow_name
                            },
                            "project": {
                                "id": {
                                    "_eq": project_id
                                }
                            },
                        }
                    },
                    "order_by": {
                        "version": EnumValue("desc")
                    },
                    "limit": 1,
                },
            ): {"id", "version"}
        }
    })
    if resp.data.flow:
        prev_id = resp.data.flow[0].id
        prev_version = resp.data.flow[0].version
    else:
        prev_id = None
        prev_version = 0

    inputs = dict(
        project_id=project_id,
        serialized_flow=compress(serialized_flow),
        set_schedule_active=schedule,
    )
    if not force:
        inputs["idempotency_key"] = hashlib.sha256(
            json.dumps(serialized_flow, sort_keys=True).encode()).hexdigest()

    res = client.graphql(
        {
            "mutation($input: create_flow_from_compressed_string_input!)": {
                "create_flow_from_compressed_string(input: $input)": {"id"}
            }
        },
        variables=dict(input=inputs),
        retry_on_api_error=False,
    )

    new_id = res.data.create_flow_from_compressed_string.id

    if new_id == prev_id:
        return new_id, prev_version, False
    else:
        return new_id, prev_version + 1, True
Esempio n. 5
0
def test_compression_back_translation(obj):
    assert decompress(compress(obj)) == obj
Esempio n. 6
0
def test_decompress():
    test_str = compress({"test": 42})
    result = decompress(test_str)
    assert isinstance(result, dict)
Esempio n. 7
0
def test_compress():
    result = compress({"test": 42})
    assert isinstance(result, str)
Esempio n. 8
0
    def register(
        self,
        flow: "Flow",
        project_name: str = None,
        build: bool = True,
        set_schedule_active: bool = True,
        version_group_id: str = None,
        compressed: bool = True,
        no_url: bool = False,
    ) -> str:
        """
        Push a new flow to Prefect Cloud

        Args:
            - flow (Flow): a flow to register
            - project_name (str, optional): the project that should contain this flow.
            - build (bool, optional): if `True`, the flow's environment is built
                prior to serialization; defaults to `True`
            - set_schedule_active (bool, optional): if `False`, will set the schedule to
                inactive in the database to prevent auto-scheduling runs (if the Flow has a
                schedule).  Defaults to `True`. This can be changed later.
            - version_group_id (str, optional): the UUID version group ID to use for versioning
                this Flow in Cloud; if not provided, the version group ID associated with this
                Flow's project and name will be used.
            - compressed (bool, optional): if `True`, the serialized flow will be; defaults to
                `True` compressed
            - no_url (bool, optional): if `True`, the stdout from this function will not
                contain the URL link to the newly-registered flow in the Cloud UI

        Returns:
            - str: the ID of the newly-registered flow

        Raises:
            - ClientError: if the register failed
        """
        required_parameters = {p for p in flow.parameters() if p.required}
        if flow.schedule is not None and required_parameters:
            required_names = {p.name for p in required_parameters}
            if not all(
                [
                    required_names <= set(c.parameter_defaults.keys())
                    for c in flow.schedule.clocks
                ]
            ):
                raise ClientError(
                    "Flows with required parameters can not be scheduled automatically."
                )
        if any(e.key for e in flow.edges) and flow.result is None:
            warnings.warn(
                "No result handler was specified on your Flow. Cloud features such as "
                "input caching and resuming task runs from failure may not work properly."
            )
        if compressed:
            create_mutation = {
                "mutation($input: create_flow_from_compressed_string_input!)": {
                    "create_flow_from_compressed_string(input: $input)": {"id"}
                }
            }
        else:
            create_mutation = {
                "mutation($input: create_flow_input!)": {
                    "create_flow(input: $input)": {"id"}
                }
            }

        project = None

        if prefect.config.backend == "cloud":
            if project_name is None:
                raise TypeError(
                    "'project_name' is a required field when registering a flow with Cloud. "
                    "If you are attempting to register a Flow with a local Prefect server "
                    "you may need to run `prefect backend server` first."
                )

            query_project = {
                "query": {
                    with_args("project", {"where": {"name": {"_eq": project_name}}}): {
                        "id": True
                    }
                }
            }

            project = self.graphql(query_project).data.project  # type: ignore

            if not project:
                raise ValueError(
                    'Project {} not found. Run `client.create_project("{}")` to create it.'.format(
                        project_name, project_name
                    )
                )

        serialized_flow = flow.serialize(build=build)  # type: Any

        # Set Docker storage image in environment metadata if provided
        if isinstance(flow.storage, prefect.environments.storage.Docker):
            flow.environment.metadata["image"] = flow.storage.name
            serialized_flow = flow.serialize(build=False)

        # If no image ever set, default metadata to all_extras image on current version
        if not flow.environment.metadata.get("image"):
            version = prefect.__version__.split("+")[0]
            flow.environment.metadata[
                "image"
            ] = f"prefecthq/prefect:all_extras-{version}"
            serialized_flow = flow.serialize(build=False)

        # verify that the serialized flow can be deserialized
        try:
            prefect.serialization.flow.FlowSchema().load(serialized_flow)
        except Exception as exc:
            raise ValueError(
                "Flow could not be deserialized successfully. Error was: {}".format(
                    repr(exc)
                )
            )

        if compressed:
            serialized_flow = compress(serialized_flow)
        res = self.graphql(
            create_mutation,
            variables=dict(
                input=dict(
                    project_id=(project[0].id if project else None),
                    serialized_flow=serialized_flow,
                    set_schedule_active=set_schedule_active,
                    version_group_id=version_group_id,
                )
            ),
        )  # type: Any

        flow_id = (
            res.data.create_flow_from_compressed_string.id
            if compressed
            else res.data.create_flow.id
        )

        if not no_url:
            # Generate direct link to Cloud flow
            flow_url = self.get_cloud_url("flow", flow_id)

            print("Flow: {}".format(flow_url))

        return flow_id