Example #1
0
    def get_flow(self, flow_location: str) -> "Flow":
        """
        Given a flow_location within this Storage object, returns the underlying Flow (if possible).

        Args:
            - flow_location (str): the location of a flow within this Storage; in this case,
                a file path where a Flow has been serialized to

        Returns:
            - Flow: the requested flow

        Raises:
            - ValueError: if the flow is not contained in this storage
        """
        if not flow_location in self.flows.values():
            raise ValueError("Flow is not contained in this Storage")

        bucket = self._gcs_client.get_bucket(self.bucket)

        self.logger.info("Downloading {} from {}".format(
            flow_location, self.bucket))

        blob = bucket.get_blob(flow_location)
        if not blob:
            raise StorageError(
                "Flow not found in bucket: flow={} bucket={}".format(
                    flow_location, self.bucket))
        content = blob.download_as_string()
        return cloudpickle.loads(content)
Example #2
0
File: gcs.py Project: zviri/prefect
    def get_flow(self, flow_name: str) -> "Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")
        flow_location = self.flows[flow_name]

        bucket = self._gcs_client.get_bucket(self.bucket)

        self.logger.info("Downloading {} from {}".format(
            flow_location, self.bucket))

        blob = bucket.get_blob(flow_location)
        if not blob:
            raise StorageError(
                "Flow not found in bucket: flow={} bucket={}".format(
                    flow_location, self.bucket))
        # Support GCS < 1.31
        content = (blob.download_as_bytes() if hasattr(
            blob, "download_as_bytes") else blob.download_as_string())

        if self.stored_as_script:
            return extract_flow_from_file(file_contents=content,
                                          flow_name=flow_name)

        return flow_from_bytes_pickle(content)
Example #3
0
def flow_from_bytes_pickle(data: bytes) -> "Flow":
    """Load a flow from bytes."""
    try:
        info = json.loads(data.decode("utf-8"))
    except Exception:
        # Serialized using older version of prefect, use cloudpickle directly
        flow_bytes = data
        reg_versions = {}
    else:
        flow_bytes = binascii.a2b_base64(info["flow"])
        reg_versions = info["versions"]

    run_versions = _get_versions()

    try:
        flow = cloudpickle.loads(flow_bytes)
    except Exception as exc:
        parts = ["An error occurred while unpickling the flow:", f"  {exc!r}"]
        # Check for mismatched versions to provide a better error if possible
        mismatches = []
        for name, v1 in sorted(reg_versions.items()):
            if name in run_versions:
                v2 = run_versions[name]
                if LooseVersion(v1) != v2:
                    mismatches.append(
                        f"  - {name}: (flow built with {v1!r}, currently running with {v2!r})"
                    )
        if mismatches:
            parts.append(
                "This may be due to one of the following version mismatches between "
                "the flow build and execution environments:"
            )
            parts.extend(mismatches)
        if isinstance(exc, ImportError):
            # If it's an import error, also note that the user may need to package
            # their dependencies
            prefix = "This also may" if mismatches else "This may"
            parts.append(
                f"{prefix} be due to a missing Python module in your current "
                "environment. Please ensure you have all required flow "
                "dependencies installed."
            )
        raise StorageError("\n".join(parts)) from exc

    run_prefect = run_versions["prefect"]
    reg_prefect = reg_versions.get("prefect")
    if reg_prefect and LooseVersion(reg_prefect) != run_prefect:
        # If we didn't error above, still check that the prefect versions match
        # and warn if they don't. Prefect version mismatches *may* work, but
        # they may also error later leading to confusing behavior.
        warnings.warn(
            f"This flow was built using Prefect {reg_prefect!r}, but you currently "
            f"have Prefect {run_prefect!r} installed. We recommend loading flows "
            "with the same Prefect version they were built with, failure to do so "
            "may result in errors."
        )
    return flow
Example #4
0
    def get_flow(self, flow_location: str = None) -> "Flow":
        """
        Given a flow_location within this Storage object, returns the underlying Flow (if possible).

        Args:
            - flow_location (str, optional): the location of a flow within this Storage; in this case,
                a file path where a Flow has been serialized to. Will use `key` if not provided.

        Returns:
            - Flow: the requested flow

        Raises:
            - ValueError: if the flow is not contained in this storage
        """
        if flow_location:
            if flow_location not in self.flows.values():
                raise ValueError("Flow is not contained in this Storage")
        elif self.key:
            flow_location = self.key
        else:
            raise ValueError("No flow location provided")

        bucket = self._gcs_client.get_bucket(self.bucket)

        self.logger.info("Downloading {} from {}".format(flow_location, self.bucket))

        blob = bucket.get_blob(flow_location)
        if not blob:
            raise StorageError(
                "Flow not found in bucket: flow={} bucket={}".format(
                    flow_location, self.bucket
                )
            )
        # Support GCS < 1.31
        content = (
            blob.download_as_bytes()
            if hasattr(blob, "download_as_bytes")
            else blob.download_as_string()
        )

        if self.stored_as_script:
            return extract_flow_from_file(file_contents=content)

        return flow_from_bytes_pickle(content)