def get_flow(self, flow_location: str) -> "Flow": """ Given a flow_location within this Storage object, returns the underlying Flow (if possible). Args: - flow_location (str): the location of a flow within this Storage; in this case, a file path where a Flow has been serialized to Returns: - Flow: the requested flow Raises: - ValueError: if the flow is not contained in this storage """ if not flow_location in self.flows.values(): raise ValueError("Flow is not contained in this Storage") bucket = self._gcs_client.get_bucket(self.bucket) self.logger.info("Downloading {} from {}".format( flow_location, self.bucket)) blob = bucket.get_blob(flow_location) if not blob: raise StorageError( "Flow not found in bucket: flow={} bucket={}".format( flow_location, self.bucket)) content = blob.download_as_string() return cloudpickle.loads(content)
def get_flow(self, flow_name: str) -> "Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") flow_location = self.flows[flow_name] bucket = self._gcs_client.get_bucket(self.bucket) self.logger.info("Downloading {} from {}".format( flow_location, self.bucket)) blob = bucket.get_blob(flow_location) if not blob: raise StorageError( "Flow not found in bucket: flow={} bucket={}".format( flow_location, self.bucket)) # Support GCS < 1.31 content = (blob.download_as_bytes() if hasattr( blob, "download_as_bytes") else blob.download_as_string()) if self.stored_as_script: return extract_flow_from_file(file_contents=content, flow_name=flow_name) return flow_from_bytes_pickle(content)
def flow_from_bytes_pickle(data: bytes) -> "Flow": """Load a flow from bytes.""" try: info = json.loads(data.decode("utf-8")) except Exception: # Serialized using older version of prefect, use cloudpickle directly flow_bytes = data reg_versions = {} else: flow_bytes = binascii.a2b_base64(info["flow"]) reg_versions = info["versions"] run_versions = _get_versions() try: flow = cloudpickle.loads(flow_bytes) except Exception as exc: parts = ["An error occurred while unpickling the flow:", f" {exc!r}"] # Check for mismatched versions to provide a better error if possible mismatches = [] for name, v1 in sorted(reg_versions.items()): if name in run_versions: v2 = run_versions[name] if LooseVersion(v1) != v2: mismatches.append( f" - {name}: (flow built with {v1!r}, currently running with {v2!r})" ) if mismatches: parts.append( "This may be due to one of the following version mismatches between " "the flow build and execution environments:" ) parts.extend(mismatches) if isinstance(exc, ImportError): # If it's an import error, also note that the user may need to package # their dependencies prefix = "This also may" if mismatches else "This may" parts.append( f"{prefix} be due to a missing Python module in your current " "environment. Please ensure you have all required flow " "dependencies installed." ) raise StorageError("\n".join(parts)) from exc run_prefect = run_versions["prefect"] reg_prefect = reg_versions.get("prefect") if reg_prefect and LooseVersion(reg_prefect) != run_prefect: # If we didn't error above, still check that the prefect versions match # and warn if they don't. Prefect version mismatches *may* work, but # they may also error later leading to confusing behavior. warnings.warn( f"This flow was built using Prefect {reg_prefect!r}, but you currently " f"have Prefect {run_prefect!r} installed. We recommend loading flows " "with the same Prefect version they were built with, failure to do so " "may result in errors." ) return flow
def get_flow(self, flow_location: str = None) -> "Flow": """ Given a flow_location within this Storage object, returns the underlying Flow (if possible). Args: - flow_location (str, optional): the location of a flow within this Storage; in this case, a file path where a Flow has been serialized to. Will use `key` if not provided. Returns: - Flow: the requested flow Raises: - ValueError: if the flow is not contained in this storage """ if flow_location: if flow_location not in self.flows.values(): raise ValueError("Flow is not contained in this Storage") elif self.key: flow_location = self.key else: raise ValueError("No flow location provided") bucket = self._gcs_client.get_bucket(self.bucket) self.logger.info("Downloading {} from {}".format(flow_location, self.bucket)) blob = bucket.get_blob(flow_location) if not blob: raise StorageError( "Flow not found in bucket: flow={} bucket={}".format( flow_location, self.bucket ) ) # Support GCS < 1.31 content = ( blob.download_as_bytes() if hasattr(blob, "download_as_bytes") else blob.download_as_string() ) if self.stored_as_script: return extract_flow_from_file(file_contents=content) return flow_from_bytes_pickle(content)