def get_flow(self, flow_location: str = None) -> "prefect.core.flow.Flow": """ Given a file path within this Docker container, returns the underlying Flow. Note that this method should only be run _within_ the container itself. Args: - flow_location (str, optional): the file path of a flow within this container. Will use `path` if not provided. Returns: - Flow: the requested flow Raises: - ValueError: if the flow is not contained in this storage """ if flow_location: if flow_location not in self.flows.values(): raise ValueError("Flow is not contained in this Storage") elif self.path: flow_location = self.path else: raise ValueError("No flow location provided") if self.stored_as_script: return extract_flow_from_file(file_path=flow_location) with open(flow_location, "rb") as f: return flow_from_bytes_pickle(f.read())
def get_flow(self, flow_location: str = None) -> "Flow": """ Given a flow_location within this Storage object, returns the underlying Flow (if possible). Args: - flow_location (str, optional): the location of a flow within this Storage; in this case, a file path where a Flow has been serialized to. Will use `blob_name` if not provided. Returns: - Flow: the requested flow Raises: - ValueError: if the flow is not contained in this storage """ if flow_location: if flow_location not in self.flows.values(): raise ValueError("Flow is not contained in this Storage") elif self.blob_name: flow_location = self.blob_name else: raise ValueError("No flow location provided") client = self._azure_block_blob_service.get_blob_client( container=self.container, blob=flow_location) self.logger.info("Downloading {} from {}".format( flow_location, self.container)) content = client.download_blob().content_as_bytes() if self.stored_as_script: return extract_flow_from_file( file_contents=content) # type: ignore return flow_from_bytes_pickle(content)
def get_flow(self, flow_name: str) -> "Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") flow_location = self.flows[flow_name] bucket = self._gcs_client.get_bucket(self.bucket) self.logger.info("Downloading {} from {}".format( flow_location, self.bucket)) blob = bucket.get_blob(flow_location) if not blob: raise FlowStorageError( "Flow not found in bucket: flow={} bucket={}".format( flow_location, self.bucket)) # Support GCS < 1.31 content = (blob.download_as_bytes() if hasattr( blob, "download_as_bytes") else blob.download_as_string()) if self.stored_as_script: return extract_flow_from_file(file_contents=content, flow_name=flow_name) return flow_from_bytes_pickle(content)
def get_flow(self, flow_name: str) -> "Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") flow_location = self.flows[flow_name] # check if the path given is a file path if os.path.isfile(flow_location): if self.stored_as_script: return extract_flow_from_file( file_path=flow_location, flow_name=flow_name ) else: with open(flow_location, "rb") as f: return flow_from_bytes_pickle(f.read()) # otherwise the path is given in the module format else: return extract_flow_from_module( module_str=flow_location, flow_name=flow_name )
def get_flow(self, flow_name: str) -> "Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") flow_location = self.flows[flow_name] try: client = self._azure_block_blob_service.get_blob_client( container=self.container, blob=flow_location ) self.logger.info( "Downloading {} from {}".format(flow_location, self.container) ) content = client.download_blob().content_as_bytes() except Exception as err: self.logger.error("Error downloading Flow from Azure: {}".format(err)) raise if self.stored_as_script: return extract_flow_from_file(file_contents=content, flow_name=flow_name) # type: ignore return flow_from_bytes_pickle(content)
def get_flow(self, flow_location: str = "placeholder") -> "Flow": """ Get the flow from storage. This method will call `cloudpickle.loads()` on the binary content of the flow, so it should only be called in an environment with all of the flow's dependencies. Args: - flow_location (str): This argument is included to comply with the interface used by other storage objects, but it has no meaning for `Webhook` storage, since `Webhook` only corresponds to a single flow. Ignore it. Raises: - requests.exceptions.HTTPError if getting the flow fails """ self.logger.info("Retrieving flow") req_function = self._method_to_function[ self.get_flow_request_http_method] get_flow_request_kwargs = _render_dict(self.get_flow_request_kwargs) response = req_function(**get_flow_request_kwargs) # type: ignore response.raise_for_status() if self.stored_as_script: flow_script_content = response.content.decode("utf-8") return extract_flow_from_file( file_contents=flow_script_content) # type: ignore return flow_from_bytes_pickle(response.content)
def get_flow(self, flow_name: str) -> "Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") req_function = self._method_to_function[ self.get_flow_request_http_method] get_flow_request_kwargs = _render_dict(self.get_flow_request_kwargs) response = req_function(**get_flow_request_kwargs) # type: ignore response.raise_for_status() if self.stored_as_script: flow_script_content = response.content.decode("utf-8") return extract_flow_from_file(file_contents=flow_script_content, flow_name=flow_name) return flow_from_bytes_pickle(response.content)
def test_flow_from_bytes_loads_raw_pickle(self): """Older versions of prefect serialized flows as straight pickle bytes. This checks that we can still deserialize these payloads""" s = cloudpickle.dumps(Flow("test")) flow = flow_from_bytes_pickle(s) assert isinstance(flow, Flow) assert flow.name == "test"
def test_flow_from_bytes_warns_prefect_version_mismatch(self, monkeypatch): s = flow_to_bytes_pickle(Flow("test")) monkeypatch.setattr(prefect, "__version__", "0.1.0") with pytest.warns(UserWarning, match="This flow was built using Prefect"): flow = flow_from_bytes_pickle(s) assert isinstance(flow, Flow) assert flow.name == "test"
def test_flow_from_bytes_error(self, monkeypatch, version_mismatch, import_error): exc = ImportError("mymodule") if import_error else ValueError("Oh no!") flow = Flow("test", tasks=[RaiseOnLoad(exc)]) s = flow_to_bytes_pickle(flow) if version_mismatch: monkeypatch.setattr(prefect, "__version__", "0.0.1") monkeypatch.setattr(cloudpickle, "__version__", "0.0.2") with pytest.raises(FlowStorageError, match="An error occurred while unpickling") as exc: flow_from_bytes_pickle(s) msg = "mymodule" if import_error else "Oh no!" assert msg in str(exc.value) # Extra components only present if relevant assert ("missing Python module" in str(exc.value)) == import_error assert ("version mismatches" in str(exc.value)) == version_mismatch
def get_flow(self, flow_location: str = None) -> "Flow": """ Given a flow_location within this Storage object or S3, returns the underlying Flow (if possible). Args: - flow_location (str, optional): the location of a flow within this Storage; in this case an S3 object key where a Flow has been serialized to. Will use `key` if not provided. Returns: - Flow: the requested Flow Raises: - ValueError: if the flow is not contained in this storage - botocore.ClientError: if there is an issue downloading the Flow from S3 """ if flow_location: if flow_location not in self.flows.values(): raise ValueError("Flow is not contained in this Storage") elif self.key: flow_location = self.key else: raise ValueError("No flow location provided") stream = io.BytesIO() self.logger.info("Downloading {} from {}".format( flow_location, self.bucket)) # Download stream from S3 from botocore.exceptions import ClientError try: self._boto3_client.download_fileobj(Bucket=self.bucket, Key=flow_location, Fileobj=stream) except ClientError as err: self.logger.error("Error downloading Flow from S3: {}".format(err)) raise err # prepare data and return stream.seek(0) output = stream.read() if self.stored_as_script: return extract_flow_from_file(file_contents=output) # type: ignore return flow_from_bytes_pickle(output)
def get_flow(self, flow_location: str = None) -> "Flow": """ Given a flow_location within this Storage object, returns the underlying Flow (if possible). Args: - flow_location (str, optional): the location of a flow within this Storage; in this case, a file path where a Flow has been serialized to. Will use `key` if not provided. Returns: - Flow: the requested flow Raises: - ValueError: if the flow is not contained in this storage """ if flow_location: if flow_location not in self.flows.values(): raise ValueError("Flow is not contained in this Storage") elif self.key: flow_location = self.key else: raise ValueError("No flow location provided") bucket = self._gcs_client.get_bucket(self.bucket) self.logger.info("Downloading {} from {}".format(flow_location, self.bucket)) blob = bucket.get_blob(flow_location) if not blob: raise StorageError( "Flow not found in bucket: flow={} bucket={}".format( flow_location, self.bucket ) ) # Support GCS < 1.31 content = ( blob.download_as_bytes() if hasattr(blob, "download_as_bytes") else blob.download_as_string() ) if self.stored_as_script: return extract_flow_from_file(file_contents=content) return flow_from_bytes_pickle(content)
def test_upload_flow_to_s3_flow_byte_stream(monkeypatch): client = MagicMock() boto3 = MagicMock(upload_fileobj=MagicMock(return_value=client)) monkeypatch.setattr("prefect.storage.S3._boto3_client", boto3) storage = S3(bucket="bucket") f = Flow("test") assert storage.add_flow(f) assert storage.build() flow_as_bytes = boto3.upload_fileobj.call_args[0][0] assert isinstance(flow_as_bytes, io.BytesIO) new_flow = flow_from_bytes_pickle(flow_as_bytes.read()) assert new_flow.name == "test" state = new_flow.run() assert state.is_successful()
def get_flow(self, flow_name: str) -> "prefect.core.flow.Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") flow_location = self.flows[flow_name] if self.stored_as_script: return extract_flow_from_file(file_path=flow_location, flow_name=flow_name) with open(flow_location, "rb") as f: return flow_from_bytes_pickle(f.read())
def test_put_get_and_run_single_flow_to_gcs(self, google_client): blob_mock = MagicMock() bucket_mock = MagicMock(blob=MagicMock(return_value=blob_mock)) google_client.return_value.get_bucket = MagicMock( return_value=bucket_mock) storage = GCS(bucket="awesome-bucket") f = Flow("awesome-flow") assert f.name not in storage assert storage.add_flow(f) assert f.name in storage assert storage.build() flow_as_bytes = blob_mock.upload_from_string.call_args[0][0] new_flow = flow_from_bytes_pickle(flow_as_bytes) assert new_flow.name == "awesome-flow" state = new_flow.run() assert state.is_successful()
def get_flow(self, flow_name: str) -> "Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") flow_location = self.flows[flow_name] stream = io.BytesIO() self.logger.info("Downloading {} from {}".format( flow_location, self.bucket)) # Download stream from S3 from botocore.exceptions import ClientError try: self._boto3_client.download_fileobj(Bucket=self.bucket, Key=flow_location, Fileobj=stream) except ClientError as err: self.logger.error("Error downloading Flow from S3: {}".format(err)) raise err # prepare data and return stream.seek(0) output = stream.read() if self.stored_as_script: return extract_flow_from_file(file_contents=output, flow_name=flow_name) # type: ignore return flow_from_bytes_pickle(output)
def get_flow(self, flow_name: str) -> "Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") key = self.flows[flow_name] self.logger.info(f"Downloading flow from s3://{self.bucket}/{key}") try: obj = self._boto3_client.get_object(Bucket=self.bucket, Key=key) body = obj["Body"] with closing(body): output = body.read() except Exception as err: self.logger.error("Error downloading Flow from S3: {}".format(err)) raise self.logger.info( "Flow successfully downloaded. ETag: %s, LastModified: %s, VersionId: %s", obj["ETag"], obj["LastModified"].isoformat(), obj.get("VersionId"), ) if self.stored_as_script: return extract_flow_from_file(file_contents=output, flow_name=flow_name) # type: ignore return flow_from_bytes_pickle(output)
def get_flow(self, flow_location: str = None) -> "Flow": """ Given a flow_location within this Storage object, returns the underlying Flow (if possible). Args: - flow_location (str, optional): the location of a flow within this Storage; in this case, a file path or python path where a Flow has been serialized to. Will use `path` if not provided. Returns: - Flow: the requested flow Raises: - ValueError: if the flow is not contained in this storage """ if flow_location: if flow_location not in self.flows.values(): raise ValueError("Flow is not contained in this Storage") elif self.path: flow_location = self.path else: raise ValueError("No flow location provided") # check if the path given is a file path try: if os.path.isfile(flow_location): if self.stored_as_script: return extract_flow_from_file(file_path=flow_location) else: with open(flow_location, "rb") as f: return flow_from_bytes_pickle(f.read()) # otherwise the path is given in the module format else: return extract_flow_from_module(module_str=flow_location) except Exception: self.logger.exception(f"Failed to load Flow from {flow_location}") raise
def test_upload_flow_to_s3(s3_client, key): storage = S3(bucket="bucket", key=key) f = Flow("test") key_used = storage.add_flow(f) if key is not None: assert key_used == key assert storage.build() is storage assert s3_client.upload_fileobj.called assert s3_client.upload_fileobj.call_args[1]["Bucket"] == "bucket" assert s3_client.upload_fileobj.call_args[1]["Key"] == key_used flow_as_bytes = s3_client.upload_fileobj.call_args[0][0] assert isinstance(flow_as_bytes, io.BytesIO) new_flow = flow_from_bytes_pickle(flow_as_bytes.read()) assert new_flow.name == "test" state = new_flow.run() assert state.is_successful()
def test_serialize_deserialize(self): s = flow_to_bytes_pickle(Flow("test")) assert isinstance(s, bytes) flow = flow_from_bytes_pickle(s) assert isinstance(flow, Flow) assert flow.name == "test"