コード例 #1
0
ファイル: docker.py プロジェクト: alexkoay/prefect
    def get_flow(self, flow_location: str = None) -> "prefect.core.flow.Flow":
        """
        Given a file path within this Docker container, returns the underlying Flow.
        Note that this method should only be run _within_ the container itself.

        Args:
            - flow_location (str, optional): the file path of a flow within this container. Will use
                `path` if not provided.

        Returns:
            - Flow: the requested flow

        Raises:
            - ValueError: if the flow is not contained in this storage
        """
        if flow_location:
            if flow_location not in self.flows.values():
                raise ValueError("Flow is not contained in this Storage")
        elif self.path:
            flow_location = self.path
        else:
            raise ValueError("No flow location provided")

        if self.stored_as_script:
            return extract_flow_from_file(file_path=flow_location)

        with open(flow_location, "rb") as f:
            return flow_from_bytes_pickle(f.read())
コード例 #2
0
ファイル: azure.py プロジェクト: alexkoay/prefect
    def get_flow(self, flow_location: str = None) -> "Flow":
        """
        Given a flow_location within this Storage object, returns the underlying Flow (if possible).

        Args:
            - flow_location (str, optional): the location of a flow within this Storage; in this case,
                a file path where a Flow has been serialized to. Will use `blob_name` if not provided.

        Returns:
            - Flow: the requested flow

        Raises:
            - ValueError: if the flow is not contained in this storage
        """
        if flow_location:
            if flow_location not in self.flows.values():
                raise ValueError("Flow is not contained in this Storage")
        elif self.blob_name:
            flow_location = self.blob_name
        else:
            raise ValueError("No flow location provided")

        client = self._azure_block_blob_service.get_blob_client(
            container=self.container, blob=flow_location)

        self.logger.info("Downloading {} from {}".format(
            flow_location, self.container))

        content = client.download_blob().content_as_bytes()

        if self.stored_as_script:
            return extract_flow_from_file(
                file_contents=content)  # type: ignore

        return flow_from_bytes_pickle(content)
コード例 #3
0
ファイル: gcs.py プロジェクト: thomasfrederikhoeck/prefect
    def get_flow(self, flow_name: str) -> "Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")
        flow_location = self.flows[flow_name]

        bucket = self._gcs_client.get_bucket(self.bucket)

        self.logger.info("Downloading {} from {}".format(
            flow_location, self.bucket))

        blob = bucket.get_blob(flow_location)
        if not blob:
            raise FlowStorageError(
                "Flow not found in bucket: flow={} bucket={}".format(
                    flow_location, self.bucket))
        # Support GCS < 1.31
        content = (blob.download_as_bytes() if hasattr(
            blob, "download_as_bytes") else blob.download_as_string())

        if self.stored_as_script:
            return extract_flow_from_file(file_contents=content,
                                          flow_name=flow_name)

        return flow_from_bytes_pickle(content)
コード例 #4
0
    def get_flow(self, flow_name: str) -> "Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")
        flow_location = self.flows[flow_name]

        # check if the path given is a file path
        if os.path.isfile(flow_location):
            if self.stored_as_script:
                return extract_flow_from_file(
                    file_path=flow_location, flow_name=flow_name
                )
            else:
                with open(flow_location, "rb") as f:
                    return flow_from_bytes_pickle(f.read())
        # otherwise the path is given in the module format
        else:
            return extract_flow_from_module(
                module_str=flow_location, flow_name=flow_name
            )
コード例 #5
0
ファイル: azure.py プロジェクト: omarbelkady/prefect
    def get_flow(self, flow_name: str) -> "Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")
        flow_location = self.flows[flow_name]
        try:
            client = self._azure_block_blob_service.get_blob_client(
                container=self.container, blob=flow_location
            )

            self.logger.info(
                "Downloading {} from {}".format(flow_location, self.container)
            )

            content = client.download_blob().content_as_bytes()
        except Exception as err:
            self.logger.error("Error downloading Flow from Azure: {}".format(err))
            raise
        if self.stored_as_script:
            return extract_flow_from_file(file_contents=content, flow_name=flow_name)  # type: ignore

        return flow_from_bytes_pickle(content)
コード例 #6
0
ファイル: webhook.py プロジェクト: alexkoay/prefect
    def get_flow(self, flow_location: str = "placeholder") -> "Flow":
        """
        Get the flow from storage. This method will call
        `cloudpickle.loads()` on the binary content of the flow, so it
        should only be called in an environment with all of the flow's
        dependencies.

        Args:
            - flow_location (str): This argument is included to comply with the
                interface used by other storage objects, but it has no meaning
                for `Webhook` storage, since `Webhook` only corresponds to a
                single flow. Ignore it.

        Raises:
            - requests.exceptions.HTTPError if getting the flow fails
        """
        self.logger.info("Retrieving flow")
        req_function = self._method_to_function[
            self.get_flow_request_http_method]

        get_flow_request_kwargs = _render_dict(self.get_flow_request_kwargs)

        response = req_function(**get_flow_request_kwargs)  # type: ignore
        response.raise_for_status()

        if self.stored_as_script:
            flow_script_content = response.content.decode("utf-8")
            return extract_flow_from_file(
                file_contents=flow_script_content)  # type: ignore

        return flow_from_bytes_pickle(response.content)
コード例 #7
0
ファイル: webhook.py プロジェクト: zviri/prefect
    def get_flow(self, flow_name: str) -> "Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")

        req_function = self._method_to_function[
            self.get_flow_request_http_method]

        get_flow_request_kwargs = _render_dict(self.get_flow_request_kwargs)

        response = req_function(**get_flow_request_kwargs)  # type: ignore
        response.raise_for_status()

        if self.stored_as_script:
            flow_script_content = response.content.decode("utf-8")
            return extract_flow_from_file(file_contents=flow_script_content,
                                          flow_name=flow_name)

        return flow_from_bytes_pickle(response.content)
コード例 #8
0
 def test_flow_from_bytes_loads_raw_pickle(self):
     """Older versions of prefect serialized flows as straight pickle bytes.
     This checks that we can still deserialize these payloads"""
     s = cloudpickle.dumps(Flow("test"))
     flow = flow_from_bytes_pickle(s)
     assert isinstance(flow, Flow)
     assert flow.name == "test"
コード例 #9
0
 def test_flow_from_bytes_warns_prefect_version_mismatch(self, monkeypatch):
     s = flow_to_bytes_pickle(Flow("test"))
     monkeypatch.setattr(prefect, "__version__", "0.1.0")
     with pytest.warns(UserWarning, match="This flow was built using Prefect"):
         flow = flow_from_bytes_pickle(s)
     assert isinstance(flow, Flow)
     assert flow.name == "test"
コード例 #10
0
    def test_flow_from_bytes_error(self, monkeypatch, version_mismatch,
                                   import_error):
        exc = ImportError("mymodule") if import_error else ValueError("Oh no!")
        flow = Flow("test", tasks=[RaiseOnLoad(exc)])
        s = flow_to_bytes_pickle(flow)

        if version_mismatch:
            monkeypatch.setattr(prefect, "__version__", "0.0.1")
            monkeypatch.setattr(cloudpickle, "__version__", "0.0.2")

        with pytest.raises(FlowStorageError,
                           match="An error occurred while unpickling") as exc:
            flow_from_bytes_pickle(s)

        msg = "mymodule" if import_error else "Oh no!"
        assert msg in str(exc.value)

        # Extra components only present if relevant
        assert ("missing Python module" in str(exc.value)) == import_error
        assert ("version mismatches" in str(exc.value)) == version_mismatch
コード例 #11
0
    def get_flow(self, flow_location: str = None) -> "Flow":
        """
        Given a flow_location within this Storage object or S3, returns the underlying Flow
        (if possible).

        Args:
            - flow_location (str, optional): the location of a flow within this Storage; in this case
                an S3 object key where a Flow has been serialized to. Will use `key` if not provided.

        Returns:
            - Flow: the requested Flow

        Raises:
            - ValueError: if the flow is not contained in this storage
            - botocore.ClientError: if there is an issue downloading the Flow from S3
        """
        if flow_location:
            if flow_location not in self.flows.values():
                raise ValueError("Flow is not contained in this Storage")
        elif self.key:
            flow_location = self.key
        else:
            raise ValueError("No flow location provided")

        stream = io.BytesIO()

        self.logger.info("Downloading {} from {}".format(
            flow_location, self.bucket))

        # Download stream from S3
        from botocore.exceptions import ClientError

        try:
            self._boto3_client.download_fileobj(Bucket=self.bucket,
                                                Key=flow_location,
                                                Fileobj=stream)
        except ClientError as err:
            self.logger.error("Error downloading Flow from S3: {}".format(err))
            raise err

        # prepare data and return
        stream.seek(0)
        output = stream.read()

        if self.stored_as_script:
            return extract_flow_from_file(file_contents=output)  # type: ignore

        return flow_from_bytes_pickle(output)
コード例 #12
0
ファイル: gcs.py プロジェクト: alexkoay/prefect
    def get_flow(self, flow_location: str = None) -> "Flow":
        """
        Given a flow_location within this Storage object, returns the underlying Flow (if possible).

        Args:
            - flow_location (str, optional): the location of a flow within this Storage; in this case,
                a file path where a Flow has been serialized to. Will use `key` if not provided.

        Returns:
            - Flow: the requested flow

        Raises:
            - ValueError: if the flow is not contained in this storage
        """
        if flow_location:
            if flow_location not in self.flows.values():
                raise ValueError("Flow is not contained in this Storage")
        elif self.key:
            flow_location = self.key
        else:
            raise ValueError("No flow location provided")

        bucket = self._gcs_client.get_bucket(self.bucket)

        self.logger.info("Downloading {} from {}".format(flow_location, self.bucket))

        blob = bucket.get_blob(flow_location)
        if not blob:
            raise StorageError(
                "Flow not found in bucket: flow={} bucket={}".format(
                    flow_location, self.bucket
                )
            )
        # Support GCS < 1.31
        content = (
            blob.download_as_bytes()
            if hasattr(blob, "download_as_bytes")
            else blob.download_as_string()
        )

        if self.stored_as_script:
            return extract_flow_from_file(file_contents=content)

        return flow_from_bytes_pickle(content)
コード例 #13
0
ファイル: test_s3_storage.py プロジェクト: alexkoay/prefect
def test_upload_flow_to_s3_flow_byte_stream(monkeypatch):
    client = MagicMock()
    boto3 = MagicMock(upload_fileobj=MagicMock(return_value=client))
    monkeypatch.setattr("prefect.storage.S3._boto3_client", boto3)

    storage = S3(bucket="bucket")

    f = Flow("test")
    assert storage.add_flow(f)
    assert storage.build()

    flow_as_bytes = boto3.upload_fileobj.call_args[0][0]
    assert isinstance(flow_as_bytes, io.BytesIO)

    new_flow = flow_from_bytes_pickle(flow_as_bytes.read())
    assert new_flow.name == "test"

    state = new_flow.run()
    assert state.is_successful()
コード例 #14
0
ファイル: docker.py プロジェクト: zschumacher/prefect
    def get_flow(self, flow_name: str) -> "prefect.core.flow.Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")
        flow_location = self.flows[flow_name]

        if self.stored_as_script:
            return extract_flow_from_file(file_path=flow_location,
                                          flow_name=flow_name)

        with open(flow_location, "rb") as f:
            return flow_from_bytes_pickle(f.read())
コード例 #15
0
ファイル: test_gcs_storage.py プロジェクト: alexkoay/prefect
    def test_put_get_and_run_single_flow_to_gcs(self, google_client):
        blob_mock = MagicMock()
        bucket_mock = MagicMock(blob=MagicMock(return_value=blob_mock))
        google_client.return_value.get_bucket = MagicMock(
            return_value=bucket_mock)

        storage = GCS(bucket="awesome-bucket")

        f = Flow("awesome-flow")
        assert f.name not in storage
        assert storage.add_flow(f)
        assert f.name in storage
        assert storage.build()

        flow_as_bytes = blob_mock.upload_from_string.call_args[0][0]
        new_flow = flow_from_bytes_pickle(flow_as_bytes)
        assert new_flow.name == "awesome-flow"

        state = new_flow.run()
        assert state.is_successful()
コード例 #16
0
ファイル: s3.py プロジェクト: laisbsc/prefect
    def get_flow(self, flow_name: str) -> "Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")
        flow_location = self.flows[flow_name]

        stream = io.BytesIO()

        self.logger.info("Downloading {} from {}".format(
            flow_location, self.bucket))

        # Download stream from S3
        from botocore.exceptions import ClientError

        try:
            self._boto3_client.download_fileobj(Bucket=self.bucket,
                                                Key=flow_location,
                                                Fileobj=stream)
        except ClientError as err:
            self.logger.error("Error downloading Flow from S3: {}".format(err))
            raise err

        # prepare data and return
        stream.seek(0)
        output = stream.read()

        if self.stored_as_script:
            return extract_flow_from_file(file_contents=output,
                                          flow_name=flow_name)  # type: ignore

        return flow_from_bytes_pickle(output)
コード例 #17
0
    def get_flow(self, flow_name: str) -> "Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")
        key = self.flows[flow_name]

        self.logger.info(f"Downloading flow from s3://{self.bucket}/{key}")

        try:
            obj = self._boto3_client.get_object(Bucket=self.bucket, Key=key)
            body = obj["Body"]
            with closing(body):
                output = body.read()
        except Exception as err:
            self.logger.error("Error downloading Flow from S3: {}".format(err))
            raise

        self.logger.info(
            "Flow successfully downloaded. ETag: %s, LastModified: %s, VersionId: %s",
            obj["ETag"],
            obj["LastModified"].isoformat(),
            obj.get("VersionId"),
        )

        if self.stored_as_script:
            return extract_flow_from_file(file_contents=output,
                                          flow_name=flow_name)  # type: ignore

        return flow_from_bytes_pickle(output)
コード例 #18
0
    def get_flow(self, flow_location: str = None) -> "Flow":
        """
        Given a flow_location within this Storage object, returns the underlying Flow (if possible).

        Args:
            - flow_location (str, optional): the location of a flow within this Storage; in this case,
                a file path or python path where a Flow has been serialized to. Will use `path`
                if not provided.

        Returns:
            - Flow: the requested flow

        Raises:
            - ValueError: if the flow is not contained in this storage
        """
        if flow_location:
            if flow_location not in self.flows.values():
                raise ValueError("Flow is not contained in this Storage")
        elif self.path:
            flow_location = self.path
        else:
            raise ValueError("No flow location provided")

        # check if the path given is a file path
        try:
            if os.path.isfile(flow_location):
                if self.stored_as_script:
                    return extract_flow_from_file(file_path=flow_location)
                else:
                    with open(flow_location, "rb") as f:
                        return flow_from_bytes_pickle(f.read())
            # otherwise the path is given in the module format
            else:
                return extract_flow_from_module(module_str=flow_location)
        except Exception:
            self.logger.exception(f"Failed to load Flow from {flow_location}")
            raise
コード例 #19
0
ファイル: test_s3_storage.py プロジェクト: zviri/prefect
def test_upload_flow_to_s3(s3_client, key):
    storage = S3(bucket="bucket", key=key)

    f = Flow("test")

    key_used = storage.add_flow(f)
    if key is not None:
        assert key_used == key

    assert storage.build() is storage

    assert s3_client.upload_fileobj.called

    assert s3_client.upload_fileobj.call_args[1]["Bucket"] == "bucket"
    assert s3_client.upload_fileobj.call_args[1]["Key"] == key_used

    flow_as_bytes = s3_client.upload_fileobj.call_args[0][0]
    assert isinstance(flow_as_bytes, io.BytesIO)

    new_flow = flow_from_bytes_pickle(flow_as_bytes.read())
    assert new_flow.name == "test"

    state = new_flow.run()
    assert state.is_successful()
コード例 #20
0
 def test_serialize_deserialize(self):
     s = flow_to_bytes_pickle(Flow("test"))
     assert isinstance(s, bytes)
     flow = flow_from_bytes_pickle(s)
     assert isinstance(flow, Flow)
     assert flow.name == "test"