コード例 #1
0
 def test_flow_from_bytes_warns_prefect_version_mismatch(self, monkeypatch):
     s = flow_to_bytes_pickle(Flow("test"))
     monkeypatch.setattr(prefect, "__version__", "0.1.0")
     with pytest.warns(UserWarning, match="This flow was built using Prefect"):
         flow = flow_from_bytes_pickle(s)
     assert isinstance(flow, Flow)
     assert flow.name == "test"
コード例 #2
0
    def test_cloudpickle_deserialization_check_passes_and_returns_multiple_objs(self):
        flow_one = flow_to_bytes_pickle(Flow("one"))
        flow_two = flow_to_bytes_pickle(Flow("two"))
        with tempfile.TemporaryDirectory() as tmpdir:
            file_one = os.path.join(tmpdir, "one.flow")
            with open(file_one, "wb") as f:
                f.write(flow_one)

            file_two = os.path.join(tmpdir, "two.flow")
            with open(file_two, "wb") as f:
                f.write(flow_two)

            paths = ["{}".format(file_one), "{}".format(file_two)]
            objs = healthchecks.cloudpickle_deserialization_check(paths)

        assert len(objs) == 2
コード例 #3
0
ファイル: azure.py プロジェクト: omarbelkady/prefect
    def build(self) -> "Storage":
        """
        Build the Azure storage object by uploading Flows to an Azure Blob container.
        This will upload all of the flows found in `storage.flows`.

        Returns:
            - Storage: an Azure object that contains information about how and where
                each flow is stored
        """
        self.run_basic_healthchecks()

        if self.stored_as_script:
            if not self.blob_name:
                raise ValueError(
                    "A `blob_name` must be provided to show where flow `.py` file is stored in Azure."
                )
            return self

        for flow_name, flow in self._flows.items():
            data = flow_to_bytes_pickle(flow)

            client = self._azure_block_blob_service.get_blob_client(
                container=self.container, blob=self.flows[flow_name]
            )

            self.logger.info(
                "Uploading {} to {}".format(self.flows[flow_name], self.container)
            )

            client.upload_blob(data, overwrite=self.overwrite)

        return self
コード例 #4
0
ファイル: test_gcs_storage.py プロジェクト: alexkoay/prefect
    def test_upload_multiple_flows_to_gcs(self, google_client):
        blob_mock = MagicMock()
        bucket_mock = MagicMock(blob=MagicMock(return_value=blob_mock))
        google_client.return_value.get_bucket = MagicMock(
            return_value=bucket_mock)

        storage = GCS(bucket="awesome-bucket")

        flows = (Flow("awesome-flow-1"), Flow("awesome-flow-2"))
        for f in flows:
            storage.add_flow(f)

        assert storage.build()
        assert bucket_mock.blob.call_count == 2
        assert blob_mock.upload_from_string.call_count == 2

        expected_blob_calls = []
        expected_upload_calls = []
        for f in flows:
            expected_blob_calls.append(call(blob_name=storage.flows[f.name]))
            expected_upload_calls.append(call(flow_to_bytes_pickle(f)))

        # note, we don't upload until build() is called, which iterates on a dictionary, which is not ordered older versions of python
        bucket_mock.blob.assert_has_calls(expected_blob_calls, any_order=True)
        blob_mock.upload_from_string.assert_has_calls(expected_upload_calls,
                                                      any_order=True)
コード例 #5
0
    def test_cloudpickle_deserialization_check_passes_and_returns_objs(self):
        good_bytes = flow_to_bytes_pickle(Flow("empty"))
        with tempfile.NamedTemporaryFile() as f:
            f.write(good_bytes)
            f.seek(0)
            objs = healthchecks.cloudpickle_deserialization_check(["{}".format(f.name)])

        assert len(objs) == 1

        flow = objs.pop()
        assert isinstance(flow, Flow)
        assert flow.name == "empty"
        assert flow.tasks == set()
コード例 #6
0
def test_upload_flow_to_azure(monkeypatch):
    client = MagicMock(upload_blob=MagicMock())
    service = MagicMock(get_blob_client=MagicMock(return_value=client))
    monkeypatch.setattr("prefect.storage.Azure._azure_block_blob_service",
                        service)

    storage = Azure(container="container")

    f = Flow("test")
    assert f.name not in storage
    assert storage.add_flow(f)
    assert storage.build()
    client.upload_blob.assert_called_once_with(flow_to_bytes_pickle(f),
                                               overwrite=False)
    assert f.name in storage
コード例 #7
0
ファイル: gcs.py プロジェクト: alexkoay/prefect
    def build(self) -> "Storage":
        """
        Build the GCS storage object by uploading Flows to an GCS bucket. This will upload
        all of the flows found in `storage.flows`.

        Returns:
            - Storage: an GCS object that contains information about how and where
                each flow is stored
        """
        self.run_basic_healthchecks()

        if self.stored_as_script:
            if self.local_script_path:
                for flow_name, flow in self._flows.items():
                    self.logger.info(
                        "Uploading script {} to {} in {}".format(
                            self.local_script_path, self.flows[flow.name], self.bucket
                        )
                    )

                    bucket = self._gcs_client.get_bucket(self.bucket)
                    blob = bucket.blob(blob_name=self.flows[flow_name])
                    with open(self.local_script_path) as file_obj:
                        blob.upload_from_file(file_obj)
            else:
                if not self.key:
                    raise ValueError(
                        "A `key` must be provided to show where flow `.py` file is stored in GCS or "
                        "provide a `local_script_path` pointing to a local script that contains the "
                        "flow."
                    )
            return self

        for flow_name, flow in self._flows.items():
            content = flow_to_bytes_pickle(flow)

            bucket = self._gcs_client.get_bucket(self.bucket)

            blob = bucket.blob(blob_name=self.flows[flow_name])

            self.logger.info(
                "Uploading {} to {}".format(self.flows[flow_name], self.bucket)
            )

            blob.upload_from_string(content)

        return self
コード例 #8
0
ファイル: test_gcs_storage.py プロジェクト: alexkoay/prefect
    def test_upload_single_flow_with_custom_key_to_gcs(self, google_client):
        blob_mock = MagicMock()
        bucket_mock = MagicMock(blob=MagicMock(return_value=blob_mock))
        google_client.return_value.get_bucket = MagicMock(
            return_value=bucket_mock)

        storage = GCS(bucket="awesome-bucket", key="the-best-key")

        f = Flow("awesome-flow")
        assert f.name not in storage
        assert storage.add_flow(f)
        assert f.name in storage
        assert storage.build()

        bucket_mock.blob.assert_called_with(blob_name="the-best-key")
        blob_mock.upload_from_string.assert_called_with(
            flow_to_bytes_pickle(f))
コード例 #9
0
ファイル: test_gcs_storage.py プロジェクト: zviri/prefect
    def test_get_flow_from_gcs(self, google_client):
        f = Flow("awesome-flow")
        flow_content = flow_to_bytes_pickle(f)

        blob_mock = MagicMock(download_as_bytes=MagicMock(
            return_value=flow_content))
        bucket_mock = MagicMock(get_blob=MagicMock(return_value=blob_mock))
        google_client.return_value.get_bucket = MagicMock(
            return_value=bucket_mock)

        storage = GCS(bucket="awesome-bucket", key="a-place")
        storage.add_flow(f)

        fetched_flow = storage.get_flow(f.name)

        assert fetched_flow.name == f.name
        bucket_mock.get_blob.assert_called_with("a-place")
        assert blob_mock.download_as_bytes.call_count == 1
コード例 #10
0
    def add_flow(self, flow: "Flow") -> str:
        """
        Method for storing a new flow as bytes in the local filesytem.

        Args:
            - flow (Flow): a Prefect Flow to add

        Returns:
            - str: the location of the newly added flow in this Storage object

        Raises:
            - ValueError: if a flow with the same name is already contained in this storage
        """
        if flow.name in self:
            raise ValueError(
                'Name conflict: Flow with the name "{}" is already present in this storage.'.format(
                    flow.name
                )
            )

        if self.stored_as_script:
            if not self.path:
                raise ValueError(
                    "A `path` must be provided to show where flow `.py` file is stored."
                )
            flow_location = self.path
        else:
            if self.path:
                flow_location = self.path
            else:
                flow_location = os.path.join(
                    self.directory,
                    slugify(flow.name),
                    slugify(pendulum.now("utc").isoformat()),
                )
            os.makedirs(os.path.dirname(flow_location), exist_ok=True)
            with open(flow_location, "wb") as f:
                f.write(flow_to_bytes_pickle(flow))

        self.flows[flow.name] = flow_location
        self._flows[flow.name] = flow
        return flow_location
コード例 #11
0
    def test_flow_from_bytes_error(self, monkeypatch, version_mismatch,
                                   import_error):
        exc = ImportError("mymodule") if import_error else ValueError("Oh no!")
        flow = Flow("test", tasks=[RaiseOnLoad(exc)])
        s = flow_to_bytes_pickle(flow)

        if version_mismatch:
            monkeypatch.setattr(prefect, "__version__", "0.0.1")
            monkeypatch.setattr(cloudpickle, "__version__", "0.0.2")

        with pytest.raises(FlowStorageError,
                           match="An error occurred while unpickling") as exc:
            flow_from_bytes_pickle(s)

        msg = "mymodule" if import_error else "Oh no!"
        assert msg in str(exc.value)

        # Extra components only present if relevant
        assert ("missing Python module" in str(exc.value)) == import_error
        assert ("version mismatches" in str(exc.value)) == version_mismatch
コード例 #12
0
ファイル: test_s3_storage.py プロジェクト: zviri/prefect
def test_get_flow_s3_pickle(s3_client, key):
    f = Flow("test")
    storage = S3(bucket="bucket", key=key)
    key_used = storage.add_flow(f)

    data = flow_to_bytes_pickle(f)

    s3_client.get_object.return_value = {
        "ETag": "my-etag",
        "LastModified": datetime.datetime.now(),
        "Body": io.BytesIO(data),
    }

    f2 = storage.get_flow(f.name)

    assert s3_client.get_object.called
    assert s3_client.get_object.call_args[1]["Bucket"] == "bucket"
    assert s3_client.get_object.call_args[1]["Key"] == key_used

    assert f2.name == f.name
    state = f2.run()
    assert state.is_successful()
コード例 #13
0
ファイル: test_s3_storage.py プロジェクト: zviri/prefect
def test_get_flow_s3_logs(s3_client, caplog, version_id):
    f = Flow("test")
    storage = S3(bucket="mybucket")
    key_used = storage.add_flow(f)

    data = flow_to_bytes_pickle(f)

    etag = "my-etag"
    t = datetime.datetime.now()

    s3_client.get_object.return_value = {
        "ETag": etag,
        "LastModified": t,
        "Body": io.BytesIO(data),
    }
    if version_id:
        s3_client.get_object.return_value["VersionId"] = version_id

    storage.get_flow(f.name)
    logs = [l.getMessage() for l in caplog.records]
    assert f"s3://mybucket/{key_used}" in logs[0]
    assert (
        f"ETag: {etag}, LastModified: {t.isoformat()}, VersionId: {version_id}"
        in logs[1])
コード例 #14
0
ファイル: docker.py プロジェクト: alexkoay/prefect
    def create_dockerfile_object(self, directory: str) -> str:
        """
        Writes a dockerfile to the provided directory using the specified
        arguments on this Docker storage object.

        In order for the docker python library to build a container it needs a
        Dockerfile that it can use to define the container. This function takes the
        specified arguments then writes them to a temporary file called Dockerfile.

        *Note*: if `files` are added to this container, they will be copied to this
        directory as well.

        Args:
            - directory (str): A directory where the Dockerfile will be created

        Returns:
            - str: the absolute file path to the Dockerfile
        """
        # Either load the base commands from the specified dockerfile or define a FROM
        if self.dockerfile:
            with open(self.dockerfile, "r") as contents:
                base_commands = contents.read()
        else:
            base_commands = "FROM {base_image}".format(base_image=self.base_image)

        # Generate ENV variables to load into the image
        env_vars = ""
        if self.env_vars:
            # Format with repr to get proper quoting
            formatted_vars = [f"{k}={v!r}" for k, v in self.env_vars.items()]
            env_vars = "ENV " + " \\\n    ".join(formatted_vars)

        # Generate single pip install command for python dependencies
        pip_installs = "RUN pip install "
        if self.python_dependencies:
            for dependency in self.python_dependencies:
                pip_installs += "{} ".format(dependency)

        # Write all install-time commands that should be run in the image
        installation_commands = ""
        for cmd in self.installation_commands:
            installation_commands += "RUN {}\n".format(cmd)

        # Copy user specified files into the image
        copy_files = ""
        if self.files:
            for src, dest in self.files.items():
                fname = os.path.basename(src)
                full_fname = os.path.join(directory, fname)
                if os.path.exists(full_fname) and filecmp.cmp(src, full_fname) is False:
                    raise ValueError(
                        "File {fname} already exists in {directory}".format(
                            fname=full_fname, directory=directory
                        )
                    )
                else:
                    if os.path.isdir(src):
                        shutil.copytree(
                            src=src, dst=full_fname, symlinks=False, ignore=None
                        )
                    else:
                        shutil.copy2(src=src, dst=full_fname)
                copy_files += "COPY {fname} {dest}\n".format(
                    fname=full_fname.replace("\\", "/") if self.dockerfile else fname,
                    dest=dest,
                )

        # Write all flows to file and load into the image
        copy_flows = ""
        if not self.stored_as_script:
            for flow_name, flow_location in self.flows.items():
                clean_name = slugify(flow_name)
                flow_path = os.path.join(directory, "{}.flow".format(clean_name))
                with open(flow_path, "wb") as f:
                    f.write(flow_to_bytes_pickle(self._flows[flow_name]))
                copy_flows += "COPY {source} {dest}\n".format(
                    source=(
                        flow_path.replace("\\", "/")
                        if self.dockerfile
                        else "{}.flow".format(clean_name)
                    ),
                    dest=flow_location,
                )
        else:
            if not self.path:
                raise ValueError(
                    "A `path` must be provided to show where flow `.py` file is stored in the image."
                )

        # Write final extra user commands that should be run in the image
        final_commands = (
            ""
            if self.extra_dockerfile_commands is None
            else "\n".join(self.extra_dockerfile_commands)
        )

        # Write a healthcheck script into the image
        with open(
            os.path.join(os.path.dirname(__file__), "_healthcheck.py"), "r"
        ) as healthscript:
            healthcheck = healthscript.read()

        healthcheck_loc = os.path.join(directory, "healthcheck.py")
        with open(healthcheck_loc, "w") as health_file:
            health_file.write(healthcheck)

        # Escape the healthcheck location
        healthcheck_loc = (
            healthcheck_loc.replace("\\", "/") if self.dockerfile else "healthcheck.py"
        )

        # Generate the command to run the healthcheck
        healthcheck_run = ""
        if not self.ignore_healthchecks:
            flow_file_paths = ", ".join(['"{}"'.format(k) for k in self.flows.values()])
            python_version = (sys.version_info.major, sys.version_info.minor)
            healthcheck_run = (
                f"RUN python {self.prefect_directory}/healthcheck.py "
                f"'[{flow_file_paths}]' '{python_version}'"
            )

        file_contents = textwrap.dedent(
            f"""
            {multiline_indent(base_commands, 12)}
            {multiline_indent(env_vars, 12)}

            RUN pip install pip --upgrade
            {multiline_indent(installation_commands, 12)}
            {pip_installs}

            RUN mkdir -p {self.prefect_directory}/
            {multiline_indent(copy_flows, 12)}
            COPY {healthcheck_loc} {self.prefect_directory}/healthcheck.py
            {multiline_indent(copy_files, 12)}

            {multiline_indent(final_commands, 12)}
            {healthcheck_run}
            """
        )

        dockerfile_path = os.path.join(directory, "Dockerfile")
        with open(dockerfile_path, "w+") as dockerfile:
            dockerfile.write(file_contents)
        return dockerfile_path
コード例 #15
0
ファイル: s3.py プロジェクト: laisbsc/prefect
    def build(self) -> "Storage":
        """
        Build the S3 storage object by uploading Flows to an S3 bucket. This will upload
        all of the flows found in `storage.flows`. If there is an issue uploading to the
        S3 bucket an error will be logged.

        Returns:
            - Storage: an S3 object that contains information about how and where
                each flow is stored

        Raises:
            - botocore.ClientError: if there is an issue uploading a Flow to S3
        """
        self.run_basic_healthchecks()

        from botocore.exceptions import ClientError

        if self.stored_as_script:
            if self.local_script_path:
                for flow_name, flow in self._flows.items():
                    self.logger.info("Uploading script {} to {} in {}".format(
                        self.local_script_path, self.flows[flow.name],
                        self.bucket))

                    try:
                        self._boto3_client.upload_file(self.local_script_path,
                                                       self.bucket,
                                                       self.flows[flow_name])
                    except ClientError as err:
                        self.logger.error(
                            "Error uploading Flow script to S3 bucket {}: {}".
                            format(self.bucket, err))
                        raise err
            else:
                if not self.key:
                    raise ValueError(
                        "A `key` must be provided to show where flow `.py` file is stored in S3 or "
                        "provide a `local_script_path` pointing to a local script that contains the "
                        "flow.")
            return self

        for flow_name, flow in self._flows.items():
            # Pickle Flow
            data = flow_to_bytes_pickle(flow)

            # Write pickled Flow to stream
            stream = io.BytesIO(data)

            self.logger.info("Uploading {} to {}".format(
                self.flows[flow_name], self.bucket))

            try:
                self._boto3_client.upload_fileobj(stream,
                                                  Bucket=self.bucket,
                                                  Key=self.flows[flow_name])
            except ClientError as err:
                self.logger.error(
                    "Error uploading Flow to S3 bucket {}: {}".format(
                        self.bucket, err))
                raise err

        return self
コード例 #16
0
 def test_serialize_deserialize(self):
     s = flow_to_bytes_pickle(Flow("test"))
     assert isinstance(s, bytes)
     flow = flow_from_bytes_pickle(s)
     assert isinstance(flow, Flow)
     assert flow.name == "test"
コード例 #17
0
ファイル: webhook.py プロジェクト: alexkoay/prefect
    def build(self) -> "Webhook":
        """
        Build the Webhook storage object by issuing an HTTP request
        to store the flow.

        If `self.stored_as_script` is `True`, this method
        will read in the contents of `self.flow_script_path`, convert it to
        byes, and attach it to the request as `data`.

        The response from this request is stored in `._build_responses`,
        a dictionary keyed by flow name. If you are using a service where
        all the details necessary to fetch a flow cannot be known until you've
        stored it, you can do something like the following.

        ```python
        import cloudpickle
        import json
        import os
        import random
        import requests

        from prefect import task, Task, Flow
        from prefect.storage import Webhook

        @task
        def random_number():
            return random.randint(0, 100)
        with Flow("test-flow") as flow:
            random_number()


        flow.storage = Webhook(
            build_request_kwargs={
                "url": "some-service/upload",
                "headers": {"Content-Type": "application/octet-stream"},
            },
            build_request_http_method="POST",
            get_flow_request_kwargs={
                "url": "some-service/download",
                "headers": {"Accept": "application/octet-stream"},
            },
            get_flow_request_http_method="GET",
        )

        flow.storage.add_flow(flow)
        res = flow.storage.build()

        # get the ID from the response
        flow_id = res._build_responses[flow.name].json()["id"]

        #  update storage
        flow.storage.get_flow_request_kwargs["url"] = f"{GET_ROUTE}/{flow_id}"
        ```

        Returns:
            - Storage: a Webhook storage object

        Raises:
            - requests.exceptions.HTTPError if pushing the flow fails
        """
        self.run_basic_healthchecks()
        self._build_responses = {}

        for flow_name, flow in self._flows.items():
            self.logger.info("Uploading flow '{}'".format(flow_name))

            data = flow_to_bytes_pickle(flow)
            if self.stored_as_script:

                # these checks are here in build() instead of the constructor
                # so that serialization and deserialization of flows doesnot fail
                if not self.flow_script_path:
                    msg = "flow_script_path must be provided if stored_as_script=True"
                    self.logger.critical(msg)
                    raise RuntimeError(msg)

                if not os.path.isfile(self.flow_script_path):
                    msg = "file '{}' passed to flow_script_path does not exist".format(
                        self.flow_script_path)
                    self.logger.critical(msg)
                    raise RuntimeError(msg)

                with open(self.flow_script_path, "r") as f:
                    data = f.read().encode("utf-8")

            req_function = self._method_to_function[
                self.build_request_http_method]

            build_request_kwargs = _render_dict(self.build_request_kwargs)

            if "data" in build_request_kwargs.keys():
                msg = ("'data' found in build_request_kwargs. This value is "
                       "overwritten with the flow content and should not "
                       "be set directly")
                self.logger.warning(msg)
                warnings.warn(msg, RuntimeWarning, stacklevel=2)
            build_request_kwargs["data"] = data

            response = req_function(**build_request_kwargs)  # type: ignore
            response.raise_for_status()

            self._build_responses[flow_name] = response
            self.logger.info(
                "Successfully uploaded flow '{}'".format(flow_name))

        return self