Ejemplo n.º 1
0
    def test_extract_flow_from_file_path(self, flow_path):
        flow = extract_flow_from_file(file_path=flow_path)
        assert flow.name == "flow-1"
        assert flow.run().is_successful()

        flow = extract_flow_from_file(file_path=flow_path, flow_name="flow-1")
        assert flow.name == "flow-1"

        flow = extract_flow_from_file(file_path=flow_path, flow_name="flow-2")
        assert flow.name == "flow-2"
Ejemplo n.º 2
0
def test_extract_flow_from_file(tmpdir):
    contents = """from prefect import Flow\nf=Flow('test-flow')"""

    full_path = os.path.join(tmpdir, "flow.py")

    with open(full_path, "w") as f:
        f.write(contents)

    flow = extract_flow_from_file(file_path=full_path)
    assert flow.run().is_successful()

    flow = extract_flow_from_file(file_contents=contents)
    assert flow.run().is_successful()

    flow = extract_flow_from_file(file_path=full_path, flow_name="test-flow")
    assert flow.run().is_successful()

    with pytest.raises(ValueError):
        extract_flow_from_file(file_path=full_path, flow_name="not-real")

    with pytest.raises(ValueError):
        extract_flow_from_file(file_path=full_path, file_contents=contents)

    with pytest.raises(ValueError):
        extract_flow_from_file()
Ejemplo n.º 3
0
    def test_extract_flow_from_file_raises_on_run_register(self, tmpdir, method):
        contents = f"from prefect import Flow\nf=Flow('test-flow')\nf.{method}()"

        full_path = os.path.join(tmpdir, "flow.py")

        with open(full_path, "w") as f:
            f.write(contents)

        with prefect.context({"loading_flow": True}):
            with pytest.warns(Warning):
                extract_flow_from_file(file_path=full_path)
Ejemplo n.º 4
0
    def test_extract_flow_from_file_contents(self, flow_path):
        with open(flow_path, "r") as f:
            contents = f.read()

        flow = extract_flow_from_file(file_contents=contents)
        assert flow.name == "flow-1"
        assert flow.run().is_successful()

        flow = extract_flow_from_file(file_contents=contents, flow_name="flow-1")
        assert flow.name == "flow-1"

        flow = extract_flow_from_file(file_contents=contents, flow_name="flow-2")
        assert flow.name == "flow-2"
Ejemplo n.º 5
0
def flow(file, name, project, label, skip_if_flow_metadata_unchanged):
    """
    Register a flow from a file. This call will pull a Flow object out of a `.py` file
    and call `flow.register` on it.

    \b
    Options:
        --file, -f      TEXT    The path to a local file which contains a flow  [required]
        --name, -n      TEXT    The `flow.name` to pull out of the file provided. If a name
                                is not provided then the first flow object found will be registered.
        --project, -p   TEXT    The name of a Prefect project to register this flow
        --label, -l     TEXT    A label to set on the flow, extending any existing labels.
                                Multiple labels are supported, eg. `-l label1 -l label2`.

        --skip-if-flow-metadata-unchanged       If set, the flow will only be re-registered if its
                                                metadata or structure has changed.

    \b
    Examples:
        $ prefect register flow --file my_flow.py --name My-Flow -l label1 -l label2
    """
    # Don't run extra `run` and `register` functions inside file
    file_path = os.path.abspath(file)
    with prefect.context({
            "loading_flow": True,
            "local_script_path": file_path
    }):
        flow = extract_flow_from_file(file_path=file_path, flow_name=name)

    idempotency_key = (flow.serialized_hash()
                       if skip_if_flow_metadata_unchanged else None)

    flow.register(project_name=project,
                  labels=label,
                  idempotency_key=idempotency_key)
Ejemplo n.º 6
0
    def get_flow(self, flow_location: str) -> "Flow":
        """
        Given a flow_location within this Storage object, returns the underlying Flow (if possible).
        If the Flow is not found an error will be logged and `None` will be returned.

        Args:
            - flow_location (str): the location of a flow within this Storage; in this case,
                a file path on a repository where a Flow file has been committed

        Returns:
            - Flow: the requested Flow

        Raises:
            - UnknownObjectException: if the Flow file is unable to be retrieved
        """
        from github import UnknownObjectException

        repo = self._github_client.get_repo(self.repo)

        try:
            contents = repo.get_contents(flow_location)
            decoded_contents = contents.decoded_content
        except UnknownObjectException as exc:
            self.logger.error(
                "Error retrieving file contents from {} on repo {}. Ensure the file exists.".format(
                    flow_location, self.repo
                )
            )
            raise exc

        return extract_flow_from_file(file_contents=decoded_contents)
Ejemplo n.º 7
0
    def get_flow(self, flow_location: str) -> "Flow":
        """
        Given a flow_location within this Storage object, returns the underlying Flow (if possible).

        Args:
            - flow_location (str): the location of a flow within this Storage; in this case,
                a file path where a Flow has been serialized to

        Returns:
            - Flow: the requested flow

        Raises:
            - ValueError: if the flow is not contained in this storage
        """
        if flow_location not in self.flows.values():
            raise ValueError("Flow is not contained in this Storage")

        bucket = self._gcs_client.get_bucket(self.bucket)

        self.logger.info("Downloading {} from {}".format(
            flow_location, self.bucket))

        blob = bucket.get_blob(flow_location)
        if not blob:
            raise StorageError(
                "Flow not found in bucket: flow={} bucket={}".format(
                    flow_location, self.bucket))
        content = blob.download_as_string()

        if self.stored_as_script:
            return extract_flow_from_file(file_contents=content)

        return cloudpickle.loads(content)
Ejemplo n.º 8
0
    def get_flow(self, flow_name: str) -> "Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")
        flow_location = self.flows[flow_name]

        ref = self.ref or "master"

        from gitlab.exceptions import GitlabAuthenticationError, GitlabGetError

        try:
            project = self._gitlab_client.projects.get(quote_plus(self.repo))
            contents = project.files.get(file_path=flow_location, ref=ref)
        except GitlabAuthenticationError:
            self.logger.error(
                "Unable to authenticate Gitlab account. Please check your credentials."
            )
            raise
        except GitlabGetError:
            self.logger.error(
                f"Error retrieving file contents at {flow_location} in {self.repo}@{ref}. "
                "Ensure the project and file exist.")
            raise

        return extract_flow_from_file(file_contents=contents.decode(),
                                      flow_name=flow_name)
Ejemplo n.º 9
0
    def get_flow(self, flow_name: str) -> "Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")

        req_function = self._method_to_function[
            self.get_flow_request_http_method]

        get_flow_request_kwargs = _render_dict(self.get_flow_request_kwargs)

        response = req_function(**get_flow_request_kwargs)  # type: ignore
        response.raise_for_status()

        if self.stored_as_script:
            flow_script_content = response.content.decode("utf-8")
            return extract_flow_from_file(file_contents=flow_script_content,
                                          flow_name=flow_name)

        return flow_from_bytes_pickle(response.content)
Ejemplo n.º 10
0
    def get_flow(self, flow_name: str) -> "Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")
        flow_location = self.flows[flow_name]

        from github import UnknownObjectException

        repo = self._github_client.get_repo(self.repo)

        try:
            contents = repo.get_contents(flow_location, ref=self.ref)
            decoded_contents = contents.decoded_content
        except UnknownObjectException as exc:
            self.logger.error(
                "Error retrieving file contents from {} on repo {}. Ensure the file exists."
                .format(flow_location, self.repo))
            raise exc

        return extract_flow_from_file(file_contents=decoded_contents,
                                      flow_name=flow_name)
Ejemplo n.º 11
0
    def get_flow(self, flow_name: str) -> "Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")
        flow_location = self.flows[flow_name]

        bucket = self._gcs_client.get_bucket(self.bucket)

        self.logger.info("Downloading {} from {}".format(
            flow_location, self.bucket))

        blob = bucket.get_blob(flow_location)
        if not blob:
            raise FlowStorageError(
                "Flow not found in bucket: flow={} bucket={}".format(
                    flow_location, self.bucket))
        # Support GCS < 1.31
        content = (blob.download_as_bytes() if hasattr(
            blob, "download_as_bytes") else blob.download_as_string())

        if self.stored_as_script:
            return extract_flow_from_file(file_contents=content,
                                          flow_name=flow_name)

        return flow_from_bytes_pickle(content)
Ejemplo n.º 12
0
    def get_flow(self, flow_location: str = "placeholder") -> "Flow":
        """
        Get the flow from storage. This method will call
        `cloudpickle.loads()` on the binary content of the flow, so it
        should only be called in an environment with all of the flow's
        dependencies.

        Args:
            - flow_location (str): This argument is included to comply with the
                interface used by other storage objects, but it has no meaning
                for `Webhook` storage, since `Webhook` only corresponds to a
                single flow. Ignore it.

        Raises:
            - requests.exceptions.HTTPError if getting the flow fails
        """
        self.logger.info("Retrieving flow")
        req_function = self._method_to_function[self.get_flow_request_http_method]

        get_flow_request_kwargs = _render_dict(self.get_flow_request_kwargs)

        response = req_function(**get_flow_request_kwargs)  # type: ignore
        response.raise_for_status()

        if self.stored_as_script:
            flow_script_content = response.content.decode("utf-8")
            return extract_flow_from_file(file_contents=flow_script_content)  # type: ignore

        return cloudpickle.loads(response.content)
Ejemplo n.º 13
0
    def get_flow(self, flow_location: str = None) -> "prefect.core.flow.Flow":
        """
        Given a file path within this Docker container, returns the underlying Flow.
        Note that this method should only be run _within_ the container itself.

        Args:
            - flow_location (str, optional): the file path of a flow within this container. Will use
                `path` if not provided.

        Returns:
            - Flow: the requested flow

        Raises:
            - ValueError: if the flow is not contained in this storage
        """
        if flow_location:
            if flow_location not in self.flows.values():
                raise ValueError("Flow is not contained in this Storage")
        elif self.path:
            flow_location = self.path
        else:
            raise ValueError("No flow location provided")

        if self.stored_as_script:
            return extract_flow_from_file(file_path=flow_location)

        with open(flow_location, "rb") as f:
            return cloudpickle.load(f)
Ejemplo n.º 14
0
    def get_flow(self, flow_name: str) -> "Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")
        flow_location = self.flows[flow_name]

        client = self._boto3_client

        try:
            file_contents = client.get_file(
                repositoryName=self.repo,
                commitSpecifier=self.commit,
                filePath=flow_location,
            )["fileContent"]
            decoded_contents = file_contents.decode("utf-8")
        except Exception as exc:
            self.logger.error(
                "Error retrieving file contents from {} on repo {}. Ensure the file exists.".format(
                    flow_location, self.repo
                )
            )
            raise exc

        return extract_flow_from_file(
            file_contents=decoded_contents, flow_name=flow_name
        )
Ejemplo n.º 15
0
def flow(file, name, project, label):
    """
    Register a flow from a file. This call will pull a Flow object out of a `.py` file
    and call `flow.register` on it.

    \b
    Options:
        --file, -f      TEXT    The path to a local file which contains a flow  [required]
        --name, -n      TEXT    The `flow.name` to pull out of the file provided. If a name
                                is not provided then the first flow object found will be registered.
        --project, -p   TEXT    The name of a Prefect project to register this flow
        --label, -l     TEXT    A label to set on the flow, extending any existing labels.
                                Multiple labels are supported, eg. `-l label1 -l label2`.

    \b
    Examples:
        $ prefect register flow --file my_flow.py --name My-Flow -l label1 -l label2
    """

    # Don't run extra `run` and `register` functions inside file
    file_path = os.path.abspath(file)
    with prefect.context({
            "loading_flow": True,
            "local_script_path": file_path
    }):
        flow = extract_flow_from_file(file_path=file_path, flow_name=name)

    flow.register(project_name=project, labels=label)
Ejemplo n.º 16
0
    def get_flow(self, flow_location: str) -> "Flow":
        """
        Given a flow_location within this Storage object, returns the underlying Flow (if possible).

        Args:
            - flow_location (str): the location of a flow within this Storage; in this case,
                a file path where a Flow has been serialized to

        Returns:
            - Flow: the requested flow

        Raises:
            - ValueError: if the flow is not contained in this storage
        """
        if flow_location not in self.flows.values():
            raise ValueError("Flow is not contained in this Storage")

        client = self._azure_block_blob_service.get_blob_client(
            container=self.container, blob=flow_location)

        self.logger.info("Downloading {} from {}".format(
            flow_location, self.container))

        content = client.download_blob().content_as_bytes()

        if self.stored_as_script:
            return extract_flow_from_file(
                file_contents=content)  # type: ignore

        return cloudpickle.loads(content)
Ejemplo n.º 17
0
    def get_flow(self, flow_location: str = None) -> "Flow":
        """
        Given a flow_location within this Storage object, returns the underlying Flow (if possible).

        Args:
            - flow_location (str, optional): the location of a flow within this Storage; in this case,
                a file path or python path where a Flow has been serialized to. Will use `path`
                if not provided.

        Returns:
            - Flow: the requested flow

        Raises:
            - ValueError: if the flow is not contained in this storage
        """
        if flow_location:
            if flow_location not in self.flows.values():
                raise ValueError("Flow is not contained in this Storage")
        elif self.path:
            flow_location = self.path
        else:
            raise ValueError("No flow location provided")

        # check if the path given is a file path
        if os.path.isfile(flow_location):
            if self.stored_as_script:
                return extract_flow_from_file(file_path=flow_location)
            else:
                return prefect.core.flow.Flow.load(flow_location)
        # otherwise the path is given in the module format
        else:
            return extract_flow_from_module(module_str=flow_location)
Ejemplo n.º 18
0
    def get_flow(self, flow_name: str) -> "Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")
        flow_location = self.flows[flow_name]

        # check if the path given is a file path
        if os.path.isfile(flow_location):
            if self.stored_as_script:
                return extract_flow_from_file(
                    file_path=flow_location, flow_name=flow_name
                )
            else:
                with open(flow_location, "rb") as f:
                    return flow_from_bytes_pickle(f.read())
        # otherwise the path is given in the module format
        else:
            return extract_flow_from_module(
                module_str=flow_location, flow_name=flow_name
            )
Ejemplo n.º 19
0
    def get_flow(self, flow_name: str) -> "Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")
        flow_location = self.flows[flow_name]
        try:
            client = self._azure_block_blob_service.get_blob_client(
                container=self.container, blob=flow_location
            )

            self.logger.info(
                "Downloading {} from {}".format(flow_location, self.container)
            )

            content = client.download_blob().content_as_bytes()
        except Exception as err:
            self.logger.error("Error downloading Flow from Azure: {}".format(err))
            raise
        if self.stored_as_script:
            return extract_flow_from_file(file_contents=content, flow_name=flow_name)  # type: ignore

        return flow_from_bytes_pickle(content)
Ejemplo n.º 20
0
    def get_flow(self, flow_name: str) -> "Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")
        path = self.flows[flow_name]

        from github import UnknownObjectException

        # Log info about the active storage object. Only include `ref` if
        # explicitly set.
        self.logger.info(
            "Downloading flow from GitHub storage - repo: %r, path: %r%s",
            self.repo,
            path,
            f", ref: {self.ref!r}" if self.ref is not None else "",
        )

        try:
            repo = self._github_client.get_repo(self.repo)
        except UnknownObjectException:
            self.logger.error(
                "Repo %r not found. Check that it exists (and is spelled correctly), "
                "and that you have configured the proper credentials for accessing it.",
                self.repo,
            )
            raise

        # Use the default branch if unspecified
        ref = self.ref or repo.default_branch

        # Get the current commit sha for this ref
        try:
            commit = repo.get_commit(ref).sha
        except UnknownObjectException:
            self.logger.error("Ref %r not found in repo %r.", ref, self.repo)
            raise

        try:
            contents = repo.get_contents(path, ref=commit)
            decoded_contents = contents.decoded_content
        except UnknownObjectException:
            self.logger.error("File %r not found in repo %r, ref %r", path,
                              self.repo, ref)
            raise

        self.logger.info("Flow successfully downloaded. Using commit: %s",
                         commit)

        return extract_flow_from_file(file_contents=decoded_contents,
                                      flow_name=flow_name)
Ejemplo n.º 21
0
def import_flow_from_script_check(flow_file_paths: list):
    from prefect.utilities.storage import extract_flow_from_file

    flows = []
    for flow_file_path in flow_file_paths:
        flows.append(extract_flow_from_file(file_path=flow_file_path))

    print("Flow import from script check: OK")
    return flows
Ejemplo n.º 22
0
    def get_flow(self, flow_location: str = None, ref: str = None) -> "Flow":
        """
        Given a flow_location within this Storage object, returns the underlying Flow (if possible).
        If the Flow is not found an error will be logged and `None` will be returned.

        Args:
            - flow_location (str): the location of a flow within this Storage; in this case,
                a file path on a repository where a Flow file has been committed. Will use `path` if not
                provided.
            - ref (str, optional): a commit SHA-1 value or branch name. Defaults to 'master' if
                not specified

        Returns:
            - Flow: the requested Flow; Atlassian API retrieves raw, decoded files.

        Raises:
            - ValueError: if the flow is not contained in this storage
            - HTTPError: if flow is unable to access the Bitbucket repository
        """
        if flow_location:
            if flow_location not in self.flows.values():
                raise ValueError("Flow is not contained in this Storage")
        elif self.path:
            flow_location = self.path
        else:
            raise ValueError("No flow location provided")

        # Use ref argument if exists, else use attribute, else default to 'master'
        ref = ref if ref else (self.ref if self.ref else "master")

        try:
            contents = self._bitbucket_client.get_content_of_file(
                self.project,
                self.repo,
                flow_location,
                at=ref,
            )
        except HTTPError as err:
            if err.code == 401:
                self.logger.error(
                    "Access denied to repository. Please check credentials.")
                raise
            elif err.code == 404:
                self.logger.error(
                    "Invalid address. Check that host, project, and repository are correct."
                )
                raise
            else:
                self.logger.error(
                    f"Error retrieving contents at {flow_location} in {self.repo}@{ref}. "
                    "Please check arguments passed to Bitbucket storage and verify project exists."
                )
                raise

        return extract_flow_from_file(file_contents=contents)
Ejemplo n.º 23
0
def test_extract_flow_from_file_raises_on_run_register(tmpdir):
    contents = """from prefect import Flow\nf=Flow('test-flow')\nf.run()"""

    full_path = os.path.join(tmpdir, "flow.py")

    with open(full_path, "w") as f:
        f.write(contents)

    with prefect.context({"loading_flow": True}):
        with pytest.raises(RuntimeError):
            extract_flow_from_file(file_path=full_path)

    contents = """from prefect import Flow\nf=Flow('test-flow')\nf.register()"""

    full_path = os.path.join(tmpdir, "flow.py")

    with open(full_path, "w") as f:
        f.write(contents)

    with prefect.context({"loading_flow": True}):
        with pytest.raises(RuntimeError):
            extract_flow_from_file(file_path=full_path)
Ejemplo n.º 24
0
    def test_extract_flow_from_file_errors(self, flow_path):
        with pytest.raises(ValueError, match="but not both"):
            extract_flow_from_file(file_path="", file_contents="")

        with pytest.raises(ValueError, match="Provide either"):
            extract_flow_from_file()

        expected = (
            "Flow 'not-real' not found in file. Found flows:\n- 'flow-1'\n- 'flow-2'"
        )
        with pytest.raises(ValueError, match=expected):
            extract_flow_from_file(file_path=flow_path, flow_name="not-real")

        with pytest.raises(ValueError, match="No flows found in file."):
            extract_flow_from_file(file_contents="")
Ejemplo n.º 25
0
    def get_flow(self, flow_location: str = None) -> "Flow":
        """
        Given a flow_location within this Storage object or S3, returns the underlying Flow
        (if possible).

        Args:
            - flow_location (str, optional): the location of a flow within this Storage; in this case
                an S3 object key where a Flow has been serialized to. Will use `key` if not provided.

        Returns:
            - Flow: the requested Flow

        Raises:
            - ValueError: if the flow is not contained in this storage
            - botocore.ClientError: if there is an issue downloading the Flow from S3
        """
        if flow_location:
            if flow_location not in self.flows.values():
                raise ValueError("Flow is not contained in this Storage")
        elif self.key:
            flow_location = self.key
        else:
            raise ValueError("No flow location provided")

        stream = io.BytesIO()

        self.logger.info("Downloading {} from {}".format(
            flow_location, self.bucket))

        # Download stream from S3
        from botocore.exceptions import ClientError

        try:
            self._boto3_client.download_fileobj(Bucket=self.bucket,
                                                Key=flow_location,
                                                Fileobj=stream)
        except ClientError as err:
            self.logger.error("Error downloading Flow from S3: {}".format(err))
            raise err

        # prepare data and return
        stream.seek(0)
        output = stream.read()

        if self.stored_as_script:
            return extract_flow_from_file(file_contents=output)  # type: ignore

        return cloudpickle.loads(output)
Ejemplo n.º 26
0
    def get_flow(self, flow_location: str = None, ref: str = None) -> "Flow":
        """
        Given a flow_location within this Storage object, returns the underlying Flow (if possible).
        If the Flow is not found an error will be logged and `None` will be returned.

        Args:
            - flow_location (str): the location of a flow within this Storage; in this case,
                a file path on a repository where a Flow file has been committed. Will use `path` if not
                provided.
            - ref (str, optional): a commit SHA-1 value or branch name. Defaults to 'master' if
                not specified

        Returns:
            - Flow: the requested Flow

        Raises:
            - ValueError: if the flow is not contained in this storage
            - UnknownObjectException: if the flow file is unable to be retrieved
        """
        if flow_location:
            if flow_location not in self.flows.values():
                raise ValueError("Flow is not contained in this Storage")
        elif self.path:
            flow_location = self.path
        else:
            raise ValueError("No flow location provided")

        # Use ref argument if exists, else use attribute, else default to 'master'
        ref = ref if ref else (self.ref if self.ref else "master")

        from gitlab.exceptions import GitlabAuthenticationError, GitlabGetError

        try:
            project = self._gitlab_client.projects.get(quote_plus(self.repo))
            contents = project.files.get(file_path=flow_location, ref=ref)
        except GitlabAuthenticationError:
            self.logger.error(
                "Unable to authenticate Gitlab account. Please check your credentials."
            )
            raise
        except GitlabGetError:
            self.logger.error(
                f"Error retrieving file contents at {flow_location} in {self.repo}@{ref}. "
                "Ensure the project and file exist."
            )
            raise

        return extract_flow_from_file(file_contents=contents.decode())
Ejemplo n.º 27
0
    def get_flow(self, flow_location: str) -> "prefect.core.flow.Flow":
        """
        Given a file path within this Docker container, returns the underlying Flow.
        Note that this method should only be run _within_ the container itself.

        Args:
            - flow_location (str): the file path of a flow within this container

        Returns:
            - Flow: the requested flow
        """
        if self.stored_as_script:
            return extract_flow_from_file(file_path=flow_location)

        with open(flow_location, "rb") as f:
            return cloudpickle.load(f)
Ejemplo n.º 28
0
    def get_flow(self, flow_name: str) -> "Flow":
        """
        Given a flow name within this Storage object, load and return the Flow.

        Args:
            - flow_name (str): the name of the flow to return.

        Returns:
            - Flow: the requested flow
        """
        if flow_name not in self.flows:
            raise ValueError("Flow is not contained in this Storage")
        flow_location = self.flows[flow_name]

        # Use ref attribute if present, defaulting to "master"
        ref = self.ref or "master"

        client = self._get_bitbucket_client()

        try:
            contents = client.get_content_of_file(
                self.project,
                self.repo,
                flow_location,
                at=ref,
            )
        except HTTPError as err:
            if err.code == 401:
                self.logger.error(
                    "Access denied to repository. Please check credentials."
                )
                raise
            elif err.code == 404:
                self.logger.error(
                    "Invalid address. Check that host, project, and repository are correct."
                )
                raise
            else:
                self.logger.error(
                    f"Error retrieving contents at {flow_location} in {self.repo}@{ref}. "
                    "Please check arguments passed to Bitbucket storage and verify project exists."
                )
                raise

        return extract_flow_from_file(file_contents=contents, flow_name=flow_name)
Ejemplo n.º 29
0
    def get_flow(self, flow_location: str = None) -> "Flow":
        """
        Given a flow_location within this Storage object, returns the underlying Flow (if possible).

        Args:
            - flow_location (str, optional): the location of a flow within this Storage; in this case,
                a file path where a Flow has been serialized to. Will use `key` if not provided.

        Returns:
            - Flow: the requested flow

        Raises:
            - ValueError: if the flow is not contained in this storage
        """
        if flow_location:
            if flow_location not in self.flows.values():
                raise ValueError("Flow is not contained in this Storage")
        elif self.key:
            flow_location = self.key
        else:
            raise ValueError("No flow location provided")

        bucket = self._gcs_client.get_bucket(self.bucket)

        self.logger.info("Downloading {} from {}".format(flow_location, self.bucket))

        blob = bucket.get_blob(flow_location)
        if not blob:
            raise StorageError(
                "Flow not found in bucket: flow={} bucket={}".format(
                    flow_location, self.bucket
                )
            )
        # Support GCS < 1.31
        content = (
            blob.download_as_bytes()
            if hasattr(blob, "download_as_bytes")
            else blob.download_as_string()
        )

        if self.stored_as_script:
            return extract_flow_from_file(file_contents=content)

        return flow_from_bytes_pickle(content)
Ejemplo n.º 30
0
def flow(file, name, project, label, skip_if_flow_metadata_unchanged):
    """Register a flow (DEPRECATED)"""
    click.secho(
        (
            "Warning: `prefect register flow` is deprecated, please transition to "
            "using `prefect register` instead."
        ),
        fg="yellow",
    )
    # Don't run extra `run` and `register` functions inside file
    file_path = os.path.abspath(file)
    with prefect.context({"loading_flow": True, "local_script_path": file_path}):
        flow = extract_flow_from_file(file_path=file_path, flow_name=name)

    idempotency_key = (
        flow.serialized_hash() if skip_if_flow_metadata_unchanged else None
    )

    flow.register(project_name=project, labels=label, idempotency_key=idempotency_key)