Esempio n. 1
0
    def find_lhs(self) -> str:
        if self._lhs is not None:
            return self._lhs

        if self._instantiated_in is None or self._instantiated_in == "":
            raise _system_exceptions.FlyteSystemException(
                f"Object {self} does not have an _instantiated in")

        _logging.debug(
            f"Looking for LHS for {self} from {self._instantiated_in}")
        m = _importlib.import_module(self._instantiated_in)
        for k in dir(m):
            try:
                if getattr(m, k) is self:
                    _logging.debug(f"Found LHS for {self}, {k}")
                    self._lhs = k
                    return k
            except ValueError as err:
                # Empty pandas dataframes behave weirdly here such that calling `m.df` raises:
                # ValueError: The truth value of a {type(self).__name__} is ambiguous. Use a.empty, a.bool(), a.item(),
                #   a.any() or a.all()
                # Since dataframes aren't registrable entities to begin with we swallow any errors they raise and
                # continue looping through m.
                _logging.warning(
                    "Caught ValueError {} while attempting to auto-assign name"
                    .format(err))
                pass

        _logging.error(
            f"Could not find LHS for {self} in {self._instantiated_in}")
        raise _system_exceptions.FlyteSystemException(
            f"Error looking for LHS in {self._instantiated_in}")
Esempio n. 2
0
    def get_sub_workflows(self) -> List["FlyteWorkflow"]:
        result = []
        for node in self.nodes:
            if node.workflow_node is not None and node.workflow_node.sub_workflow_ref is not None:
                if (node.executable_flyte_object is not None
                        and node.executable_flyte_object.entity_type_text
                        == "Workflow"):
                    result.append(node.executable_flyte_object)
                    result.extend(
                        node.executable_flyte_object.get_sub_workflows())
                else:
                    raise _system_exceptions.FlyteSystemException(
                        "workflow node with subworkflow found but bad executable "
                        "object {}".format(node.executable_flyte_object))

            # get subworkflows in conditional branches
            if node.branch_node is not None:
                if_else: _workflow_models.IfElseBlock = node.branch_node.if_else
                leaf_nodes: List[_nodes.FlyteNode] = filter(
                    None,
                    [
                        if_else.case.then_node,
                        *([] if if_else.other is None else
                          [x.then_node for x in if_else.other]),
                        if_else.else_node,
                    ],
                )
                for leaf_node in leaf_nodes:
                    exec_flyte_obj = leaf_node.executable_flyte_object
                    if exec_flyte_obj is not None and exec_flyte_obj.entity_type_text == "Workflow":
                        result.append(exec_flyte_obj)
                        result.extend(exec_flyte_obj.get_sub_workflows())

        return result
Esempio n. 3
0
def test_flyte_system_exception():
    try:
        raise system.FlyteSystemException("bad")
    except Exception as e:
        assert str(e) == "bad"
        assert isinstance(type(e), base._FlyteCodedExceptionMetaclass)
        assert type(e).error_code == "SYSTEM:Unknown"
        assert isinstance(e, base.FlyteException)
Esempio n. 4
0
    def auto_assign_name(self):
        """
        This function is a bit of trickster Python code that goes hand in hand with the _InstanceTracker metaclass
        defined above. Thanks @matthewphsmith for this bit of ingenuity.

        For instance, if a user has code that looks like this:

            from some.other.module import wf
            my_launch_plan = wf.create_launch_plan()

            @dynamic_task
            def sample_task(wf_params):
                yield my_launch_plan()

        This code means that we should have a launch plan with a name ending in "my_launch_plan", since that is the
        name of the variable that the created launch plan gets assigned to. That is also the name that the launch plan
        would be registered with.

        However, when the create_launch_plan() function runs, the Python interpreter has no idea where the created
        object will be assigned to. It has no idea that the output of the create_launch_plan call is to be paired up
        with a variable named "my_launch_plan". This function basically does this after the fact. Leveraging the
        _instantiated_in field provided by the _InstanceTracker class above, this code will re-import the
        module (ie Python file) that the object is in. Since it's already loaded, it's just retrieved from memory.
        It then scans all objects in the module, and when an object match is found, it knows it's found the right
        variable name.

        Just to drive the point home, this function is mostly needed for Launch Plans. Assuming that user code has:

            @python_task
            def some_task()

        When Flytekit calls the module loader and loads the task, the name of the task is the name of the function
        itself.  It's known at time of creation. In contrast, when

            xyz = SomeWorflow.create_launch_plan()

        is called, the name of the launch plan isn't known until after creation, it's not "SomeWorkflow", it's "xyz"
        """
        _logging.debug("Running name auto assign")
        m = _importlib.import_module(self.instantiated_in)

        for k in dir(m):
            try:
                if getattr(m, k) == self:
                    self._platform_valid_name = _utils.fqdn(m.__name__, k, entity_type=self.resource_type)
                    _logging.debug("Auto-assigning name to {}".format(self._platform_valid_name))
                    return
            except ValueError as err:
                # Empty pandas dataframes behave weirdly here such that calling `m.df` raises:
                # ValueError: The truth value of a {type(self).__name__} is ambiguous. Use a.empty, a.bool(), a.item(),
                #   a.any() or a.all()
                # Since dataframes aren't registrable entities to begin with we swallow any errors they raise and
                # continue looping through m.
                _logging.warning("Caught ValueError {} while attempting to auto-assign name".format(err))
                pass

        _logging.error("Could not auto-assign name")
        raise _system_exceptions.FlyteSystemException("Error looking for object while auto-assigning name.")
Esempio n. 5
0
    def __init__(self, sdk_workflow=None, sdk_launch_plan=None):
        """
        :param flytekit.common.workflow.SdkWorkflow sdk_workflow:
        :param flytekit.common.launch_plan.SdkLaunchPlan sdk_launch_plan:
        """
        if sdk_workflow and sdk_launch_plan:
            raise _system_exceptions.FlyteSystemException(
                "SdkWorkflowNode cannot be called with both a workflow and "
                "a launchplan specified, please pick one. WF: {} LP: {}",
                sdk_workflow, sdk_launch_plan)

        self._sdk_workflow = sdk_workflow
        self._sdk_launch_plan = sdk_launch_plan
        sdk_wf_id = sdk_workflow.id if sdk_workflow else None
        sdk_lp_id = sdk_launch_plan.id if sdk_launch_plan else None
        super(SdkWorkflowNode, self).__init__(launchplan_ref=sdk_lp_id,
                                              sub_workflow_ref=sdk_wf_id)
Esempio n. 6
0
    def get_sub_workflows(self):
        """
        Recursive call that returns all subworkflows in the current workflow

        :rtype: list[SdkWorkflow]
        """
        result = []
        for n in self.nodes:
            if n.workflow_node is not None and n.workflow_node.sub_workflow_ref is not None:
                if n.executable_sdk_object is not None and n.executable_sdk_object.entity_type_text == "Workflow":
                    result.append(n.executable_sdk_object)
                    result.extend(n.executable_sdk_object.get_sub_workflows())
                else:
                    raise _system_exceptions.FlyteSystemException(
                        "workflow node with subworkflow found but bad executable "
                        "object {}".format(n.executable_sdk_object))
            # Ignore other node types (branch, task)

        return result
Esempio n. 7
0
    def promote_from_model(cls, base_model, sub_workflows, tasks):
        """
        :param flytekit.models.core.workflow.WorkflowNode base_model:
        :param dict[flytekit.models.core.identifier.Identifier, flytekit.models.core.workflow.WorkflowTemplate]
            sub_workflows:
        :param dict[flytekit.models.core.identifier.Identifier, flytekit.models.task.TaskTemplate] tasks:
        :rtype: SdkWorkflowNode
        """
        # put the import statement here to prevent circular dependency error
        from flytekit.common import launch_plan as _launch_plan
        from flytekit.common import workflow as _workflow

        project = base_model.reference.project
        domain = base_model.reference.domain
        name = base_model.reference.name
        version = base_model.reference.version
        if base_model.launchplan_ref is not None:
            sdk_launch_plan = _launch_plan.SdkLaunchPlan.fetch(
                project, domain, name, version)
            return cls(sdk_launch_plan=sdk_launch_plan)
        elif base_model.sub_workflow_ref is not None:
            # The workflow templates for sub-workflows should have been included in the original response
            if base_model.reference in sub_workflows:
                sw = sub_workflows[base_model.reference]
                promoted = _workflow.SdkWorkflow.promote_from_model(
                    sw, sub_workflows=sub_workflows, tasks=tasks)
                return cls(sdk_workflow=promoted)

            # If not found for some reason, fetch it from Admin again.
            # The reason there is a warning here but not for tasks is because sub-workflows should always be passed
            # along. Ideally subworkflows are never even registered with Admin, so fetching from Admin ideally doesn't
            # return anything.
            _logging.warning(
                "Your subworkflow with id {} is not included in the promote call."
                .format(base_model.reference))
            sdk_workflow = _workflow.SdkWorkflow.fetch(project, domain, name,
                                                       version)
            return cls(sdk_workflow=sdk_workflow)
        else:
            raise _system_exceptions.FlyteSystemException(
                "Bad workflow node model, neither subworkflow nor "
                "launchplan specified.")
Esempio n. 8
0
    def promote_from_model(
        cls,
        base_model: _workflow_model.WorkflowNode,
        sub_workflows: Dict[_identifier.Identifier,
                            _workflow_model.WorkflowTemplate],
        tasks: Dict[_identifier.Identifier, _task_model.TaskTemplate],
    ) -> "FlyteWorkflowNode":
        from flytekit.control_plane import launch_plan as _launch_plan
        from flytekit.control_plane import workflow as _workflow

        fetch_args = (
            base_model.reference.project,
            base_model.reference.domain,
            base_model.reference.name,
            base_model.reference.version,
        )

        if base_model.launch_plan_ref is not None:
            return cls(flyte_launch_plan=_launch_plan.FlyteLaunchPlan.fetch(
                *fetch_args))
        elif base_model.sub_workflow_ref is not None:
            # the workflow tempaltes for sub-workflows should have been included in the original response
            if base_model.reference in sub_workflows:
                return cls(
                    flyte_workflow=_workflow.FlyteWorkflow.promote_from_model(
                        sub_workflows[base_model.reference],
                        sub_workflows=sub_workflows,
                        tasks=tasks,
                    ))

            # If not found for some reason, fetch it from Admin again. The reason there is a warning here but not for
            # tasks is because sub-workflows should always be passed along. Ideally subworkflows are never even
            # registered with Admin, so fetching from Admin ideelly doesn't return anything
            _logging.warning(
                f"Your subworkflow with id {base_model.reference} is not included in the promote call."
            )
            return cls(flyte_workflow=_workflow.FlyteWorkflow.fetch(
                *fetch_args))

        raise _system_exceptions.FlyteSystemException(
            "Bad workflow node model, neither subworkflow nor launchplan specified."
        )
Esempio n. 9
0
    def __init__(
        self,
        flyte_workflow: "flytekit.control_plane.workflow.FlyteWorkflow" = None,
        flyte_launch_plan:
        "flytekit.control_plane.launch_plan.FlyteLaunchPlan" = None,
    ):
        if flyte_workflow and flyte_launch_plan:
            raise _system_exceptions.FlyteSystemException(
                "FlyteWorkflowNode cannot be called with both a workflow and a launchplan specified, please pick "
                f"one. workflow: {flyte_workflow} launchPlan: {flyte_launch_plan}",
            )

        self._flyte_workflow = flyte_workflow
        self._flyte_launch_plan = flyte_launch_plan
        super(FlyteWorkflowNode, self).__init__(
            launchplan_ref=self._flyte_launch_plan.id
            if self._flyte_launch_plan else None,
            sub_workflow_ref=self._flyte_workflow.id
            if self._flyte_workflow else None,
        )
Esempio n. 10
0
    def promote_from_model(
        cls,
        model: _workflow_model.Node,
        sub_workflows: Optional[Dict[_identifier.Identifier,
                                     _workflow_model.WorkflowTemplate]],
        tasks: Optional[Dict[_identifier.Identifier,
                             _task_model.TaskTemplate]],
    ) -> "FlyteNode":
        id = model.id
        if id in {_constants.START_NODE_ID, _constants.END_NODE_ID}:
            _logging.warning(
                f"Should not call promote from model on a start node or end node {model}"
            )
            return None

        flyte_task_node, flyte_workflow_node = None, None
        if model.task_node is not None:
            flyte_task_node = _component_nodes.FlyteTaskNode.promote_from_model(
                model.task_node, tasks)
        elif model.workflow_node is not None:
            flyte_workflow_node = _component_nodes.FlyteWorkflowNode.promote_from_model(
                model.workflow_node, sub_workflows, tasks)
        else:
            raise _system_exceptions.FlyteSystemException(
                "Bad Node model, neither task nor workflow detected.")

        # When WorkflowTemplate models (containing node models) are returned by Admin, they've been compiled with a
        # start node. In order to make the promoted FlyteWorkflow look the same, we strip the start-node text back out.
        for model_input in model.inputs:
            if (model_input.binding.promise is not None
                    and model_input.binding.promise.node_id
                    == _constants.START_NODE_ID):
                model_input.binding.promise._node_id = _constants.GLOBAL_INPUT_NODE_ID

        if flyte_task_node is not None:
            return cls(
                id=id,
                upstream_nodes=
                [],  # set downstream, model doesn't contain this information
                bindings=model.inputs,
                metadata=model.metadata,
                flyte_task=flyte_task_node.flyte_task,
            )
        elif flyte_workflow_node is not None:
            if flyte_workflow_node.flyte_workflow is not None:
                return cls(
                    id=id,
                    upstream_nodes=
                    [],  # set downstream, model doesn't contain this information
                    bindings=model.inputs,
                    metadata=model.metadata,
                    flyte_workflow=flyte_workflow_node.flyte_workflow,
                )
            elif flyte_workflow_node.flyte_launch_plan is not None:
                return cls(
                    id=id,
                    upstream_nodes=
                    [],  # set downstream, model doesn't contain this information
                    bindings=models.inputs,
                    metadata=model.metadata,
                    flyte_launch_plan=flyte_workflow_node.flyte_launch_plan,
                )
            raise _system_exceptions.FlyteSystemException(
                "Bad FlyteWorkflowNode model, both launch plan and workflow are None"
            )
        raise _system_exceptions.FlyteSystemException(
            "Bad FlyteNode model, both task and workflow nodes are empty")
Esempio n. 11
0
    def promote_from_model(cls, model, sub_workflows, tasks):
        """
        :param flytekit.models.core.workflow.Node model:
        :param dict[flytekit.models.core.identifier.Identifier, flytekit.models.core.workflow.WorkflowTemplate]
            sub_workflows:
        :param dict[flytekit.models.core.identifier.Identifier, flytekit.models.task.TaskTemplate] tasks: If specified,
            these task templates will be passed to the SdkTaskNode promote_from_model call, and used
            instead of fetching from Admin.
        :rtype: SdkNode
        """
        id = model.id
        # This should never be called
        if id == _constants.START_NODE_ID or id == _constants.END_NODE_ID:
            _logging.warning(
                "Should not call promote from model on a start node or end node {}"
                .format(model))
            return None

        sdk_task_node, sdk_workflow_node = None, None
        if model.task_node is not None:
            sdk_task_node = _component_nodes.SdkTaskNode.promote_from_model(
                model.task_node, tasks)
        elif model.workflow_node is not None:
            sdk_workflow_node = _component_nodes.SdkWorkflowNode.promote_from_model(
                model.workflow_node, sub_workflows, tasks)
        else:
            raise _system_exceptions.FlyteSystemException(
                "Bad Node model, neither task nor workflow detected")

        # When WorkflowTemplate models (containing node models) are returned by Admin, they've been compiled with a
        # start node.  In order to make the promoted SdkWorkflow look the same, we strip the start-node text back out.
        for i in model.inputs:
            if i.binding.promise is not None and i.binding.promise.node_id == _constants.START_NODE_ID:
                i.binding.promise._node_id = _constants.GLOBAL_INPUT_NODE_ID

        if sdk_task_node is not None:
            return cls(
                id=id,
                upstream_nodes=
                [],  # set downstream, model doesn't contain this information
                bindings=model.inputs,
                metadata=model.metadata,
                sdk_task=sdk_task_node.sdk_task,
            )
        elif sdk_workflow_node is not None:
            if sdk_workflow_node.sdk_workflow is not None:
                return cls(
                    id=id,
                    upstream_nodes=
                    [],  # set downstream, model doesn't contain this information
                    bindings=model.inputs,
                    metadata=model.metadata,
                    sdk_workflow=sdk_workflow_node.sdk_workflow,
                )
            elif sdk_workflow_node.sdk_launch_plan is not None:
                return cls(
                    id=id,
                    upstream_nodes=
                    [],  # set downstream, model doesn't contain this information
                    bindings=model.inputs,
                    metadata=model.metadata,
                    sdk_launch_plan=sdk_workflow_node.sdk_launch_plan,
                )
            else:
                raise _system_exceptions.FlyteSystemException(
                    "Bad SdkWorkflowNode model, both lp and workflow are None")
        else:
            raise _system_exceptions.FlyteSystemException(
                "Bad SdkNode model, both task and workflow nodes are empty")