Esempio n. 1
0
    def add_workflow_output(
        self, output_name: str, p: Union[Promise, List[Promise], Dict[str, Promise]], python_type: Optional[Type] = None
    ):
        """
        Add an output with the given name from the given node output.
        """
        if output_name in self._python_interface.outputs:
            raise FlyteValidationException(f"Output {output_name} already exists in workflow {self.name}")

        if python_type is None:
            if type(p) == list or type(p) == dict:
                raise FlyteValidationException(
                    f"If specifying a list or dict of Promises, you must specify the python_type type for {output_name}"
                    f" starting with the container type (e.g. List[int]"
                )
            python_type = p.ref.node.flyte_entity.python_interface.outputs[p.var]
            logger.debug(f"Inferring python type for wf output {output_name} from Promise provided {python_type}")

        flyte_type = TypeEngine.to_literal_type(python_type=python_type)

        ctx = FlyteContext.current_context()
        if ctx.compilation_state is not None:
            raise Exception("Can't already be compiling")
        with FlyteContextManager.with_context(ctx.with_compilation_state(self.compilation_state)) as ctx:
            b = binding_from_python_std(
                ctx, output_name, expected_literal_type=flyte_type, t_value=p, t_value_type=python_type
            )
            self._output_bindings.append(b)
            self._python_interface = self._python_interface.with_outputs(extra_outputs={output_name: python_type})
            self._interface = transform_interface_to_typed_interface(self._python_interface)
Esempio n. 2
0
def get_promise(binding_data: _literal_models.BindingData, outputs_cache: Dict[Node, Dict[str, Promise]]) -> Promise:
    """
    This is a helper function that will turn a binding into a Promise object, using a lookup map. Please see
    get_promise_map for the rest of the details.
    """
    if binding_data.promise is not None:
        if not isinstance(binding_data.promise, NodeOutput):
            raise FlyteValidationException(
                f"Binding data Promises have to be of the NodeOutput type {type(binding_data.promise)} found"
            )
        # b.var is the name of the input to the task
        # binding_data.promise.var is the name of the upstream node's output we want
        return outputs_cache[binding_data.promise.node][binding_data.promise.var]
    elif binding_data.scalar is not None:
        return Promise(var="placeholder", val=_literal_models.Literal(scalar=binding_data.scalar))
    elif binding_data.collection is not None:
        literals = []
        for bd in binding_data.collection.bindings:
            p = get_promise(bd, outputs_cache)
            literals.append(p.val)
        return Promise(
            var="placeholder",
            val=_literal_models.Literal(collection=_literal_models.LiteralCollection(literals=literals)),
        )
    elif binding_data.map is not None:
        literals = {}
        for k, bd in binding_data.map.bindings.items():
            p = get_promise(bd, outputs_cache)
            literals[k] = p.val
        return Promise(
            var="placeholder", val=_literal_models.Literal(map=_literal_models.LiteralMap(literals=literals))
        )

    raise FlyteValidationException("Binding type unrecognized.")
Esempio n. 3
0
 def add_workflow_input(self, input_name: str, python_type: Type) -> Interface:
     """
     Adds an input to the workflow.
     """
     if input_name in self._inputs:
         raise FlyteValidationException(f"Input {input_name} has already been specified for wf {self.name}.")
     self._python_interface = self._python_interface.with_inputs(extra_inputs={input_name: python_type})
     self._interface = transform_interface_to_typed_interface(self._python_interface)
     self._inputs[input_name] = Promise(var=input_name, val=NodeOutput(node=GLOBAL_START_NODE, var=input_name))
     self._unbound_inputs.add(self._inputs[input_name])
     return self._inputs[input_name]
Esempio n. 4
0
def get_reference_entity(
    resource_type: int,
    project: str,
    domain: str,
    name: str,
    version: str,
    inputs: Dict[str, Type],
    outputs: Dict[str, Type],
):
    if resource_type == _identifier_model.ResourceType.TASK:
        return ReferenceTask(project, domain, name, version, inputs, outputs)
    elif resource_type == _identifier_model.ResourceType.WORKFLOW:
        return ReferenceWorkflow(project, domain, name, version, inputs, outputs)
    elif resource_type == _identifier_model.ResourceType.LAUNCH_PLAN:
        return ReferenceLaunchPlan(project, domain, name, version, inputs, outputs)
    else:
        raise FlyteValidationException("Resource type must be one of task, workflow, or launch plan")
Esempio n. 5
0
def extract_return_annotation(
        return_annotation: Union[Type, Tuple]) -> Dict[str, Type]:
    """
    The purpose of this function is to sort out whether a function is returning one thing, or multiple things, and to
    name the outputs accordingly, either by using our default name function, or from a typing.NamedTuple.

        # Option 1
        nt1 = typing.NamedTuple("NT1", x_str=str, y_int=int)
        def t(a: int, b: str) -> nt1: ...

        # Option 2
        def t(a: int, b: str) -> typing.NamedTuple("NT1", x_str=str, y_int=int): ...

        # Option 3
        def t(a: int, b: str) -> typing.Tuple[int, str]: ...

        # Option 4
        def t(a: int, b: str) -> (int, str): ...

        # Option 5
        def t(a: int, b: str) -> str: ...

        # Option 6
        def t(a: int, b: str) -> None: ...

        # Options 7/8
        def t(a: int, b: str) -> List[int]: ...
        def t(a: int, b: str) -> Dict[str, int]: ...

    Note that Options 1 and 2 are identical, just syntactic sugar. In the NamedTuple case, we'll use the names in the
    definition. In all other cases, we'll automatically generate output names, indexed starting at 0.
    """

    # Handle Option 6
    # We can think about whether we should add a default output name with type None in the future.
    if return_annotation is None or return_annotation is inspect.Signature.empty:
        return {}

    # This statement results in true for typing.Namedtuple, single and void return types, so this
    # handles Options 1, 2. Even though NamedTuple for us is multi-valued, it's a single value for Python
    if isinstance(return_annotation, Type) or isinstance(
            return_annotation, TypeVar):
        # isinstance / issubclass does not work for Namedtuple.
        # Options 1 and 2
        if hasattr(return_annotation, "_field_types"):
            logger.debug(f"Task returns named tuple {return_annotation}")
            return return_annotation._field_types

    if hasattr(return_annotation,
               "__origin__") and return_annotation.__origin__ is tuple:
        # Handle option 3
        logger.debug(f"Task returns unnamed typing.Tuple {return_annotation}")
        if len(return_annotation.__args__) == 1:
            raise FlyteValidationException(
                "Tuples should be used to indicate multiple return values, found only one return variable."
            )
        return OrderedDict(
            zip(list(output_name_generator(len(return_annotation.__args__))),
                return_annotation.__args__))
    elif isinstance(return_annotation, tuple):
        if len(return_annotation) == 1:
            raise FlyteValidationException(
                "Please don't use a tuple if you're just returning one thing.")
        return OrderedDict(
            zip(list(output_name_generator(len(return_annotation))),
                return_annotation))

    else:
        # Handle all other single return types
        logger.debug(f"Task returns unnamed native tuple {return_annotation}")
        return {default_output_name(): return_annotation}
Esempio n. 6
0
 def __post_init__(self):
     if self.interruptible is not True and self.interruptible is not False:
         raise FlyteValidationException(f"Interruptible must be boolean, {self.interruptible} invalid")
Esempio n. 7
0
 def __post_init__(self):
     if (
         self.on_failure != WorkflowFailurePolicy.FAIL_IMMEDIATELY
         and self.on_failure != WorkflowFailurePolicy.FAIL_AFTER_EXECUTABLE_NODES_COMPLETE
     ):
         raise FlyteValidationException(f"Failure policy {self.on_failure} not acceptable")
Esempio n. 8
0
    def execute(self, **kwargs):
        """
        Called by _local_execute. This function is how local execution for imperative workflows runs. Because when an
        entity is added using the add_entity function, all inputs to that entity should've been already declared, we
        can just iterate through the nodes in order and we shouldn't run into any dependency issues. That is, we force
        the user to declare entities already in a topological sort. To keep track of outputs, we create a map to
        start things off, filled in only with the workflow inputs (if any). As things are run, their outputs are stored
        in this map.
        After all nodes are run, we fill in workflow level outputs the same way as any other previous node.
        """
        if not self.ready():
            raise FlyteValidationException(f"Workflow not ready, wf is currently {self}")

        # Create a map that holds the outputs of each node.
        intermediate_node_outputs = {GLOBAL_START_NODE: {}}  # type: Dict[Node, Dict[str, Promise]]

        # Start things off with the outputs of the global input node, i.e. the inputs to the workflow.
        # _local_execute should've already ensured that all the values in kwargs are Promise objects
        for k, v in kwargs.items():
            intermediate_node_outputs[GLOBAL_START_NODE][k] = v

        # Next iterate through the nodes in order.
        for node in self.compilation_state.nodes:
            if node not in intermediate_node_outputs.keys():
                intermediate_node_outputs[node] = {}

            # Retrieve the entity from the node, and call it by looking up the promises the node's bindings require,
            # and then fill them in using the node output tracker map we have.
            entity = node.flyte_entity
            entity_kwargs = get_promise_map(node.bindings, intermediate_node_outputs)

            # Handle the calling and outputs of each node's entity
            results = entity(**entity_kwargs)
            expected_output_names = list(entity.python_interface.outputs.keys())

            if isinstance(results, VoidPromise) or results is None:
                continue  # pragma: no cover # Move along, nothing to assign

            # Because we should've already returned in the above check, we just raise an Exception here.
            if len(entity.python_interface.outputs) == 0:
                raise FlyteValueException(results, f"{results} received but should've been VoidPromise or None.")

            # if there's only one output,
            if len(expected_output_names) == 1:
                if entity.python_interface.output_tuple_name and isinstance(results, tuple):
                    intermediate_node_outputs[node][expected_output_names[0]] = results[0]
                else:
                    intermediate_node_outputs[node][expected_output_names[0]] = results

            else:
                if len(results) != len(expected_output_names):
                    raise FlyteValueException(results, f"Different lengths {results} {expected_output_names}")
                for idx, r in enumerate(results):
                    intermediate_node_outputs[node][expected_output_names[idx]] = r

        # The rest of this function looks like the above but now we're doing it for the workflow as a whole rather
        # than just one node at a time.
        if len(self.python_interface.outputs) == 0:
            return VoidPromise(self.name)

        # The values that we return below from the output have to be pulled by fulfilling all of the
        # workflow's output bindings.
        # The return style here has to match what 1) what the workflow would've returned had it been declared
        # functionally, and 2) what a user would return in mock function. That is, if it's a tuple, then it
        # should be a tuple here, if it's a one element named tuple, then we do a one-element non-named tuple,
        # if it's a single element then we return a single element
        if len(self.output_bindings) == 1:
            # Again use presence of output_tuple_name to understand that we're dealing with a one-element
            # named tuple
            if self.python_interface.output_tuple_name:
                return (get_promise(self.output_bindings[0].binding, intermediate_node_outputs),)
            # Just a normal single element
            return get_promise(self.output_bindings[0].binding, intermediate_node_outputs)
        return tuple([get_promise(b.binding, intermediate_node_outputs) for b in self.output_bindings])
Esempio n. 9
0
 def __post_init__(self):
     # TODO: Get mypy working so we don't have to worry about these checks
     if self.interruptible is not True and self.interruptible is not False:
         raise FlyteValidationException(
             f"Interruptible must be boolean, {self.interruptible} invalid")