def get_promise(binding_data: _literal_models.BindingData, outputs_cache: Dict[Node, Dict[str, Promise]]) -> Promise: """ This is a helper function that will turn a binding into a Promise object, using a lookup map. Please see get_promise_map for the rest of the details. """ if binding_data.promise is not None: if not isinstance(binding_data.promise, NodeOutput): raise FlyteValidationException( f"Binding data Promises have to be of the NodeOutput type {type(binding_data.promise)} found" ) # b.var is the name of the input to the task # binding_data.promise.var is the name of the upstream node's output we want return outputs_cache[binding_data.promise.node][binding_data.promise.var] elif binding_data.scalar is not None: return Promise(var="placeholder", val=_literal_models.Literal(scalar=binding_data.scalar)) elif binding_data.collection is not None: literals = [] for bd in binding_data.collection.bindings: p = get_promise(bd, outputs_cache) literals.append(p.val) return Promise( var="placeholder", val=_literal_models.Literal(collection=_literal_models.LiteralCollection(literals=literals)), ) elif binding_data.map is not None: literals = {} for k, bd in binding_data.map.bindings.items(): p = get_promise(bd, outputs_cache) literals[k] = p.val return Promise( var="placeholder", val=_literal_models.Literal(map=_literal_models.LiteralMap(literals=literals)) ) raise FlyteValidationException("Binding type unrecognized.")
def test_comparison_refs(): def dummy_node(node_id) -> Node: n = Node( node_id, metadata=None, bindings=[], upstream_nodes=[], flyte_entity=SQLTask(name="x", query_template="x", inputs={}), ) n._id = node_id return n px = Promise("x", NodeOutput(var="x", node=dummy_node("n1"))) py = Promise("y", NodeOutput(var="y", node=dummy_node("n2"))) def print_expr(expr): print(f"{expr} is type {type(expr)}") print_expr(px == py) print_expr(px < py) print_expr((px == py) & (px < py)) print_expr(((px == py) & (px < py)) | (px > py)) print_expr(px < 5) print_expr(px >= 5) print_expr(px != 5)
def _local_execute(self, ctx: FlyteContext, **kwargs) -> Union[Tuple[Promise], Promise, VoidPromise]: # This is done to support the invariant that Workflow local executions always work with Promise objects # holding Flyte literal values. Even in a wf, a user can call a sub-workflow with a Python native value. for k, v in kwargs.items(): if not isinstance(v, Promise): t = self.python_interface.inputs[k] kwargs[k] = Promise(var=k, val=TypeEngine.to_literal(ctx, v, t, self.interface.inputs[k].type)) # The output of this will always be a combination of Python native values and Promises containing Flyte # Literals. function_outputs = self.execute(**kwargs) # First handle the empty return case. # A workflow function may return a task that doesn't return anything # def wf(): # return t1() # or it may not return at all # def wf(): # t1() # In the former case we get the task's VoidPromise, in the latter we get None if isinstance(function_outputs, VoidPromise) or function_outputs is None: if len(self.python_interface.outputs) != 0: raise FlyteValueException( function_outputs, f"{function_outputs} received but interface has {len(self.python_interface.outputs)} outputs.", ) return VoidPromise(self.name) # Because we should've already returned in the above check, we just raise an error here. if len(self.python_interface.outputs) == 0: raise FlyteValueException( function_outputs, f"{function_outputs} received but should've been VoidPromise or None." ) expected_output_names = list(self.python_interface.outputs.keys()) if len(expected_output_names) == 1: # Here we have to handle the fact that the wf could've been declared with a typing.NamedTuple of # length one. That convention is used for naming outputs - and single-length-NamedTuples are # particularly troublesome but elegant handling of them is not a high priority # Again, we're using the output_tuple_name as a proxy. if self.python_interface.output_tuple_name and isinstance(function_outputs, tuple): wf_outputs_as_map = {expected_output_names[0]: function_outputs[0]} else: wf_outputs_as_map = {expected_output_names[0]: function_outputs} else: wf_outputs_as_map = {expected_output_names[i]: function_outputs[i] for i, _ in enumerate(function_outputs)} # Basically we need to repackage the promises coming from the tasks into Promises that match the workflow's # interface. We do that by extracting out the literals, and creating new Promises wf_outputs_as_literal_dict = translate_inputs_to_literals( ctx, wf_outputs_as_map, flyte_interface_types=self.interface.outputs, native_types=self.python_interface.outputs, ) # Recreate new promises that use the workflow's output names. new_promises = [Promise(var, wf_outputs_as_literal_dict[var]) for var in expected_output_names] return create_task_output(new_promises, self.python_interface)
def test_create_native_named_tuple(): ctx = FlyteContextManager.current_context() t = create_native_named_tuple(ctx, promises=None, entity_interface=Interface()) assert t is None p1 = Promise(var="x", val=TypeEngine.to_literal( ctx, 1, int, LiteralType(simple=SimpleType.INTEGER))) p2 = Promise(var="y", val=TypeEngine.to_literal( ctx, 2, int, LiteralType(simple=SimpleType.INTEGER))) t = create_native_named_tuple( ctx, promises=p1, entity_interface=Interface(outputs={"x": int})) assert t assert t == 1 t = create_native_named_tuple(ctx, promises=[], entity_interface=Interface()) assert t is None t = create_native_named_tuple(ctx, promises=[p1, p2], entity_interface=Interface(outputs={ "x": int, "y": int })) assert t assert t == (1, 2) t = create_native_named_tuple(ctx, promises=[p1, p2], entity_interface=Interface( outputs={ "x": int, "y": int }, output_tuple_name="Tup")) assert t assert t == (1, 2) assert t.__class__.__name__ == "Tup" with pytest.raises(KeyError): create_native_named_tuple(ctx, promises=[p1, p2], entity_interface=Interface( outputs={"x": int}, output_tuple_name="Tup"))
def test_comparison_lits(): px = Promise("x", TypeEngine.to_literal(None, 5, int, None)) py = Promise("y", TypeEngine.to_literal(None, 8, int, None)) def eval_expr(expr, expected: bool): print(f"{expr} evals to {expr.eval()}") assert expected == expr.eval() eval_expr(px == py, False) eval_expr(px < py, True) eval_expr((px == py) & (px < py), False) eval_expr(((px == py) & (px < py)) | (px > py), False) eval_expr(px < 5, False) eval_expr(px >= 5, True) eval_expr(py >= 5, True)
def construct_input_promises(inputs: List[str]): return { input_name: Promise(var=input_name, val=NodeOutput(node=GLOBAL_START_NODE, var=input_name)) for input_name in inputs }
def _local_execute(self, ctx: FlyteContext, **kwargs) -> Union[Tuple[Promise], Promise, VoidPromise]: """ Please see the _local_execute comments in the main task. """ # Unwrap the kwargs values. After this, we essentially have a LiteralMap # The reason why we need to do this is because the inputs during local execute can be of 2 types # - Promises or native constants # Promises as essentially inputs from previous task executions # native constants are just bound to this specific task (default values for a task input) # Also alongwith promises and constants, there could be dictionary or list of promises or constants kwargs = translate_inputs_to_literals( ctx, incoming_values=kwargs, flyte_interface_types=self.interface.inputs, native_types=self.python_interface.inputs, ) input_literal_map = _literal_models.LiteralMap(literals=kwargs) outputs_literal_map = self.unwrap_literal_map_and_execute(ctx, input_literal_map) # After running, we again have to wrap the outputs, if any, back into Promise objects outputs_literals = outputs_literal_map.literals output_names = list(self.python_interface.outputs.keys()) if len(output_names) != len(outputs_literals): # Length check, clean up exception raise AssertionError(f"Length difference {len(output_names)} {len(outputs_literals)}") # Tasks that don't return anything still return a VoidPromise if len(output_names) == 0: return VoidPromise(self.name) vals = [Promise(var, outputs_literals[var]) for var in output_names] return create_task_output(vals, self.python_interface)
def _local_execute(self, ctx: FlyteContext, **kwargs) -> Union[Tuple[Promise], Promise, VoidPromise]: """ This code is used only in the case when we want to dispatch_execute with outputs from a previous node For regular execution, dispatch_execute is invoked directly. """ # Unwrap the kwargs values. After this, we essentially have a LiteralMap # The reason why we need to do this is because the inputs during local execute can be of 2 types # - Promises or native constants # Promises as essentially inputs from previous task executions # native constants are just bound to this specific task (default values for a task input) # Also alongwith promises and constants, there could be dictionary or list of promises or constants kwargs = translate_inputs_to_literals( ctx, input_kwargs=kwargs, interface=self.interface, native_input_types=self.get_input_types() ) input_literal_map = _literal_models.LiteralMap(literals=kwargs) outputs_literal_map = self.dispatch_execute(ctx, input_literal_map) outputs_literals = outputs_literal_map.literals # TODO maybe this is the part that should be done for local execution, we pass the outputs to some special # location, otherwise we dont really need to right? The higher level execute could just handle literalMap # After running, we again have to wrap the outputs, if any, back into Promise objects output_names = list(self.interface.outputs.keys()) if len(output_names) != len(outputs_literals): # Length check, clean up exception raise AssertionError(f"Length difference {len(output_names)} {len(outputs_literals)}") # Tasks that don't return anything still return a VoidPromise if len(output_names) == 0: return VoidPromise(self.name) vals = [Promise(var, outputs_literals[var]) for var in output_names] return create_task_output(vals, self.python_interface)
def _workflow_fn_outputs_to_promise( ctx: FlyteContext, native_outputs: typing.Dict[str, type], # Actually an orderedDict typed_outputs: Dict[str, _interface_models.Variable], outputs: Union[Any, Tuple[Any]], ) -> List[Promise]: if len(native_outputs) == 1: if isinstance(outputs, tuple): if len(outputs) != 1: raise AssertionError( f"The Workflow specification indicates only one return value, received {len(outputs)}" ) else: outputs = (outputs, ) if len(native_outputs) > 1: if not isinstance(outputs, tuple) or len(native_outputs) != len(outputs): # Length check, clean up exception raise AssertionError( f"The workflow specification indicates {len(native_outputs)} return vals, but received {len(outputs)}" ) # This recasts the Promises provided by the outputs of the workflow's tasks into the correct output names # of the workflow itself return_vals = [] for (k, t), v in zip(native_outputs.items(), outputs): if isinstance(v, Promise): return_vals.append(v.with_var(k)) else: # Found a return type that is not a promise, so we need to transform it var = typed_outputs[k] return_vals.append( Promise(var=k, val=TypeEngine.to_literal(ctx, v, t, var.type))) return return_vals
def add_workflow_input(self, input_name: str, python_type: Type) -> Interface: """ Adds an input to the workflow. """ if input_name in self._inputs: raise FlyteValidationException(f"Input {input_name} has already been specified for wf {self.name}.") self._python_interface = self._python_interface.with_inputs(extra_inputs={input_name: python_type}) self._interface = transform_interface_to_typed_interface(self._python_interface) self._inputs[input_name] = Promise(var=input_name, val=NodeOutput(node=GLOBAL_START_NODE, var=input_name)) self._unbound_inputs.add(self._inputs[input_name]) return self._inputs[input_name]
def _compute_outputs( self, n: Node) -> Optional[Union[Promise, Tuple[Promise], VoidPromise]]: curr = self.compute_output_vars() if curr is None or len(curr) == 0: return VoidPromise(n.id, NodeOutput(node=n, var="placeholder")) promises = [ Promise(var=x, val=NodeOutput(node=n, var=x)) for x in curr ] # TODO: Is there a way to add the Python interface here? Currently, it's an optional arg. return create_task_output(promises)
def end_branch( self ) -> Optional[Union[Condition, Tuple[Promise], Promise, VoidPromise]]: """ This should be invoked after every branch has been visited """ if self._last_case: FlyteContextManager.pop_context() curr = self.compute_output_vars() if curr is None: return VoidPromise(self.name) promises = [Promise(var=x, val=None) for x in curr] return create_task_output(promises) return self._condition
def _compute_outputs( self, selected_output_promise ) -> Optional[Union[Tuple[Promise], Promise, VoidPromise]]: """ For the local execution case only returns the least common set of outputs """ curr = self.compute_output_vars() if curr is None: return VoidPromise(self.name) if not isinstance(selected_output_promise, tuple): selected_output_promise = (selected_output_promise, ) promises = [ Promise(var=x, val=v.val) for x, v in zip(curr, selected_output_promise) ] return create_task_output(promises)
def _compute_outputs(self, n: Node) -> Union[Promise, Tuple[Promise], VoidPromise]: output_var_sets: typing.List[typing.Set[str]] = [] for c in self._cases: if c.output_promise is None and c.err is None: # One node returns a void output and no error, we will default to a # Void output return VoidPromise(n.id) if c.output_promise is not None: if isinstance(c.output_promise, tuple): output_var_sets.append(set([i.var for i in c.output_promise])) else: output_var_sets.append(set([c.output_promise.var])) curr = output_var_sets[0] if len(output_var_sets) > 1: for x in output_var_sets[1:]: curr = curr.intersection(x) promises = [Promise(var=x, val=NodeOutput(node=n, var=x)) for x in curr] # TODO: Is there a way to add the Python interface here? Currently, it's an optional arg. return create_task_output(promises)
def _local_execute( self, ctx: FlyteContext, **kwargs) -> Union[Tuple[Promise], Promise, VoidPromise]: """ Performs local execution of a workflow. kwargs are expected to be Promises for the most part (unless, someone has hardcoded in my_wf(input_1=5) or something). :param ctx: The FlyteContext :param kwargs: parameters for the workflow itself """ logger.info( f"Executing Workflow {self._name}, ctx{ctx.execution_state.Mode}") # This is done to support the invariant that Workflow local executions always work with Promise objects # holding Flyte literal values. Even in a wf, a user can call a sub-workflow with a Python native value. for k, v in kwargs.items(): if not isinstance(v, Promise): t = self._native_interface.inputs[k] kwargs[k] = Promise(var=k, val=TypeEngine.to_literal( ctx, v, t, self.interface.inputs[k].type)) function_outputs = self.execute(**kwargs) if (isinstance(function_outputs, VoidPromise) or function_outputs is None or len(self.python_interface.outputs) == 0): # The reason this is here is because a workflow function may return a task that doesn't return anything # def wf(): # return t1() # or it may not return at all # def wf(): # t1() # In the former case we get the task's VoidPromise, in the latter we get None return VoidPromise(self.name) # TODO: Can we refactor the task code to be similar to what's in this function? promises = _workflow_fn_outputs_to_promise( ctx, self._native_interface.outputs, self.interface.outputs, function_outputs) # TODO: With the native interface, create_task_output should be able to derive the typed interface, and it # should be able to do the conversion of the output of the execute() call directly. return create_task_output(promises, self._native_interface)
def local_execute(self, ctx: FlyteContext, **kwargs) -> Union[Tuple[Promise], Promise, VoidPromise]: """ This function is used only in the local execution path and is responsible for calling dispatch execute. Use this function when calling a task with native values (or Promises containing Flyte literals derived from Python native values). """ # Unwrap the kwargs values. After this, we essentially have a LiteralMap # The reason why we need to do this is because the inputs during local execute can be of 2 types # - Promises or native constants # Promises as essentially inputs from previous task executions # native constants are just bound to this specific task (default values for a task input) # Also along with promises and constants, there could be dictionary or list of promises or constants kwargs = translate_inputs_to_literals( ctx, incoming_values=kwargs, flyte_interface_types=self.interface.inputs, # type: ignore native_types=self.get_input_types(), ) input_literal_map = _literal_models.LiteralMap(literals=kwargs) # if metadata.cache is set, check memoized version if self.metadata.cache: # TODO: how to get a nice `native_inputs` here? logger.info( f"Checking cache for task named {self.name}, cache version {self.metadata.cache_version} " f"and inputs: {input_literal_map}") outputs_literal_map = LocalTaskCache.get( self.name, self.metadata.cache_version, input_literal_map) # The cache returns None iff the key does not exist in the cache if outputs_literal_map is None: logger.info("Cache miss, task will be executed now") outputs_literal_map = self.dispatch_execute( ctx, input_literal_map) # TODO: need `native_inputs` LocalTaskCache.set(self.name, self.metadata.cache_version, input_literal_map, outputs_literal_map) logger.info( f"Cache set for task named {self.name}, cache version {self.metadata.cache_version} " f"and inputs: {input_literal_map}") else: logger.info("Cache hit") else: es = ctx.execution_state b = es.user_space_params.with_task_sandbox() ctx = ctx.current_context().with_execution_state( es.with_params(user_space_params=b.build())).build() outputs_literal_map = self.dispatch_execute(ctx, input_literal_map) outputs_literals = outputs_literal_map.literals # TODO maybe this is the part that should be done for local execution, we pass the outputs to some special # location, otherwise we dont really need to right? The higher level execute could just handle literalMap # After running, we again have to wrap the outputs, if any, back into Promise objects output_names = list(self.interface.outputs.keys()) # type: ignore if len(output_names) != len(outputs_literals): # Length check, clean up exception raise AssertionError( f"Length difference {len(output_names)} {len(outputs_literals)}" ) # Tasks that don't return anything still return a VoidPromise if len(output_names) == 0: return VoidPromise(self.name) vals = [Promise(var, outputs_literals[var]) for var in output_names] return create_task_output(vals, self.python_interface)