def find_lhs(self) -> str: if self._lhs is not None: return self._lhs if self._instantiated_in is None or self._instantiated_in == "": raise _system_exceptions.FlyteSystemException(f"Object {self} does not have an _instantiated in") logger.debug(f"Looking for LHS for {self} from {self._instantiated_in}") m = _importlib.import_module(self._instantiated_in) for k in dir(m): try: if getattr(m, k) is self: logger.debug(f"Found LHS for {self}, {k}") self._lhs = k return k except ValueError as err: # Empty pandas dataframes behave weirdly here such that calling `m.df` raises: # ValueError: The truth value of a {type(self).__name__} is ambiguous. Use a.empty, a.bool(), a.item(), # a.any() or a.all() # Since dataframes aren't registrable entities to begin with we swallow any errors they raise and # continue looping through m. logger.warning("Caught ValueError {} while attempting to auto-assign name".format(err)) pass logger.error(f"Could not find LHS for {self} in {self._instantiated_in}") raise _system_exceptions.FlyteSystemException(f"Error looking for LHS in {self._instantiated_in}")
def check_call(cmd_args, **kwargs): if not isinstance(cmd_args, list): cmd_args = _schlex.split(cmd_args) # Jupyter notebooks hijack I/O and thus we cannot dump directly to stdout. with _tempfile.TemporaryFile() as std_out: with _tempfile.TemporaryFile() as std_err: ret_code = _subprocess.Popen(cmd_args, stdout=std_out, stderr=std_err, **kwargs).wait() # Dump sub-process' std out into current std out std_out.seek(0) logger.info("Output of command '{}':\n{}\n".format( cmd_args, std_out.read())) if ret_code != 0: std_err.seek(0) err_str = std_err.read() logger.error("Error from command '{}':\n{}\n".format( cmd_args, err_str)) raise Exception( "Called process exited with error code: {}. Stderr dump:\n\n{}" .format(ret_code, err_str)) return 0
def execute(self, **kwargs) -> typing.Any: """ Executes the given script by substituting the inputs and outputs and extracts the outputs from the filesystem """ logger.info(f"Running shell script as type {self.task_type}") if self.script_file: with open(self.script_file) as f: self._script = f.read() outputs: typing.Dict[str, str] = {} if self._output_locs: for v in self._output_locs: outputs[v.var] = self._interpolizer.interpolate(v.location, inputs=kwargs) if os.name == "nt": self._script = self._script.lstrip().rstrip().replace("\n", "&&") if "env" in kwargs and isinstance(kwargs["env"], dict): kwargs["export_env"] = self.make_export_string_from_env_dict( kwargs["env"]) gen_script = self._interpolizer.interpolate(self._script, inputs=kwargs, outputs=outputs) if self._debug: print("\n==============================================\n") print(gen_script) print("\n==============================================\n") try: subprocess.check_call(gen_script, shell=True) except subprocess.CalledProcessError as e: files = os.listdir(".") fstr = "\n-".join(files) logger.error( f"Failed to Execute Script, return-code {e.returncode} \n" f"StdErr: {e.stderr}\n" f"StdOut: {e.stdout}\n" f" Current directory contents: .\n-{fstr}") raise final_outputs = [] for v in self._output_locs: if issubclass(v.var_type, FlyteFile): final_outputs.append(FlyteFile(outputs[v.var])) if issubclass(v.var_type, FlyteDirectory): final_outputs.append(FlyteDirectory(outputs[v.var])) if len(final_outputs) == 1: return final_outputs[0] if len(final_outputs) > 1: return tuple(final_outputs) return None
def _update_cmd_config_and_execute(s3_cfg: S3Config, cmd: List[str]): env = _os.environ.copy() if s3_cfg.enable_debug: cmd.insert(1, "--debug") if s3_cfg.endpoint is not None: cmd.insert(1, s3_cfg.endpoint) cmd.insert(1, "--endpoint-url") if S3_ACCESS_KEY_ID_ENV_NAME not in os.environ: if s3_cfg.access_key_id: env[S3_ACCESS_KEY_ID_ENV_NAME] = s3_cfg.access_key_id if S3_SECRET_ACCESS_KEY_ENV_NAME not in os.environ: if s3_cfg.secret_access_key: env[S3_SECRET_ACCESS_KEY_ENV_NAME] = s3_cfg.secret_access_key retry = 0 while True: try: try: return subprocess.check_call(cmd, env=env) except Exception as e: if retry > 0: logger.info( f"AWS command failed with error {e}, command: {cmd}, retry {retry}" ) logger.debug( f"Appending anonymous flag and retrying command {cmd}") anonymous_cmd = cmd[:] # strings only, so this is deep enough anonymous_cmd.insert(1, S3_ANONYMOUS_FLAG) return subprocess.check_call(anonymous_cmd, env=env) except Exception as e: logger.error( f"Exception when trying to execute {cmd}, reason: {str(e)}") retry += 1 if retry > s3_cfg.retries: raise secs = s3_cfg.backoff logger.info( f"Sleeping before retrying again, after {secs.total_seconds()} seconds" ) time.sleep(secs.total_seconds()) logger.info("Retrying again")
def dispatch_execute( self, ctx: FlyteContext, input_literal_map: _literal_models.LiteralMap ) -> Union[_literal_models.LiteralMap, _dynamic_job.DynamicJobSpec]: """ This method translates Flyte's Type system based input values and invokes the actual call to the executor This method is also invoked during runtime. * ``VoidPromise`` is returned in the case when the task itself declares no outputs. * ``Literal Map`` is returned when the task returns either one more outputs in the declaration. Individual outputs may be none * ``DynamicJobSpec`` is returned when a dynamic workflow is executed """ # Invoked before the task is executed new_user_params = self.pre_execute(ctx.user_space_params) from flytekit.deck.deck import _output_deck # Create another execution context with the new user params, but let's keep the same working dir with FlyteContextManager.with_context( ctx.with_execution_state( ctx.execution_state.with_params( user_space_params=new_user_params)) # type: ignore ) as exec_ctx: # TODO We could support default values here too - but not part of the plan right now # Translate the input literals to Python native native_inputs = TypeEngine.literal_map_to_kwargs( exec_ctx, input_literal_map, self.python_interface.inputs) # TODO: Logger should auto inject the current context information to indicate if the task is running within # a workflow or a subworkflow etc logger.info(f"Invoking {self.name} with inputs: {native_inputs}") try: native_outputs = self.execute(**native_inputs) except Exception as e: logger.exception(f"Exception when executing {e}") raise e logger.debug("Task executed successfully in user level") # Lets run the post_execute method. This may result in a IgnoreOutputs Exception, which is # bubbled up to be handled at the callee layer. native_outputs = self.post_execute(new_user_params, native_outputs) # Short circuit the translation to literal map because what's returned may be a dj spec (or an # already-constructed LiteralMap if the dynamic task was a no-op), not python native values if isinstance(native_outputs, _literal_models.LiteralMap) or isinstance( native_outputs, _dynamic_job.DynamicJobSpec): return native_outputs expected_output_names = list(self._outputs_interface.keys()) if len(expected_output_names) == 1: # Here we have to handle the fact that the task could've been declared with a typing.NamedTuple of # length one. That convention is used for naming outputs - and single-length-NamedTuples are # particularly troublesome but elegant handling of them is not a high priority # Again, we're using the output_tuple_name as a proxy. if self.python_interface.output_tuple_name and isinstance( native_outputs, tuple): native_outputs_as_map = { expected_output_names[0]: native_outputs[0] } else: native_outputs_as_map = { expected_output_names[0]: native_outputs } elif len(expected_output_names) == 0: native_outputs_as_map = {} else: native_outputs_as_map = { expected_output_names[i]: native_outputs[i] for i, _ in enumerate(native_outputs) } # We manually construct a LiteralMap here because task inputs and outputs actually violate the assumption # built into the IDL that all the values of a literal map are of the same type. literals = {} for k, v in native_outputs_as_map.items(): literal_type = self._outputs_interface[k].type py_type = self.get_type_for_output_var(k, v) if isinstance(v, tuple): raise TypeError( f"Output({k}) in task{self.name} received a tuple {v}, instead of {py_type}" ) try: literals[k] = TypeEngine.to_literal( exec_ctx, v, py_type, literal_type) except Exception as e: logger.error( f"Failed to convert return value for var {k} with error {type(e)}: {e}" ) raise TypeError( f"Failed to convert return value for var {k} for function {self.name} with error {type(e)}: {e}" ) from e INPUT = "input" OUTPUT = "output" input_deck = Deck(INPUT) for k, v in native_inputs.items(): input_deck.append( TypeEngine.to_html(ctx, v, self.get_type_for_input_var(k, v))) output_deck = Deck(OUTPUT) for k, v in native_outputs_as_map.items(): output_deck.append( TypeEngine.to_html(ctx, v, self.get_type_for_output_var(k, v))) if _internal.Deck.DISABLE_DECK.read( ) is not True and self.disable_deck is False: _output_deck(self.name.split(".")[-1], new_user_params) outputs_literal_map = _literal_models.LiteralMap(literals=literals) # After the execute has been successfully completed return outputs_literal_map