Example #1
0
    def find_lhs(self) -> str:
        if self._lhs is not None:
            return self._lhs

        if self._instantiated_in is None or self._instantiated_in == "":
            raise _system_exceptions.FlyteSystemException(f"Object {self} does not have an _instantiated in")

        logger.debug(f"Looking for LHS for {self} from {self._instantiated_in}")
        m = _importlib.import_module(self._instantiated_in)
        for k in dir(m):
            try:
                if getattr(m, k) is self:
                    logger.debug(f"Found LHS for {self}, {k}")
                    self._lhs = k
                    return k
            except ValueError as err:
                # Empty pandas dataframes behave weirdly here such that calling `m.df` raises:
                # ValueError: The truth value of a {type(self).__name__} is ambiguous. Use a.empty, a.bool(), a.item(),
                #   a.any() or a.all()
                # Since dataframes aren't registrable entities to begin with we swallow any errors they raise and
                # continue looping through m.
                logger.warning("Caught ValueError {} while attempting to auto-assign name".format(err))
                pass

        logger.error(f"Could not find LHS for {self} in {self._instantiated_in}")
        raise _system_exceptions.FlyteSystemException(f"Error looking for LHS in {self._instantiated_in}")
Example #2
0
def check_call(cmd_args, **kwargs):
    if not isinstance(cmd_args, list):
        cmd_args = _schlex.split(cmd_args)

    # Jupyter notebooks hijack I/O and thus we cannot dump directly to stdout.
    with _tempfile.TemporaryFile() as std_out:
        with _tempfile.TemporaryFile() as std_err:
            ret_code = _subprocess.Popen(cmd_args,
                                         stdout=std_out,
                                         stderr=std_err,
                                         **kwargs).wait()

            # Dump sub-process' std out into current std out
            std_out.seek(0)
            logger.info("Output of command '{}':\n{}\n".format(
                cmd_args, std_out.read()))

            if ret_code != 0:
                std_err.seek(0)
                err_str = std_err.read()
                logger.error("Error from command '{}':\n{}\n".format(
                    cmd_args, err_str))

                raise Exception(
                    "Called process exited with error code: {}.  Stderr dump:\n\n{}"
                    .format(ret_code, err_str))

    return 0
Example #3
0
    def execute(self, **kwargs) -> typing.Any:
        """
        Executes the given script by substituting the inputs and outputs and extracts the outputs from the filesystem
        """
        logger.info(f"Running shell script as type {self.task_type}")
        if self.script_file:
            with open(self.script_file) as f:
                self._script = f.read()

        outputs: typing.Dict[str, str] = {}
        if self._output_locs:
            for v in self._output_locs:
                outputs[v.var] = self._interpolizer.interpolate(v.location,
                                                                inputs=kwargs)

        if os.name == "nt":
            self._script = self._script.lstrip().rstrip().replace("\n", "&&")

        if "env" in kwargs and isinstance(kwargs["env"], dict):
            kwargs["export_env"] = self.make_export_string_from_env_dict(
                kwargs["env"])

        gen_script = self._interpolizer.interpolate(self._script,
                                                    inputs=kwargs,
                                                    outputs=outputs)
        if self._debug:
            print("\n==============================================\n")
            print(gen_script)
            print("\n==============================================\n")

        try:
            subprocess.check_call(gen_script, shell=True)
        except subprocess.CalledProcessError as e:
            files = os.listdir(".")
            fstr = "\n-".join(files)
            logger.error(
                f"Failed to Execute Script, return-code {e.returncode} \n"
                f"StdErr: {e.stderr}\n"
                f"StdOut: {e.stdout}\n"
                f" Current directory contents: .\n-{fstr}")
            raise

        final_outputs = []
        for v in self._output_locs:
            if issubclass(v.var_type, FlyteFile):
                final_outputs.append(FlyteFile(outputs[v.var]))
            if issubclass(v.var_type, FlyteDirectory):
                final_outputs.append(FlyteDirectory(outputs[v.var]))
        if len(final_outputs) == 1:
            return final_outputs[0]
        if len(final_outputs) > 1:
            return tuple(final_outputs)
        return None
Example #4
0
def _update_cmd_config_and_execute(s3_cfg: S3Config, cmd: List[str]):
    env = _os.environ.copy()

    if s3_cfg.enable_debug:
        cmd.insert(1, "--debug")

    if s3_cfg.endpoint is not None:
        cmd.insert(1, s3_cfg.endpoint)
        cmd.insert(1, "--endpoint-url")

    if S3_ACCESS_KEY_ID_ENV_NAME not in os.environ:
        if s3_cfg.access_key_id:
            env[S3_ACCESS_KEY_ID_ENV_NAME] = s3_cfg.access_key_id

    if S3_SECRET_ACCESS_KEY_ENV_NAME not in os.environ:
        if s3_cfg.secret_access_key:
            env[S3_SECRET_ACCESS_KEY_ENV_NAME] = s3_cfg.secret_access_key

    retry = 0
    while True:
        try:
            try:
                return subprocess.check_call(cmd, env=env)
            except Exception as e:
                if retry > 0:
                    logger.info(
                        f"AWS command failed with error {e}, command: {cmd}, retry {retry}"
                    )

            logger.debug(
                f"Appending anonymous flag and retrying command {cmd}")
            anonymous_cmd = cmd[:]  # strings only, so this is deep enough
            anonymous_cmd.insert(1, S3_ANONYMOUS_FLAG)
            return subprocess.check_call(anonymous_cmd, env=env)

        except Exception as e:
            logger.error(
                f"Exception when trying to execute {cmd}, reason: {str(e)}")
            retry += 1
            if retry > s3_cfg.retries:
                raise
            secs = s3_cfg.backoff
            logger.info(
                f"Sleeping before retrying again, after {secs.total_seconds()} seconds"
            )
            time.sleep(secs.total_seconds())
            logger.info("Retrying again")
Example #5
0
    def dispatch_execute(
        self, ctx: FlyteContext, input_literal_map: _literal_models.LiteralMap
    ) -> Union[_literal_models.LiteralMap, _dynamic_job.DynamicJobSpec]:
        """
        This method translates Flyte's Type system based input values and invokes the actual call to the executor
        This method is also invoked during runtime.

        * ``VoidPromise`` is returned in the case when the task itself declares no outputs.
        * ``Literal Map`` is returned when the task returns either one more outputs in the declaration. Individual outputs
          may be none
        * ``DynamicJobSpec`` is returned when a dynamic workflow is executed
        """

        # Invoked before the task is executed
        new_user_params = self.pre_execute(ctx.user_space_params)
        from flytekit.deck.deck import _output_deck

        # Create another execution context with the new user params, but let's keep the same working dir
        with FlyteContextManager.with_context(
                ctx.with_execution_state(
                    ctx.execution_state.with_params(
                        user_space_params=new_user_params))
                # type: ignore
        ) as exec_ctx:
            # TODO We could support default values here too - but not part of the plan right now
            # Translate the input literals to Python native
            native_inputs = TypeEngine.literal_map_to_kwargs(
                exec_ctx, input_literal_map, self.python_interface.inputs)

            # TODO: Logger should auto inject the current context information to indicate if the task is running within
            #   a workflow or a subworkflow etc
            logger.info(f"Invoking {self.name} with inputs: {native_inputs}")
            try:
                native_outputs = self.execute(**native_inputs)
            except Exception as e:
                logger.exception(f"Exception when executing {e}")
                raise e

            logger.debug("Task executed successfully in user level")
            # Lets run the post_execute method. This may result in a IgnoreOutputs Exception, which is
            # bubbled up to be handled at the callee layer.
            native_outputs = self.post_execute(new_user_params, native_outputs)

            # Short circuit the translation to literal map because what's returned may be a dj spec (or an
            # already-constructed LiteralMap if the dynamic task was a no-op), not python native values
            if isinstance(native_outputs,
                          _literal_models.LiteralMap) or isinstance(
                              native_outputs, _dynamic_job.DynamicJobSpec):
                return native_outputs

            expected_output_names = list(self._outputs_interface.keys())
            if len(expected_output_names) == 1:
                # Here we have to handle the fact that the task could've been declared with a typing.NamedTuple of
                # length one. That convention is used for naming outputs - and single-length-NamedTuples are
                # particularly troublesome but elegant handling of them is not a high priority
                # Again, we're using the output_tuple_name as a proxy.
                if self.python_interface.output_tuple_name and isinstance(
                        native_outputs, tuple):
                    native_outputs_as_map = {
                        expected_output_names[0]: native_outputs[0]
                    }
                else:
                    native_outputs_as_map = {
                        expected_output_names[0]: native_outputs
                    }
            elif len(expected_output_names) == 0:
                native_outputs_as_map = {}
            else:
                native_outputs_as_map = {
                    expected_output_names[i]: native_outputs[i]
                    for i, _ in enumerate(native_outputs)
                }

            # We manually construct a LiteralMap here because task inputs and outputs actually violate the assumption
            # built into the IDL that all the values of a literal map are of the same type.
            literals = {}
            for k, v in native_outputs_as_map.items():
                literal_type = self._outputs_interface[k].type
                py_type = self.get_type_for_output_var(k, v)

                if isinstance(v, tuple):
                    raise TypeError(
                        f"Output({k}) in task{self.name} received a tuple {v}, instead of {py_type}"
                    )
                try:
                    literals[k] = TypeEngine.to_literal(
                        exec_ctx, v, py_type, literal_type)
                except Exception as e:
                    logger.error(
                        f"Failed to convert return value for var {k} with error {type(e)}: {e}"
                    )
                    raise TypeError(
                        f"Failed to convert return value for var {k} for function {self.name} with error {type(e)}: {e}"
                    ) from e

            INPUT = "input"
            OUTPUT = "output"

            input_deck = Deck(INPUT)
            for k, v in native_inputs.items():
                input_deck.append(
                    TypeEngine.to_html(ctx, v,
                                       self.get_type_for_input_var(k, v)))

            output_deck = Deck(OUTPUT)
            for k, v in native_outputs_as_map.items():
                output_deck.append(
                    TypeEngine.to_html(ctx, v,
                                       self.get_type_for_output_var(k, v)))

            if _internal.Deck.DISABLE_DECK.read(
            ) is not True and self.disable_deck is False:
                _output_deck(self.name.split(".")[-1], new_user_params)

            outputs_literal_map = _literal_models.LiteralMap(literals=literals)
            # After the execute has been successfully completed
            return outputs_literal_map