コード例 #1
0
    def find_lhs(self) -> str:
        if self._lhs is not None:
            return self._lhs

        if self._instantiated_in is None or self._instantiated_in == "":
            raise _system_exceptions.FlyteSystemException(f"Object {self} does not have an _instantiated in")

        logger.debug(f"Looking for LHS for {self} from {self._instantiated_in}")
        m = _importlib.import_module(self._instantiated_in)
        for k in dir(m):
            try:
                if getattr(m, k) is self:
                    logger.debug(f"Found LHS for {self}, {k}")
                    self._lhs = k
                    return k
            except ValueError as err:
                # Empty pandas dataframes behave weirdly here such that calling `m.df` raises:
                # ValueError: The truth value of a {type(self).__name__} is ambiguous. Use a.empty, a.bool(), a.item(),
                #   a.any() or a.all()
                # Since dataframes aren't registrable entities to begin with we swallow any errors they raise and
                # continue looping through m.
                logger.warning("Caught ValueError {} while attempting to auto-assign name".format(err))
                pass

        logger.error(f"Could not find LHS for {self} in {self._instantiated_in}")
        raise _system_exceptions.FlyteSystemException(f"Error looking for LHS in {self._instantiated_in}")
コード例 #2
0
ファイル: file.py プロジェクト: flyteorg/flytekit
def int_transformer(config_val: typing.Any):
    if type(config_val) is str:
        try:
            return int(config_val)
        except ValueError:
            logger.warning(f"Couldn't convert configuration setting {config_val} into {int}, leaving as is.")
    return config_val
コード例 #3
0
ファイル: file.py プロジェクト: flyteorg/flytekit
 def _read_yaml_config(location: str) -> typing.Optional[typing.Dict[str, typing.Any]]:
     with open(location, "r") as fh:
         try:
             yaml_contents = yaml.safe_load(fh)
             return yaml_contents
         except yaml.YAMLError as exc:
             logger.warning(f"Error {exc} reading yaml config file at {location}, ignoring...")
             return None
コード例 #4
0
 def new_test(*args, **kwargs):
     logger.warning(f"Invoking mock method for target: '{target.name}'")
     m = MagicMock()
     saved = target.execute
     target.execute = m
     results = test_fn(m, *args, **kwargs)
     target.execute = saved
     return results
コード例 #5
0
def get_get_version():
    _VERSION_PREFIX = "sandbox_test_" + uuid.uuid4().hex[:3]
    logger.warning(f"Test version prefix is {_VERSION_PREFIX}")
    print(f"fdsafdsaTest version prefix is {_VERSION_PREFIX}")

    def fn(suffix: str = "") -> str:
        return _VERSION_PREFIX + (f"_{suffix}" if suffix else "")

    return fn
コード例 #6
0
 def get_random_remote_path(self,
                            file_path_or_file_name: Optional[str] = None
                            ) -> str:
     """
     :param file_path_or_file_name: For when you want a random directory, but want to preserve the leaf file name
     """
     if file_path_or_file_name:
         _, tail = os.path.split(file_path_or_file_name)
         if tail:
             return f"{self.remote.get_random_directory()}{tail}"
         else:
             logger.warning(
                 f"No filename detected in {file_path_or_file_name}, using random remote path..."
             )
     return self.remote.get_random_path()
コード例 #7
0
 def get_random_local_path(self,
                           file_path_or_file_name: Optional[str] = None
                           ) -> str:
     """
     :param file_path_or_file_name:  For when you want a random directory, but want to preserve the leaf file name
     """
     if file_path_or_file_name:
         _, tail = os.path.split(file_path_or_file_name)
         if tail:
             return os.path.join(self.local_access.get_random_directory(),
                                 tail)
         else:
             logger.warning(
                 f"No filename detected in {file_path_or_file_name}, using random local path..."
             )
     return self.local_access.get_random_path()
コード例 #8
0
    def __call__(self, *args, **kwargs):
        # When a Task is () aka __called__, there are three things we may do:
        #  a. Task Execution Mode - just run the Python function as Python normally would. Flyte steps completely
        #     out of the way.
        #  b. Compilation Mode - this happens when the function is called as part of a workflow (potentially
        #     dynamic task?). Instead of running the user function, produce promise objects and create a node.
        #  c. Workflow Execution Mode - when a workflow is being run locally. Even though workflows are functions
        #     and everything should be able to be passed through naturally, we'll want to wrap output values of the
        #     function into objects, so that potential .with_cpu or other ancillary functions can be attached to do
        #     nothing. Subsequent tasks will have to know how to unwrap these. If by chance a non-Flyte task uses a
        #     task output as an input, things probably will fail pretty obviously.
        if len(args) > 0:
            raise _user_exceptions.FlyteAssertion(
                f"When calling tasks, only keyword args are supported. "
                f"Aborting execution as detected {len(args)} positional args {args}"
            )

        ctx = FlyteContextManager.current_context()
        if ctx.compilation_state is not None and ctx.compilation_state.mode == 1:
            return self.compile(ctx, *args, **kwargs)
        elif (ctx.execution_state is not None and ctx.execution_state.mode
              == ExecutionState.Mode.LOCAL_WORKFLOW_EXECUTION):
            if ctx.execution_state.branch_eval_mode == BranchEvalMode.BRANCH_SKIPPED:
                if self.python_interface and self.python_interface.output_tuple_name:
                    variables = [
                        k for k in self.python_interface.outputs.keys()
                    ]
                    output_tuple = collections.namedtuple(
                        self.python_interface.output_tuple_name, variables)
                    nones = [
                        None for _ in self.python_interface.outputs.keys()
                    ]
                    return output_tuple(*nones)
                else:
                    # Should we return multiple None's here?
                    return None
            return self._local_execute(ctx, **kwargs)
        else:
            logger.warning("task run without context - executing raw function")
            new_user_params = self.pre_execute(ctx.user_space_params)
            with FlyteContextManager.with_context(
                    ctx.with_execution_state(
                        ctx.execution_state.with_params(
                            mode=ExecutionState.Mode.LOCAL_TASK_EXECUTION,
                            user_space_params=new_user_params))):
                return self.execute(**kwargs)
コード例 #9
0
 def construct_random_path(
         self,
         persist: DataPersistence,
         file_path_or_file_name: typing.Optional[str] = None) -> str:
     """
     Use file_path_or_file_name, when you want a random directory, but want to preserve the leaf file name
     """
     key = UUID(int=random.getrandbits(128)).hex
     if file_path_or_file_name:
         _, tail = os.path.split(file_path_or_file_name)
         if tail:
             return persist.construct_path(False, True, key, tail)
         else:
             logger.warning(
                 f"No filename detected in {file_path_or_file_name}, generating random path"
             )
     return persist.construct_path(False, True, key)
コード例 #10
0
def _change_unrecognized_type_to_pickle(t: Type[T]) -> Type[T]:
    try:
        if hasattr(t, "__origin__") and hasattr(t, "__args__"):
            if t.__origin__ == list:
                return typing.List[_change_unrecognized_type_to_pickle(
                    t.__args__[0])]
            elif t.__origin__ == dict and t.__args__[0] == str:
                return typing.Dict[
                    str,
                    _change_unrecognized_type_to_pickle(t.__args__[1])]
        else:
            TypeEngine.get_transformer(t)
    except ValueError:
        logger.warning(
            f"Unsupported Type {t} found, Flyte will default to use PickleFile as the transport. "
            f"Pickle can only be used to send objects between the exact same version of Python, "
            f"and we strongly recommend to use python type that flyte support."
        )
        return FlytePickle[t]
    return t
コード例 #11
0
ファイル: node_creation.py プロジェクト: flyteorg/flytekit
def create_node(
        entity: Union[PythonTask, LaunchPlan, WorkflowBase,
                      RemoteEntity], *args,
        **kwargs) -> Union[Node, VoidPromise, Type[collections.namedtuple]]:
    """
    This is the function you want to call if you need to specify dependencies between tasks that don't consume and/or
    don't produce outputs. For example, if you have t1() and t2(), both of which do not take in nor produce any
    outputs, how do you specify that t2 should run before t1?

        t1_node = create_node(t1)
        t2_node = create_node(t2)

        t2_node.runs_before(t1_node)
        # OR
        t2_node >> t1_node

    This works for tasks that take inputs as well, say a ``t3(in1: int)``

        t3_node = create_node(t3, in1=some_int)  # basically calling t3(in1=some_int)

    You can still use this method to handle setting certain overrides

        t3_node = create_node(t3, in1=some_int).with_overrides(...)

    Outputs, if there are any, will be accessible. A `t4() -> (int, str)`

        t4_node = create_node(t4)

        in compilation node.o0 has the promise.
        t5(in1=t4_node.o0)

        in local workflow execution, what is the node?  Can it just be the named tuple?
        t5(in1=t4_node.o0)

    @workflow
    def wf():
        create_node(sub_wf)
        create_node(wf2)

    @dynamic
    def sub_wf():
        create_node(other_sub)
        create_node(task)

    If t1 produces only one output, note that in local execution, you still get a wrapper object that
    needs to be dereferenced by the output name.

        t1_node = create_node(t1)
        t2(t1_node.o0)

    """
    from flytekit.remote.remote_callable import RemoteEntity

    if len(args) > 0:
        raise _user_exceptions.FlyteAssertion(
            f"Only keyword args are supported to pass inputs to workflows and tasks."
            f"Aborting execution as detected {len(args)} positional args {args}"
        )

    if (not isinstance(entity, PythonTask)
            and not isinstance(entity, WorkflowBase)
            and not isinstance(entity, LaunchPlan)
            and not isinstance(entity, RemoteEntity)):
        raise AssertionError(
            f"Should be a callable Flyte entity (either local or fetched) but is {type(entity)}"
        )

    # This function is only called from inside workflows and dynamic tasks.
    # That means there are two scenarios we need to take care of, compilation and local workflow execution.

    # When compiling, calling the entity will create a node.
    ctx = FlyteContext.current_context()
    if ctx.compilation_state is not None and ctx.compilation_state.mode == 1:
        outputs = entity(**kwargs)
        # This is always the output of create_and_link_node which returns create_task_output, which can be
        # VoidPromise, Promise, or our custom namedtuple of Promises.
        node = ctx.compilation_state.nodes[-1]

        # In addition to storing the outputs on the object itself, we also want to set them in a map. When used by
        # the imperative workflow patterns, users will probably find themselves doing things like
        #   n = create_node(...)  # then
        #   output_name = "o0"
        #   n.outputs[output_name]  # rather than
        #   n.o0
        # That is, they'll likely have the name of the output stored as a string variable, and dicts provide cleaner
        # access than getattr
        node._outputs = {}

        # If a VoidPromise, just return the node.
        if isinstance(outputs, VoidPromise):
            return node

        # If a Promise or custom namedtuple of Promises, we need to attach each output as an attribute to the node.
        # todo: fix the noqas below somehow... can't add abstract property to RemoteEntity because it has to come
        #  before the model Template classes in FlyteTask/Workflow/LaunchPlan
        if entity.interface.outputs:  # noqa
            if isinstance(outputs, tuple):
                for output_name in entity.interface.outputs.keys():  # noqa
                    attr = getattr(outputs, output_name)
                    if attr is None:
                        raise _user_exceptions.FlyteAssertion(
                            f"Output {output_name} in outputs when calling {entity.name} is empty {attr}."
                        )
                    if hasattr(node, output_name):
                        raise _user_exceptions.FlyteAssertion(
                            f"Node {node} already has attribute {output_name}, change the name of output."
                        )
                    setattr(node, output_name, attr)
                    node.outputs[output_name] = attr
            else:
                output_names = [k for k in entity.interface.outputs.keys()
                                ]  # noqa
                if len(output_names) != 1:
                    raise _user_exceptions.FlyteAssertion(
                        f"Output of length 1 expected but {len(output_names)} found"
                    )

                if hasattr(node, output_names[0]):
                    raise _user_exceptions.FlyteAssertion(
                        f"Node {node} already has attribute {output_names[0]}, change the name of output."
                    )

                setattr(node, output_names[0],
                        outputs)  # This should be a singular Promise
                node.outputs[output_names[0]] = outputs

        return node

    # Handling local execution
    elif ctx.execution_state is not None and ctx.execution_state.mode == ExecutionState.Mode.LOCAL_WORKFLOW_EXECUTION:
        if isinstance(entity, RemoteEntity):
            raise AssertionError(
                f"Remote entities are not yet runnable locally {entity.name}")

        if ctx.execution_state.branch_eval_mode == BranchEvalMode.BRANCH_SKIPPED:
            logger.warning(
                f"Manual node creation cannot be used in branch logic {entity.name}"
            )
            raise Exception(
                "Being more restrictive for now and disallowing manual node creation in branch logic"
            )

        # This the output of __call__ under local execute conditions which means this is the output of local_execute
        # which means this is the output of create_task_output with Promises containing values (or a VoidPromise)
        results = entity(**kwargs)

        # If it's a VoidPromise, let's just return it, it shouldn't get used anywhere and if it does, we want an error
        # The reason we return it if it's a tuple is to handle the case where the task returns a typing.NamedTuple.
        # In that case, it's already a tuple and we don't need to further tupletize.
        if isinstance(results, VoidPromise) or isinstance(results, tuple):
            return results

        output_names = entity.python_interface.output_names

        if not output_names:
            raise Exception(
                f"Non-VoidPromise received {results} but interface for {entity.name} doesn't have outputs"
            )

        if len(output_names) == 1:
            # See explanation above for why we still tupletize a single element.
            return entity.python_interface.output_tuple(results)

        return entity.python_interface.output_tuple(*results)

    else:
        raise Exception(
            f"Cannot use explicit run to call Flyte entities {entity.name}")
コード例 #12
0
ファイル: mock_stats.py プロジェクト: flyteorg/flytekit
 def timing(self, metric):
     logger.warning("mock timing isn't implemented yet.")
コード例 #13
0
 def local_execute(self, ctx: FlyteContext, **kwargs) -> Union[Tuple[Promise], Promise, None]:
     logger.warning(
         "Running pod task locally. Local environment may not match pod environment which may cause issues."
     )
     return super().local_execute(ctx=ctx, **kwargs)
コード例 #14
0
 def _log(*args, **kwargs):
     logger.warning(f"Invoking mock method for task: '{t.name}'")
     return m(*args, **kwargs)
コード例 #15
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: typing.Union[FlyteFile, os.PathLike, str],
        python_type: typing.Type[FlyteFile],
        expected: LiteralType,
    ) -> Literal:
        remote_path = None
        should_upload = True

        if python_val is None:
            raise TypeTransformerFailedError("None value cannot be converted to a file.")

        if not (python_type is os.PathLike or issubclass(python_type, FlyteFile)):
            raise ValueError(f"Incorrect type {python_type}, must be either a FlyteFile or os.PathLike")

        # information used by all cases
        meta = BlobMetadata(type=self._blob_type(format=FlyteFilePathTransformer.get_format(python_type)))

        if isinstance(python_val, FlyteFile):
            source_path = python_val.path

            # If the object has a remote source, then we just convert it back. This means that if someone is just
            # going back and forth between a FlyteFile Python value and a Blob Flyte IDL value, we don't do anything.
            if python_val._remote_source is not None:
                return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=python_val._remote_source)))

            # If the user specified the remote_path to be False, that means no matter what, do not upload. Also if the
            # path given is already a remote path, say https://www.google.com, the concept of uploading to the Flyte
            # blob store doesn't make sense.
            if python_val.remote_path is False or ctx.file_access.is_remote(source_path):
                should_upload = False
            # If the type that's given is a simpler type, we also don't upload, and print a warning too.
            if python_type is os.PathLike:
                logger.warning(
                    f"Converting from a FlyteFile Python instance to a Blob Flyte object, but only a {python_type} was"
                    f" specified. Since a simpler type was specified, we'll skip uploading!"
                )
                should_upload = False

            # Set the remote destination if one was given instead of triggering a random one below
            remote_path = python_val.remote_path or None

        elif isinstance(python_val, pathlib.Path) or isinstance(python_val, str):
            source_path = str(python_val)
            if issubclass(python_type, FlyteFile):
                if ctx.file_access.is_remote(source_path):
                    should_upload = False
                else:
                    if isinstance(python_val, pathlib.Path) and not python_val.is_file():
                        raise ValueError(f"Error converting pathlib.Path {python_val} because it's not a file.")

                    # If it's a string pointing to a local destination, then make sure it's a file.
                    if isinstance(python_val, str):
                        p = pathlib.Path(python_val)
                        if not p.is_file():
                            raise TypeTransformerFailedError(f"Error converting {python_val} because it's not a file.")
            # python_type must be os.PathLike - see check at beginning of function
            else:
                should_upload = False

        else:
            raise TypeTransformerFailedError(f"Expected FlyteFile or os.PathLike object, received {type(python_val)}")

        # If we're uploading something, that means that the uri should always point to the upload destination.
        if should_upload:
            if remote_path is None:
                remote_path = ctx.file_access.get_random_remote_path(source_path)
            ctx.file_access.put_data(source_path, remote_path, is_multipart=False)
            return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path)))
        # If not uploading, then we can only take the original source path as the uri.
        else:
            return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=source_path)))