Example #1
0
    def _validate_expression(cron_expression):
        """
        Ensures that the set value is a valid cron string.  We use the format used in Cloudwatch and the best
        explanation can be found here:
            https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/ScheduledEvents.html#CronExpressions
        :param Text cron_expression: cron expression
        """
        # We use the croniter lib to validate our cron expression.  Since on the admin side we use Cloudwatch,
        # we have a couple checks in order to line up Cloudwatch with Croniter.
        tokens = cron_expression.split()
        if len(tokens) != 6:
            raise _user_exceptions.FlyteAssertion(
                "Cron expression is invalid.  A cron expression must have 6 fields.  Cron expressions are in the "
                "format of: `minute hour day-of-month month day-of-week year`.  "
                "Use `schedule` for 5 fields cron expression.  Received: `{}`".
                format(cron_expression))

        if tokens[2] != "?" and tokens[4] != "?":
            raise _user_exceptions.FlyteAssertion(
                "Scheduled string is invalid.  A cron expression must have a '?' for either day-of-month or "
                "day-of-week.  Please specify '?' for one of those fields.  Cron expressions are in the format of: "
                "minute hour day-of-month month day-of-week year.\n\n"
                "For more information: "
                "https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/ScheduledEvents.html#CronExpressions"
            )

        try:
            # Cut to 5 fields and just assume year field is good because croniter treats the 6th field as seconds.
            # TODO: Parse this field ourselves and check
            _croniter.croniter(" ".join(
                cron_expression.replace("?", "*").split()[:5]))
        except Exception:
            raise _user_exceptions.FlyteAssertion(
                "Scheduled string is invalid.  The cron expression was found to be invalid."
                " Provided cron expr: {}".format(cron_expression))
Example #2
0
 def python_std_to_sdk_type(self, t):
     """
     :param T t: User input.  Should be of the form: Types.Integer, [Types.Integer], {Types.String:
     Types.Integer}, etc.
     :rtype: flytekit.common.types.base_sdk_types.FlyteSdkType
     """
     if isinstance(t, list):
         if len(t) != 1:
             raise _user_exceptions.FlyteAssertion(
                 "When specifying a list type, there must be exactly one element in "
                 "the list describing the contained type.")
         return _container_types.List(_helpers.python_std_to_sdk_type(t[0]))
     elif isinstance(t, dict):
         raise _user_exceptions.FlyteAssertion(
             "Map types are not yet implemented.")
     elif isinstance(t, _base_sdk_types.FlyteSdkType):
         return t
     else:
         raise _user_exceptions.FlyteTypeException(
             type(t),
             _base_sdk_types.FlyteSdkType,
             additional_msg=
             "Should be of form similar to: Types.Integer, [Types.Integer], {Types.String: "
             "Types.Integer}",
             received_value=t,
         )
Example #3
0
    def __init__(self,
                 cron_expression=None,
                 schedule=None,
                 offset=None,
                 kickoff_time_input_arg=None):
        """
        :param Text cron_expression:
        :param Text schedule:
        :param Text offset:
        :param Text kickoff_time_input_arg:
        """
        if cron_expression is None and schedule is None:
            raise _user_exceptions.FlyteAssertion(
                "Either `cron_expression` or `schedule` should be specified.")

        if cron_expression is not None and offset is not None:
            raise _user_exceptions.FlyteAssertion(
                "Only `schedule` is supported when specifying `offset`.")

        if cron_expression is not None:
            CronSchedule._validate_expression(cron_expression)

        if schedule is not None:
            CronSchedule._validate_schedule(schedule)

        if offset is not None:
            CronSchedule._validate_offset(offset)

        super(CronSchedule, self).__init__(
            kickoff_time_input_arg,
            cron_expression=cron_expression,
            cron_schedule=_schedule_models.Schedule.CronSchedule(
                schedule, offset) if schedule is not None else None,
        )
Example #4
0
    def download(self, local_path=None, overwrite=False):
        """
        Alternate method, rather than the context manager interface to download the binary file to the local disk.
        :param Text local_path: [Optional] If provided, the blob will be downloaded to this path.  This will make the
            resulting file object unmanaged and it will not be cleaned up by the system upon exiting the context.
        :param bool overwrite: If true and local_path is specified, we will download the blob and
            overwrite an existing file at that location.  Default is False.
        """
        if "r" not in self._mode:
            raise _user_exceptions.FlyteAssertion(
                "Cannot download a write-only blob!")

        if local_path:
            self._local_path = local_path

        if not self.local_path:
            self._generate_local_path()

        if overwrite or not _os.path.exists(self.local_path):
            # TODO: Introduce system logging
            # logging.info("Getting {} -> {}".format(self.remote_location, self.local_path))
            _data_proxy.Data.get_data(self.remote_location,
                                      self.local_path,
                                      is_multipart=False)
        else:
            raise _user_exceptions.FlyteAssertion(
                "Cannot download blob to a location that already exists when overwrite is not set to True.  "
                "Attempted download from {} -> {}".format(
                    self.remote_location, self.local_path))
    def outputs(self) -> Dict[str, Any]:
        """
        Returns the outputs to the execution in the standard python format as dictated by the type engine.

        :raises: ``FlyteAssertion`` error if execution is in progress or execution ended in error.
        """
        if not self.is_complete:
            raise _user_exceptions.FlyteAssertion(
                "Please wait until the node execution has completed before requesting the outputs."
            )
        if self.error:
            raise _user_exceptions.FlyteAssertion(
                "Outputs could not be found because the execution ended in failure."
            )

        if self._outputs is None:
            client = _flyte_engine.get_client()
            execution_data = client.get_execution_data(self.id)
            # Outputs are returned inline unless they are too big, in which case a url blob pointing to them is returned.
            output_map: LiteralMap = _literal_models.LiteralMap({})
            if bool(execution_data.full_outputs.literals):
                output_map = execution_data.full_outputs
            elif execution_data.outputs.bytes > 0:
                with _common_utils.AutoDeletingTempDir() as tmp_dir:
                    tmp_name = _os.path.join(tmp_dir.name, "outputs.pb")
                    _data_proxy.Data.get_data(execution_data.outputs.url,
                                              tmp_name)
                    output_map = _literal_models.LiteralMap.from_flyte_idl(
                        _common_utils.load_proto_from_file(
                            _literals_pb2.LiteralMap, tmp_name))
            # TODO: need to convert flyte literals to python types. For now just use literals
            # self._outputs = TypeEngine.literal_map_to_kwargs(ctx=FlyteContext.current_context(), lm=output_map)
            self._outputs = output_map
        return self._outputs
Example #6
0
    def outputs(self):
        """
        Returns the outputs of the task execution, if available, in the standard Python format that is produced by
        the type engine. If not available, perhaps due to execution being in progress or an error being produced,
        this will raise an exception.
        :rtype: dict[Text, T]
        """
        if not self.is_complete:
            raise _user_exceptions.FlyteAssertion(
                "Please what until the task execution has completed before requesting the outputs."
            )
        if self.error:
            raise _user_exceptions.FlyteAssertion("Outputs could not be found because the execution ended in failure.")

        if self._outputs is None:
            client = _flyte_engine.get_client()
            execution_data = client.get_task_execution_data(self.id)

            # Inputs are returned inline unless they are too big, in which case a url blob pointing to them is returned.
            if bool(execution_data.full_outputs.literals):
                output_map = execution_data.full_outputs

            elif execution_data.outputs.bytes > 0:
                with _common_utils.AutoDeletingTempDir() as t:
                    tmp_name = _os.path.join(t.name, "outputs.pb")
                    _data_proxy.Data.get_data(execution_data.outputs.url, tmp_name)
                    output_map = _literal_models.LiteralMap.from_flyte_idl(
                        _common_utils.load_proto_from_file(_literals_pb2.LiteralMap, tmp_name)
                    )
            else:
                output_map = _literal_models.LiteralMap({})
            self._outputs = _type_helpers.unpack_literal_map_to_sdk_python_std(output_map)
        return self._outputs
Example #7
0
    def __enter__(self):
        """
        :rtype: list[typing.BinaryIO]
        """
        if "r" not in self.mode:
            raise _user_exceptions.FlyteAssertion(
                "Do not enter context to write to directory. Call create_piece"
            )

        try:
            if not self._directory:
                if _data_proxy.LocalWorkingDirectoryContext.get() is None:
                    raise _user_exceptions.FlyteAssertion(
                        "No temporary file system is present.  Either call this method from within the "
                        "context of a task or surround with a 'with LocalTestFileSystem():' block.  Or "
                        "specify a path when calling this function.  Note: Cleanup is not automatic when a "
                        "path is specified.")
                self._directory = _utils.AutoDeletingTempDir(
                    _uuid.uuid4().hex,
                    tmp_dir=_data_proxy.LocalWorkingDirectoryContext.get().
                    name,
                )
                self._is_managed = True
                self._directory.__enter__()
                # TODO: Introduce system logging
                # logging.info("Copying recursively {} -> {}".format(self.remote_location, self.local_path))
                _data_proxy.Data.get_data(self.remote_location,
                                          self.local_path,
                                          is_multipart=True)

            # Read the files into blobs in case-insensitive lexicographically ascending orders
            self._blobs = []
            file_handles = []
            for local_path in sorted(self._directory.list_dir(),
                                     key=lambda x: x.lower()):
                b = Blob(
                    _os.path.join(self.remote_location,
                                  _os.path.basename(local_path)),
                    mode=self.mode,
                )
                b._local_path = local_path
                file_handles.append(b.__enter__())
                self._blobs.append(b)

            return file_handles
        except Exception:
            # Exit is idempotent so close partially opened context that way
            exc_type, exc_obj, exc_tb = _sys.exc_info()
            self.__exit__(exc_type, exc_obj, exc_tb)
            raise
Example #8
0
def _topo_sort_helper(
    obj,
    entity_to_module_key,
    visited,
    recursion_set,
    recursion_stack,
    include_entities,
    ignore_entities,
    detect_unreferenced_entities,
):
    visited.add(obj)
    recursion_stack.append(obj)
    if obj in recursion_set:
        raise _user_exceptions.FlyteAssertion(
            "A cyclical dependency was detected during topological sort of entities.  "
            "Cycle path was:\n\n\t{}".format("\n\t".join(
                p for p in recursion_stack[recursion_set[obj]:])))
    recursion_set[obj] = len(recursion_stack) - 1

    if isinstance(obj, _registerable.HasDependencies):
        for upstream in obj.upstream_entities:
            if upstream.has_registered:
                continue
            if upstream not in visited:
                for m1, k1, o1 in _topo_sort_helper(
                        upstream,
                        entity_to_module_key,
                        visited,
                        recursion_set,
                        recursion_stack,
                        include_entities,
                        ignore_entities,
                        detect_unreferenced_entities,
                ):
                    if not o1.has_registered:
                        yield m1, k1, o1

    recursion_stack.pop()
    del recursion_set[obj]

    if isinstance(obj,
                  include_entities) or not isinstance(obj, ignore_entities):
        if obj in entity_to_module_key:
            yield entity_to_module_key[obj] + (obj, )
        elif detect_unreferenced_entities:
            raise _user_exceptions.FlyteAssertion(
                f"An entity ({obj.id}) was not found in modules accessible from the workflow packages configuration.  Please "
                f"ensure that entities in '{obj.instantiated_in}' are moved to a configured packaged, or adjust the configuration."
            )
Example #9
0
    def __init__(self,
                 inputs,
                 outputs,
                 nodes,
                 id=None,
                 metadata=None,
                 metadata_defaults=None,
                 interface=None,
                 output_bindings=None):
        """
        :param list[flytekit.common.promise.Input] inputs:
        :param list[Output] outputs:
        :param list[flytekit.common.nodes.SdkNode] nodes:
        :param flytekit.models.core.identifier.Identifier id: This is an autogenerated id by the system. The id is
            globally unique across Flyte.
        :param WorkflowMetadata metadata: This contains information on how to run the workflow.
        :param flytekit.models.interface.TypedInterface interface: Defines a strongly typed interface for the
            Workflow (inputs, outputs).  This can include some optional parameters.
        :param list[flytekit.models.literals.Binding] output_bindings: A list of output bindings that specify how to construct
            workflow outputs. Bindings can pull node outputs or specify literals. All workflow outputs specified in
            the interface field must be bound
            in order for the workflow to be validated. A workflow has an implicit dependency on all of its nodes
            to execute successfully in order to bind final outputs.

        """
        for n in nodes:
            for upstream in n.upstream_nodes:
                if upstream.id is None:
                    raise _user_exceptions.FlyteAssertion(
                        "Some nodes contained in the workflow were not found in the workflow description.  Please "
                        "ensure all nodes are either assigned to attributes within the class or an element in a "
                        "list, dict, or tuple which is stored as an attribute in the class."
                    )

        # Allow overrides if specified for all the arguments to the parent class constructor
        id = id if id is not None else _identifier.Identifier(
            _identifier_model.ResourceType.WORKFLOW,
            _internal_config.PROJECT.get(), _internal_config.DOMAIN.get(),
            _uuid.uuid4().hex, _internal_config.VERSION.get())
        metadata = metadata if metadata is not None else _workflow_models.WorkflowMetadata(
        )

        interface = interface if interface is not None else _interface.TypedInterface(
            {v.name: v.var
             for v in inputs}, {v.name: v.var
                                for v in outputs})

        output_bindings = output_bindings if output_bindings is not None else \
            [_literal_models.Binding(v.name, v.binding_data) for v in outputs]

        super(SdkWorkflow, self).__init__(
            id=id,
            metadata=metadata,
            metadata_defaults=_workflow_models.WorkflowMetadataDefaults(),
            interface=interface,
            nodes=nodes,
            outputs=output_bindings,
        )
        self._user_inputs = inputs
        self._upstream_entities = set(n.executable_sdk_object for n in nodes)
Example #10
0
    def _translate_duration(duration):
        """
        :param datetime.timedelta duration: timedelta between runs
        :rtype: flytekit.models.schedule.Schedule.FixedRate
        """
        _SECONDS_TO_MINUTES = 60
        _SECONDS_TO_HOURS = _SECONDS_TO_MINUTES * 60
        _SECONDS_TO_DAYS = _SECONDS_TO_HOURS * 24

        if duration.microseconds != 0 or duration.seconds % _SECONDS_TO_MINUTES != 0:
            raise _user_exceptions.FlyteAssertion(
                "Granularity of less than a minute is not supported for FixedRate schedules.  Received: {}"
                .format(duration))
        elif int(duration.total_seconds()) % _SECONDS_TO_DAYS == 0:
            return _schedule_models.Schedule.FixedRate(
                int(duration.total_seconds() / _SECONDS_TO_DAYS),
                _schedule_models.Schedule.FixedRateUnit.DAY,
            )
        elif int(duration.total_seconds()) % _SECONDS_TO_HOURS == 0:
            return _schedule_models.Schedule.FixedRate(
                int(duration.total_seconds() / _SECONDS_TO_HOURS),
                _schedule_models.Schedule.FixedRateUnit.HOUR,
            )
        else:
            return _schedule_models.Schedule.FixedRate(
                int(duration.total_seconds() / _SECONDS_TO_MINUTES),
                _schedule_models.Schedule.FixedRateUnit.MINUTE,
            )
Example #11
0
 def _validate_not_null(self, val):
     if val is None:
         raise _user_exceptions.FlyteAssertion(
             "No configuration set for [{}] {}.  This is a required configuration."
             .format(self._section, self._key))
     if self._extra_validator:
         self._extra_validator(val)
Example #12
0
    def __call__(self, *args, **input_map):
        if len(args) > 0:
            raise _user_exceptions.FlyteAssertion(
                "When adding a workflow as a node in a workflow, all inputs must be specified with kwargs only.  We "
                "detected {} positional args.".format(len(args))
            )

        # Take the default values from the Inputs
        compiled_inputs = {
            v.name: v.sdk_default
            for v in self.user_inputs if not v.sdk_required
        }
        compiled_inputs.update(input_map)

        bindings, upstream_nodes = self.interface.create_bindings_for_inputs(compiled_inputs)

        node = _nodes.SdkNode(
            id=None,
            metadata=_workflow_models.NodeMetadata("placeholder", _datetime.timedelta(),
                                                   _literal_models.RetryStrategy(0)),
            upstream_nodes=upstream_nodes,
            bindings=sorted(bindings, key=lambda b: b.var),
            sdk_workflow=self
        )
        return node
Example #13
0
 def __getattr__(self, key):
     if hasattr(super(ParameterMapper, self), key):
         return super(ParameterMapper, self).__getattr_(key)
     if key not in self:
         raise _user_exceptions.FlyteAssertion(
             "{} doesn't exist.".format(key))
     return self[key]
Example #14
0
    def __call__(self, *args, **input_map):
        """
        :param list[T] args: Do not specify.  Kwargs only are supported for this function.
        :param dict[str, T] input_map: Map of inputs.  Can be statically defined or OutputReference links.
        :rtype: flytekit.common.nodes.SdkNode
        """
        if len(args) > 0:
            raise _user_exceptions.FlyteAssertion(
                "When adding a task as a node in a workflow, all inputs must be specified with kwargs only.  We "
                "detected {} positional args.".format(len(args)))

        bindings, upstream_nodes = self.interface.create_bindings_for_inputs(
            input_map)

        # TODO: Remove DEADBEEF
        # One thing to note - this function is not overloaded at the SdkRunnableTask layer, which means 'self' here
        # will sometimes refer to an object that can be executed locally, and other times will refer to something
        # that cannot (ie a pure SdkTask object, fetched from Admin for instance).
        return _nodes.SdkNode(
            id=None,
            metadata=_workflow_model.NodeMetadata(
                "DEADBEEF",
                self.metadata.timeout,
                self.metadata.retries,
                self.metadata.interruptible,
            ),
            bindings=sorted(bindings, key=lambda b: b.var),
            upstream_nodes=upstream_nodes,
            sdk_task=self,
        )
Example #15
0
 def upload_directory(self, from_path, to_path):
     """
     :param Text from_path:
     :param Text to_path:
     """
     raise _user_exceptions.FlyteAssertion(
         "Writing data to HTTP endpoint is not currently supported.")
Example #16
0
    def __call__(self, *args, **kwargs):
        # When a Task is () aka __called__, there are three things we may do:
        #  a. Plain execution Mode - just run the execute function. If not overridden, we should raise an exception
        #  b. Compilation Mode - this happens when the function is called as part of a workflow (potentially
        #     dynamic task). Produce promise objects and create a node.
        #  c. Workflow Execution Mode - when a workflow is being run locally. Even though workflows are functions
        #     and everything should be able to be passed through naturally, we'll want to wrap output values of the
        #     function into objects, so that potential .with_cpu or other ancillary functions can be attached to do
        #     nothing. Subsequent tasks will have to know how to unwrap these. If by chance a non-Flyte task uses a
        #     task output as an input, things probably will fail pretty obviously.
        #     Since this is a reference entity, it still needs to be mocked otherwise an exception will be raised.
        if len(args) > 0:
            raise _user_exceptions.FlyteAssertion(
                f"Cannot call reference entity with args - detected {len(args)} positional args {args}"
            )

        ctx = FlyteContext.current_context()
        if ctx.compilation_state is not None and ctx.compilation_state.mode == 1:
            return self.compile(ctx, *args, **kwargs)
        elif (ctx.execution_state is not None and ctx.execution_state.mode
              == ExecutionState.Mode.LOCAL_WORKFLOW_EXECUTION):
            if ctx.execution_state.branch_eval_mode == BranchEvalMode.BRANCH_SKIPPED:
                return
            return self._local_execute(ctx, **kwargs)
        else:
            logger.debug("Reference entity - running raw execute")
            return self.execute(**kwargs)
Example #17
0
    def __init__(self, inputs, outputs, nodes):
        """
        :param list[flytekit.common.promise.Input] inputs:
        :param list[Output] outputs:
        :param list[flytekit.common.nodes.SdkNode] nodes:
        """
        for n in nodes:
            for upstream in n.upstream_nodes:
                if upstream.id is None:
                    raise _user_exceptions.FlyteAssertion(
                        "Some nodes contained in the workflow were not found in the workflow description.  Please "
                        "ensure all nodes are either assigned to attributes within the class or an element in a "
                        "list, dict, or tuple which is stored as an attribute in the class."
                    )

        super(SdkWorkflow, self).__init__(
            id=_identifier.Identifier(_identifier_model.ResourceType.WORKFLOW,
                                      _internal_config.PROJECT.get(),
                                      _internal_config.DOMAIN.get(),
                                      _uuid.uuid4().hex,
                                      _internal_config.VERSION.get()),
            metadata=_workflow_models.WorkflowMetadata(),
            interface=_interface.TypedInterface(
                {v.name: v.var
                 for v in inputs}, {v.name: v.var
                                    for v in outputs}),
            nodes=nodes,
            outputs=[
                _literal_models.Binding(v.name, v.binding_data)
                for v in outputs
            ],
        )
        self._user_inputs = inputs
        self._upstream_entities = set(n.executable_sdk_object for n in nodes)
Example #18
0
 def get_task(self, sdk_task):
     """
     :param flytekit.common.tasks.task.SdkTask sdk_task:
     :rtype: UnitTestEngineTask
     """
     if sdk_task.type in {
         _sdk_constants.SdkTaskType.PYTHON_TASK,
         _sdk_constants.SdkTaskType.SPARK_TASK,
         _sdk_constants.SdkTaskType.SENSOR_TASK,
     }:
         return ReturnOutputsTask(sdk_task)
     elif sdk_task.type in {
         _sdk_constants.SdkTaskType.DYNAMIC_TASK,
     }:
         return DynamicTask(sdk_task)
     elif sdk_task.type in {
         _sdk_constants.SdkTaskType.BATCH_HIVE_TASK,
     }:
         return HiveTask(sdk_task)
     else:
         raise _user_exceptions.FlyteAssertion(
             "Unit tests are not currently supported for tasks of type: {}".format(
                 sdk_task.type
             )
         )
Example #19
0
 def to_flyte_literal_type(cls):
     """
     :rtype: flytekit.models.types.LiteralType
     """
     raise _user_exceptions.FlyteAssertion(
         "A Void type does not have a literal type and cannot be used in this "
         "manner.")
Example #20
0
 def __init__(
     self,
     nodes: List[_nodes.FlyteNode],
     interface,
     output_bindings,
     id,
     metadata,
     metadata_defaults,
 ):
     for node in nodes:
         for upstream in node.upstream_nodes:
             if upstream.id is None:
                 raise _user_exceptions.FlyteAssertion(
                     "Some nodes contained in the workflow were not found in the workflow description.  Please "
                     "ensure all nodes are either assigned to attributes within the class or an element in a "
                     "list, dict, or tuple which is stored as an attribute in the class."
                 )
     super(FlyteWorkflow, self).__init__(
         id=id,
         metadata=metadata,
         metadata_defaults=metadata_defaults,
         interface=interface,
         nodes=nodes,
         outputs=output_bindings,
     )
     self._flyte_nodes = nodes
Example #21
0
    def __call__(self, *args, **input_map):
        """
        :param list[T] args: Do not specify.  Kwargs only are supported for this function.
        :param dict[Text,T] input_map: Map of inputs.  Can be statically defined or OutputReference links.
        :rtype: flytekit.common.nodes.SdkNode
        """
        if len(args) > 0:
            raise _user_exceptions.FlyteAssertion(
                "When adding a launchplan as a node in a workflow, all inputs must be specified with kwargs only.  We "
                "detected {} positional args.".format(len(args)))

        # Take the default values from the launch plan
        default_inputs = {
            k: v.sdk_default
            for k, v in _six.iteritems(self.default_inputs.parameters)
            if not v.required
        }
        default_inputs.update(input_map)

        bindings, upstream_nodes = self.interface.create_bindings_for_inputs(
            default_inputs)

        return _nodes.SdkNode(
            id=None,
            metadata=_workflow_models.NodeMetadata(
                "", _datetime.timedelta(), _literal_models.RetryStrategy(0)),
            bindings=sorted(bindings, key=lambda b: b.var),
            upstream_nodes=upstream_nodes,
            sdk_launch_plan=self,
        )
Example #22
0
 def download_directory(self, from_path, to_path):
     """
     :param Text from_path:
     :param Text to_path:
     """
     raise _user_exceptions.FlyteAssertion(
         "Reading data recursively from HTTP endpoint is not currently supported."
     )
Example #23
0
def test_flyte_assert():
    try:
        raise user.FlyteAssertion("I ASSERT THAT THIS IS WRONG!")
    except user.FlyteAssertion as e:
        assert str(e) == "I ASSERT THAT THIS IS WRONG!"
        assert isinstance(e, AssertionError)
        assert type(e).error_code == "USER:AssertionError"
        assert isinstance(e, user.FlyteUserException)
Example #24
0
 def assign_id_and_return(self, id: str):
     if self.id:
         raise _user_exceptions.FlyteAssertion(
             f"Error assigning ID: {id} because {self} is already assigned. Has this node been ssigned to another "
             "workflow already?")
     self._id = _dnsify(id) if id else None
     self._metadata.name = id
     return self
Example #25
0
 def promote_from_model(cls, model):
     """
     :param flytekit.models.core.workflow.Node model:
     :rtype: SdkNode
     """
     raise _user_exceptions.FlyteAssertion(
         "An SDK node cannot be instantiated merely from a data model object "
         "because it must be contextualized within a workflow.")
Example #26
0
 def promote_from_model(cls, model):
     """
     :param flytekit.models.types.OutputReference model:
     :rtype: NodeOutput
     """
     raise _user_exceptions.FlyteAssertion(
         "A NodeOutput cannot be promoted from a protobuf because it must be "
         "contextualized by an existing SdkNode.")
Example #27
0
 def promote_from_model(cls, model):
     """
     :param flytekit.models.launch_plan.LaunchPlanSpec model:
     :rtype: SdkRunnableLaunchPlan
     """
     raise _user_exceptions.FlyteAssertion(
         "An SdkRunnableLaunchPlan must be created from a reference to local Python code only."
     )
Example #28
0
    def outputs(self) -> Dict[str, Any]:
        """
        Returns the outputs of the task execution, if available, in the standard Python format that is produced by
        the type engine.

        :raises: ``FlyteAssertion`` error if execution is in progress or execution ended in error.
        """
        from flytekit.control_plane.tasks.task import FlyteTask

        if not self.is_complete:
            raise _user_exceptions.FlyteAssertion(
                "Please what until the task execution has completed before requesting the outputs."
            )
        if self.error:
            raise _user_exceptions.FlyteAssertion(
                "Outputs could not be found because the execution ended in failure."
            )

        if self._outputs is None:
            client = _flyte_engine.get_client()
            execution_data = client.get_task_execution_data(self.id)

            # Inputs are returned inline unless they are too big, in which case a url blob pointing to them is returned.
            output_map = _literal_models.LiteralMap({})
            if bool(execution_data.full_outputs.literals):
                output_map = execution_data.full_outputs
            elif execution_data.outputs.bytes > 0:
                with _common_utils.AutoDeletingTempDir() as t:
                    tmp_name = os.path.join(t.name, "outputs.pb")
                    _data_proxy.Data.get_data(execution_data.outputs.url,
                                              tmp_name)
                    output_map = _literal_models.LiteralMap.from_flyte_idl(
                        _common_utils.load_proto_from_file(
                            _literals_pb2.LiteralMap, tmp_name))

            task = FlyteTask.fetch(self.id.task_id.project,
                                   self.id.task_id.domain,
                                   self.id.task_id.name,
                                   self.id.task_id.version)
            self._outputs = TypeEngine.literal_map_to_kwargs(
                ctx=FlyteContextManager.current_context(),
                lm=output_map,
                python_types=TypeEngine.guess_python_types(
                    task.interface.outputs),
            )
        return self._outputs
Example #29
0
 def _validate_schedule(schedule):
     if schedule.lower() not in CronSchedule._VALID_CRON_ALIASES:
         try:
             _croniter.croniter(schedule)
         except Exception:
             raise _user_exceptions.FlyteAssertion(
                 "Schedule is invalid. It must be set to either a cron alias or valid cron expression."
                 " Provided schedule: {}".format(schedule))
Example #30
0
def iterate_registerable_entities_in_order(pkgs,
                                           ignore_entities=None,
                                           include_entities=None,
                                           detect_unreferenced_entities=True):
    """
    This function will iterate all discovered entities in the given package list.  It will then attempt to
    topologically sort such that any entity with a dependency on another comes later in the list.  Note that workflows
    can reference other workflows and launch plans.
    :param list[Text] pkgs:
    :param set[type] ignore_entities: If specified, ignore these entities while doing a topological sort.  All other
        entities will be taken.  Only one of ignore_entities or include_entities can be set.
    :param set[type] include_entities: If specified, include these entities while doing a topological sort.  All
        other entities will be ignored.  Only one of ignore_entities or include_entities can be set.
    :param bool detect_unreferenced_entities: If true, we will raise exceptions on entities not included in the package
        configuration.
    :rtype: module, Text, flytekit.common.mixins.registerable.RegisterableEntity
    """
    if ignore_entities and include_entities:
        raise _user_exceptions.FlyteAssertion(
            "ignore_entities and include_entities cannot both be set")
    elif not ignore_entities and not include_entities:
        include_entities = (object, )
        ignore_entities = tuple()
    else:
        ignore_entities = tuple(list(ignore_entities or set([object])))
        include_entities = tuple(list(include_entities or set()))

    entity_to_module_key = {}
    for m in iterate_modules(pkgs):
        for k in dir(m):
            o = m.__dict__[k]
            if isinstance(o, _registerable.RegisterableEntity):
                if o.has_registered:
                    continue
                if o.instantiated_in == m.__name__:
                    entity_to_module_key[o] = (m, k)
                    if isinstance(o, _SdkRunnableWorkflow
                                  ) and o.should_create_default_launch_plan:
                        # SDK should create a default launch plan for a workflow.  This is a special-case to simplify
                        # authoring of workflows.
                        entity_to_module_key[o.create_launch_plan()] = (m, k)

    visited = set()
    for o in six.iterkeys(entity_to_module_key):
        if o not in visited:
            recursion_set = dict()
            recursion_stack = []
            for m, k, o2 in _topo_sort_helper(
                    o,
                    entity_to_module_key,
                    visited,
                    recursion_set,
                    recursion_stack,
                    include_entities,
                    ignore_entities,
                    detect_unreferenced_entities=detect_unreferenced_entities,
            ):
                yield m, k, o2