Example #1
0
 def __init__(
     self,
     task_type: str,
     name: str,
     task_config: T,
     interface: Optional[Interface] = None,
     environment: Optional[Dict[str, str]] = None,
     **kwargs,
 ):
     """
     Args:
         task_type: a string that defines a unique task-type for every new extension. If a backend plugin is required
                    then this has to be done in-concert with the backend plugin identifier
         name: A unique name for the task instantiation. This is unique for every instance of task.
         task_config: Configuration for the task. This is used to configure the specific plugin that handles this task
         interface: A python native typed interface ``(inputs) -> outputs`` that declares the signature of the task
         environment: Any environment variables that should be supplied during the execution of the task. Supplied as
                      a dictionary of key/value pairs
     """
     super().__init__(
         task_type=task_type,
         name=name,
         interface=transform_interface_to_typed_interface(interface),
         **kwargs,
     )
     self._python_interface = interface if interface else Interface()
     self._environment = environment if environment else {}
     self._task_config = task_config
Example #2
0
 def __init__(
     self,
     name: str,
     image: str,
     command: List[str],
     inputs: Optional[Dict[str, Type]] = None,
     metadata: Optional[TaskMetadata] = None,
     arguments: List[str] = None,
     outputs: Dict[str, Type] = None,
     requests: Optional[Resources] = None,
     limits: Optional[Resources] = None,
     input_data_dir: str = None,
     output_data_dir: str = None,
     metadata_format: MetadataFormat = MetadataFormat.JSON,
     io_strategy: IOStrategy = None,
     **kwargs,
 ):
     super().__init__(
         task_type="raw-container",
         name=name,
         interface=Interface(inputs, outputs),
         metadata=metadata,
         task_config=None,
         **kwargs,
     )
     self._image = image
     self._cmd = command
     self._args = arguments
     self._input_data_dir = input_data_dir
     self._output_data_dir = output_data_dir
     self._md_format = metadata_format
     self._io_strategy = io_strategy
     self._resources = ResourceSpec(
         requests=requests if requests else Resources(), limits=limits if limits else Resources()
     )
Example #3
0
    def create(
        cls,
        name: str,
        workflow: _annotated_workflow.WorkflowBase,
        default_inputs: Dict[str, Any] = None,
        fixed_inputs: Dict[str, Any] = None,
        schedule: _schedule_model.Schedule = None,
        notifications: List[_common_models.Notification] = None,
        auth_role: _common_models.AuthRole = None,
    ) -> LaunchPlan:
        ctx = FlyteContextManager.current_context()
        default_inputs = default_inputs or {}
        fixed_inputs = fixed_inputs or {}
        # Default inputs come from two places, the original signature of the workflow function, and the default_inputs
        # argument to this function. We'll take the latter as having higher precedence.
        wf_signature_parameters = transform_inputs_to_parameters(
            ctx, workflow.python_interface)

        # Construct a new Interface object with just the default inputs given to get Parameters, maybe there's an
        # easier way to do this, think about it later.
        temp_inputs = {}
        for k, v in default_inputs.items():
            temp_inputs[k] = (workflow.python_interface.inputs[k], v)
        temp_interface = Interface(inputs=temp_inputs, outputs={})
        temp_signature = transform_inputs_to_parameters(ctx, temp_interface)
        wf_signature_parameters._parameters.update(temp_signature.parameters)

        # These are fixed inputs that cannot change at launch time. If the same argument is also in default inputs,
        # it'll be taken out from defaults in the LaunchPlan constructor
        fixed_literals = translate_inputs_to_literals(
            ctx,
            incoming_values=fixed_inputs,
            flyte_interface_types=workflow.interface.inputs,
            native_types=workflow.python_interface.inputs,
        )
        fixed_lm = _literal_models.LiteralMap(literals=fixed_literals)

        lp = cls(
            name=name,
            workflow=workflow,
            parameters=wf_signature_parameters,
            fixed_inputs=fixed_lm,
            schedule=schedule,
            notifications=notifications,
            auth_role=auth_role,
        )

        # This is just a convenience - we'll need the fixed inputs LiteralMap for when serializing the Launch Plan out
        # to protobuf, but for local execution and such, why not save the original Python native values as well so
        # we don't have to reverse it back every time.
        default_inputs.update(fixed_inputs)
        lp._saved_inputs = default_inputs

        if name in cls.CACHE:
            raise AssertionError(
                f"Launch plan named {name} was already created! Make sure your names are unique."
            )
        cls.CACHE[name] = lp
        return lp
Example #4
0
 def __init__(
     self, task_type: str, name: str, task_config: T, interface: Optional[Interface] = None, **kwargs,
 ):
     super().__init__(
         task_type=task_type, name=name, interface=transform_interface_to_typed_interface(interface), **kwargs
     )
     self._python_interface = interface if interface else Interface()
     self._environment = kwargs.get("environment", {})
     self._task_config = task_config
Example #5
0
 def __init__(
     self,
     reference: Union[WorkflowReference, TaskReference,
                      LaunchPlanReference],
     inputs: Optional[Dict[str, Union[Type[Any], Tuple[Type[Any], Any]]]],
     outputs: Dict[str, Type],
 ):
     if (not isinstance(reference, WorkflowReference)
             and not isinstance(reference, TaskReference)
             and not isinstance(reference, LaunchPlanReference)):
         raise Exception("Must be one of task, workflow, or launch plan")
     self._reference = reference
     self._native_interface = Interface(inputs=inputs, outputs=outputs)
     self._interface = transform_interface_to_typed_interface(
         self._native_interface)
Example #6
0
 def __init__(
     self,
     name: str,
     failure_policy: Optional[WorkflowFailurePolicy] = None,
     interruptible: Optional[bool] = False,
 ):
     metadata = WorkflowMetadata(on_failure=failure_policy or WorkflowFailurePolicy.FAIL_IMMEDIATELY)
     workflow_metadata_defaults = WorkflowMetadataDefaults(interruptible)
     self._compilation_state = CompilationState(prefix="")
     self._inputs = {}
     self._unbound_inputs = set()
     super().__init__(
         name=name,
         workflow_metadata=metadata,
         workflow_metadata_defaults=workflow_metadata_defaults,
         python_interface=Interface(),
     )
Example #7
0
def test_create_native_named_tuple():
    ctx = FlyteContextManager.current_context()
    t = create_native_named_tuple(ctx,
                                  promises=None,
                                  entity_interface=Interface())
    assert t is None

    p1 = Promise(var="x",
                 val=TypeEngine.to_literal(
                     ctx, 1, int, LiteralType(simple=SimpleType.INTEGER)))
    p2 = Promise(var="y",
                 val=TypeEngine.to_literal(
                     ctx, 2, int, LiteralType(simple=SimpleType.INTEGER)))

    t = create_native_named_tuple(
        ctx, promises=p1, entity_interface=Interface(outputs={"x": int}))
    assert t
    assert t == 1

    t = create_native_named_tuple(ctx,
                                  promises=[],
                                  entity_interface=Interface())
    assert t is None

    t = create_native_named_tuple(ctx,
                                  promises=[p1, p2],
                                  entity_interface=Interface(outputs={
                                      "x": int,
                                      "y": int
                                  }))
    assert t
    assert t == (1, 2)

    t = create_native_named_tuple(ctx,
                                  promises=[p1, p2],
                                  entity_interface=Interface(
                                      outputs={
                                          "x": int,
                                          "y": int
                                      },
                                      output_tuple_name="Tup"))
    assert t
    assert t == (1, 2)
    assert t.__class__.__name__ == "Tup"

    with pytest.raises(KeyError):
        create_native_named_tuple(ctx,
                                  promises=[p1, p2],
                                  entity_interface=Interface(
                                      outputs={"x": int},
                                      output_tuple_name="Tup"))
Example #8
0
 def __init__(
     self,
     name: str,
     query_template: str,
     task_type="sql_task",
     inputs: Optional[Dict[str, Type]] = None,
     metadata: Optional[TaskMetadata] = None,
     task_config: Optional[T] = None,
     outputs: Dict[str, Type] = None,
     **kwargs,
 ):
     super().__init__(
         task_type=task_type,
         name=name,
         interface=Interface(inputs=inputs or {}, outputs=outputs or {}),
         metadata=metadata,
         task_config=task_config,
         **kwargs,
     )
     self._query_template = query_template
Example #9
0
 def __init__(
     self,
     name: str,
     failure_policy: Optional[WorkflowFailurePolicy] = None,
     interruptible: bool = False,
 ):
     metadata = WorkflowMetadata(on_failure=failure_policy
                                 or WorkflowFailurePolicy.FAIL_IMMEDIATELY)
     workflow_metadata_defaults = WorkflowMetadataDefaults(interruptible)
     self._compilation_state = CompilationState(prefix="")
     self._inputs = {}
     # This unbound inputs construct is just here to help workflow authors detect issues a bit earlier. It just
     # keeps track of workflow inputs that you've declared with add_workflow_input but haven't yet consumed. This
     # is an error that Admin would return at compile time anyways, but this allows flytekit to raise
     # the error earlier.
     self._unbound_inputs = set()
     super().__init__(
         name=name,
         workflow_metadata=metadata,
         workflow_metadata_defaults=workflow_metadata_defaults,
         python_interface=Interface(),
     )
Example #10
0
 def __init__(
     self,
     name: str,
     query_template: str,
     task_type="sql_task",
     inputs: Optional[Dict[str, Type]] = None,
     metadata: Optional[TaskMetadata] = None,
     task_config: Optional[T] = None,
     outputs: Dict[str, Type] = None,
     **kwargs,
 ):
     """
     This SQLTask should mostly just be used as a base class for other SQL task types and should not be used
     directly. See :py:class:`flytekit.extras.sqlite3.task.SQLite3Task`
     """
     super().__init__(
         task_type=task_type,
         name=name,
         interface=Interface(inputs=inputs or {}, outputs=outputs or {}),
         metadata=metadata,
         task_config=task_config,
         **kwargs,
     )
     self._query_template = query_template
Example #11
0
 def reset_interface(self, inputs: Dict[str, Type], outputs: Dict[str, Type]):
     self._interface = Interface(inputs=inputs, outputs=outputs)
     self._typed_interface = transform_interface_to_typed_interface(self._interface)
Example #12
0
    def __init__(
        self,
        name: str,
        debug: bool = False,
        script: typing.Optional[str] = None,
        script_file: typing.Optional[str] = None,
        task_config: T = None,
        inputs: typing.Optional[typing.Dict[str, typing.Type]] = None,
        output_locs: typing.Optional[typing.List[OutputLocation]] = None,
        **kwargs,
    ):
        """
        Args:
            name: str Name of the Task. Should be unique in the project
            debug: bool Print the generated script and other debugging information
            script: The actual script specified as a string
            script_file: A path to the file that contains the script (Only script or script_file) can be provided
            task_config: T Configuration for the task, can be either a Pod (or coming soon, BatchJob) config
            inputs: A Dictionary of input names to types
            output_locs: A list of :py:class:`OutputLocations`
            **kwargs: Other arguments that can be passed to :ref:class:`PythonInstanceTask`
        """
        if script and script_file:
            raise ValueError(
                "Only either of script or script_file can be provided")
        if not script and not script_file:
            raise ValueError("Either a script or script_file is needed")
        if script_file:
            if not os.path.exists(script_file):
                raise ValueError(
                    f"FileNotFound: the specified Script file at path {script_file} cannot be loaded"
                )
            script_file = os.path.abspath(script_file)

        if task_config is not None:
            if str(type(task_config)) != "flytekitplugins.pod.task.Pod":
                raise ValueError(
                    "TaskConfig can either be empty - indicating simple container task or a PodConfig."
                )

        # Each instance of NotebookTask instantiates an underlying task with a dummy function that will only be used
        # to run pre- and post- execute functions using the corresponding task plugin.
        # We rename the function name here to ensure the generated task has a unique name and avoid duplicate task name
        # errors.
        # This seem like a hack. We should use a plugin_class that doesn't require a fake-function to make work.
        plugin_class = TaskPlugins.find_pythontask_plugin(type(task_config))
        self._config_task_instance = plugin_class(
            task_config=task_config, task_function=_dummy_task_func)
        # Rename the internal task so that there are no conflicts at serialization time. Technically these internal
        # tasks should not be serialized at all, but we don't currently have a mechanism for skipping Flyte entities
        # at serialization time.
        self._config_task_instance._name = f"_bash.{name}"
        self._script = script
        self._script_file = script_file
        self._debug = debug
        self._output_locs = output_locs if output_locs else []
        self._interpolizer = _PythonFStringInterpolizer()
        outputs = self._validate_output_locs()
        super().__init__(
            name,
            task_config,
            task_type=self._config_task_instance.task_type,
            interface=Interface(inputs=inputs, outputs=outputs),
            **kwargs,
        )