def test_task_produce_deterministic_version(): containerless_task = SdkPrestoTask( task_inputs=inputs(ds=Types.String, rg=Types.String), statement= "SELECT * FROM flyte.widgets WHERE ds = '{{ .Inputs.ds}}' LIMIT 10", output_schema=schema, routing_group="{{ .Inputs.rg }}", ) identical_containerless_task = SdkPrestoTask( task_inputs=inputs(ds=Types.String, rg=Types.String), statement= "SELECT * FROM flyte.widgets WHERE ds = '{{ .Inputs.ds}}' LIMIT 10", output_schema=schema, routing_group="{{ .Inputs.rg }}", ) different_containerless_task = SdkPrestoTask( task_inputs=inputs(ds=Types.String, rg=Types.String), statement= "SELECT * FROM flyte.widgets WHERE ds = '{{ .Inputs.ds}}' LIMIT 100000", output_schema=schema, routing_group="{{ .Inputs.rg }}", ) assert (containerless_task._produce_deterministic_version() == identical_containerless_task._produce_deterministic_version()) assert (containerless_task._produce_deterministic_version() != different_containerless_task._produce_deterministic_version()) with _pytest.raises(Exception): get_sample_task()._produce_deterministic_version()
from flytekit.sdk.spark_types import SparkType from flytekit.sdk.tasks import generic_spark_task, inputs, python_task from flytekit.sdk.types import Types from flytekit.sdk.workflow import Input, workflow_class scala_spark = generic_spark_task( spark_type=SparkType.SCALA, inputs=inputs(partitions=Types.Integer), main_class="org.apache.spark.examples.SparkPi", main_application_file="local:///opt/spark/examples/jars/spark-examples.jar", spark_conf={ "spark.driver.memory": "1000M", "spark.executor.memory": "1000M", "spark.executor.cores": "1", "spark.executor.instances": "2", }, cache_version="1", ) @inputs(date_triggered=Types.Datetime) @python_task(cache_version="1") def print_every_time(workflow_parameters, date_triggered): print("My input : {}".format(date_triggered)) @workflow_class class SparkTasksWorkflow(object): triggered_date = Input(Types.Datetime) partitions = Input(Types.Integer) spark_task = scala_spark(partitions=partitions)
from __future__ import absolute_import from flytekit.sdk.tasks import inputs from flytekit.sdk.types import Types from flytekit.sdk.workflow import workflow_class, Input, Output from flytekit.common.tasks.presto_task import SdkPrestoTask schema = Types.Schema([("a", Types.Integer), ("b", Types.String)]) presto_task = SdkPrestoTask( task_inputs=inputs(length=Types.Integer, rg=Types.String), statement= "SELECT a, chr(a+64) as b from unnest(sequence(1, {{ .Inputs.length }})) t(a)", output_schema=schema, routing_group="{{ .Inputs.rg }}", catalog="hive", # can be left out if you specify in query schema="tmp", # can be left out if you specify in query ) @workflow_class() class PrestoWorkflow(object): length = Input(Types.Integer, required=True, help="Int between 1 and 26") routing_group = Input(Types.String, required=True, help="Test string with no default") p_task = presto_task(length=length, rg=routing_group) output_a = Output(p_task.outputs.results, sdk_type=schema)
def get_sdk_node( self, pipeline_context, instance, pipeline_run, step_key, task_type=constants.SdkTaskType.PYTHON_TASK, cache_version="", retries=0, interruptible=False, deprecated="", storage_request=None, cpu_request=None, gpu_request=None, memory_request=None, storage_limit=None, cpu_limit=None, gpu_limit=None, memory_limit=None, cache=False, timeout=datetime.timedelta(seconds=0), environment=None, ): execution_step = self.execution_plan.get_step_by_key(step_key) flyte_inputs = self.flyte_inputs(execution_step.step_input_dict, execution_step.solid_name) flyte_outputs = self.flyte_outputs(execution_step.step_output_dict, execution_step.solid_name) def wrapper(wf_params, *args, **kwargs): # pylint: disable=unused-argument # TODO: We can't update config values via inputs from Flyte, because they are immutable plan = self.execution_plan.build_subset_plan([step_key]) for param, arg in kwargs.items(): self.inject_intermediates(pipeline_context, execution_step, param, arg) results = list( execute_plan( plan, instance, run_config=self.run_config, pipeline_run=pipeline_run, )) for result in results: step_context = pipeline_context.for_step(execution_step) self.output_value(step_context, step_key, result, execution_step, kwargs) # This will take the wrapper definition and re-create it with explicit parameters as keyword argumentss wrapper = forge.sign(forge.arg("wf_params"), *map(forge.arg, flyte_inputs.keys()), *map(forge.arg, flyte_outputs.keys()))(wrapper) # flytekit uses this name for an internal representation, make it unique to the step key wrapper.__name__ = execution_step.solid_name task = sdk_runnable.SdkRunnableTask( task_function=wrapper, task_type=task_type, discovery_version=cache_version, retries=retries, interruptible=interruptible, deprecated=deprecated, storage_request=storage_request, cpu_request=cpu_request, gpu_request=gpu_request, memory_request=memory_request, storage_limit=storage_limit, cpu_limit=cpu_limit, gpu_limit=gpu_limit, memory_limit=memory_limit, discoverable=cache, timeout=timeout, environment=environment, custom={}, ) if flyte_inputs: task = inputs(task, **flyte_inputs) if flyte_outputs: task = outputs(task, **flyte_outputs) return task
from flytekit.common.tasks.presto_task import SdkPrestoTask from flytekit.sdk.tasks import inputs from flytekit.sdk.types import Types from flytekit.sdk.workflow import Input, Output, workflow_class schema = Types.Schema([("a", Types.String), ("b", Types.Integer)]) presto_task = SdkPrestoTask( task_inputs=inputs(ds=Types.String, rg=Types.String), statement= "SELECT * FROM hive.city.fact_airport_sessions WHERE ds = '{{ .Inputs.ds}}' LIMIT 10", output_schema=schema, routing_group="{{ .Inputs.rg }}", # catalog="hive", # schema="city", ) @workflow_class() class PrestoWorkflow(object): ds = Input(Types.String, required=True, help="Test string with no default") # routing_group = Input(Types.String, required=True, help="Test string with no default") p_task = presto_task(ds=ds, rg="etl") output_a = Output(p_task.outputs.results, sdk_type=schema)
from flytekit.contrib.notebook.tasks import python_notebook, spark_notebook from flytekit.sdk.tasks import inputs, outputs from flytekit.sdk.types import Types from flytekit.sdk.workflow import Input, workflow_class interactive_python = python_notebook( notebook_path="../../../../notebook-task-examples/python-notebook.ipynb", inputs=inputs(pi=Types.Float), outputs=outputs(out=Types.Float), cpu_request="1", memory_request="1G", ) interactive_spark = spark_notebook( notebook_path="../../../../notebook-task-examples/spark-notebook-pi.ipynb", inputs=inputs(partitions=Types.Integer), outputs=outputs(pi=Types.Float), ) @workflow_class class FlyteNotebookSparkWorkflow(object): partitions = Input(Types.Integer, default=10) out1 = interactive_spark(partitions=partitions) out2 = interactive_python(pi=out1.outputs.pi)
from __future__ import absolute_import from __future__ import division from __future__ import print_function from flytekit.sdk.types import Types from flytekit.sdk.tasks import inputs, outputs from flytekit.sdk.workflow import workflow_class, Input from flytekit.contrib.notebook.tasks import python_notebook # The path interactive_python = python_notebook( notebook_path="./notebook-task-examples/python-notebook.ipynb", inputs=inputs(pi=Types.Float), outputs=outputs(out=Types.Float), cpu_request="1", memory_request="1G") @workflow_class class FlyteNotebookWorkflow(object): out2 = interactive_python(pi=3.14)