def test_map_pod_task_serialization(): pod = Pod( pod_spec=V1PodSpec(restart_policy="OnFailure", containers=[V1Container(name="primary")]), primary_container_name="primary", ) @task(task_config=pod, environment={"FOO": "bar"}) def simple_pod_task(i: int): pass mapped_task = map_task(simple_pod_task, metadata=TaskMetadata(retries=1)) default_img = Image(name="default", fqn="test", tag="tag") serialization_settings = SerializationSettings( project="project", domain="domain", version="version", env={"FOO": "baz"}, image_config=ImageConfig(default_image=default_img, images=[default_img]), ) # Test that target is correctly serialized with an updated command pod_spec = mapped_task.get_k8s_pod(serialization_settings).pod_spec assert len(pod_spec["containers"]) == 1 assert pod_spec["containers"][0]["args"] == [ "pyflyte-map-execute", "--inputs", "{{.input}}", "--output-prefix", "{{.outputPrefix}}", "--raw-output-data-prefix", "{{.rawOutputDataPrefix}}", "--checkpoint-path", "{{.checkpointOutputPrefix}}", "--prev-checkpoint", "{{.prevCheckpointPrefix}}", "--resolver", "flytekit.core.python_auto_container.default_task_resolver", "--", "task-module", "tests.test_pod", "task-name", "simple_pod_task", ] assert { "primary_container_name": "primary" } == mapped_task.get_config(serialization_settings)
# Observe that the base class is Generic, it is parameterized with the desired config class # # .. note:: # # To create a task decorator based plugin the Config is required In this example, we are creating a named class plugin # This construct does not need a plugin # # We will try to cover an example of Config objects in a subsequent tutorial # %% # Actual Usage # ^^^^^^^^^^^^^ sensor = WaitForObjectStoreFile( name="my-objectstore-sensor", metadata=TaskMetadata(retries=10, timeout=timedelta(minutes=20)), poll_interval=timedelta(seconds=1), ) @task def print_file(path: str) -> str: print(path) return path @workflow def my_workflow(path: str) -> str: return print_file(path=sensor(path=path))
# %% # Finally, the Flytekit plugin called SdkBuiltinAlgorithmTrainingJobTask will be used to create a task that wraps the algorithm. # This task does not have a user-defined function as the actual algorithm is pre-defined in Sagemaker, but still has the same set of properties like any other FlyteTask: # Caching, Resource specification, Versioning, etc. xgboost_train_task = SagemakerBuiltinAlgorithmsTask( name="xgboost_trainer", task_config=SagemakerTrainingJobConfig( algorithm_specification=alg_spec, training_job_resource_config=TrainingJobResourceConfig( instance_type="ml.m4.xlarge", instance_count=1, volume_size_in_gb=25, ), ), metadata=TaskMetadata(cache_version="1.0", cache=True), ) # %% # :ref:`single_task_execution` can be used to execute just the task without needing to create a workflow. # To trigger an execution, you will need to provide: # # Project (flyteexamples): the project under which the execution will be created # # Domain (development): the domain where the execution will be created, under the project # # Inputs: the actual inputs # # Pre-built algorithms have a restrictive set of inputs. They always expect: #
# %% # This is the first task and represents the data source. This can be any task, that fetches data, generates, modifies # data ready for feature ingestion. These can also be arbitrary feature engineering tasks like data imputation, univariate # selection, etc. load_horse_colic_sql = SQLite3Task( name="sqlite3.load_horse_colic", query_template="select * from data", output_schema_type=FlyteSchema, task_config=SQLite3Config( uri=DATABASE_URI, compressed=True, ), metadata=TaskMetadata( cache=True, cache_version="1.0", ), ) # %% # We define two tasks, namely ``store_offline`` and ``load_historical_features`` to store and retrieve the historial # features. # # .. list-table:: Decoding the ``Feast`` Nomenclature # :widths: 25 25 # # * - ``FeatureStore`` # - A FeatureStore object is used to define, create, and retrieve features. # * - ``Entity`` # - Represents a collection of entities and associated metadata. It's usually the primary key of your data.
import pandas from flytekit import SQLTask, TaskMetadata, kwtypes, task, workflow from flytekit.testing import patch, task_mock from flytekit.types.schema import FlyteSchema # %% # This is a generic SQL task (and is by default not hooked up to any datastore nor handled by any plugin), and must # be mocked. sql = SQLTask( "my-query", query_template= "SELECT * FROM hive.city.fact_airport_sessions WHERE ds = '{{ .Inputs.ds }}' LIMIT 10", inputs=kwtypes(ds=datetime.datetime), outputs=kwtypes(results=FlyteSchema), metadata=TaskMetadata(retries=2), ) # %% # This is a task that can run locally @task def t1() -> datetime.datetime: return datetime.datetime.now() # %% # Declare a workflow that chains these two tasks together. @workflow def my_wf() -> FlyteSchema: dt = t1()
def my_map_workflow(a: List[int]) -> str: mapped_out = map_task(my_pod_map_task, metadata=TaskMetadata(retries=1))(stringify=a) coalesced = coalesce(b=mapped_out) return coalesced
def my_map_workflow(a: typing.List[int]) -> str: mapped_out = map_task(a_mappable_task, metadata=TaskMetadata(retries=1))(a=a) coalesced = coalesce(b=mapped_out) return coalesced