Beispiel #1
0
def test_map_pod_task_serialization():
    pod = Pod(
        pod_spec=V1PodSpec(restart_policy="OnFailure",
                           containers=[V1Container(name="primary")]),
        primary_container_name="primary",
    )

    @task(task_config=pod, environment={"FOO": "bar"})
    def simple_pod_task(i: int):
        pass

    mapped_task = map_task(simple_pod_task, metadata=TaskMetadata(retries=1))
    default_img = Image(name="default", fqn="test", tag="tag")
    serialization_settings = SerializationSettings(
        project="project",
        domain="domain",
        version="version",
        env={"FOO": "baz"},
        image_config=ImageConfig(default_image=default_img,
                                 images=[default_img]),
    )

    # Test that target is correctly serialized with an updated command
    pod_spec = mapped_task.get_k8s_pod(serialization_settings).pod_spec

    assert len(pod_spec["containers"]) == 1
    assert pod_spec["containers"][0]["args"] == [
        "pyflyte-map-execute",
        "--inputs",
        "{{.input}}",
        "--output-prefix",
        "{{.outputPrefix}}",
        "--raw-output-data-prefix",
        "{{.rawOutputDataPrefix}}",
        "--checkpoint-path",
        "{{.checkpointOutputPrefix}}",
        "--prev-checkpoint",
        "{{.prevCheckpointPrefix}}",
        "--resolver",
        "flytekit.core.python_auto_container.default_task_resolver",
        "--",
        "task-module",
        "tests.test_pod",
        "task-name",
        "simple_pod_task",
    ]
    assert {
        "primary_container_name": "primary"
    } == mapped_task.get_config(serialization_settings)
Beispiel #2
0
#   Observe that the base class is Generic, it is parameterized with the desired config class
#
# .. note::
#
#   To create a task decorator based plugin the Config is required In this example, we are creating a named class plugin
#   This construct does not need a plugin
#
# We will try to cover an example of Config objects in a subsequent tutorial

# %%
# Actual Usage
# ^^^^^^^^^^^^^

sensor = WaitForObjectStoreFile(
    name="my-objectstore-sensor",
    metadata=TaskMetadata(retries=10, timeout=timedelta(minutes=20)),
    poll_interval=timedelta(seconds=1),
)


@task
def print_file(path: str) -> str:
    print(path)
    return path


@workflow
def my_workflow(path: str) -> str:
    return print_file(path=sensor(path=path))

Beispiel #3
0
# %%
# Finally, the Flytekit plugin called SdkBuiltinAlgorithmTrainingJobTask will be used to create a task that wraps the algorithm.
# This task does not have a user-defined function as the actual algorithm is pre-defined in Sagemaker, but still has the same set of properties like any other FlyteTask:
# Caching, Resource specification, Versioning, etc.
xgboost_train_task = SagemakerBuiltinAlgorithmsTask(
    name="xgboost_trainer",
    task_config=SagemakerTrainingJobConfig(
        algorithm_specification=alg_spec,
        training_job_resource_config=TrainingJobResourceConfig(
            instance_type="ml.m4.xlarge",
            instance_count=1,
            volume_size_in_gb=25,
        ),
    ),
    metadata=TaskMetadata(cache_version="1.0", cache=True),
)


# %%
# :ref:`single_task_execution` can be used to execute just the task without needing to create a workflow.
# To trigger an execution, you will need to provide:
#
# Project (flyteexamples): the project under which the execution will be created
#
# Domain (development): the domain where the execution will be created, under the project
#
# Inputs: the actual inputs
#
# Pre-built algorithms have a restrictive set of inputs. They always expect:
#
Beispiel #4
0

# %%
# This is the first task and represents the data source. This can be any task, that fetches data, generates, modifies
# data ready for feature ingestion. These can also be arbitrary feature engineering tasks like data imputation, univariate
# selection, etc.
load_horse_colic_sql = SQLite3Task(
    name="sqlite3.load_horse_colic",
    query_template="select * from data",
    output_schema_type=FlyteSchema,
    task_config=SQLite3Config(
        uri=DATABASE_URI,
        compressed=True,
    ),
    metadata=TaskMetadata(
        cache=True,
        cache_version="1.0",
    ),
)


# %%
# We define two tasks, namely ``store_offline`` and ``load_historical_features`` to store and retrieve the historial
# features.
#
# .. list-table:: Decoding the ``Feast`` Nomenclature
#    :widths: 25 25
#
#    * - ``FeatureStore``
#      - A FeatureStore object is used to define, create, and retrieve features.
#    * - ``Entity``
#      - Represents a collection of entities and associated metadata. It's usually the primary key of your data.
import pandas
from flytekit import SQLTask, TaskMetadata, kwtypes, task, workflow
from flytekit.testing import patch, task_mock
from flytekit.types.schema import FlyteSchema

# %%
# This is a generic SQL task (and is by default not hooked up to any datastore nor handled by any plugin), and must
# be mocked.
sql = SQLTask(
    "my-query",
    query_template=
    "SELECT * FROM hive.city.fact_airport_sessions WHERE ds = '{{ .Inputs.ds }}' LIMIT 10",
    inputs=kwtypes(ds=datetime.datetime),
    outputs=kwtypes(results=FlyteSchema),
    metadata=TaskMetadata(retries=2),
)


# %%
# This is a task that can run locally
@task
def t1() -> datetime.datetime:
    return datetime.datetime.now()


# %%
# Declare a workflow that chains these two tasks together.
@workflow
def my_wf() -> FlyteSchema:
    dt = t1()
Beispiel #6
0
def my_map_workflow(a: List[int]) -> str:
    mapped_out = map_task(my_pod_map_task,
                          metadata=TaskMetadata(retries=1))(stringify=a)
    coalesced = coalesce(b=mapped_out)
    return coalesced
Beispiel #7
0
def my_map_workflow(a: typing.List[int]) -> str:
    mapped_out = map_task(a_mappable_task, metadata=TaskMetadata(retries=1))(a=a)
    coalesced = coalesce(b=mapped_out)
    return coalesced