Example #1
0
def get_registrable_entities(
    ctx: flyte_context.FlyteContext,
    options: typing.Optional[Options] = None
) -> typing.List[RegistrableEntity]:
    """
    Returns all entities that can be serialized and should be sent over to Flyte backend. This will filter any entities
    that are not known to Admin
    """
    new_api_serializable_entities = OrderedDict()
    # TODO: Clean up the copy() - it's here because we call get_default_launch_plan, which may create a LaunchPlan
    #  object, which gets added to the FlyteEntities.entities list, which we're iterating over.
    for entity in flyte_context.FlyteEntities.entities.copy():
        if isinstance(entity, PythonTask) or isinstance(
                entity, WorkflowBase) or isinstance(entity, LaunchPlan):
            get_serializable(new_api_serializable_entities,
                             ctx.serialization_settings,
                             entity,
                             options=options)

            if isinstance(entity, WorkflowBase):
                lp = LaunchPlan.get_default_launch_plan(ctx, entity)
                get_serializable(new_api_serializable_entities,
                                 ctx.serialization_settings, lp, options)

    new_api_model_values = list(new_api_serializable_entities.values())
    entities_to_be_serialized = list(
        filter(_should_register_with_admin, new_api_model_values))
    serializable_tasks: typing.List[task_models.TaskSpec] = [
        entity for entity in entities_to_be_serialized
        if isinstance(entity, task_models.TaskSpec)
    ]
    # Detect if any of the tasks is duplicated. Duplicate tasks are defined as having the same
    # metadata identifiers (see :py:class:`flytekit.common.core.identifier.Identifier`). Duplicate
    # tasks are considered invalid at registration
    # time and usually indicate user error, so we catch this common mistake at serialization time.
    duplicate_tasks = _find_duplicate_tasks(serializable_tasks)
    if len(duplicate_tasks) > 0:
        duplicate_task_names = [
            task.template.id.name for task in duplicate_tasks
        ]
        raise FlyteValidationException(
            f"Multiple definitions of the following tasks were found: {duplicate_task_names}"
        )

    return [v.to_flyte_idl() for v in entities_to_be_serialized]
Example #2
0
def test_map_tasks_only():
    @workflow
    def wf1(a: int):
        print(f"{a}")

    with pytest.raises(ValueError):

        @workflow
        def wf2(a: typing.List[int]):
            return map_task(wf1)(a=a)

    lp = LaunchPlan.create("test", wf1)

    with pytest.raises(ValueError):

        @workflow
        def wf3(a: typing.List[int]):
            return map_task(lp)(a=a)
Example #3
0
        y=alt.Y('new_cases_smoothed_per_million:Q', stack=None),
        color=alt.Color('continent:N', scale=alt.Scale(scheme='set1')),
        tooltip='continent:N').interactive().properties(width='container')

    dp.Report(dp.Plot(plot), dp.DataTable(df)).save(path='report.html',open=True)


@task
def transform_data(url: str) -> pandas.DataFrame:
    dataset = pd.read_csv(url)
    df = dataset.groupby(
        ['continent',
         'date'])['new_cases_smoothed_per_million'].mean().reset_index()
    return df


@workflow
def datapane_workflow(url: str):
    df = transform_data(url=url)
    publish_report(df=df)
    print(f"Report is published for {url}")


default_lp = LaunchPlan.get_default_launch_plan(
    current_context(),
    datapane_workflow)

if __name__ == "__main__":
    print(default_lp(url="https://covid.ourworldindata.org/data/owid-covid-data.csv"))

Example #4
0
# %%
# The `date_formatter_wf` workflow can be scheduled using either the `CronSchedule` or the `FixedRate` object.
#
# Cron Schedules
# ##############
#
# `Cron <https://en.wikipedia.org/wiki/Cron>`_ expression strings use this :ref:`syntax <concepts-schedules>`.
# An incorrect cron schedule expression would lead to failure in triggering the schedule.
from flytekit import CronSchedule, LaunchPlan

# creates a launch plan that runs every minute.
cron_lp = LaunchPlan.get_or_create(
    name="my_cron_scheduled_lp",
    workflow=date_formatter_wf,
    schedule=CronSchedule(
        # Note that kickoff_time_input_arg matches the workflow input we defined above: kickoff_time
        # But in case you are using the AWS scheme of schedules and not using the native scheduler then switch over the schedule parameter with cron_expression
        schedule="*/1 * * * *",  # Following schedule runs every min
        kickoff_time_input_arg="kickoff_time",
    ),
)

# %%
# The ``kickoff_time_input_arg`` corresponds to the workflow input ``kickoff_time``.
# This means that the workflow gets triggered only after the specified kickoff time, and it thereby runs every minute.

# %%
# Fixed Rate Intervals
# ####################
#
# If you prefer to use an interval rather than a cron scheduler to schedule your workflows, you can use the fixed-rate scheduler.
# A fixed-rate scheduler runs at the specified interval.
@workflow
def int_doubler_wf(a: int) -> str:
    doubled = double_int_and_print(a=a)
    return doubled

# %%
# Here are three scenarios that can help deepen your understanding of how notifications work:
# 
# 1. Launch Plan triggers email notifications when the workflow execution reaches the ``SUCCEEDED`` phase.
int_doubler_wf_lp = LaunchPlan.get_or_create(
    name="int_doubler_wf",
    workflow=int_doubler_wf,
    default_inputs={"a": 4},
    notifications=[
        Email(
            phases=[WorkflowExecutionPhase.SUCCEEDED],
            recipients_email=["*****@*****.**"],
        )
    ],
)

# %%
# 2. Notifications shine when used for scheduled workflows to alert for failures.
from datetime import timedelta

from flytekit import FixedRate, PagerDuty

int_doubler_wf_scheduled_lp = LaunchPlan.get_or_create(
    name="int_doubler_wf_scheduled",
    workflow=int_doubler_wf,
Example #6
0
    print(formatted_kickoff_time)


# %%
# Cron Expression
# ---------------
# Cron expression strings use the `AWS syntax <http://docs.aws.amazon.com/AmazonCloudWatch/latest/events/ScheduledEvents.html#CronExpressions>`_.
# These are validated at launch plan registration time.
from flytekit import CronSchedule, LaunchPlan

# creates a launch plan that runs at 10am UTC every day.
cron_lp = LaunchPlan.create(
    "my_cron_scheduled_lp",
    date_formatter_wf,
    schedule=CronSchedule(
        # Note that kickoff_time_input_arg matches the workflow input we defined above: kickoff_time
        cron_expression="0 10 * * ? *",
        kickoff_time_input_arg="kickoff_time",
    ),
)

# %%
# Fixed Rate
# ----------
# If you prefer to use an interval rather than the cron syntax to schedule your workflows, this is currently supported
# for Flyte deployments hosted on AWS.
# To run ``date_formatter_wf`` every 10 minutes read on below:

from datetime import timedelta

from flytekit import FixedRate, LaunchPlan
Example #7
0
from datetime import datetime
from flytekit import LaunchPlan, task, workflow
from flytekit.models.common import AuthRole

@task
def print_hello_world() -> str:
    print("Hello, World")

    return "Hello"


@workflow
def hello_world(
  styx_parameter: datetime,
  styx_execution_id: str,
  styx_trigger_id: str,
  styx_trigger_type: str,
  styx_workflow_id: str
) -> str:
    hello = print_hello_world()

    return hello


lp = LaunchPlan.create("morning_greeting", hello_world, auth_role=AuthRole(kubernetes_service_account="e2e-test-sa"))
Example #8
0
from flytekit import task, LaunchPlan, workflow, current_context


@task
def square(val: int) -> int:
    return val * val


@workflow
def my_wf(val: int) -> int:
    result = square(val=val)
    return result


default_lp = LaunchPlan.get_default_launch_plan(current_context(), my_wf)
square_3 = default_lp(val=3)

# %%
# The following shows how to specify a user-defined launch plan that defaults the value of 'val' to 4.
my_lp = LaunchPlan.create("default_4_lp", my_wf, default_inputs={"val": 4})
square_4 = my_lp()
square_5 = my_lp(val=5)

# %%
# In some cases you may want to **fix** launch plan inputs, such that they can't be overridden at execution call time.
my_fixed_lp = LaunchPlan.create("always_2_lp", my_wf, fixed_inputs={"val": 4})
square_2 = my_fixed_lp()
# error:
# square_1 = my_fixed_lp(val=1)
Example #9
0
    wordCount = dict(Counter(words))
    return wordCount


# %%
# We define a workflow that executes the previously defined task.
@workflow
def ext_workflow(my_input: str) -> Dict:
    result = count_freq_words(input_string1=my_input)
    return result


# %%
# Next, we create a launch plan.
external_lp = LaunchPlan.get_or_create(
    ext_workflow,
    "parent_workflow_execution",
)

# %%
# We define another task that returns the repeated keys (in our case, words) from a dictionary.


@task
def count_repetitive_words(word_counter: Dict) -> typing.List[str]:
    repeated_words = [key for key, value in word_counter.items() if value > 1]
    return repeated_words


# %%
# We define a workflow that triggers the launch plan of the previously-defined workflow.
@workflow
Example #10
0
def double(a: int) -> int:
    return a * 2


@task
def add(a: int, b: int) -> int:
    return a + b


@workflow
def my_childwf(a: int = 42) -> int:
    b = double(a=a)
    return b


child_lp = LaunchPlan.get_or_create(my_childwf,
                                    name="my_fixed_child_lp",
                                    labels=Labels({"l1": "v1"}))


@workflow
def parent_wf(a: int) -> int:
    x = double(a=a)
    y = child_lp(a=x)
    z = add(a=x, b=y)
    return z


if __name__ == "__main__":
    print(f"Running parent_wf(a=3) {parent_wf(a=3)}")