def get_registrable_entities( ctx: flyte_context.FlyteContext, options: typing.Optional[Options] = None ) -> typing.List[RegistrableEntity]: """ Returns all entities that can be serialized and should be sent over to Flyte backend. This will filter any entities that are not known to Admin """ new_api_serializable_entities = OrderedDict() # TODO: Clean up the copy() - it's here because we call get_default_launch_plan, which may create a LaunchPlan # object, which gets added to the FlyteEntities.entities list, which we're iterating over. for entity in flyte_context.FlyteEntities.entities.copy(): if isinstance(entity, PythonTask) or isinstance( entity, WorkflowBase) or isinstance(entity, LaunchPlan): get_serializable(new_api_serializable_entities, ctx.serialization_settings, entity, options=options) if isinstance(entity, WorkflowBase): lp = LaunchPlan.get_default_launch_plan(ctx, entity) get_serializable(new_api_serializable_entities, ctx.serialization_settings, lp, options) new_api_model_values = list(new_api_serializable_entities.values()) entities_to_be_serialized = list( filter(_should_register_with_admin, new_api_model_values)) serializable_tasks: typing.List[task_models.TaskSpec] = [ entity for entity in entities_to_be_serialized if isinstance(entity, task_models.TaskSpec) ] # Detect if any of the tasks is duplicated. Duplicate tasks are defined as having the same # metadata identifiers (see :py:class:`flytekit.common.core.identifier.Identifier`). Duplicate # tasks are considered invalid at registration # time and usually indicate user error, so we catch this common mistake at serialization time. duplicate_tasks = _find_duplicate_tasks(serializable_tasks) if len(duplicate_tasks) > 0: duplicate_task_names = [ task.template.id.name for task in duplicate_tasks ] raise FlyteValidationException( f"Multiple definitions of the following tasks were found: {duplicate_task_names}" ) return [v.to_flyte_idl() for v in entities_to_be_serialized]
def test_map_tasks_only(): @workflow def wf1(a: int): print(f"{a}") with pytest.raises(ValueError): @workflow def wf2(a: typing.List[int]): return map_task(wf1)(a=a) lp = LaunchPlan.create("test", wf1) with pytest.raises(ValueError): @workflow def wf3(a: typing.List[int]): return map_task(lp)(a=a)
y=alt.Y('new_cases_smoothed_per_million:Q', stack=None), color=alt.Color('continent:N', scale=alt.Scale(scheme='set1')), tooltip='continent:N').interactive().properties(width='container') dp.Report(dp.Plot(plot), dp.DataTable(df)).save(path='report.html',open=True) @task def transform_data(url: str) -> pandas.DataFrame: dataset = pd.read_csv(url) df = dataset.groupby( ['continent', 'date'])['new_cases_smoothed_per_million'].mean().reset_index() return df @workflow def datapane_workflow(url: str): df = transform_data(url=url) publish_report(df=df) print(f"Report is published for {url}") default_lp = LaunchPlan.get_default_launch_plan( current_context(), datapane_workflow) if __name__ == "__main__": print(default_lp(url="https://covid.ourworldindata.org/data/owid-covid-data.csv"))
# %% # The `date_formatter_wf` workflow can be scheduled using either the `CronSchedule` or the `FixedRate` object. # # Cron Schedules # ############## # # `Cron <https://en.wikipedia.org/wiki/Cron>`_ expression strings use this :ref:`syntax <concepts-schedules>`. # An incorrect cron schedule expression would lead to failure in triggering the schedule. from flytekit import CronSchedule, LaunchPlan # creates a launch plan that runs every minute. cron_lp = LaunchPlan.get_or_create( name="my_cron_scheduled_lp", workflow=date_formatter_wf, schedule=CronSchedule( # Note that kickoff_time_input_arg matches the workflow input we defined above: kickoff_time # But in case you are using the AWS scheme of schedules and not using the native scheduler then switch over the schedule parameter with cron_expression schedule="*/1 * * * *", # Following schedule runs every min kickoff_time_input_arg="kickoff_time", ), ) # %% # The ``kickoff_time_input_arg`` corresponds to the workflow input ``kickoff_time``. # This means that the workflow gets triggered only after the specified kickoff time, and it thereby runs every minute. # %% # Fixed Rate Intervals # #################### # # If you prefer to use an interval rather than a cron scheduler to schedule your workflows, you can use the fixed-rate scheduler. # A fixed-rate scheduler runs at the specified interval.
@workflow def int_doubler_wf(a: int) -> str: doubled = double_int_and_print(a=a) return doubled # %% # Here are three scenarios that can help deepen your understanding of how notifications work: # # 1. Launch Plan triggers email notifications when the workflow execution reaches the ``SUCCEEDED`` phase. int_doubler_wf_lp = LaunchPlan.get_or_create( name="int_doubler_wf", workflow=int_doubler_wf, default_inputs={"a": 4}, notifications=[ Email( phases=[WorkflowExecutionPhase.SUCCEEDED], recipients_email=["*****@*****.**"], ) ], ) # %% # 2. Notifications shine when used for scheduled workflows to alert for failures. from datetime import timedelta from flytekit import FixedRate, PagerDuty int_doubler_wf_scheduled_lp = LaunchPlan.get_or_create( name="int_doubler_wf_scheduled", workflow=int_doubler_wf,
print(formatted_kickoff_time) # %% # Cron Expression # --------------- # Cron expression strings use the `AWS syntax <http://docs.aws.amazon.com/AmazonCloudWatch/latest/events/ScheduledEvents.html#CronExpressions>`_. # These are validated at launch plan registration time. from flytekit import CronSchedule, LaunchPlan # creates a launch plan that runs at 10am UTC every day. cron_lp = LaunchPlan.create( "my_cron_scheduled_lp", date_formatter_wf, schedule=CronSchedule( # Note that kickoff_time_input_arg matches the workflow input we defined above: kickoff_time cron_expression="0 10 * * ? *", kickoff_time_input_arg="kickoff_time", ), ) # %% # Fixed Rate # ---------- # If you prefer to use an interval rather than the cron syntax to schedule your workflows, this is currently supported # for Flyte deployments hosted on AWS. # To run ``date_formatter_wf`` every 10 minutes read on below: from datetime import timedelta from flytekit import FixedRate, LaunchPlan
from datetime import datetime from flytekit import LaunchPlan, task, workflow from flytekit.models.common import AuthRole @task def print_hello_world() -> str: print("Hello, World") return "Hello" @workflow def hello_world( styx_parameter: datetime, styx_execution_id: str, styx_trigger_id: str, styx_trigger_type: str, styx_workflow_id: str ) -> str: hello = print_hello_world() return hello lp = LaunchPlan.create("morning_greeting", hello_world, auth_role=AuthRole(kubernetes_service_account="e2e-test-sa"))
from flytekit import task, LaunchPlan, workflow, current_context @task def square(val: int) -> int: return val * val @workflow def my_wf(val: int) -> int: result = square(val=val) return result default_lp = LaunchPlan.get_default_launch_plan(current_context(), my_wf) square_3 = default_lp(val=3) # %% # The following shows how to specify a user-defined launch plan that defaults the value of 'val' to 4. my_lp = LaunchPlan.create("default_4_lp", my_wf, default_inputs={"val": 4}) square_4 = my_lp() square_5 = my_lp(val=5) # %% # In some cases you may want to **fix** launch plan inputs, such that they can't be overridden at execution call time. my_fixed_lp = LaunchPlan.create("always_2_lp", my_wf, fixed_inputs={"val": 4}) square_2 = my_fixed_lp() # error: # square_1 = my_fixed_lp(val=1)
wordCount = dict(Counter(words)) return wordCount # %% # We define a workflow that executes the previously defined task. @workflow def ext_workflow(my_input: str) -> Dict: result = count_freq_words(input_string1=my_input) return result # %% # Next, we create a launch plan. external_lp = LaunchPlan.get_or_create( ext_workflow, "parent_workflow_execution", ) # %% # We define another task that returns the repeated keys (in our case, words) from a dictionary. @task def count_repetitive_words(word_counter: Dict) -> typing.List[str]: repeated_words = [key for key, value in word_counter.items() if value > 1] return repeated_words # %% # We define a workflow that triggers the launch plan of the previously-defined workflow. @workflow
def double(a: int) -> int: return a * 2 @task def add(a: int, b: int) -> int: return a + b @workflow def my_childwf(a: int = 42) -> int: b = double(a=a) return b child_lp = LaunchPlan.get_or_create(my_childwf, name="my_fixed_child_lp", labels=Labels({"l1": "v1"})) @workflow def parent_wf(a: int) -> int: x = double(a=a) y = child_lp(a=x) z = add(a=x, b=y) return z if __name__ == "__main__": print(f"Running parent_wf(a=3) {parent_wf(a=3)}")