Ejemplo n.º 1
0
def materialization_schedule():
    # create weekly partition set
    schedule_name = "many_events_partitioned"
    partition_set = PartitionSetDefinition(
        name="many_events_minutely",
        pipeline_name="many_events",
        partition_fn=date_partition_range(start=datetime.datetime(2020, 1, 1)),
        run_config_fn_for_partition=lambda _: {"storage": {
            "filesystem": {}
        }},
    )

    def _should_execute(context):
        return backfill_should_execute(context, partition_set)

    return partition_set.create_schedule_definition(
        schedule_name=schedule_name,
        cron_schedule="* * * * *",  # tick every minute
        partition_selector=backfilling_partition_selector,
        should_execute=_should_execute,
    )
Ejemplo n.º 2
0
def define_partitions():
    integer_set = PartitionSetDefinition(
        name="integer_partition",
        pipeline_name="no_config_pipeline",
        solid_selection=['return_hello'],
        mode="default",
        partition_fn=lambda: [Partition(i) for i in range(10)],
        run_config_fn_for_partition=lambda _: {"storage": {
            "filesystem": {}
        }},
    )

    enum_set = PartitionSetDefinition(
        name="enum_partition",
        pipeline_name="noop_pipeline",
        partition_fn=lambda: ["one", "two", "three"],
        run_config_fn_for_partition=lambda _: {"storage": {
            "filesystem": {}
        }},
    )

    return [integer_set, enum_set]
Ejemplo n.º 3
0
def materialization_schedule():
    # create weekly partition set
    schedule_name = 'many_events_partitioned'
    partition_set = PartitionSetDefinition(
        name='many_events_minutely',
        pipeline_name='many_events',
        partition_fn=date_partition_range(start=datetime.datetime(2020, 1, 1)),
        environment_dict_fn_for_partition=lambda _:
        {'storage': {
            'filesystem': {}
        }},
    )

    def _should_execute(context):
        return backfill_should_execute(context, partition_set, schedule_name)

    return partition_set.create_schedule_definition(
        schedule_name=schedule_name,
        cron_schedule="* * * * *",  # tick every minute
        partition_selector=backfilling_partition_selector,
        should_execute=_should_execute,
    )
Ejemplo n.º 4
0
def define_bar_partitions():
    def error_name():
        raise Exception("womp womp")

    def error_config(_):
        raise Exception("womp womp")

    return {
        "baz_partitions":
        PartitionSetDefinition(
            name="baz_partitions",
            pipeline_name="baz",
            partition_fn=lambda: string.digits,
            run_config_fn_for_partition=lambda partition: {
                "solids": {
                    "do_input": {
                        "inputs": {
                            "x": {
                                "value": partition.value
                            }
                        }
                    }
                }
            },
        ),
        "error_name_partitions":
        PartitionSetDefinition(
            name="error_name_partitions",
            pipeline_name="baz",
            partition_fn=error_name,
        ),
        "error_config_partitions":
        PartitionSetDefinition(
            name="error_config_partitions",
            pipeline_name="baz",
            partition_fn=error_config,
        ),
    }
Ejemplo n.º 5
0
def backfill_test_schedule():
    schedule_name = 'backfill_unreliable_weekly'
    # create weekly partition set
    partition_set = PartitionSetDefinition(
        name='unreliable_weekly',
        pipeline_name='unreliable_pipeline',
        partition_fn=date_partition_range(
            # first sunday of the year
            start=datetime.datetime(2020, 1, 5),
            delta=datetime.timedelta(weeks=1),
        ),
        run_config_fn_for_partition=lambda _: {'storage': {'filesystem': {}}},
    )

    def _should_execute(context):
        return backfill_should_execute(context, partition_set)

    return partition_set.create_schedule_definition(
        schedule_name=schedule_name,
        cron_schedule="* * * * *",  # tick every minute
        partition_selector=backfilling_partition_selector,
        should_execute=_should_execute,
    )
Ejemplo n.º 6
0
def backfill_test_schedule():
    schedule_name = "backfill_unreliable_weekly"
    # create weekly partition set
    partition_set = PartitionSetDefinition(
        name="unreliable_weekly",
        pipeline_name="unreliable_pipeline",
        partition_fn=date_partition_range(
            # first sunday of the year
            start=datetime.datetime(2020, 1, 5),
            delta_range="weeks",
        ),
    )

    def _should_execute(context):
        return backfill_should_execute(context, partition_set)

    return partition_set.create_schedule_definition(
        schedule_name=schedule_name,
        cron_schedule="* * * * *",  # tick every minute
        partition_selector=backfilling_partition_selector,
        should_execute=_should_execute,
        execution_timezone=_toys_tz_info(),
    )
Ejemplo n.º 7
0
def longitudinal_schedule():
    from .longitudinal import longitudinal_config

    schedule_name = 'longitudinal_demo'

    partition_set = PartitionSetDefinition(
        name='ingest_and_train',
        pipeline_name='longitudinal_pipeline',
        partition_fn=date_partition_range(start=datetime.datetime(2020, 1, 1)),
        run_config_fn_for_partition=longitudinal_config,
    )

    def _should_execute(context):
        return backfill_should_execute(context, partition_set, retry_failed=True)

    def _partition_selector(context, partition_set):
        return backfilling_partition_selector(context, partition_set, retry_failed=True)

    return partition_set.create_schedule_definition(
        schedule_name=schedule_name,
        cron_schedule="*/5 * * * *",  # tick every 5 minutes
        partition_selector=_partition_selector,
        should_execute=_should_execute,
    )
Ejemplo n.º 8
0
def define_partitions():
    integer_set = PartitionSetDefinition(
        name="integer_partition",
        pipeline_name="no_config_pipeline",
        solid_selection=["return_hello"],
        mode="default",
        partition_fn=lambda: [Partition(i) for i in range(10)],
        run_config_fn_for_partition=lambda _:
        {"intermediate_storage": {
            "filesystem": {}
        }},
        tags_fn_for_partition=lambda partition: {"foo": partition.name},
    )

    enum_set = PartitionSetDefinition(
        name="enum_partition",
        pipeline_name="noop_pipeline",
        partition_fn=lambda: ["one", "two", "three"],
        run_config_fn_for_partition=lambda _:
        {"intermediate_storage": {
            "filesystem": {}
        }},
    )

    chained_partition_set = PartitionSetDefinition(
        name="chained_integer_partition",
        pipeline_name="chained_failure_pipeline",
        mode="default",
        partition_fn=lambda: [Partition(i) for i in range(10)],
        run_config_fn_for_partition=lambda _:
        {"intermediate_storage": {
            "filesystem": {}
        }},
    )

    return [integer_set, enum_set, chained_partition_set]
Ejemplo n.º 9
0
def backfilling_partition_selector(context: ScheduleExecutionContext,
                                   partition_set_def: PartitionSetDefinition):
    runs_by_partition = _fetch_runs_by_partition(context.instance,
                                                 partition_set_def)

    selected = None
    for partition in partition_set_def.get_partitions():
        runs = runs_by_partition[partition.name]

        selected = partition

        # break when we find the first empty partition
        if len(runs) == 0:
            break

    # may return an already satisfied final partition - bank on should_execute to prevent firing in schedule
    return selected
Ejemplo n.º 10
0
def define_baz_partitions():
    return [
        PartitionSetDefinition(
            name='baz_partitions',
            pipeline_name='baz',
            partition_fn=lambda: string.ascii_lowercase,
            environment_dict_fn_for_partition=lambda partition:
            {'solids': {
                'do_input': {
                    'inputs': {
                        'x': {
                            'value': partition
                        }
                    }
                }
            }},
        )
    ]
Ejemplo n.º 11
0
def define_baz_partitions():
    return {
        "baz_partitions":
        PartitionSetDefinition(
            name="baz_partitions",
            pipeline_name="baz",
            partition_fn=lambda: string.ascii_lowercase,
            run_config_fn_for_partition=lambda partition: {
                "solids": {
                    "do_input": {
                        "inputs": {
                            "x": {
                                "value": partition.value
                            }
                        }
                    }
                }
            },
        )
    }
Ejemplo n.º 12
0
def backfilling_partition_selector(
    context: ScheduleExecutionContext, partition_set_def: PartitionSetDefinition, retry_failed=False
):
    status_filters = [PipelineRunStatus.SUCCESS] if retry_failed else None
    runs_by_partition = _fetch_runs_by_partition(
        context.instance, partition_set_def, status_filters
    )

    selected = None
    for partition in partition_set_def.get_partitions():
        runs = runs_by_partition[partition.name]

        selected = partition

        # break when we find the first empty partition
        if len(runs) == 0:
            break

    # may return an already satisfied final partition - bank on should_execute to prevent firing in schedule
    return selected
Ejemplo n.º 13
0
def define_baz_partitions():
    return {
        'baz_partitions':
        PartitionSetDefinition(
            name='baz_partitions',
            pipeline_name='baz',
            partition_fn=lambda: string.ascii_lowercase,
            run_config_fn_for_partition=lambda partition: {
                'solids': {
                    'do_input': {
                        'inputs': {
                            'x': {
                                'value': partition.value
                            }
                        }
                    }
                }
            },
        )
    }
Ejemplo n.º 14
0
def define_schedules():
    integer_partition_set = PartitionSetDefinition(
        name='scheduled_integer_partitions',
        pipeline_name='no_config_pipeline',
        partition_fn=lambda: [Partition(x) for x in range(1, 10)],
        environment_dict_fn_for_partition=lambda _partition:
        {"storage": {
            "filesystem": {}
        }},
        tags_fn_for_partition=lambda _partition: {"test": "1234"},
    )

    no_config_pipeline_hourly_schedule = ScheduleDefinition(
        name="no_config_pipeline_hourly_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict={"storage": {
            "filesystem": {}
        }},
    )

    no_config_pipeline_hourly_schedule_with_config_fn = ScheduleDefinition(
        name="no_config_pipeline_hourly_schedule_with_config_fn",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict_fn=lambda _context: {"storage": {
            "filesystem": {}
        }},
    )

    no_config_should_execute = ScheduleDefinition(
        name="no_config_should_execute",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict={"storage": {
            "filesystem": {}
        }},
        should_execute=lambda _context: False,
    )

    dynamic_config = ScheduleDefinition(
        name="dynamic_config",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict_fn=lambda _context: {"storage": {
            "filesystem": {}
        }},
    )

    partition_based = integer_partition_set.create_schedule_definition(
        schedule_name="partition_based",
        cron_schedule="0 0 * * *",
    )

    partition_based_custom_selector = integer_partition_set.create_schedule_definition(
        schedule_name="partition_based_custom_selector",
        cron_schedule="0 0 * * *",
        partition_selector=last_empty_partition,
    )

    @daily_schedule(
        pipeline_name='no_config_pipeline',
        start_date=datetime.datetime.now() - datetime.timedelta(days=1),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=2)).time(),
    )
    def partition_based_decorator(_date):
        return {"storage": {"filesystem": {}}}

    @daily_schedule(
        pipeline_name='multi_mode_with_loggers',
        start_date=datetime.datetime.now() - datetime.timedelta(days=1),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=2)).time(),
        mode='foo_mode',
    )
    def partition_based_multi_mode_decorator(_date):
        return {"storage": {"filesystem": {}}}

    @hourly_schedule(
        pipeline_name='no_config_chain_pipeline',
        start_date=datetime.datetime.now() - datetime.timedelta(days=1),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=2)).time(),
        solid_selection=['return_foo'],
    )
    def solid_selection_hourly_decorator(_date):
        return {"storage": {"filesystem": {}}}

    @daily_schedule(
        pipeline_name='no_config_chain_pipeline',
        start_date=datetime.datetime.now() - datetime.timedelta(days=2),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=3)).time(),
        solid_selection=['return_foo'],
    )
    def solid_selection_daily_decorator(_date):
        return {"storage": {"filesystem": {}}}

    @monthly_schedule(
        pipeline_name='no_config_chain_pipeline',
        start_date=datetime.datetime.now() - datetime.timedelta(days=100),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=4)).time(),
        solid_selection=['return_foo'],
    )
    def solid_selection_monthly_decorator(_date):
        return {"storage": {"filesystem": {}}}

    @weekly_schedule(
        pipeline_name='no_config_chain_pipeline',
        start_date=datetime.datetime.now() - datetime.timedelta(days=50),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=5)).time(),
        solid_selection=['return_foo'],
    )
    def solid_selection_weekly_decorator(_date):
        return {"storage": {"filesystem": {}}}

    # Schedules for testing the user error boundary
    @daily_schedule(
        pipeline_name='no_config_pipeline',
        start_date=datetime.datetime.now() - datetime.timedelta(days=1),
        should_execute=lambda _: asdf,  # pylint: disable=undefined-variable
    )
    def should_execute_error_schedule(_date):
        return {"storage": {"filesystem": {}}}

    @daily_schedule(
        pipeline_name='no_config_pipeline',
        start_date=datetime.datetime.now() - datetime.timedelta(days=1),
        tags_fn_for_date=lambda _: asdf,  # pylint: disable=undefined-variable
    )
    def tags_error_schedule(_date):
        return {"storage": {"filesystem": {}}}

    @daily_schedule(
        pipeline_name='no_config_pipeline',
        start_date=datetime.datetime.now() - datetime.timedelta(days=1),
    )
    def environment_dict_error_schedule(_date):
        return asdf  # pylint: disable=undefined-variable

    tagged_pipeline_schedule = ScheduleDefinition(
        name="tagged_pipeline_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="tagged_pipeline",
        environment_dict={"storage": {
            "filesystem": {}
        }},
    )

    tagged_pipeline_override_schedule = ScheduleDefinition(
        name="tagged_pipeline_override_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="tagged_pipeline",
        environment_dict={"storage": {
            "filesystem": {}
        }},
        tags={'foo': 'notbar'},
    )

    invalid_config_schedule = ScheduleDefinition(
        name="invalid_config_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="pipeline_with_enum_config",
        environment_dict={"solids": {
            "takes_an_enum": {
                'config': "invalid"
            }
        }},
    )

    return [
        environment_dict_error_schedule,
        no_config_pipeline_hourly_schedule,
        no_config_pipeline_hourly_schedule_with_config_fn,
        no_config_should_execute,
        dynamic_config,
        partition_based,
        partition_based_custom_selector,
        partition_based_decorator,
        partition_based_multi_mode_decorator,
        solid_selection_hourly_decorator,
        solid_selection_daily_decorator,
        solid_selection_monthly_decorator,
        solid_selection_weekly_decorator,
        should_execute_error_schedule,
        tagged_pipeline_schedule,
        tagged_pipeline_override_schedule,
        tags_error_schedule,
        invalid_config_schedule,
    ]
Ejemplo n.º 15
0
import datetime

from dagster import Partition, PartitionSetDefinition, ScheduleDefinition, daily_schedule, schedules
from dagster.core.definitions.partition import last_empty_partition
from dagster.utils.test import FilesytemTestScheduler

integer_partition_set = PartitionSetDefinition(
    name='scheduled_integer_partitions',
    pipeline_name='no_config_pipeline',
    partition_fn=lambda: [Partition(x) for x in range(1, 10)],
    environment_dict_fn_for_partition=lambda _partition:
    {"storage": {
        "filesystem": {}
    }},
    tags_fn_for_partition=lambda _partition: {"test": "1234"},
)


@schedules(scheduler=FilesytemTestScheduler)
def define_scheduler():

    no_config_pipeline_hourly_schedule = ScheduleDefinition(
        name="no_config_pipeline_hourly_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict={"storage": {
            "filesystem": {}
        }},
    )

    no_config_pipeline_hourly_schedule_with_config_fn = ScheduleDefinition(
Ejemplo n.º 16
0
                }
            },
            "daily_top_action": {
                "outputs": {
                    "result": {
                        "partitions": [date]
                    }
                }
            },
        }
    }


asset_lineage_partition_set = PartitionSetDefinition(
    name="date_partition_set",
    pipeline_name="asset_lineage_pipeline",
    partition_fn=get_date_partitions,
    run_config_fn_for_partition=run_config_for_date_partition,
)


def metadata_for_actions(df):
    return {
        "min_score": int(df["score"].min()),
        "max_score": int(df["score"].max()),
        "sample rows": EventMetadata.md(df[:5].to_markdown()),
    }


class MyDatabaseIOManager(PickledObjectFilesystemIOManager):
    def _get_path(self, context):
        keys = context.get_run_scoped_output_identifier()
Ejemplo n.º 17
0
        Partition("GOOG"),
        Partition("MSFT"),
        Partition("TSLA"),
    ]


def environment_dict_for_ticker_partition(partition):
    ticker_symbol = partition.value

    return {
        'solids': {
            'query_historical_stock_data': {
                'config': {
                    'symbol': ticker_symbol
                }
            }
        }
    }


stock_ticker_partition_sets = PartitionSetDefinition(
    name="stock_ticker_partition_sets",
    pipeline_name="compute_total_stock_volume",
    partition_fn=get_stock_ticker_partitions,
    environment_dict_fn_for_partition=environment_dict_for_ticker_partition,
)


def define_partitions():
    return [stock_ticker_partition_sets]
Ejemplo n.º 18
0

# start_def
def get_date_partitions():
    """Every day in the month of May, 2020"""
    return [Partition(f"2020-05-{str(day).zfill(2)}") for day in range(1, 32)]


def run_config_for_date_partition(partition):
    date = partition.value
    return {"solids": {"process_data_for_date": {"config": {"date": date}}}}


date_partition_set = PartitionSetDefinition(
    name="date_partition_set",
    pipeline_name="my_data_pipeline",
    partition_fn=get_date_partitions,
    run_config_fn_for_partition=run_config_for_date_partition,
)
# end_def


@solid
def my_solid(_):
    pass


@pipeline
def my_data_pipeline():
    my_solid()

Ejemplo n.º 19
0
def partition_config_fn(partition):
    return {
        'solids': {
            'announce_partition': {
                'config': {
                    'partition': "Partition is: " + str(partition.value)
                }
            }
        }
    }


ten_days_ago = date.today() - timedelta(days=10)
log_date_set = PartitionSetDefinition(
    name='date_partitions',
    pipeline_name='log_partitions',
    partition_fn=date_partition_range(ten_days_ago),
    environment_dict_fn_for_partition=partition_config_fn,
)

us_states_set = PartitionSetDefinition(
    name='state_partitions',
    pipeline_name='log_partitions',
    partition_fn=us_states,
    environment_dict_fn_for_partition=partition_config_fn,
)


@repository_partitions
def define_repository_partitions():
    return [log_date_set, us_states_set]
Ejemplo n.º 20
0
    return [
        Partition("M"),
        Partition("Tu"),
        Partition("W"),
        Partition("Th"),
        Partition("F"),
        Partition("Sa"),
        Partition("Su"),
    ]


def run_config_for_day_partition(partition):
    day = partition.value
    return {
        'solids': {
            'process_data_for_day': {
                'config': {
                    'day_of_week': day
                }
            }
        }
    }


day_partition_set = PartitionSetDefinition(
    name="day_partition_set",
    pipeline_name="my_pipeline",
    partition_fn=get_day_partition,
    run_config_fn_for_partition=run_config_for_day_partition,
)
Ejemplo n.º 21
0
def partition_config_fn(partition):
    return {
        'solids': {
            'announce_partition': {
                'config': {
                    'partition': "Partition is: " + str(partition.value)
                }
            }
        }
    }


ten_days_ago = date.today() - timedelta(days=10)
log_date_set = PartitionSetDefinition(
    name='date_partitions',
    pipeline_name='log_partitions',
    partition_fn=date_partition_range(ten_days_ago),
    environment_dict_fn_for_partition=partition_config_fn,
)


def dash_stats_datetime_partition_config(partition):
    current_date = partition.value
    yesterday = current_date - timedelta(days=1)
    date_string = yesterday.strftime("%Y-%m-%d")

    return {
        'resources': {
            'bigquery': None,
            'slack': {
                'config': {
                    'token': os.getenv('SLACK_TOKEN')
Ejemplo n.º 22
0
    previous_month_last_day = date - datetime.timedelta(days=1)
    previous_month_first_day = previous_month_last_day.replace(day=1)

    return {
        'solids': {
            'query_historical_stock_data': {
                'config': {
                    'ds_start': previous_month_first_day.strftime("%Y-%m-%d"),
                    'ds_end': previous_month_last_day.strftime("%Y-%m-%d"),
                    'symbol': 'AAPL',
                }
            }
        }
    }


stock_data_partitions_set = PartitionSetDefinition(
    name="stock_data_partitions_set",
    pipeline_name="compute_total_stock_volume",
    partition_fn=date_partition_range(
        start=datetime.datetime(2018, 1, 1),
        end=datetime.datetime(2019, 1, 1),
        delta=relativedelta(months=1),
    ),
    run_config_fn_for_partition=run_config_fn_for_date,
)


def define_partitions():
    return [stock_data_partitions_set]
Ejemplo n.º 23
0
        start_date = start_date + timedelta(days=1)

    return partitions


def run_config_for_date_partition(partition):
    date = partition.value
    config = yaml.load(open(Path(__file__).parent / "realized_trips.yaml", "r"))
    config["solids"]["download_brt_raw_realized_trips"]["config"]["date"] = date
    return config


daily_partition_set = PartitionSetDefinition(
    name="daily_partitions",
    pipeline_name="br_rj_riodejaneiro_gtfs_realized_trips",
    partition_fn=get_date_partitions,
    run_config_fn_for_partition=run_config_for_date_partition,
    mode="dev",
)


def daily_partition_selector(context, partition_set):

    partitions = partition_set.get_partitions(context.scheduled_execution_time)
    return partitions[-2]  # one day before run day (today)


daily_schedule = daily_partition_set.create_schedule_definition(
    "br_rj_riodejaneiro_gtfs_realized_trips",
    "0 1 * * *",
    partition_selector=daily_partition_selector,
Ejemplo n.º 24
0
def define_schedules():
    integer_partition_set = PartitionSetDefinition(
        name="scheduled_integer_partitions",
        pipeline_name="no_config_pipeline",
        partition_fn=lambda: [Partition(x) for x in range(1, 10)],
        tags_fn_for_partition=lambda _partition: {"test": "1234"},
    )

    no_config_pipeline_hourly_schedule = ScheduleDefinition(
        name="no_config_pipeline_hourly_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
    )

    no_config_pipeline_hourly_schedule_with_config_fn = ScheduleDefinition(
        name="no_config_pipeline_hourly_schedule_with_config_fn",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
    )

    no_config_should_execute = ScheduleDefinition(
        name="no_config_should_execute",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        should_execute=lambda _context: False,
    )

    dynamic_config = ScheduleDefinition(
        name="dynamic_config",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
    )

    partition_based = integer_partition_set.create_schedule_definition(
        schedule_name="partition_based",
        cron_schedule="0 0 * * *",
        partition_selector=last_empty_partition,
    )

    @daily_schedule(
        pipeline_name="no_config_pipeline",
        start_date=today_at_midnight().subtract(days=1),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=2)).time(),
    )
    def partition_based_decorator(_date):
        return {}

    @daily_schedule(
        pipeline_name="no_config_pipeline",
        start_date=today_at_midnight().subtract(days=1),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=2)).time(),
        default_status=DefaultScheduleStatus.RUNNING,
    )
    def running_in_code_schedule(_date):
        return {}

    @daily_schedule(
        pipeline_name="multi_mode_with_loggers",
        start_date=today_at_midnight().subtract(days=1),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=2)).time(),
        mode="foo_mode",
    )
    def partition_based_multi_mode_decorator(_date):
        return {}

    @hourly_schedule(
        pipeline_name="no_config_chain_pipeline",
        start_date=today_at_midnight().subtract(days=1),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=2)).time(),
        solid_selection=["return_foo"],
    )
    def solid_selection_hourly_decorator(_date):
        return {}

    @daily_schedule(
        pipeline_name="no_config_chain_pipeline",
        start_date=today_at_midnight().subtract(days=2),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=3)).time(),
        solid_selection=["return_foo"],
    )
    def solid_selection_daily_decorator(_date):
        return {}

    @monthly_schedule(
        pipeline_name="no_config_chain_pipeline",
        start_date=(today_at_midnight().subtract(days=100)).replace(day=1),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=4)).time(),
        solid_selection=["return_foo"],
    )
    def solid_selection_monthly_decorator(_date):
        return {}

    @weekly_schedule(
        pipeline_name="no_config_chain_pipeline",
        start_date=today_at_midnight().subtract(days=50),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=5)).time(),
        solid_selection=["return_foo"],
    )
    def solid_selection_weekly_decorator(_date):
        return {}

    # Schedules for testing the user error boundary
    @daily_schedule(
        pipeline_name="no_config_pipeline",
        start_date=today_at_midnight().subtract(days=1),
        should_execute=lambda _: asdf,  # pylint: disable=undefined-variable
    )
    def should_execute_error_schedule(_date):
        return {}

    @daily_schedule(
        pipeline_name="no_config_pipeline",
        start_date=today_at_midnight().subtract(days=1),
        tags_fn_for_date=lambda _: asdf,  # pylint: disable=undefined-variable
    )
    def tags_error_schedule(_date):
        return {}

    @daily_schedule(
        pipeline_name="no_config_pipeline",
        start_date=today_at_midnight().subtract(days=1),
    )
    def run_config_error_schedule(_date):
        return asdf  # pylint: disable=undefined-variable

    @daily_schedule(
        pipeline_name="no_config_pipeline",
        start_date=today_at_midnight("US/Central") -
        datetime.timedelta(days=1),
        execution_timezone="US/Central",
    )
    def timezone_schedule(_date):
        return {}

    tagged_pipeline_schedule = ScheduleDefinition(
        name="tagged_pipeline_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="tagged_pipeline",
    )

    tagged_pipeline_override_schedule = ScheduleDefinition(
        name="tagged_pipeline_override_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="tagged_pipeline",
        tags={"foo": "notbar"},
    )

    invalid_config_schedule = ScheduleDefinition(
        name="invalid_config_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="pipeline_with_enum_config",
        run_config={"solids": {
            "takes_an_enum": {
                "config": "invalid"
            }
        }},
    )

    return [
        run_config_error_schedule,
        no_config_pipeline_hourly_schedule,
        no_config_pipeline_hourly_schedule_with_config_fn,
        no_config_should_execute,
        dynamic_config,
        partition_based,
        partition_based_decorator,
        partition_based_multi_mode_decorator,
        solid_selection_hourly_decorator,
        solid_selection_daily_decorator,
        solid_selection_monthly_decorator,
        solid_selection_weekly_decorator,
        should_execute_error_schedule,
        tagged_pipeline_schedule,
        tagged_pipeline_override_schedule,
        tags_error_schedule,
        timezone_schedule,
        invalid_config_schedule,
        running_in_code_schedule,
    ]
Ejemplo n.º 25
0
        Partition("GOOG"),
        Partition("MSFT"),
        Partition("TSLA"),
    ]


def run_config_for_ticker_partition(partition):
    ticker_symbol = partition.value

    return {
        "solids": {
            "query_historical_stock_data": {
                "config": {
                    "symbol": ticker_symbol
                }
            }
        }
    }


stock_ticker_partition_sets = PartitionSetDefinition(
    name="stock_ticker_partition_sets",
    pipeline_name="compute_total_stock_volume",
    partition_fn=get_stock_ticker_partitions,
    run_config_fn_for_partition=run_config_for_ticker_partition,
)


def define_partitions():
    return [stock_ticker_partition_sets]