Ejemplo n.º 1
0
def stock_data_partitions():
    return [
        Partition(datetime.datetime(2019, 1, 1)),
        Partition(datetime.datetime(2019, 2, 1)),
        Partition(datetime.datetime(2019, 3, 1)),
        Partition(datetime.datetime(2019, 4, 1)),
    ]
Ejemplo n.º 2
0
def define_partitions():
    integer_set = PartitionSetDefinition(
        name="integer_partition",
        pipeline_name="no_config_pipeline",
        solid_selection=["return_hello"],
        mode="default",
        partition_fn=lambda: [Partition(i) for i in range(10)],
        run_config_fn_for_partition=lambda _: {"storage": {
            "filesystem": {}
        }},
        tags_fn_for_partition=lambda partition: {"foo": partition.name},
    )

    enum_set = PartitionSetDefinition(
        name="enum_partition",
        pipeline_name="noop_pipeline",
        partition_fn=lambda: ["one", "two", "three"],
        run_config_fn_for_partition=lambda _: {"storage": {
            "filesystem": {}
        }},
    )

    chained_partition_set = PartitionSetDefinition(
        name="chained_integer_partition",
        pipeline_name="chained_failure_pipeline",
        mode="default",
        partition_fn=lambda: [Partition(i) for i in range(10)],
        run_config_fn_for_partition=lambda _: {"storage": {
            "filesystem": {}
        }},
    )

    return [integer_set, enum_set, chained_partition_set]
Ejemplo n.º 3
0
def get_stock_ticker_partitions():
    return [
        Partition("AAPL"),
        Partition("GOOG"),
        Partition("MSFT"),
        Partition("TSLA"),
    ]
Ejemplo n.º 4
0
def define_partitions():
    integer_set = PartitionSetDefinition(
        name="integer_partition",
        pipeline_name="no_config_pipeline",
        solid_selection=["return_hello"],
        mode="default",
        partition_fn=lambda: [Partition(i) for i in range(10)],
        tags_fn_for_partition=lambda partition: {"foo": partition.name},
    )

    enum_set = PartitionSetDefinition(
        name="enum_partition",
        pipeline_name="noop_pipeline",
        partition_fn=lambda: ["one", "two", "three"],
    )

    chained_partition_set = PartitionSetDefinition(
        name="chained_integer_partition",
        pipeline_name="chained_failure_pipeline",
        mode="default",
        partition_fn=lambda: [Partition(i) for i in range(10)],
    )

    alphabet_partition_set = PartitionSetDefinition(
        name="alpha_partition",
        pipeline_name="no_config_pipeline",
        partition_fn=lambda: list(string.ascii_lowercase),
    )

    return [
        integer_set, enum_set, chained_partition_set, alphabet_partition_set
    ]
Ejemplo n.º 5
0
 def _invalid_partition_selector(_cotnext, _partition_set_def):
     return [
         Partition(
             value=create_pendulum_time(year=2019, month=1, day=27, hour=1, minute=25),
             name="made_up",
         )
     ]
Ejemplo n.º 6
0
def get_date_partitions():
    """Every day in 2020"""
    d1 = datetime.date(2020, 1, 1)
    d2 = datetime.date(2021, 1, 1)
    days = [d1 + datetime.timedelta(days=x) for x in range((d2 - d1).days + 1)]

    return [Partition(day.strftime("%Y-%m-%d")) for day in days]
Ejemplo n.º 7
0
 def get_date_range_partitions():
     current = start
     _end = end or date.today()
     date_names = []
     while current < _end:
         date_names.append(
             Partition(value=current, name=current.strftime('%Y-%m-%d')))
         current = current + timedelta(days=1)
     return date_names
Ejemplo n.º 8
0
def assert_partitioned_schedule_builds(
    job_def: JobDefinition,
    start: datetime,
    end: datetime,
):
    partition_set = job_def.get_partition_set_def()
    run_config = partition_set.run_config_for_partition(Partition(
        (start, end)))
    create_execution_plan(job_def, run_config=run_config)
Ejemplo n.º 9
0
def assert_partitioned_schedule_builds(
    schedule_def: PartitionScheduleDefinition,
    pipeline_def: PipelineDefinition,
    partition: datetime,
):
    run_config = schedule_def.get_partition_set().run_config_for_partition(
        Partition(partition))
    create_execution_plan(pipeline_def,
                          run_config=run_config,
                          mode=schedule_def.mode)
Ejemplo n.º 10
0
def get_day_partition():
    return [
        Partition("M"),
        Partition("Tu"),
        Partition("W"),
        Partition("Th"),
        Partition("F"),
        Partition("Sa"),
        Partition("Su"),
    ]
Ejemplo n.º 11
0
def get_date_partitions():
    """Every day in the month of May, 2020"""
    start_date = datetime(2021, 1, 1)
    end_date = datetime.now()

    partitions = []
    while start_date < end_date:

        partitions.append(Partition(start_date.strftime("%Y-%m-%d")))
        start_date = start_date + timedelta(days=1)

    return partitions
Ejemplo n.º 12
0
def define_bar_schedules():
    partition_set = PartitionSetDefinition(
        name="scheduled_partitions",
        pipeline_name="partitioned_scheduled_pipeline",
        partition_fn=lambda: string.digits,
    )
    return {
        "foo_schedule": ScheduleDefinition(
            "foo_schedule",
            cron_schedule="* * * * *",
            pipeline_name="test_pipeline",
            run_config={},
        ),
        "partitioned_schedule": partition_set.create_schedule_definition(
            schedule_name="partitioned_schedule",
            cron_schedule="* * * * *",
            partition_selector=lambda _context, _def: Partition("7"),
        ),
    }
Ejemplo n.º 13
0
def define_partitions():
    integer_set = PartitionSetDefinition(
        name="integer_partition",
        pipeline_name="no_config_pipeline",
        solid_selection=['return_hello'],
        mode="default",
        partition_fn=lambda: [Partition(i) for i in range(10)],
        environment_dict_fn_for_partition=lambda _:
        {"storage": {
            "filesystem": {}
        }},
    )

    enum_set = PartitionSetDefinition(
        name="enum_partition",
        pipeline_name="noop_pipeline",
        partition_fn=lambda: ["one", "two", "three"],
        environment_dict_fn_for_partition=lambda _:
        {"storage": {
            "filesystem": {}
        }},
    )

    return [integer_set, enum_set]
Ejemplo n.º 14
0
from dagster import Partition, PartitionSetDefinition, repository_partitions

integer_set = PartitionSetDefinition(
    name="integer_partition",
    pipeline_name="no_config_pipeline",
    solid_subset=['return_hello'],
    mode="default",
    partition_fn=lambda: [Partition(i) for i in range(10)],
    environment_dict_fn_for_partition=lambda _:
    {"storage": {
        "filesystem": {}
    }},
)

enum_set = PartitionSetDefinition(
    name="enum_partition",
    pipeline_name="noop_pipeline",
    partition_fn=lambda: ["one", "two", "three"],
    environment_dict_fn_for_partition=lambda _:
    {"storage": {
        "filesystem": {}
    }},
)


@repository_partitions
def define_partitions():
    return [integer_set, enum_set]
Ejemplo n.º 15
0
def define_schedules():
    integer_partition_set = PartitionSetDefinition(
        name="scheduled_integer_partitions",
        pipeline_name="no_config_pipeline",
        partition_fn=lambda: [Partition(x) for x in range(1, 10)],
        tags_fn_for_partition=lambda _partition: {"test": "1234"},
    )

    no_config_pipeline_hourly_schedule = ScheduleDefinition(
        name="no_config_pipeline_hourly_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
    )

    no_config_pipeline_hourly_schedule_with_config_fn = ScheduleDefinition(
        name="no_config_pipeline_hourly_schedule_with_config_fn",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
    )

    no_config_should_execute = ScheduleDefinition(
        name="no_config_should_execute",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        should_execute=lambda _context: False,
    )

    dynamic_config = ScheduleDefinition(
        name="dynamic_config",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
    )

    partition_based = integer_partition_set.create_schedule_definition(
        schedule_name="partition_based",
        cron_schedule="0 0 * * *",
        partition_selector=last_empty_partition,
    )

    @daily_schedule(
        pipeline_name="no_config_pipeline",
        start_date=today_at_midnight().subtract(days=1),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=2)).time(),
    )
    def partition_based_decorator(_date):
        return {}

    @daily_schedule(
        pipeline_name="no_config_pipeline",
        start_date=today_at_midnight().subtract(days=1),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=2)).time(),
        default_status=DefaultScheduleStatus.RUNNING,
    )
    def running_in_code_schedule(_date):
        return {}

    @daily_schedule(
        pipeline_name="multi_mode_with_loggers",
        start_date=today_at_midnight().subtract(days=1),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=2)).time(),
        mode="foo_mode",
    )
    def partition_based_multi_mode_decorator(_date):
        return {}

    @hourly_schedule(
        pipeline_name="no_config_chain_pipeline",
        start_date=today_at_midnight().subtract(days=1),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=2)).time(),
        solid_selection=["return_foo"],
    )
    def solid_selection_hourly_decorator(_date):
        return {}

    @daily_schedule(
        pipeline_name="no_config_chain_pipeline",
        start_date=today_at_midnight().subtract(days=2),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=3)).time(),
        solid_selection=["return_foo"],
    )
    def solid_selection_daily_decorator(_date):
        return {}

    @monthly_schedule(
        pipeline_name="no_config_chain_pipeline",
        start_date=(today_at_midnight().subtract(days=100)).replace(day=1),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=4)).time(),
        solid_selection=["return_foo"],
    )
    def solid_selection_monthly_decorator(_date):
        return {}

    @weekly_schedule(
        pipeline_name="no_config_chain_pipeline",
        start_date=today_at_midnight().subtract(days=50),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=5)).time(),
        solid_selection=["return_foo"],
    )
    def solid_selection_weekly_decorator(_date):
        return {}

    # Schedules for testing the user error boundary
    @daily_schedule(
        pipeline_name="no_config_pipeline",
        start_date=today_at_midnight().subtract(days=1),
        should_execute=lambda _: asdf,  # pylint: disable=undefined-variable
    )
    def should_execute_error_schedule(_date):
        return {}

    @daily_schedule(
        pipeline_name="no_config_pipeline",
        start_date=today_at_midnight().subtract(days=1),
        tags_fn_for_date=lambda _: asdf,  # pylint: disable=undefined-variable
    )
    def tags_error_schedule(_date):
        return {}

    @daily_schedule(
        pipeline_name="no_config_pipeline",
        start_date=today_at_midnight().subtract(days=1),
    )
    def run_config_error_schedule(_date):
        return asdf  # pylint: disable=undefined-variable

    @daily_schedule(
        pipeline_name="no_config_pipeline",
        start_date=today_at_midnight("US/Central") -
        datetime.timedelta(days=1),
        execution_timezone="US/Central",
    )
    def timezone_schedule(_date):
        return {}

    tagged_pipeline_schedule = ScheduleDefinition(
        name="tagged_pipeline_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="tagged_pipeline",
    )

    tagged_pipeline_override_schedule = ScheduleDefinition(
        name="tagged_pipeline_override_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="tagged_pipeline",
        tags={"foo": "notbar"},
    )

    invalid_config_schedule = ScheduleDefinition(
        name="invalid_config_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="pipeline_with_enum_config",
        run_config={"solids": {
            "takes_an_enum": {
                "config": "invalid"
            }
        }},
    )

    return [
        run_config_error_schedule,
        no_config_pipeline_hourly_schedule,
        no_config_pipeline_hourly_schedule_with_config_fn,
        no_config_should_execute,
        dynamic_config,
        partition_based,
        partition_based_decorator,
        partition_based_multi_mode_decorator,
        solid_selection_hourly_decorator,
        solid_selection_daily_decorator,
        solid_selection_monthly_decorator,
        solid_selection_weekly_decorator,
        should_execute_error_schedule,
        tagged_pipeline_schedule,
        tagged_pipeline_override_schedule,
        tags_error_schedule,
        timezone_schedule,
        invalid_config_schedule,
        running_in_code_schedule,
    ]
Ejemplo n.º 16
0
    ]


# end_repo_include


def _weekday_run_config_for_partition(_partition):
    pass


# start_manual_partition_schedule
weekday_partition_set = PartitionSetDefinition(
    name="weekday_partition_set",
    pipeline_name="my_data_pipeline",
    partition_fn=lambda: [
        Partition("Monday"),
        Partition("Tuesday"),
        Partition("Wednesday"),
        Partition("Thursday"),
        Partition("Friday"),
        Partition("Saturday"),
        Partition("Sunday"),
    ],
    run_config_fn_for_partition=_weekday_run_config_for_partition,
)


def weekday_partition_selector(
    ctx: ScheduleExecutionContext, partition_set: PartitionSetDefinition
) -> Union[Partition, List[Partition]]:
    """Maps a schedule execution time to the corresponding partition or list of partitions that
Ejemplo n.º 17
0
def define_schedules():
    integer_partition_set = PartitionSetDefinition(
        name='scheduled_integer_partitions',
        pipeline_name='no_config_pipeline',
        partition_fn=lambda: [Partition(x) for x in range(1, 10)],
        environment_dict_fn_for_partition=lambda _partition:
        {"storage": {
            "filesystem": {}
        }},
        tags_fn_for_partition=lambda _partition: {"test": "1234"},
    )

    no_config_pipeline_hourly_schedule = ScheduleDefinition(
        name="no_config_pipeline_hourly_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict={"storage": {
            "filesystem": {}
        }},
    )

    no_config_pipeline_hourly_schedule_with_config_fn = ScheduleDefinition(
        name="no_config_pipeline_hourly_schedule_with_config_fn",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict_fn=lambda _context: {"storage": {
            "filesystem": {}
        }},
    )

    no_config_should_execute = ScheduleDefinition(
        name="no_config_should_execute",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict={"storage": {
            "filesystem": {}
        }},
        should_execute=lambda _context: False,
    )

    dynamic_config = ScheduleDefinition(
        name="dynamic_config",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict_fn=lambda _context: {"storage": {
            "filesystem": {}
        }},
    )

    partition_based = integer_partition_set.create_schedule_definition(
        schedule_name="partition_based",
        cron_schedule="0 0 * * *",
    )

    partition_based_custom_selector = integer_partition_set.create_schedule_definition(
        schedule_name="partition_based_custom_selector",
        cron_schedule="0 0 * * *",
        partition_selector=last_empty_partition,
    )

    @daily_schedule(
        pipeline_name='no_config_pipeline',
        start_date=datetime.datetime.now() - datetime.timedelta(days=1),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=2)).time(),
    )
    def partition_based_decorator(_date):
        return {"storage": {"filesystem": {}}}

    @daily_schedule(
        pipeline_name='multi_mode_with_loggers',
        start_date=datetime.datetime.now() - datetime.timedelta(days=1),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=2)).time(),
        mode='foo_mode',
    )
    def partition_based_multi_mode_decorator(_date):
        return {"storage": {"filesystem": {}}}

    @hourly_schedule(
        pipeline_name='no_config_chain_pipeline',
        start_date=datetime.datetime.now() - datetime.timedelta(days=1),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=2)).time(),
        solid_selection=['return_foo'],
    )
    def solid_selection_hourly_decorator(_date):
        return {"storage": {"filesystem": {}}}

    @daily_schedule(
        pipeline_name='no_config_chain_pipeline',
        start_date=datetime.datetime.now() - datetime.timedelta(days=2),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=3)).time(),
        solid_selection=['return_foo'],
    )
    def solid_selection_daily_decorator(_date):
        return {"storage": {"filesystem": {}}}

    @monthly_schedule(
        pipeline_name='no_config_chain_pipeline',
        start_date=datetime.datetime.now() - datetime.timedelta(days=100),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=4)).time(),
        solid_selection=['return_foo'],
    )
    def solid_selection_monthly_decorator(_date):
        return {"storage": {"filesystem": {}}}

    @weekly_schedule(
        pipeline_name='no_config_chain_pipeline',
        start_date=datetime.datetime.now() - datetime.timedelta(days=50),
        execution_time=(datetime.datetime.now() +
                        datetime.timedelta(hours=5)).time(),
        solid_selection=['return_foo'],
    )
    def solid_selection_weekly_decorator(_date):
        return {"storage": {"filesystem": {}}}

    # Schedules for testing the user error boundary
    @daily_schedule(
        pipeline_name='no_config_pipeline',
        start_date=datetime.datetime.now() - datetime.timedelta(days=1),
        should_execute=lambda _: asdf,  # pylint: disable=undefined-variable
    )
    def should_execute_error_schedule(_date):
        return {"storage": {"filesystem": {}}}

    @daily_schedule(
        pipeline_name='no_config_pipeline',
        start_date=datetime.datetime.now() - datetime.timedelta(days=1),
        tags_fn_for_date=lambda _: asdf,  # pylint: disable=undefined-variable
    )
    def tags_error_schedule(_date):
        return {"storage": {"filesystem": {}}}

    @daily_schedule(
        pipeline_name='no_config_pipeline',
        start_date=datetime.datetime.now() - datetime.timedelta(days=1),
    )
    def environment_dict_error_schedule(_date):
        return asdf  # pylint: disable=undefined-variable

    tagged_pipeline_schedule = ScheduleDefinition(
        name="tagged_pipeline_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="tagged_pipeline",
        environment_dict={"storage": {
            "filesystem": {}
        }},
    )

    tagged_pipeline_override_schedule = ScheduleDefinition(
        name="tagged_pipeline_override_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="tagged_pipeline",
        environment_dict={"storage": {
            "filesystem": {}
        }},
        tags={'foo': 'notbar'},
    )

    invalid_config_schedule = ScheduleDefinition(
        name="invalid_config_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="pipeline_with_enum_config",
        environment_dict={"solids": {
            "takes_an_enum": {
                'config': "invalid"
            }
        }},
    )

    return [
        environment_dict_error_schedule,
        no_config_pipeline_hourly_schedule,
        no_config_pipeline_hourly_schedule_with_config_fn,
        no_config_should_execute,
        dynamic_config,
        partition_based,
        partition_based_custom_selector,
        partition_based_decorator,
        partition_based_multi_mode_decorator,
        solid_selection_hourly_decorator,
        solid_selection_daily_decorator,
        solid_selection_monthly_decorator,
        solid_selection_weekly_decorator,
        should_execute_error_schedule,
        tagged_pipeline_schedule,
        tagged_pipeline_override_schedule,
        tags_error_schedule,
        invalid_config_schedule,
    ]
Ejemplo n.º 18
0
import datetime

from dagster import Partition, PartitionSetDefinition, ScheduleDefinition, daily_schedule, schedules
from dagster.core.definitions.partition import last_empty_partition
from dagster.utils.test import FilesytemTestScheduler

integer_partition_set = PartitionSetDefinition(
    name='scheduled_integer_partitions',
    pipeline_name='no_config_pipeline',
    partition_fn=lambda: [Partition(x) for x in range(1, 10)],
    environment_dict_fn_for_partition=lambda _partition:
    {"storage": {
        "filesystem": {}
    }},
    tags_fn_for_partition=lambda _partition: {"test": "1234"},
)


@schedules(scheduler=FilesytemTestScheduler)
def define_scheduler():

    no_config_pipeline_hourly_schedule = ScheduleDefinition(
        name="no_config_pipeline_hourly_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict={"storage": {
            "filesystem": {}
        }},
    )

    no_config_pipeline_hourly_schedule_with_config_fn = ScheduleDefinition(
Ejemplo n.º 19
0
def get_date_partitions():
    """Every day in the month of May, 2020"""
    return [Partition(f"2020-05-{str(day).zfill(2)}") for day in range(1, 32)]