Esempio n. 1
0
    def inner(fn):
        check.callable_param(fn, 'fn')

        schedule_name = name or fn.__name__

        def _environment_dict_fn_for_partition(partition):
            return fn(partition.value)

        partition_set_name = '{}_hourly'.format(pipeline_name)
        partition_set = PartitionSetDefinition(
            name=partition_set_name,
            pipeline_name=pipeline_name,
            partition_fn=date_partition_range(
                start_date, delta=datetime.timedelta(hours=1), fmt="%Y-%m-%d-%H:%M"
            ),
            environment_dict_fn_for_partition=_environment_dict_fn_for_partition,
            mode=mode,
        )

        return partition_set.create_schedule_definition(
            schedule_name,
            cron_schedule,
            should_execute=should_execute,
            environment_vars=environment_vars,
        )
Esempio n. 2
0
def daily_schedule(
    pipeline_name,
    start_date,
    name=None,
    execution_time=datetime.time(0, 0),
    tags_fn_for_date=None,
    solid_subset=None,
    mode="default",
    should_execute=None,
    environment_vars=None,
):
    check.opt_str_param(name, 'name')
    check.inst_param(start_date, 'start_date', datetime.datetime)
    check.opt_callable_param(tags_fn_for_date, 'tags_fn_for_date')
    check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str)
    mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME)
    check.opt_callable_param(should_execute, 'should_execute')
    check.opt_dict_param(environment_vars,
                         'environment_vars',
                         key_type=str,
                         value_type=str)
    check.str_param(pipeline_name, 'pipeline_name')
    check.inst_param(start_date, 'start_date', datetime.datetime)
    check.inst_param(execution_time, 'execution_time', datetime.time)

    cron_schedule = '{minute} {hour} * * *'.format(
        minute=execution_time.minute, hour=execution_time.hour)

    partition_fn = date_partition_range(start_date)

    def inner(fn):
        check.callable_param(fn, 'fn')

        schedule_name = name or fn.__name__

        tags_fn_for_partition_value = lambda partition: {}
        if tags_fn_for_date:
            tags_fn_for_partition_value = lambda partition: tags_fn_for_date(
                partition.value)

        partition_set = PartitionSetDefinition(
            name='{}_daily'.format(pipeline_name),
            pipeline_name=pipeline_name,
            partition_fn=partition_fn,
            environment_dict_fn_for_partition=lambda partition: fn(partition.
                                                                   value),
            tags_fn_for_partition=tags_fn_for_partition_value,
            mode=mode,
        )

        return partition_set.create_schedule_definition(
            schedule_name,
            cron_schedule,
            should_execute=should_execute,
            environment_vars=environment_vars,
        )

    return inner
Esempio n. 3
0
def test_date_partition_range_daily(start, end, delta, expected_partitions):
    partition_generator = date_partition_range(start, end, delta)
    generated_partitions = partition_generator()
    assert all(
        isinstance(generated_partition, Partition) for generated_partition in generated_partitions
    )
    assert len(generated_partitions) == len(expected_partitions)
    assert all(
        expected_partition_name == generated_partition_name
        for expected_partition_name, generated_partition_name in zip(
            expected_partitions,
            [generated_partition.name for generated_partition in generated_partitions],
        )
    )
Esempio n. 4
0
    def inner(fn):
        check.callable_param(fn, 'fn')

        schedule_name = name or fn.__name__

        def _environment_dict_fn_for_partition(partition):
            return fn(partition.value)

        partition_set_name = '{}_daily'.format(pipeline_name)
        partition_set = PartitionSetDefinition(
            name=partition_set_name,
            pipeline_name=pipeline_name,
            partition_fn=date_partition_range(start_date),
            environment_dict_fn_for_partition=
            _environment_dict_fn_for_partition,
        )

        return partition_set.create_schedule_definition(
            schedule_name,
            cron_schedule,
            should_execute=should_execute,
            environment_vars=environment_vars,
        )
Esempio n. 5
0
def monthly_schedule(
    pipeline_name,
    start_date,
    name=None,
    execution_day_of_month=1,
    execution_time=datetime.time(0, 0),
    tags_fn_for_date=None,
    solid_subset=None,
    mode="default",
    should_execute=None,
    environment_vars=None,
):
    check.opt_str_param(name, 'name')
    check.inst_param(start_date, 'start_date', datetime.datetime)
    check.opt_callable_param(tags_fn_for_date, 'tags_fn_for_date')
    check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str)
    mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME)
    check.opt_callable_param(should_execute, 'should_execute')
    check.opt_dict_param(environment_vars,
                         'environment_vars',
                         key_type=str,
                         value_type=str)
    check.str_param(pipeline_name, 'pipeline_name')
    check.inst_param(start_date, 'start_date', datetime.datetime)
    check.int_param(execution_day_of_month, 'execution_day')
    check.inst_param(execution_time, 'execution_time', datetime.time)

    if execution_day_of_month <= 0 or execution_day_of_month > 31:
        raise DagsterInvalidDefinitionError(
            "`execution_day_of_month={}` is not valid for monthly schedule. Execution day must be between 1 and 31"
            .format(execution_day_of_month))

    cron_schedule = '{minute} {hour} {day} * *'.format(
        minute=execution_time.minute,
        hour=execution_time.hour,
        day=execution_day_of_month)

    partition_fn = date_partition_range(start_date,
                                        delta=relativedelta(months=1),
                                        fmt="%Y-%m")

    def inner(fn):
        check.callable_param(fn, 'fn')

        schedule_name = name or fn.__name__

        tags_fn_for_partition_value = lambda partition: {}
        if tags_fn_for_date:
            tags_fn_for_partition_value = lambda partition: tags_fn_for_date(
                partition.value)

        partition_set = PartitionSetDefinition(
            name='{}_monthly'.format(pipeline_name),
            pipeline_name=pipeline_name,
            partition_fn=partition_fn,
            environment_dict_fn_for_partition=lambda partition: fn(partition.
                                                                   value),
            tags_fn_for_partition=tags_fn_for_partition_value,
            mode=mode,
        )

        return partition_set.create_schedule_definition(
            schedule_name,
            cron_schedule,
            should_execute=should_execute,
            environment_vars=environment_vars,
        )

    return inner
Esempio n. 6
0
def hourly_schedule(
    pipeline_name,
    start_date,
    name=None,
    execution_time=datetime.time(0, 0),
    tags_fn_for_date=None,
    solid_subset=None,
    mode="default",
    should_execute=None,
    environment_vars=None,
):
    check.opt_str_param(name, 'name')
    check.inst_param(start_date, 'start_date', datetime.datetime)
    check.opt_callable_param(tags_fn_for_date, 'tags_fn_for_date')
    check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str)
    mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME)
    check.opt_callable_param(should_execute, 'should_execute')
    check.opt_dict_param(environment_vars,
                         'environment_vars',
                         key_type=str,
                         value_type=str)
    check.str_param(pipeline_name, 'pipeline_name')
    check.inst_param(start_date, 'start_date', datetime.datetime)
    check.inst_param(execution_time, 'execution_time', datetime.time)

    if execution_time.hour != 0:
        warnings.warn(
            "Hourly schedule {schedule_name} created with:\n"
            "\tschedule_time=datetime.time(hour={hour}, minute={minute}, ...)."
            "Since this is a hourly schedule, the hour parameter will be ignored and the schedule "
            "will run on the {minute} mark for the previous hour interval. Replace "
            "datetime.time(hour={hour}, minute={minute}, ...) with datetime.time(minute={minute}, ...)"
            "to fix this warning.")

    cron_schedule = '{minute} * * * *'.format(minute=execution_time.minute)

    partition_fn = date_partition_range(start_date,
                                        delta=datetime.timedelta(hours=1),
                                        fmt="%Y-%m-%d-%H:%M")

    def inner(fn):
        check.callable_param(fn, 'fn')

        schedule_name = name or fn.__name__

        tags_fn_for_partition_value = lambda partition: {}
        if tags_fn_for_date:
            tags_fn_for_partition_value = lambda partition: tags_fn_for_date(
                partition.value)

        partition_set = PartitionSetDefinition(
            name='{}_hourly'.format(pipeline_name),
            pipeline_name=pipeline_name,
            partition_fn=partition_fn,
            environment_dict_fn_for_partition=lambda partition: fn(partition.
                                                                   value),
            tags_fn_for_partition=tags_fn_for_partition_value,
            mode=mode,
        )

        return partition_set.create_schedule_definition(
            schedule_name,
            cron_schedule,
            should_execute=should_execute,
            environment_vars=environment_vars,
        )

    return inner
Esempio n. 7
0
def test_date_partition_range_out_of_order():
    with pytest.raises(DagsterInvariantViolationError):
        date_partition_range(
            datetime(year=2020, month=1, day=3), datetime(year=2020, month=1, day=1)
        )