def inner(fn): check.callable_param(fn, 'fn') schedule_name = name or fn.__name__ def _environment_dict_fn_for_partition(partition): return fn(partition.value) partition_set_name = '{}_hourly'.format(pipeline_name) partition_set = PartitionSetDefinition( name=partition_set_name, pipeline_name=pipeline_name, partition_fn=date_partition_range( start_date, delta=datetime.timedelta(hours=1), fmt="%Y-%m-%d-%H:%M" ), environment_dict_fn_for_partition=_environment_dict_fn_for_partition, mode=mode, ) return partition_set.create_schedule_definition( schedule_name, cron_schedule, should_execute=should_execute, environment_vars=environment_vars, )
def daily_schedule( pipeline_name, start_date, name=None, execution_time=datetime.time(0, 0), tags_fn_for_date=None, solid_subset=None, mode="default", should_execute=None, environment_vars=None, ): check.opt_str_param(name, 'name') check.inst_param(start_date, 'start_date', datetime.datetime) check.opt_callable_param(tags_fn_for_date, 'tags_fn_for_date') check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str) mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME) check.opt_callable_param(should_execute, 'should_execute') check.opt_dict_param(environment_vars, 'environment_vars', key_type=str, value_type=str) check.str_param(pipeline_name, 'pipeline_name') check.inst_param(start_date, 'start_date', datetime.datetime) check.inst_param(execution_time, 'execution_time', datetime.time) cron_schedule = '{minute} {hour} * * *'.format( minute=execution_time.minute, hour=execution_time.hour) partition_fn = date_partition_range(start_date) def inner(fn): check.callable_param(fn, 'fn') schedule_name = name or fn.__name__ tags_fn_for_partition_value = lambda partition: {} if tags_fn_for_date: tags_fn_for_partition_value = lambda partition: tags_fn_for_date( partition.value) partition_set = PartitionSetDefinition( name='{}_daily'.format(pipeline_name), pipeline_name=pipeline_name, partition_fn=partition_fn, environment_dict_fn_for_partition=lambda partition: fn(partition. value), tags_fn_for_partition=tags_fn_for_partition_value, mode=mode, ) return partition_set.create_schedule_definition( schedule_name, cron_schedule, should_execute=should_execute, environment_vars=environment_vars, ) return inner
def test_date_partition_range_daily(start, end, delta, expected_partitions): partition_generator = date_partition_range(start, end, delta) generated_partitions = partition_generator() assert all( isinstance(generated_partition, Partition) for generated_partition in generated_partitions ) assert len(generated_partitions) == len(expected_partitions) assert all( expected_partition_name == generated_partition_name for expected_partition_name, generated_partition_name in zip( expected_partitions, [generated_partition.name for generated_partition in generated_partitions], ) )
def inner(fn): check.callable_param(fn, 'fn') schedule_name = name or fn.__name__ def _environment_dict_fn_for_partition(partition): return fn(partition.value) partition_set_name = '{}_daily'.format(pipeline_name) partition_set = PartitionSetDefinition( name=partition_set_name, pipeline_name=pipeline_name, partition_fn=date_partition_range(start_date), environment_dict_fn_for_partition= _environment_dict_fn_for_partition, ) return partition_set.create_schedule_definition( schedule_name, cron_schedule, should_execute=should_execute, environment_vars=environment_vars, )
def monthly_schedule( pipeline_name, start_date, name=None, execution_day_of_month=1, execution_time=datetime.time(0, 0), tags_fn_for_date=None, solid_subset=None, mode="default", should_execute=None, environment_vars=None, ): check.opt_str_param(name, 'name') check.inst_param(start_date, 'start_date', datetime.datetime) check.opt_callable_param(tags_fn_for_date, 'tags_fn_for_date') check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str) mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME) check.opt_callable_param(should_execute, 'should_execute') check.opt_dict_param(environment_vars, 'environment_vars', key_type=str, value_type=str) check.str_param(pipeline_name, 'pipeline_name') check.inst_param(start_date, 'start_date', datetime.datetime) check.int_param(execution_day_of_month, 'execution_day') check.inst_param(execution_time, 'execution_time', datetime.time) if execution_day_of_month <= 0 or execution_day_of_month > 31: raise DagsterInvalidDefinitionError( "`execution_day_of_month={}` is not valid for monthly schedule. Execution day must be between 1 and 31" .format(execution_day_of_month)) cron_schedule = '{minute} {hour} {day} * *'.format( minute=execution_time.minute, hour=execution_time.hour, day=execution_day_of_month) partition_fn = date_partition_range(start_date, delta=relativedelta(months=1), fmt="%Y-%m") def inner(fn): check.callable_param(fn, 'fn') schedule_name = name or fn.__name__ tags_fn_for_partition_value = lambda partition: {} if tags_fn_for_date: tags_fn_for_partition_value = lambda partition: tags_fn_for_date( partition.value) partition_set = PartitionSetDefinition( name='{}_monthly'.format(pipeline_name), pipeline_name=pipeline_name, partition_fn=partition_fn, environment_dict_fn_for_partition=lambda partition: fn(partition. value), tags_fn_for_partition=tags_fn_for_partition_value, mode=mode, ) return partition_set.create_schedule_definition( schedule_name, cron_schedule, should_execute=should_execute, environment_vars=environment_vars, ) return inner
def hourly_schedule( pipeline_name, start_date, name=None, execution_time=datetime.time(0, 0), tags_fn_for_date=None, solid_subset=None, mode="default", should_execute=None, environment_vars=None, ): check.opt_str_param(name, 'name') check.inst_param(start_date, 'start_date', datetime.datetime) check.opt_callable_param(tags_fn_for_date, 'tags_fn_for_date') check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str) mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME) check.opt_callable_param(should_execute, 'should_execute') check.opt_dict_param(environment_vars, 'environment_vars', key_type=str, value_type=str) check.str_param(pipeline_name, 'pipeline_name') check.inst_param(start_date, 'start_date', datetime.datetime) check.inst_param(execution_time, 'execution_time', datetime.time) if execution_time.hour != 0: warnings.warn( "Hourly schedule {schedule_name} created with:\n" "\tschedule_time=datetime.time(hour={hour}, minute={minute}, ...)." "Since this is a hourly schedule, the hour parameter will be ignored and the schedule " "will run on the {minute} mark for the previous hour interval. Replace " "datetime.time(hour={hour}, minute={minute}, ...) with datetime.time(minute={minute}, ...)" "to fix this warning.") cron_schedule = '{minute} * * * *'.format(minute=execution_time.minute) partition_fn = date_partition_range(start_date, delta=datetime.timedelta(hours=1), fmt="%Y-%m-%d-%H:%M") def inner(fn): check.callable_param(fn, 'fn') schedule_name = name or fn.__name__ tags_fn_for_partition_value = lambda partition: {} if tags_fn_for_date: tags_fn_for_partition_value = lambda partition: tags_fn_for_date( partition.value) partition_set = PartitionSetDefinition( name='{}_hourly'.format(pipeline_name), pipeline_name=pipeline_name, partition_fn=partition_fn, environment_dict_fn_for_partition=lambda partition: fn(partition. value), tags_fn_for_partition=tags_fn_for_partition_value, mode=mode, ) return partition_set.create_schedule_definition( schedule_name, cron_schedule, should_execute=should_execute, environment_vars=environment_vars, ) return inner
def test_date_partition_range_out_of_order(): with pytest.raises(DagsterInvariantViolationError): date_partition_range( datetime(year=2020, month=1, day=3), datetime(year=2020, month=1, day=1) )