def create_hourly_hn_download_schedule(): pipeline_name = "download_pipeline" schedule_name = "hourly_hn_download_schedule" start_date = datetime.datetime(2021, 1, 1) execution_time = datetime.time(0, 0) partitions_def = ScheduleTimeBasedPartitionsDefinition( schedule_type=ScheduleType.HOURLY, start=start_date, execution_time=execution_time, fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE, timezone="UTC", ) partition_set = PartitionSetDefinition( name="{}_partitions".format(schedule_name), pipeline_name=pipeline_name, # type: ignore[arg-type] run_config_fn_for_partition=lambda partition: get_hourly_download_def_schedule_config(partition.value), mode="prod", partitions_def=partitions_def, ) schedule_def = partition_set.create_schedule_definition( schedule_name, partitions_def.get_cron_schedule(), partition_selector=create_offset_partition_selector( execution_time_to_partition_fn=partitions_def. get_execution_time_to_partition_fn(), ), execution_timezone="UTC", decorated_fn=get_hourly_download_def_schedule_config, job=download_comments_and_stories_prod, ) return schedule_def
def inner(fn: Callable[[datetime.datetime], Dict[str, Any]]) -> PartitionScheduleDefinition: check.callable_param(fn, "fn") schedule_name = name or fn.__name__ tags_fn_for_partition_value: Callable[ ["Partition"], Optional[Dict[str, str]] ] = lambda partition: {} if tags_fn_for_date: tags_fn = cast( Callable[[datetime.datetime], Optional[Dict[str, str]]], tags_fn_for_date ) tags_fn_for_partition_value = lambda partition: tags_fn(partition.value) fmt = ( DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE if execution_timezone else DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE ) partitions_def = ScheduleTimeBasedPartitionsDefinition( schedule_type=ScheduleType.HOURLY, start=start_date, execution_time=execution_time, end=end_date, fmt=fmt, timezone=execution_timezone, offset=partition_hours_offset, ) partition_set = PartitionSetDefinition( name="{}_partitions".format(schedule_name), pipeline_name=pipeline_name, # type: ignore[arg-type] run_config_fn_for_partition=lambda partition: fn(partition.value), solid_selection=solid_selection, tags_fn_for_partition=tags_fn_for_partition_value, mode=mode, partitions_def=partitions_def, ) schedule_def = partition_set.create_schedule_definition( schedule_name, partitions_def.get_cron_schedule(), should_execute=should_execute, environment_vars=environment_vars, partition_selector=create_offset_partition_selector( execution_time_to_partition_fn=partitions_def.get_execution_time_to_partition_fn(), ), execution_timezone=execution_timezone, description=description, decorated_fn=fn, default_status=default_status, ) update_wrapper(schedule_def, wrapped=fn) return schedule_def
def test_time_partitions_weekly_partitions( start: datetime, end: datetime, partition_weeks_offset: Optional[int], current_time, expected_partitions: List[str], ): with pendulum.test(current_time): partitions = ScheduleTimeBasedPartitionsDefinition( schedule_type=ScheduleType.WEEKLY, start=start, execution_time=time(1, 20), execution_day=0, end=end, offset=partition_weeks_offset, ) assert_expected_partitions(partitions.get_partitions(), expected_partitions)
def test_time_partitions_hourly_partitions( start: datetime, end: datetime, timezone: Optional[str], partition_hours_offset: int, current_time, expected_partitions: List[str], ): with pendulum.test(current_time): partitions = ScheduleTimeBasedPartitionsDefinition( schedule_type=ScheduleType.HOURLY, start=start, execution_time=time(0, 1), end=end, timezone=timezone, fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE, offset=partition_hours_offset, ) assert_expected_partitions(partitions.get_partitions(), expected_partitions)
def test_time_partitions_daily_partitions( start: datetime, execution_time: time, end: Optional[datetime], partition_days_offset: Optional[int], current_time, expected_partitions: List[str], timezone: Optional[str], ): with pendulum.test(current_time): partitions = ScheduleTimeBasedPartitionsDefinition( schedule_type=ScheduleType.DAILY, start=start, execution_time=execution_time, end=end, offset=partition_days_offset, timezone=timezone, ) assert_expected_partitions(partitions.get_partitions(), expected_partitions)
def test_time_based_partitions_invariants( schedule_type: ScheduleType, start: datetime, execution_day: Optional[int], end: Optional[datetime], error_message_regex: str, ): with pytest.raises(CheckError, match=error_message_regex): ScheduleTimeBasedPartitionsDefinition( schedule_type=schedule_type, start=start, execution_day=execution_day, execution_time=None, end=end, fmt=None, timezone=None, offset=None, )