def create_hourly_hn_download_schedule():
    pipeline_name = "download_pipeline"
    schedule_name = "hourly_hn_download_schedule"
    start_date = datetime.datetime(2021, 1, 1)
    execution_time = datetime.time(0, 0)
    partitions_def = ScheduleTimeBasedPartitionsDefinition(
        schedule_type=ScheduleType.HOURLY,
        start=start_date,
        execution_time=execution_time,
        fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
        timezone="UTC",
    )

    partition_set = PartitionSetDefinition(
        name="{}_partitions".format(schedule_name),
        pipeline_name=pipeline_name,  # type: ignore[arg-type]
        run_config_fn_for_partition=lambda partition:
        get_hourly_download_def_schedule_config(partition.value),
        mode="prod",
        partitions_def=partitions_def,
    )

    schedule_def = partition_set.create_schedule_definition(
        schedule_name,
        partitions_def.get_cron_schedule(),
        partition_selector=create_offset_partition_selector(
            execution_time_to_partition_fn=partitions_def.
            get_execution_time_to_partition_fn(), ),
        execution_timezone="UTC",
        decorated_fn=get_hourly_download_def_schedule_config,
        job=download_comments_and_stories_prod,
    )

    return schedule_def
Ejemplo n.º 2
0
    def inner(fn: Callable[[datetime.datetime], Dict[str, Any]]) -> PartitionScheduleDefinition:
        check.callable_param(fn, "fn")

        schedule_name = name or fn.__name__

        tags_fn_for_partition_value: Callable[
            ["Partition"], Optional[Dict[str, str]]
        ] = lambda partition: {}
        if tags_fn_for_date:
            tags_fn = cast(
                Callable[[datetime.datetime], Optional[Dict[str, str]]], tags_fn_for_date
            )
            tags_fn_for_partition_value = lambda partition: tags_fn(partition.value)

        fmt = (
            DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE
            if execution_timezone
            else DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE
        )

        partitions_def = ScheduleTimeBasedPartitionsDefinition(
            schedule_type=ScheduleType.HOURLY,
            start=start_date,
            execution_time=execution_time,
            end=end_date,
            fmt=fmt,
            timezone=execution_timezone,
            offset=partition_hours_offset,
        )

        partition_set = PartitionSetDefinition(
            name="{}_partitions".format(schedule_name),
            pipeline_name=pipeline_name,  # type: ignore[arg-type]
            run_config_fn_for_partition=lambda partition: fn(partition.value),
            solid_selection=solid_selection,
            tags_fn_for_partition=tags_fn_for_partition_value,
            mode=mode,
            partitions_def=partitions_def,
        )

        schedule_def = partition_set.create_schedule_definition(
            schedule_name,
            partitions_def.get_cron_schedule(),
            should_execute=should_execute,
            environment_vars=environment_vars,
            partition_selector=create_offset_partition_selector(
                execution_time_to_partition_fn=partitions_def.get_execution_time_to_partition_fn(),
            ),
            execution_timezone=execution_timezone,
            description=description,
            decorated_fn=fn,
            default_status=default_status,
        )

        update_wrapper(schedule_def, wrapped=fn)
        return schedule_def
Ejemplo n.º 3
0
def test_time_partitions_weekly_partitions(
    start: datetime,
    end: datetime,
    partition_weeks_offset: Optional[int],
    current_time,
    expected_partitions: List[str],
):
    with pendulum.test(current_time):
        partitions = ScheduleTimeBasedPartitionsDefinition(
            schedule_type=ScheduleType.WEEKLY,
            start=start,
            execution_time=time(1, 20),
            execution_day=0,
            end=end,
            offset=partition_weeks_offset,
        )

        assert_expected_partitions(partitions.get_partitions(),
                                   expected_partitions)
Ejemplo n.º 4
0
def test_time_partitions_hourly_partitions(
    start: datetime,
    end: datetime,
    timezone: Optional[str],
    partition_hours_offset: int,
    current_time,
    expected_partitions: List[str],
):
    with pendulum.test(current_time):
        partitions = ScheduleTimeBasedPartitionsDefinition(
            schedule_type=ScheduleType.HOURLY,
            start=start,
            execution_time=time(0, 1),
            end=end,
            timezone=timezone,
            fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
            offset=partition_hours_offset,
        )

        assert_expected_partitions(partitions.get_partitions(),
                                   expected_partitions)
Ejemplo n.º 5
0
def test_time_partitions_daily_partitions(
    start: datetime,
    execution_time: time,
    end: Optional[datetime],
    partition_days_offset: Optional[int],
    current_time,
    expected_partitions: List[str],
    timezone: Optional[str],
):
    with pendulum.test(current_time):
        partitions = ScheduleTimeBasedPartitionsDefinition(
            schedule_type=ScheduleType.DAILY,
            start=start,
            execution_time=execution_time,
            end=end,
            offset=partition_days_offset,
            timezone=timezone,
        )

        assert_expected_partitions(partitions.get_partitions(),
                                   expected_partitions)
Ejemplo n.º 6
0
def test_time_based_partitions_invariants(
    schedule_type: ScheduleType,
    start: datetime,
    execution_day: Optional[int],
    end: Optional[datetime],
    error_message_regex: str,
):
    with pytest.raises(CheckError, match=error_message_regex):
        ScheduleTimeBasedPartitionsDefinition(
            schedule_type=schedule_type,
            start=start,
            execution_day=execution_day,
            execution_time=None,
            end=end,
            fmt=None,
            timezone=None,
            offset=None,
        )