예제 #1
0
def test_partitions_outside_schedule_range():
    execution_time = create_pendulum_time(year=2021, month=1, day=1, tz="UTC")
    context = build_schedule_context(scheduled_execution_time=execution_time)

    @monthly_schedule(
        pipeline_name="too early",
        start_date=create_pendulum_time(year=2021, month=1, day=1, tz="UTC"),
    )
    def too_early(monthly_time):
        return {"monthly_time": monthly_time.isoformat()}

    execution_data = too_early.evaluate_tick(context)
    assert execution_data.skip_message == (
        "Your partition (2020-12-01T00:00:00+00:00) is before the beginning of "
        "the partition set (2021-01-01T00:00:00+00:00). "
        "Verify your schedule's start_date is correct.")

    @monthly_schedule(
        pipeline_name="too late",
        start_date=create_pendulum_time(year=2020, month=1, day=1, tz="UTC"),
        end_date=create_pendulum_time(year=2020, month=12, day=1, tz="UTC"),
        partition_months_offset=0,
    )
    def too_late(monthly_time):
        return {"monthly_time": monthly_time.isoformat()}

    execution_data = too_late.evaluate_tick(context)
    assert execution_data.skip_message == (
        "Your partition (2021-01-01T00:00:00+00:00) is after the end of "
        "the partition set (2020-12-01T00:00:00+00:00). "
        "Verify your schedule's end_date is correct.")
예제 #2
0
def test_partitions_for_monthly_schedule_decorators_without_timezone(
        partition_months_offset: int):
    with pendulum.test(
            to_timezone(create_pendulum_time(2019, 3, 27, 0, 1, 1, tz="UTC"),
                        "US/Eastern")):
        context_without_time = build_schedule_context()

        start_date = datetime(year=2019, month=1, day=1)

        @monthly_schedule(
            pipeline_name="foo_pipeline",
            execution_day_of_month=3,
            start_date=start_date,
            execution_time=time(9, 30),
            partition_months_offset=partition_months_offset,
        )
        def monthly_foo_schedule(monthly_time):
            return {"monthly_time": monthly_time.isoformat()}

        valid_monthly_time = create_pendulum_time(year=2019,
                                                  month=3,
                                                  day=3,
                                                  hour=9,
                                                  minute=30,
                                                  tz="UTC")
        context_with_valid_time = build_schedule_context(
            scheduled_execution_time=valid_monthly_time)

        execution_data = monthly_foo_schedule.evaluate_tick(
            context_with_valid_time)
        assert execution_data.run_requests
        assert len(execution_data.run_requests) == 1
        assert execution_data.run_requests[0].run_config == {
            "monthly_time":
            create_pendulum_time(
                year=2019, month=3, day=1,
                tz="UTC").subtract(months=partition_months_offset).isoformat()
        }

        execution_data = monthly_foo_schedule.evaluate_tick(
            context_without_time)
        assert execution_data.run_requests
        assert len(execution_data.run_requests) == 1
        assert execution_data.run_requests[0].run_config == {
            "monthly_time":
            create_pendulum_time(
                year=2019, month=3, day=1,
                tz="UTC").subtract(months=partition_months_offset).isoformat()
        }

        _check_partitions(
            monthly_foo_schedule,
            3 - partition_months_offset,
            pendulum.instance(start_date, tz="UTC"),
            DEFAULT_MONTHLY_FORMAT,
            relativedelta(months=1),
        )
예제 #3
0
def test_partitions_for_hourly_schedule_decorators_without_timezone(
        partition_hours_offset: int):
    with pendulum.test(
            to_timezone(create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="UTC"),
                        "US/Eastern")):

        context_without_time = build_schedule_context()

        start_date = datetime(year=2019, month=1, day=1)

        @hourly_schedule(
            pipeline_name="foo_pipeline",
            start_date=start_date,
            execution_time=time(hour=0, minute=25),
            partition_hours_offset=partition_hours_offset,
        )
        def hourly_foo_schedule(hourly_time):
            return {"hourly_time": hourly_time.isoformat()}

        _check_partitions(
            hourly_foo_schedule,
            HOURS_UNTIL_FEBRUARY_27 + 1 - partition_hours_offset,
            pendulum.instance(start_date, tz="UTC"),
            DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE,
            relativedelta(hours=1),
        )

        execution_data = hourly_foo_schedule.evaluate_tick(
            context_without_time)
        assert execution_data.run_requests
        assert len(execution_data.run_requests) == 1
        assert execution_data.run_requests[0].run_config == {
            "hourly_time":
            create_pendulum_time(
                year=2019, month=2, day=27,
                tz="UTC").subtract(hours=partition_hours_offset).isoformat()
        }

        valid_time = create_pendulum_time(year=2019,
                                          month=1,
                                          day=27,
                                          hour=1,
                                          minute=25,
                                          tz="UTC")
        context_with_valid_time = build_schedule_context(
            scheduled_execution_time=valid_time)

        execution_data = hourly_foo_schedule.evaluate_tick(
            context_with_valid_time)
        assert execution_data.run_requests
        assert len(execution_data.run_requests) == 1
        assert execution_data.run_requests[0].run_config == {
            "hourly_time":
            create_pendulum_time(
                year=2019, month=1, day=27, hour=1,
                tz="UTC").subtract(hours=partition_hours_offset).isoformat()
        }
예제 #4
0
 def _invalid_partition_selector(_cotnext, _partition_set_def):
     return [
         Partition(
             value=create_pendulum_time(year=2019, month=1, day=27, hour=1, minute=25),
             name="made_up",
         )
     ]
예제 #5
0
def test_time_based_partitioned_job():
    @daily_partitioned_config(start_date="2021-05-05")
    def my_daily_partitioned_config(_start, _end):
        return RUN_CONFIG

    assert my_daily_partitioned_config(None, None) == RUN_CONFIG

    @job(config=my_daily_partitioned_config)
    def my_job():
        my_op()

    freeze_datetime = create_pendulum_time(year=2021,
                                           month=5,
                                           day=6,
                                           hour=23,
                                           minute=59,
                                           second=59,
                                           tz="UTC")
    partition_keys = my_daily_partitioned_config.get_partition_keys(
        freeze_datetime)
    assert len(partition_keys) == 1

    partition_key = partition_keys[0]

    result = my_job.execute_in_process(partition_key=partition_key)
    assert result.success

    with pytest.raises(
            DagsterUnknownPartitionError,
            match="Could not find a partition with key `doesnotexist`"):
        result = my_job.execute_in_process(partition_key="doesnotexist")
예제 #6
0
파일: test_utils.py 프로젝트: prezi/dagster
def today_at_midnight(timezone_name="UTC"):
    check.str_param(timezone_name, "timezone_name")
    now = pendulum.now(timezone_name)
    return create_pendulum_time(now.year,
                                now.month,
                                now.day,
                                tz=now.timezone.name)
예제 #7
0
def test_partitions_for_weekly_schedule_decorators_with_timezone(
        partition_weeks_offset: int):
    with pendulum.test(
            create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")):

        start_date = datetime(year=2019, month=1, day=1)

        @weekly_schedule(
            pipeline_name="foo_pipeline",
            execution_day_of_week=3,
            start_date=start_date,
            execution_time=time(9, 30),
            execution_timezone="US/Central",
            partition_weeks_offset=partition_weeks_offset,
        )
        def weekly_foo_schedule(weekly_time):
            return {"weekly_time": weekly_time.isoformat()}

        assert weekly_foo_schedule.execution_timezone == "US/Central"

        valid_weekly_time = create_pendulum_time(year=2019,
                                                 month=1,
                                                 day=30,
                                                 hour=9,
                                                 minute=30,
                                                 tz="US/Central")
        context_with_valid_time = build_schedule_context(
            scheduled_execution_time=valid_weekly_time)

        execution_data = weekly_foo_schedule.evaluate_tick(
            context_with_valid_time)
        assert execution_data.run_requests
        assert len(execution_data.run_requests) == 1
        assert execution_data.run_requests[0].run_config == {
            "weekly_time":
            create_pendulum_time(year=2019, month=1, day=29,
                                 tz="US/Central").subtract(
                                     weeks=partition_weeks_offset).isoformat()
        }

        _check_partitions(
            weekly_foo_schedule,
            9 - partition_weeks_offset,
            pendulum.instance(start_date, tz="US/Central"),
            DEFAULT_DATE_FORMAT,
            relativedelta(weeks=1),
        )
예제 #8
0
def test_failure_recovery_between_multi_runs(instance, external_repo,
                                             crash_location, crash_signal):
    initial_datetime = create_pendulum_time(year=2019,
                                            month=2,
                                            day=28,
                                            hour=0,
                                            minute=0,
                                            second=0)
    frozen_datetime = initial_datetime.add()
    external_schedule = external_repo.get_external_schedule(
        "multi_run_schedule")
    with pendulum.test(frozen_datetime):
        instance.start_schedule(external_schedule)

        debug_crash_flags = {
            external_schedule.name: {
                crash_location: crash_signal
            }
        }

        scheduler_process = spawn_ctx.Process(
            target=_test_launch_scheduled_runs_in_subprocess,
            args=[instance.get_ref(), frozen_datetime, debug_crash_flags],
        )
        scheduler_process.start()
        scheduler_process.join(timeout=60)

        assert scheduler_process.exitcode != 0

        wait_for_all_runs_to_start(instance)
        assert instance.get_runs_count() == 1
        validate_run_exists(instance.get_runs()[0], initial_datetime)

        ticks = instance.get_ticks(external_schedule.get_external_origin_id(),
                                   external_schedule.selector_id)
        assert len(ticks) == 1

    frozen_datetime = frozen_datetime.add(minutes=1)
    with pendulum.test(frozen_datetime):
        scheduler_process = spawn_ctx.Process(
            target=_test_launch_scheduled_runs_in_subprocess,
            args=[instance.get_ref(), frozen_datetime, None],
        )
        scheduler_process.start()
        scheduler_process.join(timeout=60)
        assert scheduler_process.exitcode == 0
        assert instance.get_runs_count() == 2
        validate_run_exists(instance.get_runs()[0], initial_datetime)
        ticks = instance.get_ticks(external_schedule.get_external_origin_id(),
                                   external_schedule.selector_id)
        assert len(ticks) == 1
        validate_tick(
            ticks[0],
            external_schedule,
            initial_datetime,
            TickStatus.SUCCESS,
            [run.run_id for run in instance.get_runs()],
        )
예제 #9
0
def test_unloadable_schedule(graphql_context):
    instance = graphql_context.instance
    initial_datetime = create_pendulum_time(
        year=2019,
        month=2,
        day=27,
        hour=23,
        minute=59,
        second=59,
    )

    running_origin = _get_unloadable_schedule_origin("unloadable_running")
    running_instigator_state = InstigatorState(
        running_origin,
        InstigatorType.SCHEDULE,
        InstigatorStatus.RUNNING,
        ScheduleInstigatorData(
            "0 0 * * *",
            pendulum.now("UTC").timestamp(),
        ),
    )

    stopped_origin = _get_unloadable_schedule_origin("unloadable_stopped")

    with pendulum.test(initial_datetime):
        instance.add_instigator_state(running_instigator_state)

        instance.add_instigator_state(
            InstigatorState(
                stopped_origin,
                InstigatorType.SCHEDULE,
                InstigatorStatus.STOPPED,
                ScheduleInstigatorData(
                    "0 0 * * *",
                    pendulum.now("UTC").timestamp(),
                ),
            ))

    result = execute_dagster_graphql(graphql_context, GET_UNLOADABLE_QUERY)
    assert len(
        result.data["unloadableInstigationStatesOrError"]["results"]) == 1
    assert (result.data["unloadableInstigationStatesOrError"]["results"][0]
            ["name"] == "unloadable_running")

    # Verify that we can stop the unloadable schedule
    stop_result = execute_dagster_graphql(
        graphql_context,
        STOP_SCHEDULES_QUERY,
        variables={
            "scheduleOriginId": running_instigator_state.instigator_origin_id,
            "scheduleSelectorId": running_instigator_state.selector_id,
        },
    )
    assert (stop_result.data["stopRunningSchedule"]["scheduleState"]["status"]
            == InstigatorStatus.STOPPED.value)
예제 #10
0
def test_cron_schedule_advances_past_dst():
    # In Australia/Sydney, DST is at 2AM on 10/3/21. Verify that we don't
    # get stuck on the DST boundary.
    start_time = create_pendulum_time(year=2021,
                                      month=10,
                                      day=3,
                                      hour=1,
                                      minute=30,
                                      second=1,
                                      tz="Australia/Sydney")

    time_iter = schedule_execution_time_iterator(start_time.timestamp(),
                                                 "*/15 * * * *",
                                                 "Australia/Sydney")

    for _i in range(6):
        # 1:45, 3:00, 3:15, 3:30, 3:45, 4:00
        next_time = next(time_iter)

    assert (next_time.timestamp() == create_pendulum_time(
        year=2021, month=10, day=3, hour=4, tz="Australia/Sydney").timestamp())
예제 #11
0
def test_run_record_timestamps():
    with get_instance() as instance:
        freeze_datetime = to_timezone(
            create_pendulum_time(2019, 11, 2, 0, 0, 0, tz="US/Central"), "US/Pacific"
        )

        with pendulum.test(freeze_datetime):
            result = my_job.execute_in_process(instance=instance)
            records = instance.get_run_records(filters=PipelineRunsFilter(run_ids=[result.run_id]))
            assert len(records) == 1
            record = records[0]
            assert record.start_time == 1572670800.0
            assert record.end_time == 1572670800.0
예제 #12
0
def test_invalid_cron_string():
    start_time = create_pendulum_time(year=2022,
                                      month=2,
                                      day=21,
                                      hour=1,
                                      minute=30,
                                      second=1,
                                      tz="US/Pacific")

    with pytest.raises(CheckError):
        next(
            schedule_execution_time_iterator(start_time.timestamp(),
                                             "* * * * * *", "US/Pacific"))
예제 #13
0
def test_get_unloadable_job(graphql_context):
    instance = graphql_context.instance
    initial_datetime = create_pendulum_time(
        year=2019,
        month=2,
        day=27,
        hour=23,
        minute=59,
        second=59,
    )
    with pendulum.test(initial_datetime):
        instance.add_job_state(
            InstigatorState(
                _get_unloadable_schedule_origin("unloadable_running"),
                InstigatorType.SCHEDULE,
                InstigatorStatus.RUNNING,
                ScheduleInstigatorData(
                    "0 0 * * *",
                    pendulum.now("UTC").timestamp(),
                ),
            )
        )

        instance.add_job_state(
            InstigatorState(
                _get_unloadable_schedule_origin("unloadable_stopped"),
                InstigatorType.SCHEDULE,
                InstigatorStatus.STOPPED,
                ScheduleInstigatorData(
                    "0 0 * * *",
                    pendulum.now("UTC").timestamp(),
                ),
            )
        )

    result = execute_dagster_graphql(graphql_context, GET_UNLOADABLE_QUERY)
    assert len(result.data["unloadableInstigationStatesOrError"]["results"]) == 1
    assert (
        result.data["unloadableInstigationStatesOrError"]["results"][0]["name"]
        == "unloadable_running"
    )
예제 #14
0
def test_vixie_cronstring_schedule():
    start_time = create_pendulum_time(year=2022,
                                      month=2,
                                      day=21,
                                      hour=1,
                                      minute=30,
                                      second=1,
                                      tz="US/Pacific")

    time_iter = schedule_execution_time_iterator(start_time.timestamp(),
                                                 "@hourly", "US/Pacific")
    for _i in range(6):
        # 2:00, 3:00, 4:00, 5:00, 6:00, 7:00
        next_time = next(time_iter)
    assert (next_time.timestamp() == create_pendulum_time(
        year=2022, month=2, day=21, hour=7, tz="US/Pacific").timestamp())

    time_iter = schedule_execution_time_iterator(start_time.timestamp(),
                                                 "@daily", "US/Pacific")
    for _i in range(6):
        # 2/22, 2/23, 2/24, 2/25, 2/26, 2/27
        next_time = next(time_iter)
    assert (next_time.timestamp() == create_pendulum_time(
        year=2022, month=2, day=27, tz="US/Pacific").timestamp())

    time_iter = schedule_execution_time_iterator(start_time.timestamp(),
                                                 "@weekly", "US/Pacific")
    for _i in range(6):
        # 2/27, 3/6, 3/13, 3/20, 3/27, 4/3
        next_time = next(time_iter)
    assert (next_time.timestamp() == create_pendulum_time(
        year=2022, month=4, day=3, tz="US/Pacific").timestamp())

    time_iter = schedule_execution_time_iterator(start_time.timestamp(),
                                                 "@monthly", "US/Pacific")
    for _i in range(6):
        # 3/1, 4/1, 5/1, 6/1, 7/1, 8/1
        next_time = next(time_iter)
    assert (next_time.timestamp() == create_pendulum_time(
        year=2022, month=8, day=1, tz="US/Pacific").timestamp())

    time_iter = schedule_execution_time_iterator(start_time.timestamp(),
                                                 "@yearly", "US/Pacific")
    for _i in range(6):
        # 1/1/2023, 1/1/2024, 1/1/2025, 1/1/2026, 1/1/2027, 1/1/2028
        next_time = next(time_iter)
    assert (next_time.timestamp() == create_pendulum_time(
        year=2028, month=1, day=1, tz="US/Pacific").timestamp())
예제 #15
0
    def test_run_record_timestamps(self, storage):
        assert storage

        self._skip_in_memory(storage)

        @op
        def a():
            pass

        @job
        def my_job():
            a()

        with tempfile.TemporaryDirectory() as temp_dir:
            if storage._instance:  # pylint: disable=protected-access
                instance = storage._instance  # pylint: disable=protected-access
            else:
                instance = DagsterInstance(
                    instance_type=InstanceType.EPHEMERAL,
                    local_artifact_storage=LocalArtifactStorage(temp_dir),
                    run_storage=storage,
                    event_storage=InMemoryEventLogStorage(),
                    compute_log_manager=NoOpComputeLogManager(),
                    run_coordinator=DefaultRunCoordinator(),
                    run_launcher=SyncInMemoryRunLauncher(),
                )

            freeze_datetime = to_timezone(
                create_pendulum_time(2019, 11, 2, 0, 0, 0, tz="US/Central"), "US/Pacific"
            )

            with pendulum.test(freeze_datetime):
                result = my_job.execute_in_process(instance=instance)
                records = instance.get_run_records(
                    filters=PipelineRunsFilter(run_ids=[result.run_id])
                )
                assert len(records) == 1
                record = records[0]
                assert record.start_time == freeze_datetime.timestamp()
                assert record.end_time == freeze_datetime.timestamp()
예제 #16
0
def _test_backfill_in_subprocess(instance_ref, debug_crash_flags):
    execution_datetime = to_timezone(
        create_pendulum_time(
            year=2021,
            month=2,
            day=17,
        ),
        "US/Central",
    )
    with DagsterInstance.from_ref(instance_ref) as instance:
        try:
            with pendulum.test(execution_datetime), create_test_daemon_workspace() as workspace:
                list(
                    execute_backfill_iteration(
                        instance,
                        workspace,
                        get_default_daemon_logger("BackfillDaemon"),
                        debug_crash_flags=debug_crash_flags,
                    )
                )
        finally:
            cleanup_test_instance(instance)
예제 #17
0
def test_future_ticks_until(graphql_context):
    schedule_selector = infer_schedule_selector(graphql_context,
                                                "timezone_schedule")

    future_ticks_start_time = create_pendulum_time(
        2019, 2, 27, tz="US/Central").timestamp()

    # Start a single schedule, future tick run requests only available for running schedules
    start_result = execute_dagster_graphql(
        graphql_context,
        START_SCHEDULES_QUERY,
        variables={"scheduleSelector": schedule_selector},
    )
    assert (start_result.data["startSchedule"]["scheduleState"]["status"] ==
            InstigatorStatus.RUNNING.value)

    future_ticks_start_time = create_pendulum_time(
        2019, 2, 27, tz="US/Central").timestamp()
    future_ticks_end_time = create_pendulum_time(2019, 3, 2,
                                                 tz="US/Central").timestamp()

    result = execute_dagster_graphql(
        graphql_context,
        GET_SCHEDULE_FUTURE_TICKS_UNTIL,
        variables={
            "scheduleSelector": schedule_selector,
            "ticksAfter": future_ticks_start_time,
            "ticksUntil": future_ticks_end_time,
        },
    )

    future_ticks = result.data["scheduleOrError"]["futureTicks"]

    assert future_ticks
    assert len(future_ticks["results"]) == 3

    timestamps = [
        future_tick["timestamp"] for future_tick in future_ticks["results"]
    ]

    assert timestamps == [
        create_pendulum_time(2019, 2, 27, tz="US/Central").timestamp(),
        create_pendulum_time(2019, 2, 28, tz="US/Central").timestamp(),
        create_pendulum_time(2019, 3, 1, tz="US/Central").timestamp(),
    ]
예제 #18
0
def test_differing_timezones(instance, workspace, external_repo):
    # Two schedules, one using US/Central, the other on US/Eastern
    freeze_datetime = to_timezone(
        create_pendulum_time(2019, 2, 27, 23, 59, 59, tz="US/Eastern"), "US/Pacific"
    )
    with pendulum.test(freeze_datetime):
        external_schedule = external_repo.get_external_schedule("daily_central_time_schedule")
        external_eastern_schedule = external_repo.get_external_schedule(
            "daily_eastern_time_schedule"
        )

        schedule_origin = external_schedule.get_external_origin()
        eastern_origin = external_eastern_schedule.get_external_origin()

        instance.start_schedule(external_schedule)
        instance.start_schedule(external_eastern_schedule)

        assert instance.get_runs_count() == 0
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

        ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id)
        assert len(ticks) == 0

        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )
        assert instance.get_runs_count() == 0
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

        ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id)
        assert len(ticks) == 0

    # Past midnight eastern time, the eastern timezone schedule will run, but not the central timezone
    freeze_datetime = freeze_datetime.add(minutes=1)
    with pendulum.test(freeze_datetime):
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )

        assert instance.get_runs_count() == 1
        ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id)
        assert len(ticks) == 1

        expected_datetime = to_timezone(
            create_pendulum_time(year=2019, month=2, day=28, tz="US/Eastern"), "UTC"
        )

        validate_tick(
            ticks[0],
            external_eastern_schedule,
            expected_datetime,
            TickStatus.SUCCESS,
            [run.run_id for run in instance.get_runs()],
        )

        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

        wait_for_all_runs_to_start(instance)
        validate_run_started(
            instance,
            instance.get_runs()[0],
            expected_datetime,
            create_pendulum_time(2019, 2, 27, tz="US/Eastern"),
        )

    # Past midnight central time, the central timezone schedule will now run
    freeze_datetime = freeze_datetime.add(hours=1)
    with pendulum.test(freeze_datetime):

        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )

        assert instance.get_runs_count() == 2
        ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id)
        assert len(ticks) == 1

        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 1

        expected_datetime = to_timezone(
            create_pendulum_time(year=2019, month=2, day=28, tz="US/Central"), "UTC"
        )

        validate_tick(
            ticks[0],
            external_schedule,
            expected_datetime,
            TickStatus.SUCCESS,
            [instance.get_runs()[0].run_id],
        )

        wait_for_all_runs_to_start(instance)
        validate_run_started(
            instance,
            instance.get_runs()[0],
            expected_datetime,
            create_pendulum_time(2019, 2, 27, tz="US/Central"),
        )

        # Verify idempotence
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )
        assert instance.get_runs_count() == 2
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 1
        assert ticks[0].status == TickStatus.SUCCESS

        ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id)
        assert len(ticks) == 1
        assert ticks[0].status == TickStatus.SUCCESS
예제 #19
0
def test_execute_during_dst_transition_fall_back(instance, workspace, external_repo):
    # A schedule that runs daily during a time that occurs twice during a fall DST transition
    # only executes once for that day
    freeze_datetime = to_timezone(
        create_pendulum_time(2019, 11, 2, 0, 0, 0, tz="US/Central"), "US/Pacific"
    )

    with pendulum.test(freeze_datetime):
        external_schedule = external_repo.get_external_schedule(
            "daily_dst_transition_schedule_doubled_time"
        )
        schedule_origin = external_schedule.get_external_origin()
        instance.start_schedule(external_schedule)

        assert instance.get_runs_count() == 0
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

    freeze_datetime = freeze_datetime.add(days=3)

    with pendulum.test(freeze_datetime):
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )

        wait_for_all_runs_to_start(instance)

        assert instance.get_runs_count() == 3
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 3

        expected_datetimes_utc = [
            create_pendulum_time(2019, 11, 4, 7, 30, 0, tz="UTC"),
            create_pendulum_time(2019, 11, 3, 7, 30, 0, tz="UTC"),
            create_pendulum_time(2019, 11, 2, 6, 30, 0, tz="UTC"),
        ]

        expected_partition_times = [
            create_pendulum_time(2019, 11, 3, tz="US/Central"),
            create_pendulum_time(2019, 11, 2, tz="US/Central"),
            create_pendulum_time(2019, 11, 1, tz="US/Central"),
        ]

        for i in range(3):
            validate_tick(
                ticks[i],
                external_schedule,
                expected_datetimes_utc[i],
                TickStatus.SUCCESS,
                [instance.get_runs()[i].run_id],
            )

            validate_run_started(
                instance,
                instance.get_runs()[i],
                expected_datetimes_utc[i],
                partition_time=expected_partition_times[i],
            )

        # Verify idempotence
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )
        assert instance.get_runs_count() == 3
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 3
예제 #20
0
def test_execute_during_dst_transition_spring_forward(instance, workspace, external_repo):
    # Verify that a daily schedule that is supposed to execute at a time that is skipped
    # by the DST transition does not execute for that day
    # Day before DST
    freeze_datetime = to_timezone(
        create_pendulum_time(2019, 3, 9, 0, 0, 0, tz="US/Central"), "US/Pacific"
    )

    with pendulum.test(freeze_datetime):
        external_schedule = external_repo.get_external_schedule(
            "daily_dst_transition_schedule_skipped_time"
        )
        schedule_origin = external_schedule.get_external_origin()
        instance.start_schedule(external_schedule)

        assert instance.get_runs_count() == 0
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

    freeze_datetime = freeze_datetime.add(days=3)

    with pendulum.test(freeze_datetime):
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )

        wait_for_all_runs_to_start(instance)

        assert instance.get_runs_count() == 3
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 3

        expected_datetimes_utc = [
            to_timezone(create_pendulum_time(2019, 3, 11, 2, 30, 0, tz="US/Central"), "UTC"),
            to_timezone(create_pendulum_time(2019, 3, 10, 3, 00, 0, tz="US/Central"), "UTC"),
            to_timezone(create_pendulum_time(2019, 3, 9, 2, 30, 0, tz="US/Central"), "UTC"),
        ]

        expected_partition_times = [
            create_pendulum_time(2019, 3, 10, tz="US/Central"),
            create_pendulum_time(2019, 3, 9, tz="US/Central"),
            create_pendulum_time(2019, 3, 8, tz="US/Central"),
        ]

        partition_set_def = the_repo.get_partition_set_def(
            "daily_dst_transition_schedule_skipped_time_partitions"
        )
        partition_names = partition_set_def.get_partition_names()

        assert "2019-03-08" in partition_names
        assert "2019-03-09" in partition_names
        assert "2019-03-10" in partition_names

        for i in range(3):
            validate_tick(
                ticks[i],
                external_schedule,
                expected_datetimes_utc[i],
                TickStatus.SUCCESS,
                [instance.get_runs()[i].run_id],
            )

            validate_run_started(
                instance,
                instance.get_runs()[i],
                expected_datetimes_utc[i],
                partition_time=expected_partition_times[i],
            )

        # Verify idempotence
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )
        assert instance.get_runs_count() == 3
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 3
예제 #21
0
def test_daily_dst_fall_back(instance, workspace, external_repo):
    # Verify that a daily schedule still runs once per day during the fall DST transition
    # Night before DST
    freeze_datetime = to_timezone(
        create_pendulum_time(2019, 11, 3, 0, 0, 0, tz="US/Central"), "US/Pacific"
    )

    with pendulum.test(freeze_datetime):
        external_schedule = external_repo.get_external_schedule("daily_central_time_schedule")
        schedule_origin = external_schedule.get_external_origin()
        instance.start_schedule(external_schedule)

        assert instance.get_runs_count() == 0
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

    freeze_datetime = freeze_datetime.add(days=2)

    with pendulum.test(freeze_datetime):
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )

        wait_for_all_runs_to_start(instance)

        assert instance.get_runs_count() == 3
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 3

        # UTC time changed by one hour after the transition, still running daily at the same
        # time in CT
        expected_datetimes_utc = [
            create_pendulum_time(2019, 11, 5, 6, 0, 0, tz="UTC"),
            create_pendulum_time(2019, 11, 4, 6, 0, 0, tz="UTC"),
            create_pendulum_time(2019, 11, 3, 5, 0, 0, tz="UTC"),
        ]

        expected_partition_times = [
            create_pendulum_time(2019, 11, 4, tz="US/Central"),
            create_pendulum_time(2019, 11, 3, tz="US/Central"),
            create_pendulum_time(2019, 11, 2, tz="US/Central"),
        ]

        for i in range(3):
            validate_tick(
                ticks[i],
                external_schedule,
                expected_datetimes_utc[i],
                TickStatus.SUCCESS,
                [instance.get_runs()[i].run_id],
            )

            validate_run_started(
                instance,
                instance.get_runs()[i],
                expected_datetimes_utc[i],
                partition_time=expected_partition_times[i],
            )

        # Verify idempotence
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )
        assert instance.get_runs_count() == 3
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 3
예제 #22
0
def test_failure_after_run_launched(crash_location, crash_signal, capfd):
    frozen_datetime = to_timezone(
        create_pendulum_time(
            year=2019,
            month=2,
            day=28,
            hour=0,
            minute=0,
            second=0,
            tz="UTC",
        ),
        "US/Central",
    )
    with instance_with_sensors() as (
        instance,
        _grpc_server_registry,
        external_repo,
    ):
        with pendulum.test(frozen_datetime):
            external_sensor = external_repo.get_external_sensor("run_key_sensor")
            instance.add_instigator_state(
                InstigatorState(
                    external_sensor.get_external_origin(),
                    InstigatorType.SENSOR,
                    InstigatorStatus.RUNNING,
                )
            )

            # create a run, launch but crash
            debug_crash_flags = {external_sensor.name: {crash_location: crash_signal}}
            launch_process = spawn_ctx.Process(
                target=_test_launch_sensor_runs_in_subprocess,
                args=[instance.get_ref(), frozen_datetime, debug_crash_flags],
            )
            launch_process.start()
            launch_process.join(timeout=60)

            assert launch_process.exitcode != 0

            ticks = instance.get_ticks(external_sensor.get_external_origin_id())

            assert len(ticks) == 1
            assert ticks[0].status == TickStatus.STARTED
            assert instance.get_runs_count() == 1

            run = instance.get_runs()[0]
            wait_for_all_runs_to_start(instance)
            assert run.tags.get(SENSOR_NAME_TAG) == "run_key_sensor"
            assert run.tags.get(RUN_KEY_TAG) == "only_once"
            capfd.readouterr()

            launch_process = spawn_ctx.Process(
                target=_test_launch_sensor_runs_in_subprocess,
                args=[instance.get_ref(), frozen_datetime.add(seconds=1), None],
            )
            launch_process.start()
            launch_process.join(timeout=60)

            assert launch_process.exitcode == 0
            wait_for_all_runs_to_start(instance)

            assert instance.get_runs_count() == 1
            run = instance.get_runs()[0]
            captured = capfd.readouterr()

            assert (
                'Skipping 1 run for sensor run_key_sensor already completed with run keys: ["only_once"]'
                in captured.out
            )

            ticks = instance.get_ticks(external_sensor.get_external_origin_id())
            assert len(ticks) == 2
            assert ticks[0].status == TickStatus.SKIPPED
예제 #23
0
def test_non_utc_timezone_run(instance, workspace, external_repo):
    # Verify that schedule runs at the expected time in a non-UTC timezone
    freeze_datetime = to_timezone(
        create_pendulum_time(2019, 2, 27, 23, 59, 59, tz="US/Central"), "US/Pacific"
    )
    with pendulum.test(freeze_datetime):
        external_schedule = external_repo.get_external_schedule("daily_central_time_schedule")

        schedule_origin = external_schedule.get_external_origin()

        instance.start_schedule(external_schedule)

        assert instance.get_runs_count() == 0
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )
        assert instance.get_runs_count() == 0
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

    freeze_datetime = freeze_datetime.add(seconds=2)
    with pendulum.test(freeze_datetime):
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )

        assert instance.get_runs_count() == 1
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 1

        expected_datetime = to_timezone(
            create_pendulum_time(year=2019, month=2, day=28, tz="US/Central"), "UTC"
        )

        validate_tick(
            ticks[0],
            external_schedule,
            expected_datetime,
            TickStatus.SUCCESS,
            [run.run_id for run in instance.get_runs()],
        )

        wait_for_all_runs_to_start(instance)
        validate_run_started(
            instance,
            instance.get_runs()[0],
            expected_datetime,
            create_pendulum_time(2019, 2, 27, tz="US/Central"),
        )

        # Verify idempotence
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )
        assert instance.get_runs_count() == 1
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 1
        assert ticks[0].status == TickStatus.SUCCESS
예제 #24
0
def test_failure_recovery_after_run_created(instance, external_repo,
                                            crash_location, crash_signal):
    # Verify that if the scheduler crashes or is interrupted after a run is created,
    # it will just re-launch the already-created run when it runs again
    initial_datetime = create_pendulum_time(year=2019,
                                            month=2,
                                            day=27,
                                            hour=0,
                                            minute=0,
                                            second=0)
    frozen_datetime = initial_datetime.add()
    external_schedule = external_repo.get_external_schedule("simple_schedule")
    with pendulum.test(frozen_datetime):
        instance.start_schedule(external_schedule)

        debug_crash_flags = {
            external_schedule.name: {
                crash_location: crash_signal
            }
        }

        scheduler_process = spawn_ctx.Process(
            target=_test_launch_scheduled_runs_in_subprocess,
            args=[instance.get_ref(), frozen_datetime, debug_crash_flags],
        )
        scheduler_process.start()
        scheduler_process.join(timeout=60)

        assert scheduler_process.exitcode != 0

        ticks = instance.get_ticks(external_schedule.get_external_origin_id(),
                                   external_schedule.selector_id)
        assert len(ticks) == 1
        assert ticks[0].status == TickStatus.STARTED

        assert instance.get_runs_count() == 1

        if crash_location == "RUN_CREATED":
            run = instance.get_runs()[0]
            # Run was created, but hasn't launched yet
            assert run.tags[
                SCHEDULED_EXECUTION_TIME_TAG] == frozen_datetime.isoformat()
            assert run.tags[PARTITION_NAME_TAG] == "2019-02-26"
            assert run.status == PipelineRunStatus.NOT_STARTED
        else:
            # The run was created and launched - running again should do nothing other than
            # moving the tick to success state.

            # The fact that we need to add this line indicates that there is still a theoretical
            # possible race condition - if the scheduler fails after launching a run
            # and then runs again between when the run was launched and when its status is changed to STARTED by the executor, we could
            # end up launching the same run twice. Run queueing or some other way to immediately
            # identify that a run was launched would help eliminate this race condition. For now,
            # eliminate the possibility by waiting for the run to start before running the
            # scheduler again.
            wait_for_all_runs_to_start(instance)

            run = instance.get_runs()[0]
            validate_run_exists(instance.get_runs()[0], frozen_datetime,
                                create_pendulum_time(2019, 2, 26))

    frozen_datetime = frozen_datetime.add(minutes=5)
    with pendulum.test(frozen_datetime):

        # Running again just launches the existing run and marks the tick as success
        scheduler_process = spawn_ctx.Process(
            target=_test_launch_scheduled_runs_in_subprocess,
            args=[instance.get_ref(), frozen_datetime, None],
        )
        scheduler_process.start()
        scheduler_process.join(timeout=60)
        assert scheduler_process.exitcode == 0

        assert instance.get_runs_count() == 1
        wait_for_all_runs_to_start(instance)
        validate_run_exists(instance.get_runs()[0], initial_datetime,
                            create_pendulum_time(2019, 2, 26))

        ticks = instance.get_ticks(external_schedule.get_external_origin_id(),
                                   external_schedule.selector_id)
        assert len(ticks) == 1
        validate_tick(
            ticks[0],
            external_schedule,
            initial_datetime,
            TickStatus.SUCCESS,
            [instance.get_runs()[0].run_id],
        )
예제 #25
0
def test_failure_recovery_after_tick_success(instance, external_repo,
                                             crash_location, crash_signal):
    initial_datetime = create_pendulum_time(year=2019,
                                            month=2,
                                            day=27,
                                            hour=0,
                                            minute=0,
                                            second=0)
    frozen_datetime = initial_datetime.add()
    external_schedule = external_repo.get_external_schedule("simple_schedule")
    with pendulum.test(frozen_datetime):
        instance.start_schedule(external_schedule)

        debug_crash_flags = {
            external_schedule.name: {
                crash_location: crash_signal
            }
        }

        scheduler_process = spawn_ctx.Process(
            target=_test_launch_scheduled_runs_in_subprocess,
            args=[instance.get_ref(), frozen_datetime, debug_crash_flags],
        )
        scheduler_process.start()
        scheduler_process.join(timeout=60)

        assert scheduler_process.exitcode != 0

        # As above there's a possible race condition here if the scheduler crashes
        # and launches the same run twice if we crash right after the launch and re-run
        # before the run actually starts
        wait_for_all_runs_to_start(instance)

        assert instance.get_runs_count() == 1
        validate_run_exists(instance.get_runs()[0], initial_datetime,
                            create_pendulum_time(2019, 2, 26))

        ticks = instance.get_ticks(external_schedule.get_external_origin_id(),
                                   external_schedule.selector_id)
        assert len(ticks) == 1

        if crash_signal == get_terminate_signal():
            run_ids = []
        else:
            run_ids = [run.run_id for run in instance.get_runs()]

        validate_tick(
            ticks[0],
            external_schedule,
            initial_datetime,
            TickStatus.STARTED,
            run_ids,
        )

    frozen_datetime = frozen_datetime.add(minutes=1)
    with pendulum.test(frozen_datetime):
        # Running again just marks the tick as success since the run has already started
        scheduler_process = spawn_ctx.Process(
            target=_test_launch_scheduled_runs_in_subprocess,
            args=[instance.get_ref(), frozen_datetime, None],
        )
        scheduler_process.start()
        scheduler_process.join(timeout=60)
        assert scheduler_process.exitcode == 0

        assert instance.get_runs_count() == 1
        validate_run_exists(instance.get_runs()[0], initial_datetime,
                            create_pendulum_time(2019, 2, 26))

        ticks = instance.get_ticks(external_schedule.get_external_origin_id(),
                                   external_schedule.selector_id)
        assert len(ticks) == 1
        validate_tick(
            ticks[0],
            external_schedule,
            initial_datetime,
            TickStatus.SUCCESS,
            [instance.get_runs()[0].run_id],
        )
예제 #26
0
def test_different_days_in_different_timezones(instance, workspace, external_repo):
    freeze_datetime = to_timezone(
        create_pendulum_time(2019, 2, 27, 22, 59, 59, tz="US/Central"), "US/Pacific"
    )
    with pendulum.test(freeze_datetime):
        # Runs every day at 11PM (CST)
        external_schedule = external_repo.get_external_schedule("daily_late_schedule")
        schedule_origin = external_schedule.get_external_origin()
        instance.start_schedule(external_schedule)

        assert instance.get_runs_count() == 0
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )
        assert instance.get_runs_count() == 0
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

    freeze_datetime = freeze_datetime.add(seconds=2)
    with pendulum.test(freeze_datetime):
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )

        assert instance.get_runs_count() == 1
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 1

        expected_datetime = to_timezone(
            create_pendulum_time(year=2019, month=2, day=27, hour=23, tz="US/Central"), "UTC"
        )

        validate_tick(
            ticks[0],
            external_schedule,
            expected_datetime,
            TickStatus.SUCCESS,
            [instance.get_runs()[0].run_id],
        )

        wait_for_all_runs_to_start(instance)
        validate_run_started(
            instance,
            instance.get_runs()[0],
            expected_datetime,
            create_pendulum_time(2019, 2, 26, tz="US/Central"),
        )

        # Verify idempotence
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )
        assert instance.get_runs_count() == 1
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 1
        assert ticks[0].status == TickStatus.SUCCESS
예제 #27
0
def test_failure_recovery_before_run_created(instance, external_repo,
                                             crash_location, crash_signal):
    # Verify that if the scheduler crashes or is interrupted before a run is created,
    # it will create exactly one tick/run when it is re-launched
    initial_datetime = to_timezone(
        create_pendulum_time(year=2019,
                             month=2,
                             day=27,
                             hour=0,
                             minute=0,
                             second=0,
                             tz="UTC"),
        "US/Central",
    )

    frozen_datetime = initial_datetime.add()

    external_schedule = external_repo.get_external_schedule("simple_schedule")
    with pendulum.test(frozen_datetime):
        instance.start_schedule(external_schedule)

        debug_crash_flags = {
            external_schedule.name: {
                crash_location: crash_signal
            }
        }

        scheduler_process = spawn_ctx.Process(
            target=_test_launch_scheduled_runs_in_subprocess,
            args=[instance.get_ref(), frozen_datetime, debug_crash_flags],
        )
        scheduler_process.start()
        scheduler_process.join(timeout=60)

        assert scheduler_process.exitcode != 0

        ticks = instance.get_ticks(external_schedule.get_external_origin_id(),
                                   external_schedule.selector_id)
        assert len(ticks) == 1
        assert ticks[0].status == TickStatus.STARTED

        assert instance.get_runs_count() == 0

    frozen_datetime = frozen_datetime.add(minutes=5)
    with pendulum.test(frozen_datetime):
        scheduler_process = spawn_ctx.Process(
            target=_test_launch_scheduled_runs_in_subprocess,
            args=[instance.get_ref(), frozen_datetime, None],
        )
        scheduler_process.start()
        scheduler_process.join(timeout=60)
        assert scheduler_process.exitcode == 0

        assert instance.get_runs_count() == 1
        wait_for_all_runs_to_start(instance)
        validate_run_exists(
            instance.get_runs()[0],
            execution_time=initial_datetime,
            partition_time=create_pendulum_time(2019, 2, 26),
        )

        ticks = instance.get_ticks(external_schedule.get_external_origin_id(),
                                   external_schedule.selector_id)
        assert len(ticks) == 1
        validate_tick(
            ticks[0],
            external_schedule,
            initial_datetime,
            TickStatus.SUCCESS,
            [instance.get_runs()[0].run_id],
        )
예제 #28
0
def test_hourly_dst_spring_forward(instance, workspace, external_repo):
    # Verify that an hourly schedule still runs hourly during the spring DST transition
    # 1AM CST
    freeze_datetime = to_timezone(
        create_pendulum_time(2019, 3, 10, 1, 0, 0, tz="US/Central"), "US/Pacific"
    )

    with pendulum.test(freeze_datetime):
        external_schedule = external_repo.get_external_schedule("hourly_central_time_schedule")
        schedule_origin = external_schedule.get_external_origin()
        instance.start_schedule(external_schedule)

        assert instance.get_runs_count() == 0
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

    freeze_datetime = freeze_datetime.add(hours=2)

    # DST has now happened, 2 hours later it is 4AM CST
    # Should be 3 runs: 1AM CST, 3AM CST, 4AM CST
    with pendulum.test(freeze_datetime):
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )

        wait_for_all_runs_to_start(instance)

        assert instance.get_runs_count() == 3
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 3

        expected_datetimes_utc = [
            to_timezone(create_pendulum_time(2019, 3, 10, 4, 0, 0, tz="US/Central"), "UTC"),
            to_timezone(create_pendulum_time(2019, 3, 10, 3, 0, 0, tz="US/Central"), "UTC"),
            to_timezone(create_pendulum_time(2019, 3, 10, 1, 0, 0, tz="US/Central"), "UTC"),
        ]

        for i in range(3):
            validate_tick(
                ticks[i],
                external_schedule,
                expected_datetimes_utc[i],
                TickStatus.SUCCESS,
                [instance.get_runs()[i].run_id],
            )

            validate_run_started(
                instance,
                instance.get_runs()[i],
                expected_datetimes_utc[i],
                partition_time=to_timezone(expected_datetimes_utc[i], "US/Central").subtract(
                    hours=1
                ),
                partition_fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
            )

        # Verify idempotence
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )
        assert instance.get_runs_count() == 3
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 3
예제 #29
0
def test_failure_before_run_created(crash_location, crash_signal, capfd):
    frozen_datetime = to_timezone(
        create_pendulum_time(year=2019, month=2, day=28, hour=0, minute=0, second=1, tz="UTC"),
        "US/Central",
    )

    with instance_with_sensors() as (
        instance,
        _grpc_server_registry,
        external_repo,
    ):
        with pendulum.test(frozen_datetime):
            external_sensor = external_repo.get_external_sensor("simple_sensor")
            instance.add_instigator_state(
                InstigatorState(
                    external_sensor.get_external_origin(),
                    InstigatorType.SENSOR,
                    InstigatorStatus.RUNNING,
                )
            )

            # create a tick
            launch_process = spawn_ctx.Process(
                target=_test_launch_sensor_runs_in_subprocess,
                args=[instance.get_ref(), frozen_datetime, None],
            )
            launch_process.start()
            launch_process.join(timeout=60)
            ticks = instance.get_ticks(external_sensor.get_external_origin_id())
            assert len(ticks) == 1
            assert ticks[0].status == TickStatus.SKIPPED
            capfd.readouterr()

            # create a starting tick, but crash
            debug_crash_flags = {external_sensor.name: {crash_location: crash_signal}}
            launch_process = spawn_ctx.Process(
                target=_test_launch_sensor_runs_in_subprocess,
                args=[instance.get_ref(), frozen_datetime.add(seconds=31), debug_crash_flags],
            )
            launch_process.start()
            launch_process.join(timeout=60)

            assert launch_process.exitcode != 0

            capfd.readouterr()

            ticks = instance.get_ticks(external_sensor.get_external_origin_id())
            assert len(ticks) == 2
            assert ticks[0].status == TickStatus.STARTED
            assert not int(ticks[0].timestamp) % 2  # skip condition for simple_sensor
            assert instance.get_runs_count() == 0

            # create another tick, but ensure that the last evaluation time used is from the first,
            # successful tick rather than the failed tick
            launch_process = spawn_ctx.Process(
                target=_test_launch_sensor_runs_in_subprocess,
                args=[instance.get_ref(), frozen_datetime.add(seconds=62), None],
            )
            launch_process.start()
            launch_process.join(timeout=60)

            assert launch_process.exitcode == 0
            wait_for_all_runs_to_start(instance)

            assert instance.get_runs_count() == 1
            run = instance.get_runs()[0]
            assert (
                get_logger_output_from_capfd(capfd, "dagster.daemon.SensorDaemon")
                == f"""2019-02-27 18:01:03 -0600 - dagster.daemon.SensorDaemon - INFO - Checking for new runs for sensor: simple_sensor
2019-02-27 18:01:03 -0600 - dagster.daemon.SensorDaemon - INFO - Launching run for simple_sensor
2019-02-27 18:01:03 -0600 - dagster.daemon.SensorDaemon - INFO - Completed launch of run {run.run_id} for simple_sensor"""
            )

            ticks = instance.get_ticks(external_sensor.get_external_origin_id())
            assert len(ticks) == 3
            assert ticks[0].status == TickStatus.SUCCESS
예제 #30
0
def test_hourly_dst_fall_back(instance, workspace, external_repo):
    # Verify that an hourly schedule still runs hourly during the fall DST transition
    # 12:30 AM CST
    freeze_datetime = to_timezone(
        create_pendulum_time(2019, 11, 3, 0, 30, 0, tz="US/Central"), "US/Pacific"
    )

    with pendulum.test(freeze_datetime):
        external_schedule = external_repo.get_external_schedule("hourly_central_time_schedule")
        schedule_origin = external_schedule.get_external_origin()
        instance.start_schedule(external_schedule)

        assert instance.get_runs_count() == 0
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

    freeze_datetime = freeze_datetime.add(hours=4)

    # DST has now happened, 4 hours later it is 3:30AM CST
    # Should be 4 runs: 1AM CDT, 1AM CST, 2AM CST, 3AM CST
    with pendulum.test(freeze_datetime):
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )

        wait_for_all_runs_to_start(instance)

        assert instance.get_runs_count() == 4
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 4

        expected_datetimes_utc = [
            create_pendulum_time(2019, 11, 3, 9, 0, 0, tz="UTC"),
            create_pendulum_time(2019, 11, 3, 8, 0, 0, tz="UTC"),
            create_pendulum_time(2019, 11, 3, 7, 0, 0, tz="UTC"),
            create_pendulum_time(2019, 11, 3, 6, 0, 0, tz="UTC"),
        ]

        expected_ct_times = [
            "2019-11-03T03:00:00-06:00",  # 3 AM CST
            "2019-11-03T02:00:00-06:00",  # 2 AM CST
            "2019-11-03T01:00:00-06:00",  # 1 AM CST
            "2019-11-03T01:00:00-05:00",  # 1 AM CDT
        ]

        for i in range(4):
            assert (
                to_timezone(expected_datetimes_utc[i], "US/Central").isoformat()
                == expected_ct_times[i]
            )

            validate_tick(
                ticks[i],
                external_schedule,
                expected_datetimes_utc[i],
                TickStatus.SUCCESS,
                [instance.get_runs()[i].run_id],
            )

            validate_run_started(
                instance,
                instance.get_runs()[i],
                expected_datetimes_utc[i],
                partition_time=to_timezone(expected_datetimes_utc[i], "US/Central").subtract(
                    hours=1
                ),
                partition_fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
            )

        # Verify idempotence
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )
        assert instance.get_runs_count() == 4
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 4