def test_scheduled_jobs(): from dagster import Field, String @op(config_schema={"foo": Field(String)}) def foo_op(context): pass DEFAULT_FOO_CONFIG = {"ops": {"foo_op": {"config": {"foo": "bar"}}}} @job(config=DEFAULT_FOO_CONFIG) def foo_job(): foo_op() my_schedule = ScheduleDefinition(name="my_schedule", cron_schedule="* * * * *", job=foo_job) context_without_time = build_schedule_context() execution_time = datetime(year=2019, month=2, day=27) context_with_time = build_schedule_context( scheduled_execution_time=execution_time) execution_data = my_schedule.evaluate_tick(context_without_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 validate_run_config(foo_job, execution_data.run_requests[0].run_config)
def test_partitions_for_hourly_schedule_decorators_without_timezone( partition_hours_offset: int): with pendulum.test( to_timezone(create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="UTC"), "US/Eastern")): context_without_time = build_schedule_context() start_date = datetime(year=2019, month=1, day=1) @hourly_schedule( pipeline_name="foo_pipeline", start_date=start_date, execution_time=time(hour=0, minute=25), partition_hours_offset=partition_hours_offset, ) def hourly_foo_schedule(hourly_time): return {"hourly_time": hourly_time.isoformat()} _check_partitions( hourly_foo_schedule, HOURS_UNTIL_FEBRUARY_27 + 1 - partition_hours_offset, pendulum.instance(start_date, tz="UTC"), DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE, relativedelta(hours=1), ) execution_data = hourly_foo_schedule.evaluate_tick( context_without_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 assert execution_data.run_requests[0].run_config == { "hourly_time": create_pendulum_time( year=2019, month=2, day=27, tz="UTC").subtract(hours=partition_hours_offset).isoformat() } valid_time = create_pendulum_time(year=2019, month=1, day=27, hour=1, minute=25, tz="UTC") context_with_valid_time = build_schedule_context( scheduled_execution_time=valid_time) execution_data = hourly_foo_schedule.evaluate_tick( context_with_valid_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 assert execution_data.run_requests[0].run_config == { "hourly_time": create_pendulum_time( year=2019, month=1, day=27, hour=1, tz="UTC").subtract(hours=partition_hours_offset).isoformat() }
def test_partitions_for_monthly_schedule_decorators_without_timezone( partition_months_offset: int): with pendulum.test( to_timezone(create_pendulum_time(2019, 3, 27, 0, 1, 1, tz="UTC"), "US/Eastern")): context_without_time = build_schedule_context() start_date = datetime(year=2019, month=1, day=1) @monthly_schedule( pipeline_name="foo_pipeline", execution_day_of_month=3, start_date=start_date, execution_time=time(9, 30), partition_months_offset=partition_months_offset, ) def monthly_foo_schedule(monthly_time): return {"monthly_time": monthly_time.isoformat()} valid_monthly_time = create_pendulum_time(year=2019, month=3, day=3, hour=9, minute=30, tz="UTC") context_with_valid_time = build_schedule_context( scheduled_execution_time=valid_monthly_time) execution_data = monthly_foo_schedule.evaluate_tick( context_with_valid_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 assert execution_data.run_requests[0].run_config == { "monthly_time": create_pendulum_time( year=2019, month=3, day=1, tz="UTC").subtract(months=partition_months_offset).isoformat() } execution_data = monthly_foo_schedule.evaluate_tick( context_without_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 assert execution_data.run_requests[0].run_config == { "monthly_time": create_pendulum_time( year=2019, month=3, day=1, tz="UTC").subtract(months=partition_months_offset).isoformat() } _check_partitions( monthly_foo_schedule, 3 - partition_months_offset, pendulum.instance(start_date, tz="UTC"), DEFAULT_MONTHLY_FORMAT, relativedelta(months=1), )
def test_instance_access(): with pytest.raises( DagsterInvariantViolationError, match="Attempted to initialize dagster instance, but no instance reference was provided.", ): build_schedule_context().instance # pylint: disable=expression-not-assigned with instance_for_test() as instance: assert isinstance(build_schedule_context(instance).instance, DagsterInstance)
def test_cron_schedule_invocation_all_args(): basic_schedule_context = cron_test_schedule_factory_context() assert basic_schedule_context(None) == {} assert basic_schedule_context(build_schedule_context()) == {} assert basic_schedule_context(_=None) == {} assert basic_schedule_context(_=build_schedule_context()) == {} basic_schedule_no_context = cron_test_schedule_factory_no_context() assert basic_schedule_no_context() == {}
def test_scheduler(): def define_schedules(): return [ ScheduleDefinition( name="my_schedule", cron_schedule="* * * * *", pipeline_name="test_pipeline", run_config={}, ) ] @schedule(cron_schedule="* * * * *", pipeline_name="foo_pipeline") def echo_time_schedule(context): return { "echo_time": ( ( context.scheduled_execution_time.isoformat() if context.scheduled_execution_time else "" ) ) } @schedule( cron_schedule="* * * * *", pipeline_name="foo_pipeline", should_execute=lambda x: False ) def always_skip_schedule(context): return {} with instance_for_test() as instance: context_without_time = build_schedule_context(instance) execution_time = datetime(year=2019, month=2, day=27) context_with_time = build_schedule_context(instance, execution_time) execution_data = echo_time_schedule.get_execution_data(context_without_time) assert len(execution_data) == 1 assert isinstance(execution_data[0], RunRequest) assert execution_data[0].run_config == {"echo_time": ""} execution_data = echo_time_schedule.get_execution_data(context_with_time) assert len(execution_data) == 1 assert isinstance(execution_data[0], RunRequest) assert execution_data[0].run_config == {"echo_time": execution_time.isoformat()} execution_data = always_skip_schedule.get_execution_data(context_with_time) assert len(execution_data) == 1 assert isinstance(execution_data[0], SkipReason) assert ( execution_data[0].skip_message == "should_execute function for always_skip_schedule returned false." )
def test_scheduler(): def define_schedules(): return [ ScheduleDefinition( name="my_schedule", cron_schedule="* * * * *", pipeline_name="test_pipeline", run_config={}, ) ] @schedule(cron_schedule="* * * * *", pipeline_name="foo_pipeline") def echo_time_schedule(context): return { "echo_time": ( ( context.scheduled_execution_time.isoformat() if context.scheduled_execution_time else "" ) ) } @schedule( cron_schedule="* * * * *", pipeline_name="foo_pipeline", should_execute=lambda x: False ) def always_skip_schedule(): return {} context_without_time = build_schedule_context() execution_time = datetime(year=2019, month=2, day=27) context_with_time = build_schedule_context(scheduled_execution_time=execution_time) execution_data = echo_time_schedule.evaluate_tick(context_without_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 assert execution_data.run_requests[0].run_config == {"echo_time": ""} execution_data = echo_time_schedule.evaluate_tick(context_with_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 assert execution_data.run_requests[0].run_config == {"echo_time": execution_time.isoformat()} execution_data = always_skip_schedule.evaluate_tick(context_with_time) assert execution_data.skip_message assert ( execution_data.skip_message == "should_execute function for always_skip_schedule returned false." )
def test_partitions_for_hourly_schedule_decorators_without_timezone(): with instance_for_test() as instance: with pendulum.test( to_timezone(create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="UTC"), "US/Eastern") ): context_without_time = build_schedule_context(instance) start_date = datetime(year=2019, month=1, day=1) @hourly_schedule( pipeline_name="foo_pipeline", start_date=start_date, execution_time=time(hour=0, minute=25), ) def hourly_foo_schedule(hourly_time): return {"hourly_time": hourly_time.isoformat()} _check_partitions( hourly_foo_schedule, HOURS_UNTIL_FEBRUARY_27, pendulum.instance(start_date, tz="UTC"), DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE, relativedelta(hours=1), ) execution_data = hourly_foo_schedule.get_execution_data(context_without_time) assert len(execution_data) == 1 assert isinstance(execution_data[0], RunRequest) assert execution_data[0].run_config == { "hourly_time": create_pendulum_time( year=2019, month=2, day=26, hour=23, tz="UTC" ).isoformat() } valid_time = create_pendulum_time( year=2019, month=1, day=27, hour=1, minute=25, tz="UTC" ) context_with_valid_time = build_schedule_context(instance, valid_time) execution_data = hourly_foo_schedule.get_execution_data(context_with_valid_time) assert len(execution_data) == 1 assert isinstance(execution_data[0], RunRequest) assert execution_data[0].run_config == { "hourly_time": create_pendulum_time( year=2019, month=1, day=27, hour=0, tz="UTC" ).isoformat() }
def test_daily_schedule_with_offsets(): @daily_partitioned_config(start_date="2021-05-05", minute_offset=15, hour_offset=2) def my_partitioned_config(start, end): return {"start": str(start), "end": str(end)} keys = my_partitioned_config.get_partition_keys() assert keys[0] == "2021-05-05" assert keys[1] == "2021-05-06" partitions = my_partitioned_config.partitions_def.get_partitions() assert partitions[0].value == time_window("2021-05-05T02:15:00", "2021-05-06T02:15:00") assert my_partitioned_config.get_run_config_for_partition_key(keys[0]) == { "start": "2021-05-05T02:15:00+00:00", "end": "2021-05-06T02:15:00+00:00", } my_schedule = schedule_for_partitioned_config(my_partitioned_config, hour_of_day=9, minute_of_hour=30) assert my_schedule.cron_schedule == "30 9 * * *" assert my_schedule.evaluate_tick( build_schedule_context(scheduled_execution_time=datetime( 2021, 5, 8, 9, 30))).run_requests[0].run_config == { "start": "2021-05-07T02:15:00+00:00", "end": "2021-05-08T02:15:00+00:00", } @repository def _repo(): return [my_schedule]
def test_monthly_schedule(): @monthly_partitioned_config(start_date="2021-05-05") def my_partitioned_config(start, end): return {"start": str(start), "end": str(end)} keys = my_partitioned_config.get_partition_keys() assert keys[0] == "2021-06-01" assert keys[1] == "2021-07-01" partitions = my_partitioned_config.partitions_def.get_partitions() assert partitions[0].value == time_window("2021-06-01", "2021-07-01") assert my_partitioned_config.get_run_config_for_partition_key(keys[0]) == { "start": "2021-06-01T00:00:00+00:00", "end": "2021-07-01T00:00:00+00:00", } my_schedule = schedule_for_partitioned_config(my_partitioned_config, hour_of_day=9, minute_of_hour=30, day_of_month=2) assert my_schedule.cron_schedule == "30 9 2 * *" assert my_schedule.evaluate_tick( build_schedule_context(scheduled_execution_time=datetime.strptime( "2021-07-21", DATE_FORMAT))).run_requests[0].run_config == { "start": "2021-06-01T00:00:00+00:00", "end": "2021-07-01T00:00:00+00:00", } @repository def _repo(): return [my_schedule]
def test_hourly_schedule_with_offsets(): @hourly_partitioned_config(start_date=datetime(2021, 5, 5), minute_offset=20) def my_partitioned_config(start, end): return {"start": str(start), "end": str(end)} keys = my_partitioned_config.get_partition_keys() assert keys[0] == "2021-05-05-00:20" assert keys[1] == "2021-05-05-01:20" partitions = my_partitioned_config.partitions_def.get_partitions() assert partitions[0].value == time_window("2021-05-05T00:20:00", "2021-05-05T01:20:00") assert my_partitioned_config.get_run_config_for_partition_key(keys[0]) == { "start": "2021-05-05T00:20:00+00:00", "end": "2021-05-05T01:20:00+00:00", } my_schedule = schedule_for_partitioned_config(my_partitioned_config, minute_of_hour=30) assert my_schedule.cron_schedule == "30 * * * *" assert my_schedule.evaluate_tick( build_schedule_context(scheduled_execution_time=datetime.strptime( "2021-05-08", DATE_FORMAT))).run_requests[0].run_config == { "start": "2021-05-07T22:20:00+00:00", "end": "2021-05-07T23:20:00+00:00", } @repository def _repo(): return [my_schedule]
def test_hourly_schedule(): @hourly_partitioned_config(start_date=datetime(2021, 5, 5)) def my_partitioned_config(start, end): return {"start": str(start), "end": str(end)} assert my_partitioned_config(datetime(2021, 5, 7, 23), datetime(2021, 5, 8)) == { "start": "2021-05-07 23:00:00", "end": "2021-05-08 00:00:00", } my_schedule = schedule_for_partitioned_config(my_partitioned_config, minute_of_hour=30) assert my_schedule.cron_schedule == "30 * * * *" assert my_schedule.evaluate_tick( build_schedule_context(scheduled_execution_time=datetime.strptime( "2021-05-08", DATE_FORMAT))).run_requests[0].run_config == { "start": "2021-05-07T23:00:00+00:00", "end": "2021-05-08T00:00:00+00:00", } @repository def _repo(): return [my_schedule]
def test_weekly_schedule(): @weekly_partitioned_config(start_date="2021-05-05") def my_partitioned_config(start, end): return {"start": str(start), "end": str(end)} assert my_partitioned_config(datetime(2021, 12, 13), datetime(2021, 12, 19)) == { "start": "2021-12-13 00:00:00", "end": "2021-12-19 00:00:00", } my_schedule = schedule_for_partitioned_config(my_partitioned_config, hour_of_day=9, minute_of_hour=30, day_of_week=2) assert my_schedule.cron_schedule == "30 9 * * 2" assert my_schedule.evaluate_tick( build_schedule_context(scheduled_execution_time=datetime.strptime( "2021-05-21", DATE_FORMAT))).run_requests[0].run_config == { "start": "2021-05-09T00:00:00+00:00", "end": "2021-05-16T00:00:00+00:00", } @repository def _repo(): return [my_schedule]
def test_monthly_schedule(): @monthly_partitioned_config(start_date="2021-05-05") def my_partitioned_config(start, end): return {"start": str(start), "end": str(end)} assert my_partitioned_config(datetime(2021, 11, 1), datetime(2021, 11, 30)) == { "start": "2021-11-01 00:00:00", "end": "2021-11-30 00:00:00", } my_schedule = schedule_for_partitioned_config(my_partitioned_config, hour_of_day=9, minute_of_hour=30, day_of_month=2) assert my_schedule.cron_schedule == "30 9 2 * *" assert my_schedule.evaluate_tick( build_schedule_context(scheduled_execution_time=datetime.strptime( "2021-07-21", DATE_FORMAT))).run_requests[0].run_config == { "start": "2021-06-01T00:00:00+00:00", "end": "2021-07-01T00:00:00+00:00", } @repository def _repo(): return [my_schedule]
def test_partitions_outside_schedule_range(): execution_time = create_pendulum_time(year=2021, month=1, day=1, tz="UTC") context = build_schedule_context(scheduled_execution_time=execution_time) @monthly_schedule( pipeline_name="too early", start_date=create_pendulum_time(year=2021, month=1, day=1, tz="UTC"), ) def too_early(monthly_time): return {"monthly_time": monthly_time.isoformat()} execution_data = too_early.evaluate_tick(context) assert execution_data.skip_message == ( "Your partition (2020-12-01T00:00:00+00:00) is before the beginning of " "the partition set (2021-01-01T00:00:00+00:00). " "Verify your schedule's start_date is correct.") @monthly_schedule( pipeline_name="too late", start_date=create_pendulum_time(year=2020, month=1, day=1, tz="UTC"), end_date=create_pendulum_time(year=2020, month=12, day=1, tz="UTC"), partition_months_offset=0, ) def too_late(monthly_time): return {"monthly_time": monthly_time.isoformat()} execution_data = too_late.evaluate_tick(context) assert execution_data.skip_message == ( "Your partition (2021-01-01T00:00:00+00:00) is after the end of " "the partition set (2020-12-01T00:00:00+00:00). " "Verify your schedule's end_date is correct.")
def test_my_execution_time_schedule(): @solid(config_schema={"dataset_name": str, "execution_date": str}) def process_data(_): pass @pipeline def pipeline_for_test(): process_data() run_config = my_execution_time_schedule( build_schedule_context(scheduled_execution_time=datetime(2021, 1, 1))) assert validate_run_config(pipeline_for_test, run_config)
def test_job_schedules(): for module, attr_name in job_schedules: schedule = getattr(module, attr_name) try: assert schedule.has_loadable_target() job = schedule.load_target() context = build_schedule_context() run_config = schedule.evaluate_tick( context).run_requests[0].run_config assert job.execute_in_process(run_config=run_config).success except Exception as ex: raise Exception( f"Error while executing schedule '{schedule.name}' from module '{module.__name__}'" ) from ex
def test_pipeline_schedules(): for module, (schedule_name, pipeline_name) in pipeline_schedules: schedule = getattr(module, schedule_name) the_pipeline = getattr(module, pipeline_name) try: context = build_schedule_context() run_config = schedule.evaluate_tick( context).run_requests[0].run_config assert execute_pipeline(the_pipeline, run_config=run_config).success except Exception as ex: raise Exception( f"Error while executing schedule '{schedule.name}' from module '{module.__name__}'" ) from ex
def test_hourly_schedule_from_partitions(): @hourly_partitioned_config(start_date="2021-05-05") def my_partitioned_config(start, end): return {"start": str(start), "end": str(end)} my_schedule = job_for_partitions(my_partitioned_config, minute_of_hour=30) assert my_schedule.cron_schedule == "30 * * * *" assert my_schedule.evaluate_tick( build_schedule_context(scheduled_execution_time=datetime.strptime( "2021-05-08", DATE_FORMAT))).run_requests[0].run_config == { "start": "2021-05-07T23:00:00+00:00", "end": "2021-05-08T00:00:00+00:00", }
def test_partitions_for_weekly_schedule_decorators_with_timezone( partition_weeks_offset: int): with pendulum.test( create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")): start_date = datetime(year=2019, month=1, day=1) @weekly_schedule( pipeline_name="foo_pipeline", execution_day_of_week=3, start_date=start_date, execution_time=time(9, 30), execution_timezone="US/Central", partition_weeks_offset=partition_weeks_offset, ) def weekly_foo_schedule(weekly_time): return {"weekly_time": weekly_time.isoformat()} assert weekly_foo_schedule.execution_timezone == "US/Central" valid_weekly_time = create_pendulum_time(year=2019, month=1, day=30, hour=9, minute=30, tz="US/Central") context_with_valid_time = build_schedule_context( scheduled_execution_time=valid_weekly_time) execution_data = weekly_foo_schedule.evaluate_tick( context_with_valid_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 assert execution_data.run_requests[0].run_config == { "weekly_time": create_pendulum_time(year=2019, month=1, day=29, tz="US/Central").subtract( weeks=partition_weeks_offset).isoformat() } _check_partitions( weekly_foo_schedule, 9 - partition_weeks_offset, pendulum.instance(start_date, tz="US/Central"), DEFAULT_DATE_FORMAT, relativedelta(weeks=1), )
def test_weekly_schedule_from_partitions(): @weekly_partitioned_config(start_date="2021-05-05") def my_partitioned_config(start, end): return {"start": str(start), "end": str(end)} my_schedule = job_for_partitions(my_partitioned_config, hour_of_day=9, minute_of_hour=30, day_of_week=2) assert my_schedule.cron_schedule == "30 9 * * 2" assert my_schedule.evaluate_tick( build_schedule_context(scheduled_execution_time=datetime.strptime( "2021-05-21", DATE_FORMAT))).run_requests[0].run_config == { "start": "2021-05-09T00:00:00+00:00", "end": "2021-05-16T00:00:00+00:00", }
def test_vixie_cronstring_schedule(): context_without_time = build_schedule_context() start_date = datetime(year=2019, month=1, day=1) @op def foo_op(context): pass @job def foo_job(): foo_op() @schedule(cron_schedule="@daily", job=foo_job) def foo_schedule(): yield RunRequest(run_key=None, run_config={}, tags={"foo": "FOO"}) # evaluate tick execution_data = foo_schedule.evaluate_tick(context_without_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 assert execution_data.run_requests[0].tags.get("foo") == "FOO"
def test_request_based_schedule_generator(): from dagster import Field, String context_without_time = build_schedule_context() start_date = datetime(year=2019, month=1, day=1) @op(config_schema={"foo": Field(String)}) def foo_op(context): pass @job def foo_job(): foo_op() FOO_CONFIG = {"ops": {"foo_op": {"config": {"foo": "bar"}}}} @schedule( cron_schedule="* * * * *", job=foo_job, ) def foo_schedule(_context): yield RunRequest(run_key=None, run_config=FOO_CONFIG, tags={"foo": "FOO"}) # evaluate tick execution_data = foo_schedule.evaluate_tick(context_without_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 assert execution_data.run_requests[0].run_config == FOO_CONFIG assert execution_data.run_requests[0].tags.get("foo") == "FOO" # test direct invocation request_generator = foo_schedule(context_without_time) assert inspect.isgenerator(request_generator) requests = list(request_generator) assert len(requests) == 1 assert requests[0].run_config == FOO_CONFIG assert requests[0].tags.get("foo") == "FOO"
def test_partitions_for_weekly_schedule_decorators_with_timezone(): with instance_for_test() as instance: with pendulum.test(create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")): start_date = datetime(year=2019, month=1, day=1) @weekly_schedule( pipeline_name="foo_pipeline", execution_day_of_week=3, start_date=start_date, execution_time=time(9, 30), execution_timezone="US/Central", ) def weekly_foo_schedule(weekly_time): return {"weekly_time": weekly_time.isoformat()} assert weekly_foo_schedule.execution_timezone == "US/Central" valid_weekly_time = create_pendulum_time( year=2019, month=1, day=30, hour=9, minute=30, tz="US/Central" ) context_with_valid_time = build_schedule_context(instance, valid_weekly_time) execution_data = weekly_foo_schedule.get_execution_data(context_with_valid_time) assert len(execution_data) == 1 assert isinstance(execution_data[0], RunRequest) assert execution_data[0].run_config == { "weekly_time": create_pendulum_time( year=2019, month=1, day=22, tz="US/Central" ).isoformat() } _check_partitions( weekly_foo_schedule, 8, pendulum.instance(start_date, tz="US/Central"), DEFAULT_DATE_FORMAT, relativedelta(weeks=1), )
def test_partitions_outside_schedule_range(): with instance_for_test() as instance: execution_time = create_pendulum_time(year=2021, month=1, day=1, tz="UTC") context = build_schedule_context(instance, execution_time) @monthly_schedule( pipeline_name="too early", start_date=create_pendulum_time(year=2021, month=1, day=1, tz="UTC"), ) def too_early(monthly_time): return {"monthly_time": monthly_time.isoformat()} execution_data = too_early.get_execution_data(context) assert len(execution_data) == 1 skip_data = execution_data[0] assert isinstance(skip_data, SkipReason) assert skip_data.skip_message == ( "Your partition (2020-12-01T00:00:00+00:00) is before the beginning of " "the partition set (2021-01-01T00:00:00+00:00). " "Verify your schedule's start_date is correct." ) @monthly_schedule( pipeline_name="too late", start_date=create_pendulum_time(year=2020, month=1, day=1, tz="UTC"), end_date=create_pendulum_time(year=2020, month=12, day=1, tz="UTC"), ) def too_late(monthly_time): return {"monthly_time": monthly_time.isoformat()} execution_data = too_late.get_execution_data(context) assert len(execution_data) == 1 skip_data = execution_data[0] assert isinstance(skip_data, SkipReason) assert skip_data.skip_message == ( "Your partition (2020-12-01T00:00:00+00:00) is after the end of " "the partition set (2020-11-01T00:00:00+00:00). " "Verify your schedule's end_date is correct." )
def test_weekly_schedule_with_offsets(): @weekly_partitioned_config(start_date="2021-05-05", minute_offset=10, hour_offset=13, day_offset=3) def my_partitioned_config(start, end): return {"start": str(start), "end": str(end)} keys = my_partitioned_config.get_partition_keys() assert keys[0] == "2021-05-05" assert keys[1] == "2021-05-12" partitions = my_partitioned_config.partitions_def.get_partitions() assert partitions[0].value == time_window("2021-05-05T13:10:00", "2021-05-12T13:10:00") assert my_partitioned_config.get_run_config_for_partition_key(keys[0]) == { "start": "2021-05-05T13:10:00+00:00", "end": "2021-05-12T13:10:00+00:00", } my_schedule = schedule_for_partitioned_config(my_partitioned_config, hour_of_day=9, minute_of_hour=30, day_of_week=2) assert my_schedule.cron_schedule == "30 9 * * 2" assert my_schedule.evaluate_tick( build_schedule_context(scheduled_execution_time=datetime.strptime( "2021-05-21", DATE_FORMAT))).run_requests[0].run_config == { "start": "2021-05-12T13:10:00+00:00", "end": "2021-05-19T13:10:00+00:00", } @repository def _repo(): return [my_schedule]
def test_config_based_schedule_no_context(): from dagster import Field, String context_without_time = build_schedule_context() start_date = datetime(year=2019, month=1, day=1) @op(config_schema={"foo": Field(String)}) def foo_op(context): pass @job def foo_job(): foo_op() FOO_CONFIG = {"ops": {"foo_op": {"config": {"foo": "bar"}}}} @schedule( cron_schedule="* * * * *", job=foo_job, tags={"foo": "FOO"}, ) def foo_schedule(): return FOO_CONFIG # evaluate tick execution_data = foo_schedule.evaluate_tick(context_without_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 run_request = execution_data.run_requests[0] assert run_request.run_config == FOO_CONFIG assert run_request.tags.get("foo") == "FOO" # direct invocation run_config = foo_schedule() assert run_config == FOO_CONFIG
def test_partitions_for_hourly_schedule_decorators_with_timezone( partition_hours_offset: int): with pendulum.test( create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")): start_date = datetime(year=2019, month=1, day=1) # You can specify a start date with no timezone and it will be assumed to be # in the execution timezone @hourly_schedule( pipeline_name="foo_pipeline", start_date=start_date, execution_time=time(hour=0, minute=25), execution_timezone="US/Central", partition_hours_offset=partition_hours_offset, ) def hourly_central_schedule(hourly_time): return {"hourly_time": hourly_time.isoformat()} assert hourly_central_schedule.execution_timezone == "US/Central" _check_partitions( hourly_central_schedule, HOURS_UNTIL_FEBRUARY_27 + 1 - partition_hours_offset, pendulum.instance(start_date, tz="US/Central"), DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE, relativedelta(hours=1), ) valid_time = create_pendulum_time(year=2019, month=1, day=27, hour=1, minute=25, tz="US/Central") context_with_valid_time = build_schedule_context( scheduled_execution_time=valid_time) execution_data = hourly_central_schedule.evaluate_tick( context_with_valid_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 assert execution_data.run_requests[0].run_config == { "hourly_time": create_pendulum_time(year=2019, month=1, day=27, hour=1, tz="US/Central").subtract( hours=partition_hours_offset).isoformat() } # You can specify a start date in a different timezone and it will be transformed into the # execution timezone start_date_with_different_timezone = create_pendulum_time( 2019, 1, 1, 0, tz="US/Pacific") @hourly_schedule( pipeline_name="foo_pipeline", start_date=start_date_with_different_timezone, execution_time=time(hour=0, minute=25), execution_timezone="US/Central", partition_hours_offset=partition_hours_offset, ) def hourly_central_schedule_with_timezone_start_time(hourly_time): return {"hourly_time": hourly_time.isoformat()} _check_partitions( hourly_central_schedule_with_timezone_start_time, HOURS_UNTIL_FEBRUARY_27 - 2 # start date is two hours later since it's in PT + 1 - partition_hours_offset, to_timezone(start_date_with_different_timezone, "US/Central"), DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE, relativedelta(hours=1), )
def test_my_cron_schedule_with_context(): context = build_schedule_context( scheduled_execution_time=datetime.datetime(2020, 1, 1)) run_config = my_schedule_uses_context(context) assert validate_run_config(pipeline_for_test, run_config)
def test_configurable_job_schedule(): context = build_schedule_context( scheduled_execution_time=datetime.datetime(2020, 1, 1)) run_request = configurable_job_schedule(context) assert validate_run_config(configurable_job, run_request.run_config)