def test_scheduled_jobs(): from dagster import Field, String @op(config_schema={"foo": Field(String)}) def foo_op(context): pass DEFAULT_FOO_CONFIG = {"ops": {"foo_op": {"config": {"foo": "bar"}}}} @job(config=DEFAULT_FOO_CONFIG) def foo_job(): foo_op() my_schedule = ScheduleDefinition(name="my_schedule", cron_schedule="* * * * *", job=foo_job) context_without_time = build_schedule_context() execution_time = datetime(year=2019, month=2, day=27) context_with_time = build_schedule_context( scheduled_execution_time=execution_time) execution_data = my_schedule.evaluate_tick(context_without_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 validate_run_config(foo_job, execution_data.run_requests[0].run_config)
def define_scheduler(artifacts_dir, repository_name): no_config_pipeline_daily_schedule = ScheduleDefinition( name="no_config_pipeline_daily_schedule", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", environment_dict={"storage": {"filesystem": None}}, ) no_config_pipeline_every_min_schedule = ScheduleDefinition( name="no_config_pipeline_every_min_schedule", cron_schedule="* * * * *", pipeline_name="no_config_pipeline", environment_dict={"storage": {"filesystem": None}}, ) default_config_pipeline_every_min_schedule = ScheduleDefinition( name="default_config_pipeline_every_min_schedule", cron_schedule="* * * * *", pipeline_name="no_config_pipeline", ) return SchedulerHandle( scheduler_type=MockSystemCronScheduler, schedule_defs=[ default_config_pipeline_every_min_schedule, no_config_pipeline_daily_schedule, no_config_pipeline_every_min_schedule, ], repository_name=repository_name, artifacts_dir=artifacts_dir, )
def define_bar_schedules(): return { "foo_schedule": ScheduleDefinition( "foo_schedule", cron_schedule="* * * * *", pipeline_name="test_pipeline", run_config={"fizz": "buzz"}, ), "foo_schedule_never_execute": ScheduleDefinition( "foo_schedule_never_execute", cron_schedule="* * * * *", pipeline_name="test_pipeline", run_config={"fizz": "buzz"}, should_execute=lambda _context: False, ), "foo_schedule_echo_time": ScheduleDefinition( "foo_schedule_echo_time", cron_schedule="* * * * *", pipeline_name="test_pipeline", run_config_fn=lambda context: { "passed_in_time": context.scheduled_execution_time_utc.isoformat() if context.scheduled_execution_time_utc else "" }, ), }
def define_schedules(): no_config_pipeline_daily_schedule = ScheduleDefinition( name="no_config_pipeline_daily_schedule", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", environment_dict={"storage": {"filesystem": None}}, ) no_config_pipeline_every_min_schedule = ScheduleDefinition( name="no_config_pipeline_every_min_schedule", cron_schedule="* * * * *", pipeline_name="no_config_pipeline", environment_dict={"storage": {"filesystem": None}}, ) default_config_pipeline_every_min_schedule = ScheduleDefinition( name="default_config_pipeline_every_min_schedule", cron_schedule="* * * * *", pipeline_name="no_config_pipeline", ) return [ default_config_pipeline_every_min_schedule, no_config_pipeline_daily_schedule, no_config_pipeline_every_min_schedule, ]
def define_scheduler(): no_config_pipeline_hourly_schedule = ScheduleDefinition( name="no_config_pipeline_hourly_schedule", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", environment_dict={"storage": {"filesystem": {}}}, ) no_config_pipeline_hourly_schedule_with_schedule_id_tag = ScheduleDefinition( name="no_config_pipeline_hourly_schedule_with_schedule_id_tag", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", environment_dict={"storage": {"filesystem": {}}}, tags=[{"key": "dagster/schedule_id", "value": "1234"}], ) no_config_should_execute = ScheduleDefinition( name="no_config_should_execute", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", environment_dict={"storage": {"filesystem": {}}}, should_execute=lambda: False, ) return [ no_config_pipeline_hourly_schedule, no_config_pipeline_hourly_schedule_with_schedule_id_tag, no_config_should_execute, ]
def get_toys_schedules(): from dagster import ScheduleDefinition, file_relative_path return [ backfill_test_schedule(), longitudinal_schedule(), materialization_schedule(), ScheduleDefinition( name="many_events_every_min", cron_schedule="* * * * *", pipeline_name="many_events", run_config_fn=lambda _: {"storage": { "filesystem": {} }}, ), ScheduleDefinition( name="pandas_hello_world_hourly", cron_schedule="0 * * * *", pipeline_name="pandas_hello_world_pipeline", run_config_fn=lambda _: { "solids": { "mult_solid": { "inputs": { "num_df": { "csv": { "path": file_relative_path( __file__, "pandas_hello_world/data/num.csv") } } } }, "sum_solid": { "inputs": { "num_df": { "csv": { "path": file_relative_path( __file__, "pandas_hello_world/data/num.csv") } } } }, }, "storage": { "filesystem": {} }, }, ), ]
def get_toys_schedules(): from dagster import ScheduleDefinition, file_relative_path return [ backfill_test_schedule(), longitudinal_schedule(), materialization_schedule(), ScheduleDefinition( name="many_events_every_min", cron_schedule="* * * * *", pipeline_name='many_events', environment_dict_fn=lambda _: {"storage": { "filesystem": {} }}, ), ScheduleDefinition( name="pandas_hello_world_hourly", cron_schedule="0 * * * *", pipeline_name="pandas_hello_world_pipeline", environment_dict_fn=lambda _: { 'solids': { 'mult_solid': { 'inputs': { 'num_df': { 'csv': { 'path': file_relative_path( __file__, "pandas_hello_world/data/num.csv") } } } }, 'sum_solid': { 'inputs': { 'num_df': { 'csv': { 'path': file_relative_path( __file__, "pandas_hello_world/data/num.csv") } } } }, }, "storage": { "filesystem": {} }, }, ), ]
def test_jobs_attr(): @graph def my_graph(): pass schedule = ScheduleDefinition(job=my_graph, cron_schedule="0 0 * * *") assert schedule.job.name == my_graph.name schedule = ScheduleDefinition(pipeline_name="my_pipeline", cron_schedule="0 0 * * *") with pytest.raises(DagsterInvalidDefinitionError, match="No job was provided to ScheduleDefinition."): schedule.job
def define_scheduler(): no_config_pipeline_hourly_schedule = ScheduleDefinition( name="no_config_pipeline_hourly_schedule", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", environment_dict={"storage": {"filesystem": {}}}, ) no_config_pipeline_hourly_schedule_with_config_fn = ScheduleDefinition( name="no_config_pipeline_hourly_schedule_with_config_fn", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", environment_dict_fn=lambda: {"storage": {"filesystem": {}}}, ) no_config_pipeline_hourly_schedule_with_schedule_id_tag = ScheduleDefinition( name="no_config_pipeline_hourly_schedule_with_schedule_id_tag", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", environment_dict={"storage": {"filesystem": {}}}, tags={"dagster/schedule_id": "1234"}, ) no_config_should_execute = ScheduleDefinition( name="no_config_should_execute", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", environment_dict={"storage": {"filesystem": {}}}, should_execute=lambda: False, ) dynamic_config = ScheduleDefinition( name="dynamic_config", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", environment_dict_fn=lambda: {"storage": {"filesystem": {}}}, ) partition_based = integer_partition_set.create_schedule_definition( schedule_name="partition_based", cron_schedule="0 0 * * *", ) return [ no_config_pipeline_hourly_schedule, no_config_pipeline_hourly_schedule_with_schedule_id_tag, no_config_pipeline_hourly_schedule_with_config_fn, no_config_should_execute, dynamic_config, partition_based, ]
def define_scheduler(): def many_events_every_minute_filter(): weekno = datetime.datetime.today().weekday() # Returns true if current day is a weekday return weekno < 5 many_events_every_minute = ScheduleDefinition( name="many_events_every_min", cron_schedule="* * * * *", pipeline_name="many_events", environment_dict={"storage": { "filesystem": {} }}, should_execute=many_events_every_minute_filter, ) log_spew_hourly = ScheduleDefinition( name="log_spew_hourly", cron_schedule="0 * * * *", pipeline_name="log_spew", environment_dict={"storage": { "filesystem": {} }}, ) pandas_hello_world_hourly = ScheduleDefinition( name="pandas_hello_world_hourly", cron_schedule="0 * * * *", pipeline_name="pandas_hello_world", environment_dict={ "solids": { "sum_solid": { "inputs": { "num": { "csv": { "path": file_relative_path( __file__, "../pandas_hello_world/data/num.csv") } } } } } }, ) return [ many_events_every_minute, log_spew_hourly, pandas_hello_world_hourly ]
def gbfs_schedules(): return [ ScheduleDefinition( name='get_gbfs_feed_every_minute', cron_schedule='* * * * *', pipeline_name='download_gbfs_files', environment_dict={'storage': {'filesystem': {}}}, ), ScheduleDefinition( name='get_gbfs_feed_every_five_minutes', cron_schedule='*/5 * * * *', pipeline_name='download_gbfs_files', environment_dict={'storage': {'filesystem': {}}}, ), ]
def create_hello_world_schedule(name): return ScheduleDefinition( name=name, cron_schedule="* * * * *", pipeline_name="hello_world_pipeline", environment_dict={}, )
def _load_schedules(self): utils.mkdir_p(self._artifacts_dir) for file in os.listdir(self._artifacts_dir): if not file.endswith('.json'): continue file_path = os.path.join(self._artifacts_dir, file) with open(file_path) as data: try: data = seven.json.load(data) schedule = RunningSchedule( data['schedule_id'], ScheduleDefinition( name=data['name'], cron_schedule=data['cron_schedule'], execution_params=data['execution_params'], ), python_path=data['python_path'], repository_path=data['repository_path'], ) self._schedules[ schedule.schedule_definition.name] = schedule except Exception as ex: # pylint: disable=broad-except six.raise_from( Exception( 'Could not parse dagit schedule from {file_name} in {dir_name}. {ex}: {msg}' .format( file_name=file, dir_name=self._artifacts_dir, ex=type(ex).__name__, msg=ex, )), ex, )
def define_bar_scheduler(artifacts_dir): return MockScheduler( schedule_defs=[ ScheduleDefinition("foo_schedule", cron_schedule="* * * * *", execution_params={}) ], artifacts_dir=artifacts_dir, )
def test_default_name_graph(): @graph def my_graph(): pass schedule = ScheduleDefinition(job=my_graph, cron_schedule="0 0 * * *") assert schedule.name == "my_graph_schedule"
def create_repository(): no_config_pipeline_hourly_schedule = ScheduleDefinition( name="no_config_pipeline_hourly_schedule", cron_schedule="0 0 * * *", execution_params={ "environmentConfigData": { "storage": { "filesystem": None } }, "selector": { "name": "no_config_pipeline", "solidSubset": None }, "mode": "default", }, ) @pipeline def no_config_pipeline(): @lambda_solid def return_hello(): return 'Hello' return return_hello() return RepositoryDefinition( name='test', pipeline_defs=[no_config_pipeline], experimental={ 'schedule_defs': [no_config_pipeline_hourly_schedule], 'scheduler': TestSystemCronScheduler, }, )
def define_bar_schedules(): return { "foo_schedule": ScheduleDefinition( "foo_schedule", cron_schedule="* * * * *", pipeline_name="test_pipeline", run_config={"fizz": "buzz"}, ), "foo_schedule_never_execute": ScheduleDefinition( "foo_schedule_never_execute", cron_schedule="* * * * *", pipeline_name="test_pipeline", run_config={"fizz": "buzz"}, should_execute=lambda _context: False, ), }
def define_scheduler(): return [ ScheduleDefinition( name="many_events_every_min", cron_schedule="* * * * *", pipeline_name='many_events', environment_dict_fn=lambda _: {"storage": { "filesystem": {} }}, ), ScheduleDefinition( name="pandas_hello_world_hourly", cron_schedule="0 * * * *", pipeline_name="pandas_hello_world_pipeline", environment_dict_fn=lambda _: { 'solids': { 'mult_solid': { 'inputs': { 'num_df': { 'csv': { 'path': file_relative_path( __file__, "pandas_hello_world/data/num.csv") } } } }, 'sum_solid': { 'inputs': { 'num_df': { 'csv': { 'path': file_relative_path( __file__, "pandas_hello_world/data/num.csv") } } } }, }, "storage": { "filesystem": {} }, }, ), ]
def define_scheduler(): hello_world_every_minute = ScheduleDefinition( name="hello_world_every_minute", cron_schedule="* * * * *", pipeline_name="hello_world_pipeline", environment_dict={}) return [hello_world_every_minute]
def define_bar_schedules(): return { "foo_schedule": ScheduleDefinition( "foo_schedule", cron_schedule="* * * * *", pipeline_name="foo", run_config={}, ) }
def define_scheduler(): return [ ScheduleDefinition( name="my_schedule", cron_schedule="* * * * *", pipeline_name="test_pipeline", environment_dict={}, ) ]
def define_schedules(): return [ ScheduleDefinition( name="my_schedule", cron_schedule="* * * * *", pipeline_name="test_pipeline", run_config={}, ) ]
def feedly_schedules(): return [ ScheduleDefinition( name='daily_feedly_batch', cron_schedule='0 15 * * *', pipeline_name='load_to_firebase_pipeline', environment_dict={}, ) ]
def cereal_schedules(): return [ ScheduleDefinition( name='good_morning', cron_schedule='45 6 * * *', pipeline_name='hello_cereal_pipeline', run_config={'storage': {'filesystem': {}}}, ) ]
def spotify_schedules(): return [ ScheduleDefinition( name='daily_spotify_batch', cron_schedule='0 * * * *', pipeline_name='load_to_spotify_pipeline', environment_dict={}, ) ]
def define_schedules(): math_hourly_schedule = ScheduleDefinition( name="math_hourly_schedule", cron_schedule="0 0 * * *", pipeline_name="math", environment_dict={'solids': {'add_one': {'inputs': {'num': {'value': 123}}}}}, ) return [math_hourly_schedule]
def define_bar_schedules(): return [ ScheduleDefinition( "foo_schedule", cron_schedule="* * * * *", pipeline_name="test_pipeline", environment_dict={}, ) ]
def define_schedules(): math_hourly_schedule = ScheduleDefinition( name="math_hourly_schedule", cron_schedule="0 0 * * *", pipeline_name="math", run_config={"solids": {"add_one": {"inputs": {"num": {"value": 123}}}}}, ) return [math_hourly_schedule]
def define_scheduler(): return [ ScheduleDefinition( name="many_events_every_min", cron_schedule="* * * * *", pipeline_name='many_events', environment_dict_fn=lambda: {"storage": { "filesystem": {} }}, ), ScheduleDefinition( name="log_spew_hourly", cron_schedule="0 * * * *", pipeline_name="log_spew", environment_dict_fn=lambda: {"storage": { "filesystem": {} }}, ), ScheduleDefinition( name="pandas_hello_world_hourly", cron_schedule="0 * * * *", pipeline_name="pandas_hello_world_pipeline", environment_dict_fn=lambda: { "solids": { "sum_solid": { "inputs": { "num_df": { "csv": { "path": file_relative_path( __file__, "pandas_hello_world/data/num.csv") } } } } }, "storage": { "filesystem": {} }, }, ), ]
def cereal_schedules(): return [ ScheduleDefinition( name='good_morning', cron_schedule='*/1 * * * *', pipeline_name='hello_cereal_pipeline', environment_dict={'storage': { 'filesystem': {} }}, ) ]