Ejemplo n.º 1
0
def test_scheduled_jobs():
    from dagster import Field, String

    @op(config_schema={"foo": Field(String)})
    def foo_op(context):
        pass

    DEFAULT_FOO_CONFIG = {"ops": {"foo_op": {"config": {"foo": "bar"}}}}

    @job(config=DEFAULT_FOO_CONFIG)
    def foo_job():
        foo_op()

    my_schedule = ScheduleDefinition(name="my_schedule",
                                     cron_schedule="* * * * *",
                                     job=foo_job)

    context_without_time = build_schedule_context()
    execution_time = datetime(year=2019, month=2, day=27)
    context_with_time = build_schedule_context(
        scheduled_execution_time=execution_time)
    execution_data = my_schedule.evaluate_tick(context_without_time)
    assert execution_data.run_requests
    assert len(execution_data.run_requests) == 1

    validate_run_config(foo_job, execution_data.run_requests[0].run_config)
Ejemplo n.º 2
0
def define_scheduler(artifacts_dir, repository_name):
    no_config_pipeline_daily_schedule = ScheduleDefinition(
        name="no_config_pipeline_daily_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict={"storage": {"filesystem": None}},
    )

    no_config_pipeline_every_min_schedule = ScheduleDefinition(
        name="no_config_pipeline_every_min_schedule",
        cron_schedule="* * * * *",
        pipeline_name="no_config_pipeline",
        environment_dict={"storage": {"filesystem": None}},
    )

    default_config_pipeline_every_min_schedule = ScheduleDefinition(
        name="default_config_pipeline_every_min_schedule",
        cron_schedule="* * * * *",
        pipeline_name="no_config_pipeline",
    )

    return SchedulerHandle(
        scheduler_type=MockSystemCronScheduler,
        schedule_defs=[
            default_config_pipeline_every_min_schedule,
            no_config_pipeline_daily_schedule,
            no_config_pipeline_every_min_schedule,
        ],
        repository_name=repository_name,
        artifacts_dir=artifacts_dir,
    )
Ejemplo n.º 3
0
def define_bar_schedules():
    return {
        "foo_schedule":
        ScheduleDefinition(
            "foo_schedule",
            cron_schedule="* * * * *",
            pipeline_name="test_pipeline",
            run_config={"fizz": "buzz"},
        ),
        "foo_schedule_never_execute":
        ScheduleDefinition(
            "foo_schedule_never_execute",
            cron_schedule="* * * * *",
            pipeline_name="test_pipeline",
            run_config={"fizz": "buzz"},
            should_execute=lambda _context: False,
        ),
        "foo_schedule_echo_time":
        ScheduleDefinition(
            "foo_schedule_echo_time",
            cron_schedule="* * * * *",
            pipeline_name="test_pipeline",
            run_config_fn=lambda context: {
                "passed_in_time":
                context.scheduled_execution_time_utc.isoformat()
                if context.scheduled_execution_time_utc else ""
            },
        ),
    }
Ejemplo n.º 4
0
def define_schedules():
    no_config_pipeline_daily_schedule = ScheduleDefinition(
        name="no_config_pipeline_daily_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict={"storage": {"filesystem": None}},
    )

    no_config_pipeline_every_min_schedule = ScheduleDefinition(
        name="no_config_pipeline_every_min_schedule",
        cron_schedule="* * * * *",
        pipeline_name="no_config_pipeline",
        environment_dict={"storage": {"filesystem": None}},
    )

    default_config_pipeline_every_min_schedule = ScheduleDefinition(
        name="default_config_pipeline_every_min_schedule",
        cron_schedule="* * * * *",
        pipeline_name="no_config_pipeline",
    )

    return [
        default_config_pipeline_every_min_schedule,
        no_config_pipeline_daily_schedule,
        no_config_pipeline_every_min_schedule,
    ]
Ejemplo n.º 5
0
def define_scheduler():

    no_config_pipeline_hourly_schedule = ScheduleDefinition(
        name="no_config_pipeline_hourly_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict={"storage": {"filesystem": {}}},
    )

    no_config_pipeline_hourly_schedule_with_schedule_id_tag = ScheduleDefinition(
        name="no_config_pipeline_hourly_schedule_with_schedule_id_tag",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict={"storage": {"filesystem": {}}},
        tags=[{"key": "dagster/schedule_id", "value": "1234"}],
    )

    no_config_should_execute = ScheduleDefinition(
        name="no_config_should_execute",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict={"storage": {"filesystem": {}}},
        should_execute=lambda: False,
    )

    return [
        no_config_pipeline_hourly_schedule,
        no_config_pipeline_hourly_schedule_with_schedule_id_tag,
        no_config_should_execute,
    ]
Ejemplo n.º 6
0
def get_toys_schedules():
    from dagster import ScheduleDefinition, file_relative_path

    return [
        backfill_test_schedule(),
        longitudinal_schedule(),
        materialization_schedule(),
        ScheduleDefinition(
            name="many_events_every_min",
            cron_schedule="* * * * *",
            pipeline_name="many_events",
            run_config_fn=lambda _: {"storage": {
                "filesystem": {}
            }},
        ),
        ScheduleDefinition(
            name="pandas_hello_world_hourly",
            cron_schedule="0 * * * *",
            pipeline_name="pandas_hello_world_pipeline",
            run_config_fn=lambda _: {
                "solids": {
                    "mult_solid": {
                        "inputs": {
                            "num_df": {
                                "csv": {
                                    "path":
                                    file_relative_path(
                                        __file__,
                                        "pandas_hello_world/data/num.csv")
                                }
                            }
                        }
                    },
                    "sum_solid": {
                        "inputs": {
                            "num_df": {
                                "csv": {
                                    "path":
                                    file_relative_path(
                                        __file__,
                                        "pandas_hello_world/data/num.csv")
                                }
                            }
                        }
                    },
                },
                "storage": {
                    "filesystem": {}
                },
            },
        ),
    ]
Ejemplo n.º 7
0
def get_toys_schedules():
    from dagster import ScheduleDefinition, file_relative_path

    return [
        backfill_test_schedule(),
        longitudinal_schedule(),
        materialization_schedule(),
        ScheduleDefinition(
            name="many_events_every_min",
            cron_schedule="* * * * *",
            pipeline_name='many_events',
            environment_dict_fn=lambda _: {"storage": {
                "filesystem": {}
            }},
        ),
        ScheduleDefinition(
            name="pandas_hello_world_hourly",
            cron_schedule="0 * * * *",
            pipeline_name="pandas_hello_world_pipeline",
            environment_dict_fn=lambda _: {
                'solids': {
                    'mult_solid': {
                        'inputs': {
                            'num_df': {
                                'csv': {
                                    'path':
                                    file_relative_path(
                                        __file__,
                                        "pandas_hello_world/data/num.csv")
                                }
                            }
                        }
                    },
                    'sum_solid': {
                        'inputs': {
                            'num_df': {
                                'csv': {
                                    'path':
                                    file_relative_path(
                                        __file__,
                                        "pandas_hello_world/data/num.csv")
                                }
                            }
                        }
                    },
                },
                "storage": {
                    "filesystem": {}
                },
            },
        ),
    ]
Ejemplo n.º 8
0
def test_jobs_attr():
    @graph
    def my_graph():
        pass

    schedule = ScheduleDefinition(job=my_graph, cron_schedule="0 0 * * *")
    assert schedule.job.name == my_graph.name

    schedule = ScheduleDefinition(pipeline_name="my_pipeline",
                                  cron_schedule="0 0 * * *")
    with pytest.raises(DagsterInvalidDefinitionError,
                       match="No job was provided to ScheduleDefinition."):
        schedule.job
Ejemplo n.º 9
0
def define_scheduler():

    no_config_pipeline_hourly_schedule = ScheduleDefinition(
        name="no_config_pipeline_hourly_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict={"storage": {"filesystem": {}}},
    )

    no_config_pipeline_hourly_schedule_with_config_fn = ScheduleDefinition(
        name="no_config_pipeline_hourly_schedule_with_config_fn",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict_fn=lambda: {"storage": {"filesystem": {}}},
    )

    no_config_pipeline_hourly_schedule_with_schedule_id_tag = ScheduleDefinition(
        name="no_config_pipeline_hourly_schedule_with_schedule_id_tag",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict={"storage": {"filesystem": {}}},
        tags={"dagster/schedule_id": "1234"},
    )

    no_config_should_execute = ScheduleDefinition(
        name="no_config_should_execute",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict={"storage": {"filesystem": {}}},
        should_execute=lambda: False,
    )

    dynamic_config = ScheduleDefinition(
        name="dynamic_config",
        cron_schedule="0 0 * * *",
        pipeline_name="no_config_pipeline",
        environment_dict_fn=lambda: {"storage": {"filesystem": {}}},
    )

    partition_based = integer_partition_set.create_schedule_definition(
        schedule_name="partition_based", cron_schedule="0 0 * * *",
    )

    return [
        no_config_pipeline_hourly_schedule,
        no_config_pipeline_hourly_schedule_with_schedule_id_tag,
        no_config_pipeline_hourly_schedule_with_config_fn,
        no_config_should_execute,
        dynamic_config,
        partition_based,
    ]
Ejemplo n.º 10
0
def define_scheduler():
    def many_events_every_minute_filter():
        weekno = datetime.datetime.today().weekday()
        # Returns true if current day is a weekday
        return weekno < 5

    many_events_every_minute = ScheduleDefinition(
        name="many_events_every_min",
        cron_schedule="* * * * *",
        pipeline_name="many_events",
        environment_dict={"storage": {
            "filesystem": {}
        }},
        should_execute=many_events_every_minute_filter,
    )

    log_spew_hourly = ScheduleDefinition(
        name="log_spew_hourly",
        cron_schedule="0 * * * *",
        pipeline_name="log_spew",
        environment_dict={"storage": {
            "filesystem": {}
        }},
    )

    pandas_hello_world_hourly = ScheduleDefinition(
        name="pandas_hello_world_hourly",
        cron_schedule="0 * * * *",
        pipeline_name="pandas_hello_world",
        environment_dict={
            "solids": {
                "sum_solid": {
                    "inputs": {
                        "num": {
                            "csv": {
                                "path":
                                file_relative_path(
                                    __file__,
                                    "../pandas_hello_world/data/num.csv")
                            }
                        }
                    }
                }
            }
        },
    )

    return [
        many_events_every_minute, log_spew_hourly, pandas_hello_world_hourly
    ]
Ejemplo n.º 11
0
def gbfs_schedules():
    return [
        ScheduleDefinition(
            name='get_gbfs_feed_every_minute',
            cron_schedule='* * * * *',
            pipeline_name='download_gbfs_files',
            environment_dict={'storage': {'filesystem': {}}},
        ),
        ScheduleDefinition(
            name='get_gbfs_feed_every_five_minutes',
            cron_schedule='*/5 * * * *',
            pipeline_name='download_gbfs_files',
            environment_dict={'storage': {'filesystem': {}}},
        ),
    ]
Ejemplo n.º 12
0
 def create_hello_world_schedule(name):
     return ScheduleDefinition(
         name=name,
         cron_schedule="* * * * *",
         pipeline_name="hello_world_pipeline",
         environment_dict={},
     )
Ejemplo n.º 13
0
    def _load_schedules(self):
        utils.mkdir_p(self._artifacts_dir)

        for file in os.listdir(self._artifacts_dir):
            if not file.endswith('.json'):
                continue
            file_path = os.path.join(self._artifacts_dir, file)
            with open(file_path) as data:
                try:
                    data = seven.json.load(data)
                    schedule = RunningSchedule(
                        data['schedule_id'],
                        ScheduleDefinition(
                            name=data['name'],
                            cron_schedule=data['cron_schedule'],
                            execution_params=data['execution_params'],
                        ),
                        python_path=data['python_path'],
                        repository_path=data['repository_path'],
                    )
                    self._schedules[
                        schedule.schedule_definition.name] = schedule

                except Exception as ex:  # pylint: disable=broad-except
                    six.raise_from(
                        Exception(
                            'Could not parse dagit schedule from {file_name} in {dir_name}. {ex}: {msg}'
                            .format(
                                file_name=file,
                                dir_name=self._artifacts_dir,
                                ex=type(ex).__name__,
                                msg=ex,
                            )),
                        ex,
                    )
Ejemplo n.º 14
0
def define_bar_scheduler(artifacts_dir):
    return MockScheduler(
        schedule_defs=[
            ScheduleDefinition("foo_schedule", cron_schedule="* * * * *", execution_params={})
        ],
        artifacts_dir=artifacts_dir,
    )
Ejemplo n.º 15
0
def test_default_name_graph():
    @graph
    def my_graph():
        pass

    schedule = ScheduleDefinition(job=my_graph, cron_schedule="0 0 * * *")
    assert schedule.name == "my_graph_schedule"
Ejemplo n.º 16
0
def create_repository():
    no_config_pipeline_hourly_schedule = ScheduleDefinition(
        name="no_config_pipeline_hourly_schedule",
        cron_schedule="0 0 * * *",
        execution_params={
            "environmentConfigData": {
                "storage": {
                    "filesystem": None
                }
            },
            "selector": {
                "name": "no_config_pipeline",
                "solidSubset": None
            },
            "mode": "default",
        },
    )

    @pipeline
    def no_config_pipeline():
        @lambda_solid
        def return_hello():
            return 'Hello'

        return return_hello()

    return RepositoryDefinition(
        name='test',
        pipeline_defs=[no_config_pipeline],
        experimental={
            'schedule_defs': [no_config_pipeline_hourly_schedule],
            'scheduler': TestSystemCronScheduler,
        },
    )
Ejemplo n.º 17
0
def define_bar_schedules():
    return {
        "foo_schedule": ScheduleDefinition(
            "foo_schedule",
            cron_schedule="* * * * *",
            pipeline_name="test_pipeline",
            run_config={"fizz": "buzz"},
        ),
        "foo_schedule_never_execute": ScheduleDefinition(
            "foo_schedule_never_execute",
            cron_schedule="* * * * *",
            pipeline_name="test_pipeline",
            run_config={"fizz": "buzz"},
            should_execute=lambda _context: False,
        ),
    }
Ejemplo n.º 18
0
def define_scheduler():
    return [
        ScheduleDefinition(
            name="many_events_every_min",
            cron_schedule="* * * * *",
            pipeline_name='many_events',
            environment_dict_fn=lambda _: {"storage": {
                "filesystem": {}
            }},
        ),
        ScheduleDefinition(
            name="pandas_hello_world_hourly",
            cron_schedule="0 * * * *",
            pipeline_name="pandas_hello_world_pipeline",
            environment_dict_fn=lambda _: {
                'solids': {
                    'mult_solid': {
                        'inputs': {
                            'num_df': {
                                'csv': {
                                    'path':
                                    file_relative_path(
                                        __file__,
                                        "pandas_hello_world/data/num.csv")
                                }
                            }
                        }
                    },
                    'sum_solid': {
                        'inputs': {
                            'num_df': {
                                'csv': {
                                    'path':
                                    file_relative_path(
                                        __file__,
                                        "pandas_hello_world/data/num.csv")
                                }
                            }
                        }
                    },
                },
                "storage": {
                    "filesystem": {}
                },
            },
        ),
    ]
Ejemplo n.º 19
0
def define_scheduler():
    hello_world_every_minute = ScheduleDefinition(
        name="hello_world_every_minute",
        cron_schedule="* * * * *",
        pipeline_name="hello_world_pipeline",
        environment_dict={})

    return [hello_world_every_minute]
Ejemplo n.º 20
0
def define_bar_schedules():
    return {
        "foo_schedule": ScheduleDefinition(
            "foo_schedule",
            cron_schedule="* * * * *",
            pipeline_name="foo",
            run_config={},
        )
    }
Ejemplo n.º 21
0
 def define_scheduler():
     return [
         ScheduleDefinition(
             name="my_schedule",
             cron_schedule="* * * * *",
             pipeline_name="test_pipeline",
             environment_dict={},
         )
     ]
Ejemplo n.º 22
0
 def define_schedules():
     return [
         ScheduleDefinition(
             name="my_schedule",
             cron_schedule="* * * * *",
             pipeline_name="test_pipeline",
             run_config={},
         )
     ]
Ejemplo n.º 23
0
def feedly_schedules():
    return [
        ScheduleDefinition(
            name='daily_feedly_batch',
            cron_schedule='0 15 * * *',
            pipeline_name='load_to_firebase_pipeline',
            environment_dict={},
        )
    ]
Ejemplo n.º 24
0
def cereal_schedules():
    return [
        ScheduleDefinition(
            name='good_morning',
            cron_schedule='45 6 * * *',
            pipeline_name='hello_cereal_pipeline',
            run_config={'storage': {'filesystem': {}}},
        )
    ]
Ejemplo n.º 25
0
def spotify_schedules():
    return [
        ScheduleDefinition(
            name='daily_spotify_batch',
            cron_schedule='0 * * * *',
            pipeline_name='load_to_spotify_pipeline',
            environment_dict={},
        )
    ]
Ejemplo n.º 26
0
def define_schedules():
    math_hourly_schedule = ScheduleDefinition(
        name="math_hourly_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="math",
        environment_dict={'solids': {'add_one': {'inputs': {'num': {'value': 123}}}}},
    )

    return [math_hourly_schedule]
Ejemplo n.º 27
0
def define_bar_schedules():
    return [
        ScheduleDefinition(
            "foo_schedule",
            cron_schedule="* * * * *",
            pipeline_name="test_pipeline",
            environment_dict={},
        )
    ]
Ejemplo n.º 28
0
def define_schedules():
    math_hourly_schedule = ScheduleDefinition(
        name="math_hourly_schedule",
        cron_schedule="0 0 * * *",
        pipeline_name="math",
        run_config={"solids": {"add_one": {"inputs": {"num": {"value": 123}}}}},
    )

    return [math_hourly_schedule]
Ejemplo n.º 29
0
def define_scheduler():
    return [
        ScheduleDefinition(
            name="many_events_every_min",
            cron_schedule="* * * * *",
            pipeline_name='many_events',
            environment_dict_fn=lambda: {"storage": {
                "filesystem": {}
            }},
        ),
        ScheduleDefinition(
            name="log_spew_hourly",
            cron_schedule="0 * * * *",
            pipeline_name="log_spew",
            environment_dict_fn=lambda: {"storage": {
                "filesystem": {}
            }},
        ),
        ScheduleDefinition(
            name="pandas_hello_world_hourly",
            cron_schedule="0 * * * *",
            pipeline_name="pandas_hello_world_pipeline",
            environment_dict_fn=lambda: {
                "solids": {
                    "sum_solid": {
                        "inputs": {
                            "num_df": {
                                "csv": {
                                    "path":
                                    file_relative_path(
                                        __file__,
                                        "pandas_hello_world/data/num.csv")
                                }
                            }
                        }
                    }
                },
                "storage": {
                    "filesystem": {}
                },
            },
        ),
    ]
Ejemplo n.º 30
0
def cereal_schedules():
    return [
        ScheduleDefinition(
            name='good_morning',
            cron_schedule='*/1 * * * *',
            pipeline_name='hello_cereal_pipeline',
            environment_dict={'storage': {
                'filesystem': {}
            }},
        )
    ]