def test_start_schedule_fails():
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository", schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        schedule_def = repository.get_schedule_def("no_config_pipeline_every_min_schedule")

        def raises(*args, **kwargs):
            raise Exception('Patch')

        instance._scheduler._start_cron_job = raises  # pylint: disable=protected-access
        with pytest.raises(Exception, match='Patch'):
            instance.start_schedule(repository, "no_config_pipeline_every_min_schedule")

        schedule = instance.get_schedule_by_name(repository, schedule_def.name)

        assert schedule.status == ScheduleStatus.STOPPED
def test_start_and_stop_schedule():
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository", schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        schedule_def = repository.get_schedule_def("no_config_pipeline_every_min_schedule")

        # Start schedule
        schedule = instance.start_schedule(repository, "no_config_pipeline_every_min_schedule")

        check.inst_param(schedule, 'schedule', Schedule)
        assert "/bin/python" in schedule.python_path

        assert 'schedules' in os.listdir(tempdir)

        assert "{}.{}.sh".format(repository.name, schedule_def.name) in os.listdir(
            os.path.join(tempdir, 'schedules', 'scripts')
        )

        # End schedule
        instance.stop_schedule(repository, "no_config_pipeline_every_min_schedule")
        assert "{}.{}.sh".format(repository.name, schedule_def.name) not in os.listdir(
            os.path.join(tempdir, 'schedules', 'scripts')
        )
def test_re_init():
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository", schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Start schedule
        schedule = instance.start_schedule(repository, "no_config_pipeline_every_min_schedule")

        # Re-initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Check schedules are saved to disk
        assert 'schedules' in os.listdir(tempdir)

        schedules = instance.all_schedules(repository)

        for schedule in schedules:
            assert "/bin/python" in schedule.python_path
Beispiel #4
0
def test_start_schedule_manual_delete_debug(
        restore_cron_tab,
        snapshot  # pylint:disable=unused-argument,redefined-outer-name
):
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository",
                                          schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path="fake path",
            repository_path="",
            repository=repository,
            instance=instance,
        )

        instance.start_schedule(repository,
                                "no_config_pipeline_every_min_schedule")

        # Manually delete the schedule from the crontab
        instance.scheduler._end_cron_job(  # pylint: disable=protected-access
            instance,
            repository,
            instance.get_schedule_by_name(
                repository, "no_config_pipeline_every_min_schedule"),
        )

        # Check debug command
        snapshot.assert_match(instance.scheduler_debug_info())
Beispiel #5
0
def test_stop_schedule_unsuccessful(restore_cron_tab, ):  # pylint:disable=unused-argument,redefined-outer-name
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository",
                                          schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        def do_nothing(*_):
            pass

        instance._scheduler._end_cron_job = do_nothing  # pylint: disable=protected-access

        instance.start_schedule(repository,
                                "no_config_pipeline_every_min_schedule")

        # End schedule
        with pytest.raises(
                DagsterInvariantViolationError,
                match=
                "Attempted to remove cron job for schedule no_config_pipeline_every_min_schedule, but failed.",
        ):
            instance.stop_schedule(repository,
                                   "no_config_pipeline_every_min_schedule")
Beispiel #6
0
def external_repository_data_from_def(
    repository_def: RepositoryDefinition, ) -> ExternalRepositoryData:
    check.inst_param(repository_def, "repository_def", RepositoryDefinition)

    pipelines = repository_def.get_all_pipelines()
    return ExternalRepositoryData(
        name=repository_def.name,
        external_pipeline_datas=sorted(
            list(map(external_pipeline_data_from_def, pipelines)),
            key=lambda pd: pd.name,
        ),
        external_schedule_datas=sorted(
            list(
                map(external_schedule_data_from_def,
                    repository_def.schedule_defs)),
            key=lambda sd: sd.name,
        ),
        external_partition_set_datas=sorted(
            list(
                map(external_partition_set_data_from_def,
                    repository_def.partition_set_defs)),
            key=lambda psd: psd.name,
        ),
        external_sensor_datas=sorted(
            list(map(external_sensor_data_from_def,
                     repository_def.sensor_defs)),
            key=lambda sd: sd.name,
        ),
        external_asset_graph_data=external_asset_graph_from_defs(
            pipelines,
            source_assets_by_key=repository_def.source_assets_by_key),
    )
Beispiel #7
0
def test_stop_schedule_fails(
        restore_cron_tab,  # pylint:disable=unused-argument,redefined-outer-name
):
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository",
                                          schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        schedule_def = repository.get_schedule_def(
            "no_config_pipeline_every_min_schedule")

        def raises(*args, **kwargs):
            raise Exception('Patch')

        instance._scheduler._end_cron_job = raises  # pylint: disable=protected-access

        schedule = instance.start_schedule(
            repository.name, "no_config_pipeline_every_min_schedule")

        check.inst_param(schedule, 'schedule', Schedule)
        assert "/bin/python" in schedule.python_path

        assert 'schedules' in os.listdir(tempdir)

        assert "{}.{}.sh".format(repository.name,
                                 schedule_def.name) in os.listdir(
                                     os.path.join(tempdir, 'schedules',
                                                  'scripts'))

        # End schedule
        with pytest.raises(Exception, match='Patch'):
            instance.stop_schedule(repository.name,
                                   "no_config_pipeline_every_min_schedule")

        schedule = instance.get_schedule_by_name(repository.name,
                                                 schedule_def.name)

        assert schedule.status == ScheduleStatus.RUNNING
Beispiel #8
0
def test_start_schedule_manual_duplicate_schedules_add_debug(
        restore_cron_tab,
        snapshot  # pylint:disable=unused-argument,redefined-outer-name
):
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository",
                                          schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path="fake path",
            repository_path="",
            repository=repository,
            instance=instance,
        )

        instance.start_schedule(repository.name,
                                "no_config_pipeline_every_min_schedule")

        # Manually add  extra cron tabs
        instance.scheduler._start_cron_job(  # pylint: disable=protected-access
            instance,
            repository.name,
            instance.get_schedule_by_name(
                repository.name, "no_config_pipeline_every_min_schedule"),
        )
        instance.scheduler._start_cron_job(  # pylint: disable=protected-access
            instance,
            repository.name,
            instance.get_schedule_by_name(
                repository.name, "no_config_pipeline_every_min_schedule"),
        )

        # Check debug command
        debug_info = instance.scheduler_debug_info()
        assert len(debug_info.errors) == 1

        # Reconcile should fix error
        reconcile_scheduler_state(
            python_path="fake path",
            repository_path="",
            repository=repository,
            instance=instance,
        )
        debug_info = instance.scheduler_debug_info()
        assert len(debug_info.errors) == 0
def test_wipe():
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository", schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Start schedule
        instance.start_schedule(repository, "no_config_pipeline_every_min_schedule")

        # Wipe scheduler
        instance.wipe_all_schedules()

        # Check schedules are wiped
        assert instance.all_schedules(repository) == []
Beispiel #10
0
def test_init(restore_cron_tab):  # pylint:disable=unused-argument,redefined-outer-name
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository",
                                          schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Check schedules are saved to disk
        assert 'schedules' in os.listdir(tempdir)

        schedules = instance.all_schedules(repository)

        for schedule in schedules:
            assert "/bin/python" in schedule.python_path
Beispiel #11
0
def test_init():
    with TemporaryDirectory() as tempdir:
        instance = DagsterInstance.local_temp(tempdir=tempdir)
        repository = RepositoryDefinition(name="test_repository")

        scheduler_handle = define_scheduler()
        assert scheduler_handle

        # Initialize scheduler
        scheduler_handle.up(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Check schedules are saved to disk
        assert 'schedules' in os.listdir(tempdir)

        schedules = instance.all_schedules(repository)

        for schedule in schedules:
            assert "/bin/python" in schedule.python_path
def define_bar_repo():
    return RepositoryDefinition('bar', {'foo': define_foo_pipeline})
Beispiel #13
0
def define_bar_repo():
    return RepositoryDefinition(name='bar',
                                pipeline_dict={'foo': define_foo_pipeline})
Beispiel #14
0
def define_repository():
    return RepositoryDefinition(name="test_repository",
                                pipeline_defs=[no_config_pipeline],
                                schedule_defs=define_schedules())