Ejemplo n.º 1
0
def create_app_with_execution_handle(handle, instance, reloader=None):
    check.inst_param(handle, 'handle', ExecutionTargetHandle)
    check.inst_param(instance, 'instance', DagsterInstance)
    check.opt_inst_param(reloader, 'reloader', Reloader)

    execution_manager = get_execution_manager(instance)
    warn_if_compute_logs_disabled()

    print('Loading repository...')
    context = DagsterGraphQLInProcessRepositoryContext(
        handle=handle,
        instance=instance,
        execution_manager=execution_manager,
        reloader=reloader,
        version=__version__,
    )

    # Automatically initialize scheduler everytime Dagit loads
    scheduler = instance.scheduler
    repository = context.get_repository()

    if repository.schedule_defs:
        if scheduler:
            handle = context.get_handle()
            python_path = sys.executable
            repository_path = handle.data.repository_yaml
            reconcile_scheduler_state(python_path,
                                      repository_path,
                                      repository=repository,
                                      instance=instance)
        else:
            warnings.warn(MISSING_SCHEDULER_WARNING)

    return instantiate_app_with_views(context)
Ejemplo n.º 2
0
def test_start_schedule_fails():
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository", schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        schedule_def = repository.get_schedule_def("no_config_pipeline_every_min_schedule")

        def raises(*args, **kwargs):
            raise Exception('Patch')

        instance._scheduler._start_cron_job = raises  # pylint: disable=protected-access
        with pytest.raises(Exception, match='Patch'):
            instance.start_schedule(repository, "no_config_pipeline_every_min_schedule")

        schedule = instance.get_schedule_by_name(repository, schedule_def.name)

        assert schedule.status == ScheduleStatus.STOPPED
Ejemplo n.º 3
0
    def test_invalid_config_schedule_error(self, graphql_context, snapshot):
        repository = graphql_context.legacy_get_repository_definition()
        instance = graphql_context.instance
        reconcile_scheduler_state("", "", repository, instance)
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'invalid_config_schedule'},
        )

        assert (result.data['startScheduledExecution']['__typename'] ==
                'PipelineConfigValidationInvalid')

        # Check tick data and stats through gql
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context, SCHEDULE_TICKS_QUERY)
        schedule_result = next(
            x for x in result.data['scheduler']['runningSchedules']
            if x['scheduleDefinition']['name'] == 'invalid_config_schedule')
        assert schedule_result['stats']['ticksSucceeded'] == 1
        snapshot.assert_match(schedule_result)

        ticks = instance.get_schedule_ticks_by_schedule(
            repository.name, 'invalid_config_schedule')

        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.status == ScheduleTickStatus.SUCCESS
Ejemplo n.º 4
0
def test_stop_schedule_unsuccessful(restore_cron_tab, ):  # pylint:disable=unused-argument,redefined-outer-name
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository",
                                          schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        def do_nothing(*_):
            pass

        instance._scheduler._end_cron_job = do_nothing  # pylint: disable=protected-access

        instance.start_schedule(repository,
                                "no_config_pipeline_every_min_schedule")

        # End schedule
        with pytest.raises(
                DagsterInvariantViolationError,
                match=
                "Attempted to remove cron job for schedule no_config_pipeline_every_min_schedule, but failed.",
        ):
            instance.stop_schedule(repository,
                                   "no_config_pipeline_every_min_schedule")
Ejemplo n.º 5
0
def test_start_schedule_manual_delete_debug(
        restore_cron_tab,
        snapshot  # pylint:disable=unused-argument,redefined-outer-name
):
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository",
                                          schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path="fake path",
            repository_path="",
            repository=repository,
            instance=instance,
        )

        instance.start_schedule(repository,
                                "no_config_pipeline_every_min_schedule")

        # Manually delete the schedule from the crontab
        instance.scheduler._end_cron_job(  # pylint: disable=protected-access
            instance,
            repository,
            instance.get_schedule_by_name(
                repository, "no_config_pipeline_every_min_schedule"),
        )

        # Check debug command
        snapshot.assert_match(instance.scheduler_debug_info())
Ejemplo n.º 6
0
def test_re_init():
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository", schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Start schedule
        schedule = instance.start_schedule(repository, "no_config_pipeline_every_min_schedule")

        # Re-initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Check schedules are saved to disk
        assert 'schedules' in os.listdir(tempdir)

        schedules = instance.all_schedules(repository)

        for schedule in schedules:
            assert "/bin/python" in schedule.python_path
Ejemplo n.º 7
0
def test_tick_skip(snapshot):
    with seven.TemporaryDirectory() as temp_dir:
        instance = get_instance(temp_dir)
        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)
        repository = context.get_repository()
        reconcile_scheduler_state("", "", repository, instance)

        execute_dagster_graphql(
            context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'no_config_should_execute'},
        )

        # Check tick data and stats through gql
        result = execute_dagster_graphql(context, SCHEDULE_TICKS_QUERY)
        schedule_result = next(
            x for x in result.data['scheduler']['runningSchedules']
            if x['scheduleDefinition']['name'] == 'no_config_should_execute')
        assert schedule_result['stats']['ticksSkipped'] == 1
        snapshot.assert_match(schedule_result)

        ticks = instance.get_schedule_ticks_by_schedule(
            repository, 'no_config_should_execute')

        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.status == ScheduleTickStatus.SKIPPED
Ejemplo n.º 8
0
def create_app_with_reconstructable_repo(recon_repo, instance, reloader=None):
    check.inst_param(recon_repo, 'recon_repo', ReconstructableRepository)
    check.inst_param(instance, 'instance', DagsterInstance)
    check.opt_inst_param(reloader, 'reloader', Reloader)

    warn_if_compute_logs_disabled()

    print('Loading repository...')
    context = DagsterGraphQLContext(
        instance=instance,
        locations=[InProcessRepositoryLocation(recon_repo, reloader=reloader)],
        version=__version__,
    )

    # Automatically initialize scheduler everytime Dagit loads
    scheduler = instance.scheduler
    repository = context.legacy_get_repository_definition()

    if repository.schedule_defs:
        if scheduler:
            python_path = sys.executable
            repository_path = context.legacy_location.get_reconstructable_repository(
            ).yaml_path
            reconcile_scheduler_state(python_path,
                                      repository_path,
                                      repository=repository,
                                      instance=instance)
        else:
            warnings.warn(MISSING_SCHEDULER_WARNING)

    return instantiate_app_with_views(context)
Ejemplo n.º 9
0
    def test_should_execute_scheduler_error(self, graphql_context, snapshot):
        instance = graphql_context.instance
        repository = graphql_context.legacy_get_repository_definition()
        reconcile_scheduler_state("", "", repository, instance)

        execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'should_execute_error_schedule'},
        )

        # Check tick data and stats through gql
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context, SCHEDULE_TICKS_QUERY)
        schedule_result = next(
            x for x in result.data['scheduler']['runningSchedules'] if
            x['scheduleDefinition']['name'] == 'should_execute_error_schedule')
        assert schedule_result['stats']['ticksFailed'] == 1
        snapshot.assert_match(schedule_result)

        ticks = instance.get_schedule_ticks_by_schedule(
            repository.name, 'should_execute_error_schedule')

        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.status == ScheduleTickStatus.FAILURE
        assert tick.error
        assert (
            "Error occurred during the execution should_execute for schedule "
            "should_execute_error_schedule" in tick.error.message)
Ejemplo n.º 10
0
    def test_tick_skip(self, graphql_context, snapshot):
        instance = graphql_context.instance

        repository = graphql_context.legacy_get_repository_definition()
        reconcile_scheduler_state("", "", repository, instance)

        execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'no_config_should_execute'},
        )

        # Check tick data and stats through gql
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context, SCHEDULE_TICKS_QUERY)
        schedule_result = next(
            x for x in result.data['scheduler']['runningSchedules']
            if x['scheduleDefinition']['name'] == 'no_config_should_execute')
        assert schedule_result['stats']['ticksSkipped'] == 1
        snapshot.assert_match(schedule_result)

        ticks = instance.get_schedule_ticks_by_schedule(
            repository.name, 'no_config_should_execute')

        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.status == ScheduleTickStatus.SKIPPED
Ejemplo n.º 11
0
    def test_environment_dict_scheduler_error(self, graphql_context, snapshot):
        instance = graphql_context.instance
        repository = graphql_context.legacy_get_repository_definition()
        reconcile_scheduler_state("", "", repository, instance)

        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'environment_dict_error_schedule'},
        )
        assert_start_scheduled_execution_success(result)
        run_id = result.data['startScheduledExecution']['run']['runId']

        # Check tick data and stats through gql
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context, SCHEDULE_TICKS_QUERY)
        schedule_result = next(
            x for x in result.data['scheduler']['runningSchedules']
            if x['scheduleDefinition']['name'] ==
            'environment_dict_error_schedule')
        assert schedule_result['stats']['ticksSucceeded'] == 1
        snapshot.assert_match(schedule_result)

        ticks = instance.get_schedule_ticks_by_schedule(
            repository.name, 'environment_dict_error_schedule')

        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.status == ScheduleTickStatus.SUCCESS
        assert tick.run_id == run_id
Ejemplo n.º 12
0
def test_start_and_stop_schedule():
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository", schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        schedule_def = repository.get_schedule_def("no_config_pipeline_every_min_schedule")

        # Start schedule
        schedule = instance.start_schedule(repository, "no_config_pipeline_every_min_schedule")

        check.inst_param(schedule, 'schedule', Schedule)
        assert "/bin/python" in schedule.python_path

        assert 'schedules' in os.listdir(tempdir)

        assert "{}.{}.sh".format(repository.name, schedule_def.name) in os.listdir(
            os.path.join(tempdir, 'schedules', 'scripts')
        )

        # End schedule
        instance.stop_schedule(repository, "no_config_pipeline_every_min_schedule")
        assert "{}.{}.sh".format(repository.name, schedule_def.name) not in os.listdir(
            os.path.join(tempdir, 'schedules', 'scripts')
        )
Ejemplo n.º 13
0
def test_enviornment_dict_scheduler_error_serialize_cauze():
    with seven.TemporaryDirectory() as temp_dir:
        instance = get_instance(temp_dir)
        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)
        repository = context.get_repository_definition()
        reconcile_scheduler_state("", "", repository, instance)

        result = execute_dagster_graphql(
            context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'environment_dict_error_schedule'},
        )
        assert result.data['startScheduledExecution'][
            '__typename'] == 'StartPipelineRunSuccess'
        run_id = result.data['startScheduledExecution']['run']['runId']

        ticks = instance.get_schedule_ticks_by_schedule(
            repository.name, 'environment_dict_error_schedule')

        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.status == ScheduleTickStatus.SUCCESS
        assert tick.run_id == run_id
Ejemplo n.º 14
0
def test_enviornment_dict_scheduler_error(snapshot):
    with seven.TemporaryDirectory() as temp_dir:
        instance = get_instance(temp_dir)
        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)
        repository = context.get_repository_definition()
        reconcile_scheduler_state("", "", repository, instance)

        result = execute_dagster_graphql(
            context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'environment_dict_error_schedule'},
        )
        assert result.data['startScheduledExecution'][
            '__typename'] == 'StartPipelineRunSuccess'
        run_id = result.data['startScheduledExecution']['run']['runId']

        # Check tick data and stats through gql
        result = execute_dagster_graphql(context, SCHEDULE_TICKS_QUERY)
        schedule_result = next(
            x for x in result.data['scheduler']['runningSchedules']
            if x['scheduleDefinition']['name'] ==
            'environment_dict_error_schedule')
        assert schedule_result['stats']['ticksSucceeded'] == 1
        snapshot.assert_match(schedule_result)

        ticks = instance.get_schedule_ticks_by_schedule(
            repository.name, 'environment_dict_error_schedule')

        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.status == ScheduleTickStatus.SUCCESS
        assert tick.run_id == run_id
Ejemplo n.º 15
0
def test_should_execute_scheduler_error(snapshot):
    with seven.TemporaryDirectory() as temp_dir:
        instance = get_instance(temp_dir)
        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)
        repository = context.get_repository_definition()
        reconcile_scheduler_state("", "", repository, instance)

        execute_dagster_graphql(
            context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'should_execute_error_schedule'},
        )

        # Check tick data and stats through gql
        result = execute_dagster_graphql(context, SCHEDULE_TICKS_QUERY)
        schedule_result = next(
            x for x in result.data['scheduler']['runningSchedules'] if
            x['scheduleDefinition']['name'] == 'should_execute_error_schedule')
        assert schedule_result['stats']['ticksFailed'] == 1
        snapshot.assert_match(schedule_result)

        ticks = instance.get_schedule_ticks_by_schedule(
            repository.name, 'should_execute_error_schedule')

        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.status == ScheduleTickStatus.FAILURE
        assert tick.error
        assert (
            "Error occurred during the execution should_execute for schedule "
            "should_execute_error_schedule" in tick.error.message)
Ejemplo n.º 16
0
def test_schedules():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(temp_dir)

        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)

        # We need to call up on the scheduler handle to persist
        # state about the schedules to disk before running them.
        # Note: This dependency will be removed soon.
        repository = context.get_repository()
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path=file_relative_path(__file__, '../'),
            repository=repository,
            instance=instance,
        )

        for schedule_name in [
                'many_events_every_min',
                'pandas_hello_world_hourly',
        ]:
            result = execute_dagster_graphql(
                context,
                START_SCHEDULED_EXECUTION_MUTATION,
                variables={'scheduleName': schedule_name},
            )

            assert not result.errors
            assert result.data
            assert result.data['startScheduledExecution'][
                '__typename'] == 'StartPipelineRunSuccess'
Ejemplo n.º 17
0
def test_get_all_schedules():

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
            scheduler=FilesystemTestScheduler(temp_dir),
            run_launcher=SyncInMemoryRunLauncher(),
        )

        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)

        # Initialize scheduler
        repository = context.legacy_get_repository_definition()
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Start schedule
        schedule = instance.start_schedule(
            repository.name, "no_config_pipeline_hourly_schedule")

        # Query Scheduler + all Schedules
        scheduler_result = execute_dagster_graphql(context,
                                                   GET_SCHEDULES_QUERY)

        # These schedules are defined in dagster_graphql_tests/graphql/setup_scheduler.py
        # If you add a schedule there, be sure to update the number of schedules below
        assert scheduler_result.data
        assert scheduler_result.data['scheduler']
        assert scheduler_result.data['scheduler']['runningSchedules']
        assert len(
            scheduler_result.data['scheduler']['runningSchedules']) == 18

        for schedule in scheduler_result.data['scheduler']['runningSchedules']:
            if schedule['scheduleDefinition'][
                    'name'] == 'no_config_pipeline_hourly_schedule':
                assert schedule['status'] == 'RUNNING'

            if schedule['scheduleDefinition'][
                    'name'] == 'environment_dict_error_schedule':
                assert schedule['scheduleDefinition']['runConfigYaml'] is None
            elif schedule['scheduleDefinition'][
                    'name'] == 'invalid_config_schedule':
                assert (schedule['scheduleDefinition']['runConfigYaml'] ==
                        'solids:\n  takes_an_enum:\n    config: invalid\n')
            else:
                assert (schedule['scheduleDefinition']['runConfigYaml'] ==
                        'storage:\n  filesystem: {}\n')
Ejemplo n.º 18
0
def test_start_schedule_manual_duplicate_schedules_add_debug(
        restore_cron_tab,
        snapshot  # pylint:disable=unused-argument,redefined-outer-name
):
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository",
                                          schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path="fake path",
            repository_path="",
            repository=repository,
            instance=instance,
        )

        instance.start_schedule(repository.name,
                                "no_config_pipeline_every_min_schedule")

        # Manually add  extra cron tabs
        instance.scheduler._start_cron_job(  # pylint: disable=protected-access
            instance,
            repository.name,
            instance.get_schedule_by_name(
                repository.name, "no_config_pipeline_every_min_schedule"),
        )
        instance.scheduler._start_cron_job(  # pylint: disable=protected-access
            instance,
            repository.name,
            instance.get_schedule_by_name(
                repository.name, "no_config_pipeline_every_min_schedule"),
        )

        # Check debug command
        debug_info = instance.scheduler_debug_info()
        assert len(debug_info.errors) == 1

        # Reconcile should fix error
        reconcile_scheduler_state(
            python_path="fake path",
            repository_path="",
            repository=repository,
            instance=instance,
        )
        debug_info = instance.scheduler_debug_info()
        assert len(debug_info.errors) == 0
Ejemplo n.º 19
0
def test_stop_schedule_fails(
        restore_cron_tab,  # pylint:disable=unused-argument,redefined-outer-name
):
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository",
                                          schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        schedule_def = repository.get_schedule_def(
            "no_config_pipeline_every_min_schedule")

        def raises(*args, **kwargs):
            raise Exception('Patch')

        instance._scheduler._end_cron_job = raises  # pylint: disable=protected-access

        schedule = instance.start_schedule(
            repository.name, "no_config_pipeline_every_min_schedule")

        check.inst_param(schedule, 'schedule', Schedule)
        assert "/bin/python" in schedule.python_path

        assert 'schedules' in os.listdir(tempdir)

        assert "{}.{}.sh".format(repository.name,
                                 schedule_def.name) in os.listdir(
                                     os.path.join(tempdir, 'schedules',
                                                  'scripts'))

        # End schedule
        with pytest.raises(Exception, match='Patch'):
            instance.stop_schedule(repository.name,
                                   "no_config_pipeline_every_min_schedule")

        schedule = instance.get_schedule_by_name(repository.name,
                                                 schedule_def.name)

        assert schedule.status == ScheduleStatus.RUNNING
Ejemplo n.º 20
0
def test_script_execution(restore_cron_tab, unset_dagster_home):  # pylint:disable=unused-argument,redefined-outer-name
    with TemporaryDirectory() as tempdir:
        os.environ["DAGSTER_HOME"] = tempdir
        config = {
            'scheduler': {
                'module': 'dagster_cron',
                'class': 'SystemCronScheduler',
                'config': {}
            },
            # This needs to synchronously execute to completion when
            # the generated bash script is invoked
            'run_launcher': {
                'module': 'dagster.core.launcher.sync_in_memory_run_launcher',
                'class': 'SyncInMemoryRunLauncher',
            },
        }

        with open(os.path.join(tempdir, 'dagster.yaml'), 'w+') as f:
            f.write(yaml.dump(config))

        instance = DagsterInstance.get()
        repository = define_repository()

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path=file_relative_path(__file__, './repository.yaml'),
            repository=repository,
            instance=instance,
        )

        instance.start_schedule(repository.name,
                                "no_config_pipeline_every_min_schedule")

        schedule_def = repository.get_schedule_def(
            "no_config_pipeline_every_min_schedule")
        script = instance.scheduler._get_bash_script_file_path(  # pylint: disable=protected-access
            instance, repository.name, schedule_def)

        subprocess.check_output([script],
                                shell=True,
                                env={"DAGSTER_HOME": tempdir})

        runs = instance.get_runs()
        assert len(runs) == 1
        assert runs[0].status == PipelineRunStatus.SUCCESS
Ejemplo n.º 21
0
def execute_up_command(preview, cli_args, print_fn):
    handle = recon_repo_for_cli_args(cli_args)
    instance = DagsterInstance.get()
    check_handle_and_scheduler(handle, instance)

    repository = handle.get_definition()
    python_path = sys.executable
    repository_path = handle.yaml_path

    print_changes(repository, instance, print_fn, preview=preview)
    if preview:
        return

    try:
        reconcile_scheduler_state(python_path, repository_path, repository, instance=instance)
    except DagsterInvariantViolationError as ex:
        raise click.UsageError(ex)
Ejemplo n.º 22
0
def test_start_stop_schedule():

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
            scheduler=FilesystemTestScheduler(temp_dir),
            run_launcher=SyncInMemoryRunLauncher(),
        )

        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)

        # Initialize scheduler
        repository = context.legacy_get_repository_definition()
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Start schedule
        start_result = execute_dagster_graphql(
            context,
            START_SCHEDULES_QUERY,
            variables={'scheduleName': 'no_config_pipeline_hourly_schedule'},
        )
        assert start_result.data['startSchedule']['schedule'][
            'status'] == 'RUNNING'

        # Stop schedule
        stop_result = execute_dagster_graphql(
            context,
            STOP_SCHEDULES_QUERY,
            variables={'scheduleName': 'no_config_pipeline_hourly_schedule'},
        )
        assert stop_result.data['stopRunningSchedule']['schedule'][
            'status'] == 'STOPPED'
Ejemplo n.º 23
0
    def test_query_multiple_schedule_ticks(self, graphql_context, snapshot):
        instance = graphql_context.instance
        repository = graphql_context.legacy_get_repository_definition()
        reconcile_scheduler_state("", "", repository, instance)

        for scheduleName in [
                'no_config_pipeline_hourly_schedule',
                'no_config_should_execute',
                'environment_dict_error_schedule',
        ]:
            execute_dagster_graphql_and_finish_runs(
                graphql_context,
                START_SCHEDULED_EXECUTION_QUERY,
                variables={'scheduleName': scheduleName},
            )

        result = execute_dagster_graphql_and_finish_runs(
            graphql_context, SCHEDULE_TICKS_QUERY)
        snapshot.assert_match(result.data['scheduler']['runningSchedules'])
Ejemplo n.º 24
0
    def test_tick_success(self, graphql_context, snapshot):
        context = graphql_context
        instance = context.instance

        repository = context.legacy_get_repository_definition()

        reconcile_scheduler_state("", "", repository, instance)
        schedule_def = repository.get_schedule_def(
            "no_config_pipeline_hourly_schedule")

        start_time = time.time()
        execute_dagster_graphql_and_finish_runs(
            context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': schedule_def.name},
        )

        # Check tick data and stats through gql
        result = execute_dagster_graphql_and_finish_runs(
            context, SCHEDULE_TICKS_QUERY)

        assert result.data
        schedule_result = next(
            schedule_result
            for schedule_result in result.data['scheduler']['runningSchedules']
            if schedule_result['scheduleDefinition']['name'] ==
            schedule_def.name)

        assert schedule_result
        assert schedule_result['stats']['ticksSucceeded'] == 1
        snapshot.assert_match(schedule_result)

        # Check directly against the DB
        ticks = instance.get_schedule_ticks_by_schedule(
            repository.name, schedule_def.name)
        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.schedule_name == schedule_def.name
        assert tick.cron_schedule == schedule_def.cron_schedule
        assert tick.timestamp > start_time and tick.timestamp < time.time()
        assert tick.status == ScheduleTickStatus.SUCCESS
        assert tick.run_id
Ejemplo n.º 25
0
def test_tick_success(snapshot):
    with seven.TemporaryDirectory() as temp_dir:
        instance = get_instance(temp_dir)
        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)
        repository = context.get_repository_definition()

        reconcile_scheduler_state("", "", repository, instance)
        schedule_def = repository.get_schedule_def(
            "no_config_pipeline_hourly_schedule")

        start_time = time.time()
        execute_dagster_graphql(
            context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': schedule_def.name},
        )

        # Check tick data and stats through gql
        result = execute_dagster_graphql(context, SCHEDULE_TICKS_QUERY)
        schedule_result = next(
            schedule_result
            for schedule_result in result.data['scheduler']['runningSchedules']
            if schedule_result['scheduleDefinition']['name'] ==
            schedule_def.name)

        assert schedule_result
        assert schedule_result['stats']['ticksSucceeded'] == 1
        snapshot.assert_match(schedule_result)

        # Check directly against the DB
        ticks = instance.get_schedule_ticks_by_schedule(
            repository.name, schedule_def.name)
        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.schedule_name == schedule_def.name
        assert tick.cron_schedule == schedule_def.cron_schedule
        assert tick.timestamp > start_time and tick.timestamp < time.time()
        assert tick.status == ScheduleTickStatus.SUCCESS
        assert tick.run_id
Ejemplo n.º 26
0
def test_init(restore_cron_tab):  # pylint:disable=unused-argument,redefined-outer-name
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository",
                                          schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Check schedules are saved to disk
        assert 'schedules' in os.listdir(tempdir)

        schedules = instance.all_schedules(repository)

        for schedule in schedules:
            assert "/bin/python" in schedule.python_path
Ejemplo n.º 27
0
    def test_environment_dict_scheduler_error_serialize_cause(
            self, graphql_context):
        instance = graphql_context.instance
        repository = graphql_context.legacy_get_repository_definition()
        reconcile_scheduler_state("", "", repository, instance)

        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'environment_dict_error_schedule'},
        )
        assert_start_scheduled_execution_success(result)
        run_id = result.data['startScheduledExecution']['run']['runId']

        ticks = instance.get_schedule_ticks_by_schedule(
            repository.name, 'environment_dict_error_schedule')

        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.status == ScheduleTickStatus.SUCCESS
        assert tick.run_id == run_id
Ejemplo n.º 28
0
def test_wipe():
    with TemporaryDirectory() as tempdir:
        repository = RepositoryDefinition(name="test_repository", schedule_defs=define_schedules())
        instance = define_scheduler_instance(tempdir)

        # Initialize scheduler
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Start schedule
        instance.start_schedule(repository, "no_config_pipeline_every_min_schedule")

        # Wipe scheduler
        instance.wipe_all_schedules()

        # Check schedules are wiped
        assert instance.all_schedules(repository) == []
Ejemplo n.º 29
0
def test_query_multiple_schedule_ticks(snapshot):
    with seven.TemporaryDirectory() as temp_dir:
        instance = get_instance(temp_dir)
        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)
        repository = context.get_repository_definition()
        reconcile_scheduler_state("", "", repository, instance)

        for scheduleName in [
                'no_config_pipeline_hourly_schedule',
                'no_config_should_execute',
                'environment_dict_error_schedule',
        ]:
            execute_dagster_graphql(
                context,
                START_SCHEDULED_EXECUTION_QUERY,
                variables={'scheduleName': scheduleName},
            )

        result = execute_dagster_graphql(context, SCHEDULE_TICKS_QUERY)
        snapshot.assert_match(result.data['scheduler']['runningSchedules'])