Example #1
0
    def get_current_instigator_state(
            self, stored_state: Optional["InstigatorState"]):
        from dagster.core.scheduler.instigation import (
            InstigatorState,
            InstigatorStatus,
            ScheduleInstigatorData,
        )

        if self.default_status == DefaultScheduleStatus.RUNNING:
            if stored_state:
                return stored_state

            return InstigatorState(
                self.get_external_origin(),
                InstigatorType.SCHEDULE,
                InstigatorStatus.AUTOMATICALLY_RUNNING,
                ScheduleInstigatorData(self.cron_schedule,
                                       start_timestamp=None),
            )
        else:
            # Ignore AUTOMATICALLY_RUNNING states in the DB if the default status
            # isn't DefaultScheduleStatus.RUNNING - this would indicate that the schedule's
            # default has been changed in code but there's still a lingering AUTOMATICALLY_RUNNING
            # row in the database that can be ignored
            if stored_state and stored_state.status != InstigatorStatus.AUTOMATICALLY_RUNNING:
                return stored_state

            return InstigatorState(
                self.get_external_origin(),
                InstigatorType.SCHEDULE,
                InstigatorStatus.STOPPED,
                ScheduleInstigatorData(self.cron_schedule,
                                       start_timestamp=None),
            )
Example #2
0
def test_unloadable_schedule(graphql_context):
    instance = graphql_context.instance
    initial_datetime = create_pendulum_time(
        year=2019,
        month=2,
        day=27,
        hour=23,
        minute=59,
        second=59,
    )

    running_origin = _get_unloadable_schedule_origin("unloadable_running")
    running_instigator_state = InstigatorState(
        running_origin,
        InstigatorType.SCHEDULE,
        InstigatorStatus.RUNNING,
        ScheduleInstigatorData(
            "0 0 * * *",
            pendulum.now("UTC").timestamp(),
        ),
    )

    stopped_origin = _get_unloadable_schedule_origin("unloadable_stopped")

    with pendulum.test(initial_datetime):
        instance.add_instigator_state(running_instigator_state)

        instance.add_instigator_state(
            InstigatorState(
                stopped_origin,
                InstigatorType.SCHEDULE,
                InstigatorStatus.STOPPED,
                ScheduleInstigatorData(
                    "0 0 * * *",
                    pendulum.now("UTC").timestamp(),
                ),
            ))

    result = execute_dagster_graphql(graphql_context, GET_UNLOADABLE_QUERY)
    assert len(
        result.data["unloadableInstigationStatesOrError"]["results"]) == 1
    assert (result.data["unloadableInstigationStatesOrError"]["results"][0]
            ["name"] == "unloadable_running")

    # Verify that we can stop the unloadable schedule
    stop_result = execute_dagster_graphql(
        graphql_context,
        STOP_SCHEDULES_QUERY,
        variables={
            "scheduleOriginId": running_instigator_state.instigator_origin_id,
            "scheduleSelectorId": running_instigator_state.selector_id,
        },
    )
    assert (stop_result.data["stopRunningSchedule"]["scheduleState"]["status"]
            == InstigatorStatus.STOPPED.value)
Example #3
0
    def test_update_schedule(self, storage):
        assert storage

        schedule = self.build_schedule("my_schedule", "* * * * *")
        storage.add_instigator_state(schedule)

        now_time = get_current_datetime_in_utc().timestamp()

        new_schedule = schedule.with_status(InstigatorStatus.RUNNING).with_data(
            ScheduleInstigatorData(
                cron_schedule=schedule.instigator_data.cron_schedule,
                start_timestamp=now_time,
            )
        )
        storage.update_instigator_state(new_schedule)

        schedules = storage.all_instigator_state(
            self.fake_repo_target().get_id(),
            self.fake_repo_target().get_selector_id(),
            InstigatorType.SCHEDULE,
        )
        assert len(schedules) == 1

        schedule = schedules[0]
        assert schedule.instigator_name == "my_schedule"
        assert schedule.status == InstigatorStatus.RUNNING
        assert schedule.instigator_data.start_timestamp == now_time

        stopped_schedule = schedule.with_status(InstigatorStatus.STOPPED).with_data(
            ScheduleInstigatorData(schedule.instigator_data.cron_schedule)
        )
        storage.update_instigator_state(stopped_schedule)

        schedules = storage.all_instigator_state(
            self.fake_repo_target().get_id(),
            self.fake_repo_target().get_selector_id(),
            InstigatorType.SCHEDULE,
        )
        assert len(schedules) == 1

        schedule = schedules[0]
        assert schedule.instigator_name == "my_schedule"
        assert schedule.status == InstigatorStatus.STOPPED
        assert schedule.instigator_data.start_timestamp == None
Example #4
0
    def _create_new_schedule_state(self, instance, external_schedule):
        schedule_state = InstigatorState(
            external_schedule.get_external_origin(),
            InstigatorType.SCHEDULE,
            InstigatorStatus.STOPPED,
            ScheduleInstigatorData(external_schedule.cron_schedule),
        )

        instance.add_job_state(schedule_state)
        return schedule_state
Example #5
0
def test_get_unloadable_job(graphql_context):
    instance = graphql_context.instance
    initial_datetime = create_pendulum_time(
        year=2019,
        month=2,
        day=27,
        hour=23,
        minute=59,
        second=59,
    )
    with pendulum.test(initial_datetime):
        instance.add_job_state(
            InstigatorState(
                _get_unloadable_schedule_origin("unloadable_running"),
                InstigatorType.SCHEDULE,
                InstigatorStatus.RUNNING,
                ScheduleInstigatorData(
                    "0 0 * * *",
                    pendulum.now("UTC").timestamp(),
                ),
            )
        )

        instance.add_job_state(
            InstigatorState(
                _get_unloadable_schedule_origin("unloadable_stopped"),
                InstigatorType.SCHEDULE,
                InstigatorStatus.STOPPED,
                ScheduleInstigatorData(
                    "0 0 * * *",
                    pendulum.now("UTC").timestamp(),
                ),
            )
        )

    result = execute_dagster_graphql(graphql_context, GET_UNLOADABLE_QUERY)
    assert len(result.data["unloadableInstigationStatesOrError"]["results"]) == 1
    assert (
        result.data["unloadableInstigationStatesOrError"]["results"][0]["name"]
        == "unloadable_running"
    )
Example #6
0
    def stop_schedule(self, instance, schedule_origin_id, schedule_selector_id,
                      external_schedule):
        """
        Updates the status of the given schedule to `InstigatorStatus.STOPPED` in schedule storage,

        This should not be overridden by subclasses.

        Args:
            schedule_origin_id (string): The id of the schedule target to stop running.
        """

        check.str_param(schedule_origin_id, "schedule_origin_id")
        check.opt_inst_param(external_schedule, "external_schedule",
                             ExternalSchedule)

        schedule_state = instance.get_instigator_state(schedule_origin_id,
                                                       schedule_selector_id)
        if (external_schedule and not external_schedule.
                get_current_instigator_state(schedule_state).is_running) or (
                    schedule_state and not schedule_state.is_running):
            raise DagsterSchedulerError(
                "You have attempted to stop schedule {name}, but it is already stopped"
                .format(name=external_schedule.name))

        if not schedule_state:
            assert external_schedule
            stopped_schedule = InstigatorState(
                external_schedule.get_external_origin(),
                InstigatorType.SCHEDULE,
                InstigatorStatus.STOPPED,
                ScheduleInstigatorData(external_schedule.cron_schedule, ),
            )
            instance.add_instigator_state(stopped_schedule)
        else:
            stopped_schedule = schedule_state.with_status(
                InstigatorStatus.STOPPED).with_data(
                    ScheduleInstigatorData(cron_schedule=schedule_state.
                                           instigator_data.cron_schedule, ))
            instance.update_instigator_state(stopped_schedule)

        return stopped_schedule
Example #7
0
 def build_schedule(
     cls,
     schedule_name,
     cron_schedule,
     status=InstigatorStatus.STOPPED,
 ):
     return InstigatorState(
         cls.fake_repo_target().get_instigator_origin(schedule_name),
         InstigatorType.SCHEDULE,
         status,
         ScheduleInstigatorData(cron_schedule, start_timestamp=None),
     )
Example #8
0
    def get_default_instigation_state(self):
        from dagster.core.scheduler.instigation import (
            InstigatorState,
            InstigatorStatus,
            ScheduleInstigatorData,
        )

        return InstigatorState(
            self.get_external_origin(),
            InstigatorType.SCHEDULE,
            InstigatorStatus.STOPPED,
            ScheduleInstigatorData(self.cron_schedule, start_timestamp=None),
        )
Example #9
0
    def start_schedule(self, instance, external_schedule):
        """
        Updates the status of the given schedule to `InstigatorStatus.RUNNING` in schedule storage,

        This should not be overridden by subclasses.

        Args:
            instance (DagsterInstance): The current instance.
            external_schedule (ExternalSchedule): The schedule to start

        """

        check.inst_param(instance, "instance", DagsterInstance)
        check.inst_param(external_schedule, "external_schedule",
                         ExternalSchedule)

        schedule_state = instance.get_instigator_state(
            external_schedule.get_external_origin_id(),
            external_schedule.selector_id)
        if external_schedule.get_current_instigator_state(
                schedule_state).is_running:
            raise DagsterSchedulerError(
                "You have attempted to start schedule {name}, but it is already running"
                .format(name=external_schedule.name))

        new_instigator_data = ScheduleInstigatorData(
            external_schedule.cron_schedule,
            get_current_datetime_in_utc().timestamp(),
        )

        if not schedule_state:
            started_schedule = InstigatorState(
                external_schedule.get_external_origin(),
                InstigatorType.SCHEDULE,
                InstigatorStatus.RUNNING,
                new_instigator_data,
            )
            instance.add_instigator_state(started_schedule)
        else:
            started_schedule = schedule_state.with_status(
                InstigatorStatus.RUNNING).with_data(new_instigator_data)
            instance.update_instigator_state(started_schedule)
        return started_schedule
Example #10
0
    def start_schedule_and_update_storage_state(self, instance,
                                                external_schedule):
        """
        Updates the status of the given schedule to `InstigatorStatus.RUNNING` in schedule storage,
        then calls `start_schedule`.

        This should not be overridden by subclasses.

        Args:
            instance (DagsterInstance): The current instance.
            external_schedule (ExternalSchedule): The schedule to start

        """

        check.inst_param(instance, "instance", DagsterInstance)
        check.inst_param(external_schedule, "external_schedule",
                         ExternalSchedule)

        schedule_state = instance.get_job_state(
            external_schedule.get_external_origin_id())

        if not schedule_state:
            schedule_state = self._create_new_schedule_state(
                instance, external_schedule)

        if schedule_state.status == InstigatorStatus.RUNNING:
            raise DagsterSchedulerError(
                "You have attempted to start schedule {name}, but it is already running"
                .format(name=external_schedule.name))

        self.start_schedule(instance, external_schedule)
        started_schedule = schedule_state.with_status(
            InstigatorStatus.RUNNING).with_data(
                ScheduleInstigatorData(
                    external_schedule.cron_schedule,
                    get_current_datetime_in_utc().timestamp(),
                ))
        instance.update_job_state(started_schedule)
        return started_schedule
Example #11
0
    def stop_schedule_and_update_storage_state(self, instance,
                                               schedule_origin_id):
        """
        Updates the status of the given schedule to `InstigatorStatus.STOPPED` in schedule storage,
        then calls `stop_schedule`.

        This should not be overridden by subclasses.

        Args:
            schedule_origin_id (string): The id of the schedule target to stop running.
        """

        check.str_param(schedule_origin_id, "schedule_origin_id")

        schedule_state = self._get_schedule_state(instance, schedule_origin_id)

        self.stop_schedule(instance, schedule_origin_id)
        stopped_schedule = schedule_state.with_status(
            InstigatorStatus.STOPPED).with_data(
                ScheduleInstigatorData(cron_schedule=schedule_state.
                                       job_specific_data.cron_schedule, ))
        instance.update_job_state(stopped_schedule)
        return stopped_schedule
Example #12
0
def launch_scheduled_runs(
    instance,
    workspace,
    logger,
    end_datetime_utc,
    max_catchup_runs=DEFAULT_MAX_CATCHUP_RUNS,
    max_tick_retries=0,
    debug_crash_flags=None,
    log_verbose_checks=True,
):
    check.inst_param(instance, "instance", DagsterInstance)
    check.inst_param(workspace, "workspace", IWorkspace)

    workspace_snapshot = {
        location_entry.origin: location_entry
        for location_entry in workspace.get_workspace_snapshot().values()
    }

    all_schedule_states = {
        schedule_state.origin.get_id(): schedule_state
        for schedule_state in instance.all_instigator_state(instigator_type=InstigatorType.SCHEDULE)
    }

    schedules = {}
    for location_entry in workspace_snapshot.values():
        repo_location = location_entry.repository_location
        if repo_location:
            for repo in repo_location.get_repositories().values():
                for schedule in repo.get_external_schedules():
                    origin_id = schedule.get_external_origin().get_id()
                    if schedule.get_current_instigator_state(
                        all_schedule_states.get(origin_id)
                    ).is_running:
                        schedules[origin_id] = schedule
        elif location_entry.load_error and log_verbose_checks:
            logger.warning(
                f"Could not load location {location_entry.origin.location_name} to check for schedules due to the following error: {location_entry.load_error}"
            )

    # Remove any schedule states that were previously created with AUTOMATICALLY_RUNNING
    # and can no longer be found in the workspace (so that if they are later added
    # back again, their timestamps will start at the correct place)
    states_to_delete = {
        schedule_state
        for origin_id, schedule_state in all_schedule_states.items()
        if origin_id not in schedules
        and schedule_state.status == InstigatorStatus.AUTOMATICALLY_RUNNING
    }
    for state in states_to_delete:
        instance.schedule_storage.delete_instigator_state(
            state.instigator_origin_id, state.selector_id
        )

    if log_verbose_checks:
        unloadable_schedule_states = {
            origin_id: schedule_state
            for origin_id, schedule_state in all_schedule_states.items()
            if origin_id not in schedules and schedule_state.status == InstigatorStatus.RUNNING
        }

        for schedule_state in unloadable_schedule_states.values():
            schedule_name = schedule_state.origin.instigator_name
            repo_location_origin = (
                schedule_state.origin.external_repository_origin.repository_location_origin
            )

            repo_location_name = repo_location_origin.location_name
            repo_name = schedule_state.origin.external_repository_origin.repository_name
            if (
                repo_location_origin not in workspace_snapshot
                or not workspace_snapshot[repo_location_origin].repository_location
            ):
                logger.warning(
                    f"Schedule {schedule_name} was started from a location "
                    f"{repo_location_name} that can no longer be found in the workspace, or has "
                    "metadata that has changed since the schedule was started. You can turn off "
                    "this schedule in the Dagit UI from the Status tab."
                )
            elif not workspace_snapshot[repo_location_origin].repository_location.has_repository(
                repo_name
            ):
                logger.warning(
                    f"Could not find repository {repo_name} in location {repo_location_name} to "
                    + f"run schedule {schedule_name}. If this repository no longer exists, you can "
                    + "turn off the schedule in the Dagit UI from the Status tab.",
                )
            else:
                logger.warning(
                    f"Could not find schedule {schedule_name} in repository {repo_name}. If this "
                    "schedule no longer exists, you can turn it off in the Dagit UI from the "
                    "Status tab.",
                )

    if not schedules:
        logger.debug("Not checking for any runs since no schedules have been started.")
        yield
        return

    if log_verbose_checks:
        schedule_names = ", ".join([schedule.name for schedule in schedules.values()])
        logger.info(f"Checking for new runs for the following schedules: {schedule_names}")

    for external_schedule in schedules.values():
        error_info = None
        try:
            schedule_state = all_schedule_states.get(
                external_schedule.get_external_origin().get_id()
            )
            if not schedule_state:
                assert external_schedule.default_status == DefaultScheduleStatus.RUNNING
                schedule_state = InstigatorState(
                    external_schedule.get_external_origin(),
                    InstigatorType.SCHEDULE,
                    InstigatorStatus.AUTOMATICALLY_RUNNING,
                    ScheduleInstigatorData(
                        external_schedule.cron_schedule,
                        end_datetime_utc.timestamp(),
                    ),
                )
                instance.add_instigator_state(schedule_state)

            yield from launch_scheduled_runs_for_schedule(
                instance,
                logger,
                external_schedule,
                schedule_state,
                workspace,
                end_datetime_utc,
                max_catchup_runs,
                max_tick_retries,
                (
                    debug_crash_flags.get(schedule_state.instigator_name)
                    if debug_crash_flags
                    else None
                ),
                log_verbose_checks=log_verbose_checks,
            )
        except Exception:
            error_info = serializable_error_info_from_exc_info(sys.exc_info())
            logger.error(
                f"Scheduler caught an error for schedule {external_schedule.name} : {error_info.to_string()}"
            )
        yield error_info