def test_tag_limits(instance, grpc_server_registry): create_run( instance, run_id="tiny-1", status=PipelineRunStatus.QUEUED, tags={"database": "tiny"}, ) create_run( instance, run_id="tiny-2", status=PipelineRunStatus.QUEUED, tags={"database": "tiny"}, ) create_run( instance, run_id="large-1", status=PipelineRunStatus.QUEUED, tags={"database": "large"}, ) coordinator = QueuedRunCoordinatorDaemon( interval_seconds=5, max_concurrent_runs=10, tag_concurrency_limits=[{ "key": "database", "value": "tiny", "limit": 1 }], ) list(coordinator.run_iteration(instance, grpc_server_registry)) assert get_run_ids(instance.run_launcher.queue()) == ["tiny-1", "large-1"]
def test_skip_error_runs(instance, grpc_server_registry): create_invalid_run( instance, run_id="bad-run", status=PipelineRunStatus.QUEUED, ) create_run( instance, run_id="good-run", status=PipelineRunStatus.QUEUED, ) coordinator = QueuedRunCoordinatorDaemon( interval_seconds=5, max_concurrent_runs=10, ) errors = [ error for error in list( coordinator.run_iteration(instance, grpc_server_registry)) if error ] assert len(errors) == 1 assert "ModuleNotFoundError" in errors[0].message assert get_run_ids(instance.run_launcher.queue()) == ["good-run"] assert instance.get_run_by_id( "bad-run").status == PipelineRunStatus.FAILURE
def test_overlapping_tag_limits(instance): create_run( instance, run_id="run-1", status=PipelineRunStatus.QUEUED, tags={"foo": "bar"}, ) create_run( instance, run_id="run-2", status=PipelineRunStatus.QUEUED, tags={"foo": "bar"}, ) create_run( instance, run_id="run-3", status=PipelineRunStatus.QUEUED, tags={"foo": "other"}, ) create_run( instance, run_id="run-4", status=PipelineRunStatus.QUEUED, tags={"foo": "other"}, ) coordinator = QueuedRunCoordinatorDaemon( instance, interval_seconds=5, max_concurrent_runs=10, tag_concurrency_limits=[ {"key": "foo", "limit": 2}, {"key": "foo", "value": "bar", "limit": 1}, ], ) coordinator.run_iteration() assert get_run_ids(instance.run_launcher.queue()) == ["run-1", "run-3"]
def test_priority(instance, grpc_server_registry): create_run(instance, run_id="default-pri-run", status=PipelineRunStatus.QUEUED) create_run( instance, run_id="low-pri-run", status=PipelineRunStatus.QUEUED, tags={PRIORITY_TAG: "-1"}, ) create_run( instance, run_id="hi-pri-run", status=PipelineRunStatus.QUEUED, tags={PRIORITY_TAG: "3"}, ) coordinator = QueuedRunCoordinatorDaemon( interval_seconds=5, max_concurrent_runs=10, ) list(coordinator.run_iteration(instance, grpc_server_registry)) assert get_run_ids(instance.run_launcher.queue()) == [ "hi-pri-run", "default-pri-run", "low-pri-run", ]
def test_get_queued_runs_max_runs(instance, num_in_progress_runs): # pylint: disable=redefined-outer-name max_runs = 4 # fill run store with ongoing runs in_progress_run_ids = [ "in_progress-run-{}".format(i) for i in range(num_in_progress_runs) ] for i, run_id in enumerate(in_progress_run_ids): # get a selection of all in progress statuses status = IN_PROGRESS_STATUSES[i % len(IN_PROGRESS_STATUSES)] create_run( instance, run_id=run_id, status=status, ) # add more queued runs than should be launched queued_run_ids = ["queued-run-{}".format(i) for i in range(max_runs + 1)] for run_id in queued_run_ids: create_run( instance, run_id=run_id, status=PipelineRunStatus.QUEUED, ) coordinator = QueuedRunCoordinatorDaemon( instance, interval_seconds=5, max_concurrent_runs=max_runs, ) coordinator.run_iteration() assert len(instance.run_launcher.queue()) == max( 0, max_runs - num_in_progress_runs)
def test_multiple_tag_limits(instance): create_run( instance, run_id="run-1", status=PipelineRunStatus.QUEUED, tags={"database": "tiny", "user": "******"}, ) create_run( instance, run_id="run-2", status=PipelineRunStatus.QUEUED, tags={"database": "tiny"}, ) create_run( instance, run_id="run-3", status=PipelineRunStatus.QUEUED, tags={"user": "******"}, ) create_run( instance, run_id="run-4", status=PipelineRunStatus.QUEUED, tags={"user": "******"}, ) coordinator = QueuedRunCoordinatorDaemon( instance, interval_seconds=5, max_concurrent_runs=10, tag_concurrency_limits=[ {"key": "database", "value": "tiny", "limit": 1}, {"key": "user", "value": "johann", "limit": 2}, ], ) coordinator.run_iteration() assert get_run_ids(instance.run_launcher.queue()) == ["run-1", "run-3"]
def create_daemon_of_type(daemon_type): if daemon_type == SchedulerDaemon.daemon_type(): return SchedulerDaemon.create_from_instance(DagsterInstance.get()) elif daemon_type == SensorDaemon.daemon_type(): return SensorDaemon.create_from_instance(DagsterInstance.get()) elif daemon_type == QueuedRunCoordinatorDaemon.daemon_type(): return QueuedRunCoordinatorDaemon.create_from_instance( DagsterInstance.get()) else: raise Exception("Unexpected daemon type {daemon_type}".format( daemon_type=daemon_type))
def test_priority_on_malformed_tag(instance): create_run( instance, run_id="bad-pri-run", status=PipelineRunStatus.QUEUED, tags={PRIORITY_TAG: "foobar"}, ) coordinator = QueuedRunCoordinatorDaemon(instance, interval_seconds=5, max_concurrent_runs=10) coordinator.run_iteration() assert get_run_ids(instance.run_launcher.queue()) == ["bad-pri-run"]
def test_attempt_to_launch_runs_no_queued(instance): create_run( instance, run_id="queued-run", status=PipelineRunStatus.STARTED, ) create_run( instance, run_id="non-queued-run", status=PipelineRunStatus.NOT_STARTED, ) coordinator = QueuedRunCoordinatorDaemon(instance, interval_seconds=5, max_concurrent_runs=10) coordinator.run_iteration() assert instance.run_launcher.queue() == []
def create_daemon_of_type(daemon_type, instance): if daemon_type == SchedulerDaemon.daemon_type(): return SchedulerDaemon( interval_seconds=DEFAULT_DAEMON_INTERVAL_SECONDS) elif daemon_type == SensorDaemon.daemon_type(): return SensorDaemon(interval_seconds=DEFAULT_SENSOR_DAEMON_INTERVAL) elif daemon_type == QueuedRunCoordinatorDaemon.daemon_type(): return QueuedRunCoordinatorDaemon( interval_seconds=instance.run_coordinator.dequeue_interval_seconds) elif daemon_type == BackfillDaemon.daemon_type(): return BackfillDaemon(interval_seconds=DEFAULT_DAEMON_INTERVAL_SECONDS) else: raise Exception(f"Unexpected daemon type {daemon_type}")
def test_location_handles_reused(instance, monkeypatch, grpc_server_registry): """ verifies that only one repository location is created when two queued runs from the same location are dequeued in the same iteration """ create_run( instance, run_id="queued-run", status=PipelineRunStatus.QUEUED, ) create_run( instance, run_id="queued-run-2", status=PipelineRunStatus.QUEUED, ) original_method = GrpcServerRepositoryLocationHandle.__init__ method_calls = [] def mocked_handle_init( self, origin, host=None, port=None, socket=None, server_id=None, heartbeat=False, watch_server=True, ): method_calls.append(origin) return original_method(self, origin, host, port, socket, server_id, heartbeat, watch_server) monkeypatch.setattr( GrpcServerRepositoryLocationHandle, "__init__", mocked_handle_init, ) coordinator = QueuedRunCoordinatorDaemon( interval_seconds=5, max_concurrent_runs=10, ) list(coordinator.run_iteration(instance, grpc_server_registry)) assert get_run_ids(instance.run_launcher.queue()) == ["queued-run", "queued-run-2"] assert len(method_calls) == 1
def __init__(self, instance): self._instance = instance self._daemons = {} self._logger = get_default_daemon_logger("dagster-daemon") if isinstance(instance.scheduler, DagsterDaemonScheduler): max_catchup_runs = instance.scheduler.max_catchup_runs self._add_daemon( SchedulerDaemon(instance, interval_seconds=30, max_catchup_runs=max_catchup_runs)) if isinstance(instance.run_coordinator, QueuedRunCoordinator): max_concurrent_runs = instance.run_coordinator.max_concurrent_runs dequeue_interval_seconds = instance.run_coordinator.dequeue_interval_seconds self._add_daemon( QueuedRunCoordinatorDaemon( instance, interval_seconds=dequeue_interval_seconds, max_concurrent_runs=max_concurrent_runs, )) if not self._daemons: raise Exception("No daemons configured on the DagsterInstance") self._logger.info( "instance is configured with the following daemons: {}".format( _sorted_quoted( type(daemon).__name__ for daemon in self.daemons)))
def test_attempt_to_launch_runs_no_queued(instance, ): # pylint: disable=redefined-outer-name create_run( instance, run_id="queued-run", status=PipelineRunStatus.STARTED, ) create_run( instance, run_id="non-queued-run", status=PipelineRunStatus.NOT_STARTED, ) coordinator = QueuedRunCoordinatorDaemon(instance) coordinator.attempt_to_launch_runs() assert instance.run_launcher.queue() == []
def required_daemons(instance): """ Return which daemon types are required by the instance """ daemons = [SensorDaemon.daemon_type()] if isinstance(instance.scheduler, DagsterDaemonScheduler): daemons.append(SchedulerDaemon.daemon_type()) if isinstance(instance.run_coordinator, QueuedRunCoordinator): daemons.append(QueuedRunCoordinatorDaemon.daemon_type()) return daemons
def test_attempt_to_launch_runs_filter(instance): # pylint: disable=redefined-outer-name create_run( instance, run_id="queued-run", status=PipelineRunStatus.QUEUED, ) create_run( instance, run_id="non-queued-run", status=PipelineRunStatus.NOT_STARTED, ) coordinator = QueuedRunCoordinatorDaemon(instance, interval_seconds=5, max_concurrent_runs=10) coordinator.run_iteration() assert get_run_ids(instance.run_launcher.queue()) == ["queued-run"]
def test_location_handles_reused(instance, monkeypatch): """ verifies that only one repository location is created when two queued runs from the same location are dequeued in the same iteration """ create_run( instance, run_id="queued-run", status=PipelineRunStatus.QUEUED, ) create_run( instance, run_id="queued-run-2", status=PipelineRunStatus.QUEUED, ) original_method = ManagedGrpcPythonEnvRepositoryLocationOrigin.create_handle method_calls = [] def mocked_create_location_handle(origin): method_calls.append(origin) return original_method(origin) monkeypatch.setattr( ManagedGrpcPythonEnvRepositoryLocationOrigin, "create_handle", mocked_create_location_handle, ) coordinator = QueuedRunCoordinatorDaemon( interval_seconds=5, max_concurrent_runs=10, ) list(coordinator.run_iteration(instance)) assert get_run_ids( instance.run_launcher.queue()) == ["queued-run", "queued-run-2"] assert len(method_calls) == 1
def test_attempt_to_launch_runs_filter(instance, grpc_server_registry): create_run( instance, run_id="queued-run", status=PipelineRunStatus.QUEUED, ) create_run( instance, run_id="non-queued-run", status=PipelineRunStatus.NOT_STARTED, ) coordinator = QueuedRunCoordinatorDaemon( interval_seconds=5, max_concurrent_runs=10, ) list(coordinator.run_iteration(instance, grpc_server_registry)) assert get_run_ids(instance.run_launcher.queue()) == ["queued-run"]
def create_daemons_from_instance(instance): daemon_types = required_daemons(instance) daemons = [] # Separate instance for each daemon since each is in its own thread for daemon_type in daemon_types: if daemon_type == SchedulerDaemon.daemon_type(): daemons.append( SchedulerDaemon.create_from_instance(DagsterInstance.get())) elif daemon_type == SensorDaemon.daemon_type(): daemons.append( SensorDaemon.create_from_instance(DagsterInstance.get())) elif daemon_type == QueuedRunCoordinatorDaemon.daemon_type(): daemons.append( QueuedRunCoordinatorDaemon.create_from_instance( DagsterInstance.get())) else: raise Exception("Unexpected daemon type {daemon_type}".format( daemon_type=daemon_type)) return daemons
def __init__(self, instance): self._instance = instance self._daemon_uuid = str(uuid.uuid4()) self._daemons = {} self._last_heartbeat_times = {} self._last_iteration_times = {} self._last_iteration_exceptions = {} self._current_iteration_exceptions = {} self._logger = get_default_daemon_logger("dagster-daemon") if isinstance(instance.scheduler, DagsterDaemonScheduler): max_catchup_runs = instance.scheduler.max_catchup_runs self._add_daemon( SchedulerDaemon( instance, interval_seconds=DEFAULT_DAEMON_INTERVAL_SECONDS, max_catchup_runs=max_catchup_runs, ) ) self._add_daemon(SensorDaemon(instance, interval_seconds=SENSOR_DAEMON_INTERVAL,)) if isinstance(instance.run_coordinator, QueuedRunCoordinator): max_concurrent_runs = instance.run_coordinator.max_concurrent_runs tag_concurrency_limits = instance.run_coordinator.tag_concurrency_limits self._add_daemon( QueuedRunCoordinatorDaemon( instance, interval_seconds=instance.run_coordinator.dequeue_interval_seconds, max_concurrent_runs=max_concurrent_runs, tag_concurrency_limits=tag_concurrency_limits, ) ) assert set(required_daemons(instance)) == self._daemons.keys() if not self._daemons: raise Exception("No daemons configured on the DagsterInstance") self._logger.info( "instance is configured with the following daemons: {}".format( _sorted_quoted(type(daemon).__name__ for daemon in self.daemons) ) )
def __init__(self, instance): self._instance = instance self._daemon_uuid = str(uuid.uuid4()) self._daemons = {} self._last_heartbeat_time = None self._logger = get_default_daemon_logger("dagster-daemon") if isinstance(instance.scheduler, DagsterDaemonScheduler): max_catchup_runs = instance.scheduler.max_catchup_runs self._add_daemon( SchedulerDaemon( instance, interval_seconds=self._get_interval_seconds( instance, SchedulerDaemon.__name__), max_catchup_runs=max_catchup_runs, )) self._add_daemon( SensorDaemon( instance, interval_seconds=self._get_interval_seconds( instance, SensorDaemon.__name__), )) if isinstance(instance.run_coordinator, QueuedRunCoordinator): max_concurrent_runs = instance.run_coordinator.max_concurrent_runs self._add_daemon( QueuedRunCoordinatorDaemon( instance, interval_seconds=self._get_interval_seconds( instance, QueuedRunCoordinatorDaemon.__name__), max_concurrent_runs=max_concurrent_runs, )) assert set(self._expected_daemons(instance)) == self._daemons.keys() if not self._daemons: raise Exception("No daemons configured on the DagsterInstance") self._logger.info( "instance is configured with the following daemons: {}".format( _sorted_quoted( type(daemon).__name__ for daemon in self.daemons)))
def daemon_fixture(): return QueuedRunCoordinatorDaemon(interval_seconds=1)
def run_command(interval_seconds, max_concurrent_runs): coordinator = QueuedRunCoordinatorDaemon( DagsterInstance.get(), max_concurrent_runs=max_concurrent_runs) click.echo("Starting run coordinator") coordinator.run(interval_seconds=interval_seconds)