def launchTask(self, driver, task): """Queue a new garbage collection run, and drop any currently-enqueued runs.""" if self._slave_id is None: self._slave_id = task.slave_id.value task_id = task.task_id.value self.log('launchTask() got task_id: %s' % task_id) if task_id == self._task_id: self.log('=> GC with task_id %s currently running - ignoring' % task_id) return elif task_id in self._gc_task_queue: self.log('=> Already have task_id %s queued - ignoring' % task_id) return try: art = thrift_deserialize(AdjustRetainedTasks(), task.data) except Exception as err: self.log('Error deserializing task: %s' % err) self.send_update( self._driver, task_id, mesos_pb.TASK_FAILED, 'Deserialization of GC task failed') return try: prev_task_id, _ = self._gc_task_queue.popitem(0) except KeyError: # no enqueued GC tasks - reset counter self._dropped_tasks.write(0) else: self.log('=> Dropping previously queued GC with task_id %s' % prev_task_id) self._dropped_tasks.increment() self.log('=> Updating scheduler') self.send_update(self._driver, prev_task_id, mesos_pb.TASK_FINISHED, 'Garbage collection skipped - GC executor received another task') self.log('=> Adding %s to GC queue' % task_id) self._gc_task_queue[task_id] = (task, art.retainedTasks, self._clock.time())
def test_gc_wait(): # run w/ no tasks with run_gc_with_timeout( maximum_executor_wait=Amount(15, Time.SECONDS)) as (proxy_driver, executor): executor._clock.tick(10) proxy_driver.stopped.wait(timeout=0.1) assert not proxy_driver.stopped.is_set() executor._clock.tick(5.1) proxy_driver.stopped.wait(timeout=0.1) assert proxy_driver.stopped.is_set() assert not executor._stop_event.is_set() # ensure launchTask restarts executor wait with run_gc_with_timeout( maximum_executor_wait=Amount(15, Time.SECONDS)) as (proxy_driver, executor): executor._clock.tick(10) proxy_driver.stopped.wait(timeout=0.1) assert not proxy_driver.stopped.is_set() executor.launchTask( proxy_driver, serialize_art(AdjustRetainedTasks(retainedTasks={}))) executor._clock.tick(5.1) proxy_driver.stopped.wait(timeout=0.1) assert not proxy_driver.stopped.is_set() executor._clock.tick(15.1) proxy_driver.stopped.wait(timeout=0.1) assert proxy_driver.stopped.is_set() assert not executor._stop_event.is_set()
def test_gc_killtask_queued(): TASK2_ID = "task2" proxy_driver = ProxyDriver() with temporary_dir() as td: executor = build_blocking_gc_executor(td, proxy_driver) executor.launchTask(proxy_driver, serialize_art(AdjustRetainedTasks())) thread_yield() executor.launchTask( proxy_driver, serialize_art(AdjustRetainedTasks(), task_id=TASK2_ID)) thread_yield() assert len(executor._gc_task_queue) == 1 executor.killTask(proxy_driver, TASK2_ID) thread_yield() assert len(executor._gc_task_queue) == 0 assert not proxy_driver.stopped.is_set() assert len(proxy_driver.updates) == 0
def test_gc_multiple_launchtasks(): TASK2, TASK3 = "task2", "task3" proxy_driver = ProxyDriver() with temporary_dir() as td: executor = build_blocking_gc_executor(td, proxy_driver) executor.launchTask(proxy_driver, serialize_art(AdjustRetainedTasks())) thread_yield() executor.launchTask( proxy_driver, serialize_art(AdjustRetainedTasks(), task_id=TASK2)) thread_yield() assert len(executor._gc_task_queue) == 1 executor.launchTask( proxy_driver, serialize_art(AdjustRetainedTasks(), task_id=TASK3)) thread_yield() assert len(executor._gc_task_queue) == 1 assert not proxy_driver.stopped.is_set() assert len(proxy_driver.updates) >= 1 assert StatusUpdate(mesos.TASK_FINISHED, TASK2) in proxy_driver.updates
def test_gc_shutdown_queued(): TASK2_ID = "task2" proxy_driver = ProxyDriver() with temporary_dir() as td: executor = build_blocking_gc_executor(td, proxy_driver) executor.launchTask(proxy_driver, serialize_art(AdjustRetainedTasks())) thread_yield() executor.launchTask( proxy_driver, serialize_art(AdjustRetainedTasks(), task_id=TASK2_ID)) thread_yield() assert len(executor._gc_task_queue) == 1 executor.shutdown(proxy_driver) executor._clock.tick(executor.PERSISTENCE_WAIT.as_(Time.SECONDS)) assert executor._stop_event.is_set() proxy_driver.stopped.wait(timeout=1.0) assert proxy_driver.stopped.is_set() assert len(proxy_driver.updates) == 1 assert proxy_driver.updates[-1][0] == mesos.TASK_FINISHED assert proxy_driver.updates[-1][1] == TASK2_ID
def test_gc_killtask_current(): proxy_driver = ProxyDriver() with temporary_dir() as td: executor = build_blocking_gc_executor(td, proxy_driver) executor.launchTask(proxy_driver, serialize_art(AdjustRetainedTasks())) wait_until_not(lambda: executor._gc_task_queue, clock=executor._clock) assert len(executor._gc_task_queue) == 0 assert executor._task_id == TASK_ID executor.killTask(proxy_driver, TASK_ID) assert executor._task_id == TASK_ID assert len(executor._gc_task_queue) == 0 assert not proxy_driver.stopped.is_set() assert len(proxy_driver.updates) == 0
def run_gc_with(active_executors, retained_tasks, lose=False): proxy_driver = ProxyDriver() with temporary_dir() as td: setup_tree(td, lose=lose) executor = ThinTestThermosGCExecutor(td, active_executors=active_executors) executor.registered(proxy_driver, None, None, None) executor.start() art = AdjustRetainedTasks(retainedTasks=retained_tasks) executor.launchTask(proxy_driver, serialize_art(art, TASK_ID)) wait_until_not(lambda: executor._gc_task_queue, clock=executor._clock) wait_until_not(lambda: executor._task_id, clock=executor._clock) assert len(executor._gc_task_queue) == 0 assert not executor._task_id assert len(proxy_driver.updates) >= 1 if not lose: # if the task is lost it will be cleaned out of band (by clean_orphans), # so we don't care when the GC task actually finishes assert proxy_driver.updates[-1][0] == mesos.TASK_FINISHED assert proxy_driver.updates[-1][1] == TASK_ID return executor, proxy_driver