def test_index_poll_many(self): for _ in range(10): create_random_cg(self.parent_cg_path) cg_path = create_random_cg(self.parent_cg_path) job_q = queue.Queue() activity_q = queue.Queue() index = CgroupIndex(self.parent_cg_path, job_q, activity_q) index.open() self.assertHasNoMessages(activity_q) index.sync() for _ in range(11): self.assertHasMessageForCg(activity_q, NewCgroupMessage, self.ANY_CG) self.assertHasNoMessages(activity_q) set_memlimit(cg_path) index.sync() self.assertHasNoMessages(job_q) trigger_oom(cg_path) index.poll(10) # We might receive a few pressure notifications before we finally get # the OOM event. So, wait for the message 100 times. for _ in self.assertEvnetuallyHasMessageForCg( job_q, RestartRequestedMessage, cg_path ): index.poll(1) index.close()
def run_loop(root_cg_path, activity_path, sync_target_interval, restart_adapter, restart_grace_period): threading.current_thread().name = "index" job_queue = queue.Queue() activity_queue = queue.Queue() index = CgroupIndex(root_cg_path, job_queue, activity_queue) index.open() restarter = RestartEngine(restart_adapter, restart_grace_period, job_queue, activity_queue) restarter_thread = threading.Thread(target=restarter.run, name="restarter") restarter_thread.daemon = True activity = ActivityEngine(activity_path, activity_queue) activity_thread = threading.Thread(target=activity.run, name="activity") activity_thread.daemon = True # Now, fire an initial sync, then empty the activity queue (we don't want # to fire notifications for "new" containers if Captain Comeback is the one # that's starting), and start all worker threads. index.sync() while True: try: activity_queue.get_nowait() except queue.Empty: break restarter_thread.start() activity_thread.start() while True: index.sync() next_sync = time.time() + sync_target_interval while True: poll_timeout = next_sync - time.time() if poll_timeout <= 0: break logger.debug("poll with timeout: %s", poll_timeout) try: index.poll(poll_timeout) except IOError as e: if e.errno != errno.EINTR: raise logger.warning("interrupted") for thread in [activity_thread, restarter_thread]: if not thread.is_alive(): logger.critical("thread %s is dead", thread.name) return 1 return 0
def test_wakeup_on_sync(self): cg_path = create_random_cg(self.parent_cg_path) cg = Cgroup(cg_path) cg.open() cg.set_memory_limit_in_bytes(1024) self.assertEqual("0", cg.oom_control_status()["oom_kill_disable"]) index = CgroupIndex(self.parent_cg_path, queue.Queue()) index.open() index.sync() index.close() self.assertEqual("1", cg.oom_control_status()["oom_kill_disable"]) cg.close()
def test_index_poll_close(self): cg_path = create_random_cg(self.parent_cg_path) job_q = queue.Queue() activity_q = queue.Queue() index = CgroupIndex(self.parent_cg_path, job_q, activity_q) index.open() self.assertHasNoMessages(activity_q) index.sync() self.assertHasMessageForCg(activity_q, NewCgroupMessage, cg_path) delete_cg(cg_path) index.sync() self.assertHasMessageForCg(activity_q, StaleCgroupMessage, cg_path) index.close()
def test_index_poll(self): cg_path = create_random_cg(self.parent_cg_path) q = queue.Queue() index = CgroupIndex(self.parent_cg_path, q) index.open() index.sync() self.assertRaises(queue.Empty, q.get_nowait) enable_memlimit_and_trigger_oom(cg_path) index.poll(10) msg = q.get_nowait() self.assertIsInstance(msg, RestartRequestedMessage) self.assertEqual(cg_path, msg.cg.path) index.close()
def test_index_leak(self): job_q = queue.Queue() activity_q = queue.Queue() index = CgroupIndex(self.parent_cg_path, job_q, activity_q) fd_dir = os.path.join('/proc', str(os.getpid()), 'fd') fd_initial = len(os.listdir(fd_dir)) logger.debug("fd_initial=%d", fd_initial) index.open() fd_intermediate = len(os.listdir(fd_dir)) logger.debug("fd_intermediate=%d", fd_intermediate) for _ in range(100): fd0 = len(os.listdir(fd_dir)) cgs = [ quick_create_cg(str(uuid.uuid4()), self.parent_cg_path) for _ in range(100) ] index.sync() fd1 = len(os.listdir(fd_dir)) index.sync() fd2 = len(os.listdir(fd_dir)) logger.debug("fd0=%d, fd1=%d, fd2=%d", fd0, fd1, fd2) self.assertEqual(fd0, fd_intermediate) self.assertEqual(fd1, fd2) for cg in cgs: quick_delete_cg(cg) index.sync() index.close() fd_final = len(os.listdir(fd_dir)) logger.debug("fd_final=%d", fd_final) self.assertEqual(fd_final, fd_initial)
def test_index_sync(self): cg_path = create_random_cg(self.parent_cg_path) q = queue.Queue() index = CgroupIndex(self.parent_cg_path, q) index.open() index.sync() # Check that the CG was added to the path hash self.assertEqual(1, len(index._path_hash)) self.assertEqual(1, len(index._efd_hash)) # Check that the CG was registered (adding it again will cause an # error) cg = index._path_hash[cg_path] self.assertRaises(EnvironmentError, index.epl.register, cg.event_fileno()) index.close()
def test_index_sync(self): cg_path = create_random_cg(self.parent_cg_path) q = queue.Queue() index = CgroupIndex(self.parent_cg_path, q, q) index.open() index.sync() # Check that the CG was added to the path hash self.assertEqual(1, len(index._path_hash)) self.assertEqual(1, len(index._efd_hash)) # Check that the CG was registered (adding it again will cause an # error) cg = index._path_hash[cg_path] self.assertRaises(EnvironmentError, index.epl.register, cg.event_fileno()) index.close()
def test_index_poll(self): cg_path = create_random_cg(self.parent_cg_path) job_q = queue.Queue() activity_q = queue.Queue() index = CgroupIndex(self.parent_cg_path, job_q, activity_q) index.open() self.assertHasNoMessages(activity_q) index.sync() enable_memlimit_and_trigger_oom(cg_path) self.assertHasNoMessages(job_q) index.poll(10) self.assertHasMessageForCg(job_q, RestartRequestedMessage, cg_path) self.assertHasMessageForCg(activity_q, NewCgroupMessage, cg_path) index.close()
def test_index_sync_many(self): cg_paths = [create_random_cg(self.parent_cg_path) for _ in range(10)] index = CgroupIndex(self.parent_cg_path, queue.Queue()) index.open() index.sync() while cg_paths: self.assertEqual(len(cg_paths), len(index._path_hash)) path = cg_paths.pop() self.assertIn(path, index._path_hash) delete_cg(path) index.sync() self.assertEqual(len(cg_paths), len(index._path_hash)) self.assertNotIn(path, index._path_hash) index.close()
def main(root_cg_path, sync_target_interval, restart_grace_period): threading.current_thread().name = "index" job_queue = queue.Queue() index = CgroupIndex(root_cg_path, job_queue) index.open() restarter = RestartEngine(job_queue, restart_grace_period) restarter_thread = threading.Thread(target=restarter.run, name="restarter") restarter_thread.daemon = True restarter_thread.start() while True: index.sync() next_sync = time.time() + sync_target_interval while True: poll_timeout = next_sync - time.time() if poll_timeout <= 0: break logger.debug("poll with timeout: %s", poll_timeout) index.poll(poll_timeout)
def test_index_race(self): ready_q = queue.Queue() exit_q = queue.Queue() cg_count = 100 cg_cycle = 10 index_syncs = 50 file_limit = cg_count * 5 resource.setrlimit(resource.RLIMIT_NOFILE, (file_limit, file_limit)) def racer(): cgs = [ quick_create_cg(str(uuid.uuid4()), self.parent_cg_path) for _ in range(cg_count) ] ready_q.put(None) i = 0 while True: i += 1 try: exit_q.get_nowait() except queue.Empty: pass else: break logger.debug("racer: shuffle (%d)", i) random.shuffle(cgs) logger.debug("racer: split (%d)", i) del_cgs, keep_cgs = cgs[:cg_cycle], cgs[cg_cycle:] logger.debug("racer: delete (%d)", i) for cg in del_cgs: quick_delete_cg(cg) logger.debug("racer: remake (%d)", i) cgs = keep_cgs + [ quick_create_cg(str(uuid.uuid4()), self.parent_cg_path) for _ in range(cg_cycle) ] logger.debug("racer: done (%d)", i) for cg in cgs: quick_delete_cg(cg) t = threading.Thread(target=racer) t.start() job_q = queue.Queue() activity_q = queue.Queue() index = CgroupIndex(self.parent_cg_path, job_q, activity_q) index.open() ready_q.get(timeout=5) try: for _ in range(index_syncs): index.sync() index.close() except Exception: logger.error("sync errorred") # Make logs more usable raise finally: exit_q.put(None) t.join(5)