def main_loop(self) -> None: dts: Set[str] = set() while True: begin = time.time() try: # Check remote worker connectivity with transaction.atomic(): self.check_workers() # Schedule jobs schedule(self.logger, dts) dts = set() # Wait for events while not dts and (time.time() - begin) < INTERVAL: timeout = max(INTERVAL - (time.time() - begin), 0) with contextlib.suppress(zmq.ZMQError): self.poller.poll(max(timeout * 1000, 1)) dts = self.get_available_dts() except (OperationalError, InterfaceError): self.logger.info("[RESET] database connection reset.") # Closing the database connection will force Django to reopen # the connection connection.close() time.sleep(2)
def test_job_limit(self): TestJob.objects.create( requested_device_type=self.device_type01, submitter=self.user, queue_timeout=int(timedelta(seconds=1).total_seconds()), ) assert TestJob.objects.all().count() == 1 # Limit the number of jobs that can run schedule(self.logger, [], []) assert TestJob.objects.filter( state=TestJob.STATE_SUBMITTED).count() == 1 assert TestJob.objects.filter( state=TestJob.STATE_CANCELING).count() == 0 time.sleep(3) schedule(self.logger, [], []) assert TestJob.objects.filter( state=TestJob.STATE_SUBMITTED).count() == 0 canceling = TestJob.objects.filter( state=TestJob.STATE_CANCELING).count() canceled = TestJob.objects.filter( health=TestJob.HEALTH_CANCELED).count() if canceling == 0: assert canceled == 1 else: assert canceling == 1 assert canceled == 0
def test_low_medium_high_with_hc(self): # Enable health checks self.device_type01.health_denominator = DeviceType.HEALTH_PER_HOUR self.device_type01.health_frequency = 24 self.device_type01.save() Device.get_health_check = _minimal_valid_job self.assertNotEqual(self.device01.get_health_check(), None) jobs = [] for p in [ TestJob.LOW, TestJob.MEDIUM, TestJob.HIGH, TestJob.MEDIUM, TestJob.LOW, ]: j = TestJob.objects.create( requested_device_type=self.device_type01, user=self.user, submitter=self.user, is_public=True, definition=_minimal_valid_job(None), priority=p, ) jobs.append(j) # Check that an health check will be scheduled before any jobs log = DummyLogger() schedule(log) self.device01.refresh_from_db() self.assertEqual(self.device01.state, Device.STATE_RESERVED) self._check_job(jobs[0], TestJob.STATE_SUBMITTED) self._check_job(jobs[1], TestJob.STATE_SUBMITTED) self._check_job(jobs[2], TestJob.STATE_SUBMITTED) self._check_job(jobs[3], TestJob.STATE_SUBMITTED) self._check_job(jobs[4], TestJob.STATE_SUBMITTED) current_hc = self.device01.current_job() self.assertEqual(current_hc.state, TestJob.STATE_SCHEDULED) current_hc.go_state_finished(TestJob.HEALTH_COMPLETE) current_hc.save() # Check that the next job is the highest priority schedule(log) self.device01.refresh_from_db() self.assertEqual(self.device01.state, Device.STATE_RESERVED) self._check_job(jobs[0], TestJob.STATE_SUBMITTED) self._check_job(jobs[1], TestJob.STATE_SUBMITTED) self._check_job(jobs[2], TestJob.STATE_SCHEDULED, self.device01) self._check_job(jobs[3], TestJob.STATE_SUBMITTED) self._check_job(jobs[4], TestJob.STATE_SUBMITTED)
def test_health_frequency_hours(self): self.device_type01.health_denominator = DeviceType.HEALTH_PER_HOUR self.device_type01.health_frequency = 24 self.device_type01.save() Device.get_health_check = _minimal_valid_job self.assertNotEqual(self.device01.get_health_check(), None) self.assertNotEqual(self.device02.get_health_check(), None) self.assertNotEqual(self.device03.get_health_check(), None) # Only device03 is available now self.device01.health = Device.HEALTH_BAD self.device01.save() self.assertTrue(self.device01.is_valid()) self.device03.health = Device.HEALTH_GOOD self.device03.save() self.assertTrue(self.device03.is_valid()) # Create a job that should be scheduled now j = TestJob.objects.create( requested_device_type=self.device_type01, user=self.user, submitter=self.user, is_public=True, definition=_minimal_valid_job(None), ) schedule(DummyLogger()) self.device01.refresh_from_db() j.refresh_from_db() self.assertEqual(j.state, TestJob.STATE_SCHEDULED) self.assertEqual(j.actual_device, self.device03) j.go_state_finished(TestJob.HEALTH_COMPLETE) j.save() # Create a job that should be scheduled after the health check j = TestJob.objects.create( requested_device_type=self.device_type01, user=self.user, submitter=self.user, is_public=True, definition=_minimal_valid_job(None), ) self.device03.refresh_from_db() self.last_hc03.submit_time = timezone.now() - timedelta(hours=25) self.last_hc03.save() schedule(DummyLogger()) self.device03.refresh_from_db() j.refresh_from_db() self.assertEqual(j.state, TestJob.STATE_SUBMITTED) current_hc = self.device03.current_job() self.assertTrue(current_hc.health_check) self.assertEqual(current_hc.state, TestJob.STATE_SCHEDULED)
def test_job_limit(self): self.jobs = [] maxjobs = 8 j = maxjobs while j > 0: j = j - 1 job = TestJob.objects.create( requested_device_type=self.device_type01, submitter=self.user, definition=_minimal_valid_job(None), ) self.jobs.append(job) j = self.devmax while j > 0: j = j - 2 djobs = schedule(DummyLogger()) self.assertEqual(len(djobs), self.joblimit) for job in TestJob.objects.filter(state__in=[ TestJob.STATE_SCHEDULING, TestJob.STATE_SCHEDULED ]): self.assertTrue(job.health_check) job.go_state_finished(TestJob.HEALTH_COMPLETE) job.actual_device.health = Device.HEALTH_GOOD job.actual_device.state = Device.STATE_IDLE job.actual_device.save() job.save() j = maxjobs while j > 0: j = j - 2 djobs = schedule(DummyLogger()) self.assertEqual(len(djobs), self.joblimit) devs = 0 for device in self.devices: device.refresh_from_db() if device.state != Device.STATE_IDLE: devs = devs + 1 self.assertEqual(devs, self.joblimit) for job in TestJob.objects.filter(state__in=[ TestJob.STATE_SCHEDULING, TestJob.STATE_SCHEDULED ]): self.assertFalse(job.health_check) job.go_state_finished(TestJob.HEALTH_COMPLETE) job.actual_device.health = Device.HEALTH_GOOD job.actual_device.state = Device.STATE_IDLE job.actual_device.save() job.save()
def test_job_limit(self): for i in range(0, 4): TestJob.objects.create( requested_device_type=self.device_type01, submitter=self.user, definition=_minimal_valid_job(None), ) assert TestJob.objects.all().count() == 4 # Limit the number of jobs that can run schedule(self.logger) assert TestJob.objects.filter( state=TestJob.STATE_SCHEDULED).count() == 2 assert TestJob.objects.filter( state=TestJob.STATE_SUBMITTED).count() == 2
def test_health_frequency_hours(self): self.device_type01.health_denominator = DeviceType.HEALTH_PER_HOUR self.device_type01.health_frequency = 24 self.device_type01.save() Device.get_health_check = _minimal_valid_job self.assertNotEqual(self.device01.get_health_check(), None) self.assertNotEqual(self.device02.get_health_check(), None) self.assertNotEqual(self.device03.get_health_check(), None) # Only device03 is available now self.device01.health = Device.HEALTH_BAD self.device01.save() self.device03.health = Device.HEALTH_GOOD self.device03.save() # Create a job that should be scheduled now j = TestJob.objects.create(requested_device_type=self.device_type01, user=self.user, submitter=self.user, is_public=True, definition=_minimal_valid_job(None)) schedule(DummyLogger()) self.device01.refresh_from_db() j.refresh_from_db() self.assertEqual(j.state, TestJob.STATE_SCHEDULED) self.assertEqual(j.actual_device, self.device03) j.go_state_finished(TestJob.HEALTH_COMPLETE) j.save() # Create a job that should be scheduled after the health check j = TestJob.objects.create(requested_device_type=self.device_type01, user=self.user, submitter=self.user, is_public=True, definition=_minimal_valid_job(None)) self.device03.refresh_from_db() self.last_hc03.submit_time = timezone.now() - timedelta(hours=25) self.last_hc03.save() schedule(DummyLogger()) self.device03.refresh_from_db() j.refresh_from_db() self.assertEqual(j.state, TestJob.STATE_SUBMITTED) current_hc = self.device03.current_job() self.assertTrue(current_hc.health_check) self.assertEqual(current_hc.state, TestJob.STATE_SCHEDULED)
def _check_scheduling(self, logger, device, current_priority, remaining_priorities): schedule(logger, [], ["worker-01"]) device.refresh_from_db() self.assertEqual(device.state, Device.STATE_RESERVED) scheduled = TestJob.objects.filter(state=TestJob.STATE_SCHEDULED) self.assertEqual(scheduled.count(), 1) current = TestJob.objects.get(id=scheduled[0].id) self._check_job(current, (current_priority, ), TestJob.STATE_SCHEDULED, device) submitted = TestJob.objects.filter(state=TestJob.STATE_SUBMITTED) for j in submitted: self._check_job(j, remaining_priorities) current.go_state_finished(TestJob.HEALTH_COMPLETE) current.save() self._check_job(current, (current_priority, ), TestJob.STATE_FINISHED, device)
def test_low_medium_high_with_hc(self): # Enable health checks self.device_type01.health_denominator = DeviceType.HEALTH_PER_HOUR self.device_type01.health_frequency = 24 self.device_type01.save() Device.get_health_check = _minimal_valid_job self.assertNotEqual(self.device01.get_health_check(), None) jobs = [] for p in [TestJob.LOW, TestJob.MEDIUM, TestJob.HIGH, TestJob.MEDIUM, TestJob.LOW]: j = TestJob.objects.create(requested_device_type=self.device_type01, user=self.user, submitter=self.user, is_public=True, definition=_minimal_valid_job(None), priority=p) jobs.append(j) # Check that an health check will be scheduled before any jobs log = DummyLogger() schedule(log) self.device01.refresh_from_db() self.assertEqual(self.device01.state, Device.STATE_RESERVED) self._check_job(jobs[0], TestJob.STATE_SUBMITTED) self._check_job(jobs[1], TestJob.STATE_SUBMITTED) self._check_job(jobs[2], TestJob.STATE_SUBMITTED) self._check_job(jobs[3], TestJob.STATE_SUBMITTED) self._check_job(jobs[4], TestJob.STATE_SUBMITTED) current_hc = self.device01.current_job() self.assertEqual(current_hc.state, TestJob.STATE_SCHEDULED) current_hc.go_state_finished(TestJob.HEALTH_COMPLETE) current_hc.save() # Check that the next job is the highest priority schedule(log) self.device01.refresh_from_db() self.assertEqual(self.device01.state, Device.STATE_RESERVED) self._check_job(jobs[0], TestJob.STATE_SUBMITTED) self._check_job(jobs[1], TestJob.STATE_SUBMITTED) self._check_job(jobs[2], TestJob.STATE_SCHEDULED, self.device01) self._check_job(jobs[3], TestJob.STATE_SUBMITTED) self._check_job(jobs[4], TestJob.STATE_SUBMITTED)
def test_health_frequency_jobs(self): self.device_type01.health_denominator = DeviceType.HEALTH_PER_JOB self.device_type01.health_frequency = 2 self.device_type01.save() self.last_hc03.submit_time = timezone.now() - timedelta(hours=2) self.last_hc03.save() Device.get_health_check = _minimal_valid_job self.assertNotEqual(self.device01.get_health_check(), None) self.assertNotEqual(self.device02.get_health_check(), None) self.assertNotEqual(self.device03.get_health_check(), None) # Only device03 is available now self.device01.health = Device.HEALTH_BAD self.device01.save() self.device03.health = Device.HEALTH_GOOD self.device03.save() # Create a job that should be scheduled now j01 = TestJob.objects.create( requested_device_type=self.device_type01, submitter=self.user, definition=_minimal_valid_job(None), ) j02 = TestJob.objects.create( requested_device_type=self.device_type01, submitter=self.user, definition=_minimal_valid_job(None), ) j03 = TestJob.objects.create( requested_device_type=self.device_type01, submitter=self.user, definition=_minimal_valid_job(None), ) schedule(logging.getLogger()) self.device03.refresh_from_db() j01.refresh_from_db() self.assertEqual(j01.state, TestJob.STATE_SCHEDULED) self.assertEqual(j01.actual_device, self.device03) j01.go_state_finished(TestJob.HEALTH_COMPLETE) j01.start_time = timezone.now() - timedelta(hours=1) j01.save() schedule(logging.getLogger()) self.device03.refresh_from_db() j02.refresh_from_db() self.assertEqual(j02.state, TestJob.STATE_SCHEDULED) self.assertEqual(j02.actual_device, self.device03) j02.go_state_finished(TestJob.HEALTH_COMPLETE) j02.start_time = timezone.now() - timedelta(hours=1) j02.save() schedule(logging.getLogger()) self.device03.refresh_from_db() j03.refresh_from_db() self.assertEqual(j03.state, TestJob.STATE_SUBMITTED) current_hc = self.device03.current_job() self.assertTrue(current_hc.health_check) self.assertEqual(current_hc.state, TestJob.STATE_SCHEDULED)
def test_health_frequency_jobs(self): self.device_type01.health_denominator = DeviceType.HEALTH_PER_JOB self.device_type01.health_frequency = 2 self.device_type01.save() self.last_hc03.submit_time = timezone.now() - timedelta(hours=2) self.last_hc03.save() Device.get_health_check = _minimal_valid_job self.assertNotEqual(self.device01.get_health_check(), None) self.assertNotEqual(self.device02.get_health_check(), None) self.assertNotEqual(self.device03.get_health_check(), None) # Only device03 is available now self.device01.health = Device.HEALTH_BAD self.device01.save() self.device03.health = Device.HEALTH_GOOD self.device03.save() # Create three jobs that should be scheduled with a healthcheck preceding the # last one for i in range(0, 3): TestJob.objects.create( requested_device_type=self.device_type01, submitter=self.user, definition=_minimal_valid_job(None), ) schedule(logging.getLogger(), [], ["worker-01", "worker-03"]) self.device03.refresh_from_db() jobs = TestJob.objects.filter(state=TestJob.STATE_SCHEDULED) self.assertEqual(jobs.count(), 1) j = jobs[0] self.assertEqual(j.actual_device, self.device03) j.go_state_finished(TestJob.HEALTH_COMPLETE) j.start_time = timezone.now() - timedelta(hours=1) j.save() schedule(logging.getLogger(), [], ["worker-01", "worker-03"]) self.device03.refresh_from_db() jobs = TestJob.objects.filter(state=TestJob.STATE_SCHEDULED) self.assertEqual(jobs.count(), 1) j = jobs[0] self.assertEqual(j.actual_device, self.device03) j.go_state_finished(TestJob.HEALTH_COMPLETE) j.start_time = timezone.now() - timedelta(hours=1) j.save() schedule(logging.getLogger(), [], ["worker-01", "worker-03"]) self.device03.refresh_from_db() jobs = TestJob.objects.filter(state=TestJob.STATE_SUBMITTED) self.assertEqual(jobs.count(), 1) current_hc = self.device03.current_job() self.assertTrue(current_hc.health_check) self.assertEqual(current_hc.state, TestJob.STATE_SCHEDULED)
def main_loop(self, options): last_schedule = last_dispatcher_check = time.time() while True: try: try: # Compute the timeout now = time.time() timeout = min( SCHEDULE_INTERVAL - (now - last_schedule), PING_INTERVAL - (now - last_dispatcher_check)) # If some actions are remaining, decrease the timeout if any([self.events[k] for k in self.events.keys()]): timeout = min(timeout, 2) # Wait at least for 1ms timeout = max(timeout * 1000, 1) # Wait for data or a timeout sockets = dict(self.poller.poll(timeout)) except zmq.error.ZMQError: continue if sockets.get(self.pipe_r) == zmq.POLLIN: self.logger.info("[POLL] Received a signal, leaving") break # Command socket if sockets.get(self.controler) == zmq.POLLIN: while self.controler_socket( ): # Unqueue all pending messages pass # Events socket if sockets.get(self.event_socket) == zmq.POLLIN: while self.read_event_socket( ): # Unqueue all pending messages pass # Wait for the next iteration to handle the event. # In fact, the code that generated the event (lava-logs or # lava-server-gunicorn) needs some time to commit the # database transaction. # If we are too fast, the database object won't be # available (or in the right state) yet. continue # Inotify socket if sockets.get(self.inotify_fd) == zmq.POLLIN: os.read(self.inotify_fd, 4096) self.logger.debug("[AUTH] Reloading certificates from %s", options['slaves_certs']) self.auth.configure_curve(domain='*', location=options['slaves_certs']) # Check dispatchers status now = time.time() if now - last_dispatcher_check > PING_INTERVAL: for hostname, dispatcher in self.dispatchers.items(): if dispatcher.online and now - dispatcher.last_msg > DISPATCHER_TIMEOUT: if hostname == "lava-logs": self.logger.error( "[STATE] lava-logs goes OFFLINE") else: self.logger.error( "[STATE] Dispatcher <%s> goes OFFLINE", hostname) self.dispatchers[hostname].go_offline() last_dispatcher_check = now # Limit accesses to the database. This will also limit the rate of # CANCEL and START messages if time.time() - last_schedule > SCHEDULE_INTERVAL: if self.dispatchers["lava-logs"].online: schedule(self.logger) # Dispatch scheduled jobs with transaction.atomic(): self.start_jobs() else: self.logger.warning( "lava-logs is offline: can't schedule jobs") # Handle canceling jobs with transaction.atomic(): self.cancel_jobs() # Do not count the time taken to schedule jobs last_schedule = time.time() else: # Cancel the jobs and remove the jobs from the set if self.events["canceling"]: with transaction.atomic(): self.cancel_jobs(partial=True) self.events["canceling"] = set() # Schedule for available device-types if self.events["available_dt"]: jobs = schedule(self.logger, self.events["available_dt"]) self.events["available_dt"] = set() # Dispatch scheduled jobs with transaction.atomic(): self.start_jobs(jobs) except (OperationalError, InterfaceError): self.logger.info("[RESET] database connection reset.") # Closing the database connection will force Django to reopen # the connection connection.close() time.sleep(2)
def test_low_medium_high_without_hc(self): # Disable health checks Device.get_health_check = lambda cls: None jobs = [] for p in [ TestJob.LOW, TestJob.MEDIUM, TestJob.HIGH, TestJob.MEDIUM, TestJob.LOW, 40, ]: j = TestJob.objects.create( requested_device_type=self.device_type01, submitter=self.user, definition=_minimal_valid_job(None), priority=p, ) jobs.append(j) log = DummyLogger() schedule(log) self.device01.refresh_from_db() self.assertEqual(self.device01.state, Device.STATE_RESERVED) self._check_job(jobs[0], TestJob.STATE_SUBMITTED) self._check_job(jobs[1], TestJob.STATE_SUBMITTED) self._check_job(jobs[2], TestJob.STATE_SCHEDULED, self.device01) self._check_job(jobs[3], TestJob.STATE_SUBMITTED) self._check_job(jobs[4], TestJob.STATE_SUBMITTED) self._check_job(jobs[5], TestJob.STATE_SUBMITTED) jobs[2].go_state_finished(TestJob.HEALTH_COMPLETE) jobs[2].save() self._check_job(jobs[2], TestJob.STATE_FINISHED, self.device01) schedule(log) self.device01.refresh_from_db() self.assertEqual(self.device01.state, Device.STATE_RESERVED) self._check_job(jobs[0], TestJob.STATE_SUBMITTED) self._check_job(jobs[1], TestJob.STATE_SCHEDULED, self.device01) self._check_job(jobs[2], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[3], TestJob.STATE_SUBMITTED) self._check_job(jobs[4], TestJob.STATE_SUBMITTED) self._check_job(jobs[5], TestJob.STATE_SUBMITTED) jobs[1].go_state_finished(TestJob.HEALTH_COMPLETE) jobs[1].save() self._check_job(jobs[1], TestJob.STATE_FINISHED, self.device01) schedule(log) self.device01.refresh_from_db() self.assertEqual(self.device01.state, Device.STATE_RESERVED) self._check_job(jobs[0], TestJob.STATE_SUBMITTED) self._check_job(jobs[1], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[2], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[3], TestJob.STATE_SCHEDULED, self.device01) self._check_job(jobs[4], TestJob.STATE_SUBMITTED) self._check_job(jobs[5], TestJob.STATE_SUBMITTED) jobs[3].go_state_finished(TestJob.HEALTH_COMPLETE) jobs[3].save() self._check_job(jobs[3], TestJob.STATE_FINISHED, self.device01) schedule(log) self.device01.refresh_from_db() self.assertEqual(self.device01.state, Device.STATE_RESERVED) self._check_job(jobs[0], TestJob.STATE_SUBMITTED) self._check_job(jobs[1], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[2], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[3], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[4], TestJob.STATE_SUBMITTED) self._check_job(jobs[5], TestJob.STATE_SCHEDULED, self.device01) jobs[5].go_state_finished(TestJob.HEALTH_COMPLETE) jobs[5].save() self._check_job(jobs[5], TestJob.STATE_FINISHED, self.device01) schedule(log) self.device01.refresh_from_db() self.assertEqual(self.device01.state, Device.STATE_RESERVED) self._check_job(jobs[0], TestJob.STATE_SCHEDULED, self.device01) self._check_job(jobs[1], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[2], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[3], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[4], TestJob.STATE_SUBMITTED) self._check_job(jobs[5], TestJob.STATE_FINISHED, self.device01) jobs[0].go_state_finished(TestJob.HEALTH_COMPLETE) jobs[0].save() self._check_job(jobs[0], TestJob.STATE_FINISHED, self.device01) schedule(log) self.device01.refresh_from_db() self.assertEqual(self.device01.state, Device.STATE_RESERVED) self._check_job(jobs[0], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[1], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[2], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[3], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[4], TestJob.STATE_SCHEDULED, self.device01) self._check_job(jobs[5], TestJob.STATE_FINISHED, self.device01)
def main_loop(self, options): last_schedule = last_dispatcher_check = time.time() while True: try: try: # Compute the timeout now = time.time() timeout = min(SCHEDULE_INTERVAL - (now - last_schedule), PING_INTERVAL - (now - last_dispatcher_check)) # If some actions are remaining, decrease the timeout if self.events["canceling"]: timeout = min(timeout, 1) # Wait at least for 1ms timeout = max(timeout * 1000, 1) # Wait for data or a timeout sockets = dict(self.poller.poll(timeout)) except zmq.error.ZMQError: continue if sockets.get(self.pipe_r) == zmq.POLLIN: self.logger.info("[POLL] Received a signal, leaving") break # Command socket if sockets.get(self.controler) == zmq.POLLIN: while self.controler_socket(): # Unqueue all pending messages pass # Events socket if sockets.get(self.event_socket) == zmq.POLLIN: while self.read_event_socket(): # Unqueue all pending messages pass # Wait for the next iteration to handle the event. # In fact, the code that generated the event (lava-logs or # lava-server-gunicorn) needs some time to commit the # database transaction. # If we are too fast, the database object won't be # available (or in the right state) yet. continue # Inotify socket if sockets.get(self.inotify_fd) == zmq.POLLIN: os.read(self.inotify_fd, 4096) self.logger.debug("[AUTH] Reloading certificates from %s", options['slaves_certs']) self.auth.configure_curve(domain='*', location=options['slaves_certs']) # Check dispatchers status now = time.time() if now - last_dispatcher_check > PING_INTERVAL: for hostname, dispatcher in self.dispatchers.items(): if dispatcher.online and now - dispatcher.last_msg > DISPATCHER_TIMEOUT: if hostname == "lava-logs": self.logger.error("[STATE] lava-logs goes OFFLINE") else: self.logger.error("[STATE] Dispatcher <%s> goes OFFLINE", hostname) self.dispatchers[hostname].go_offline() last_dispatcher_check = now # Limit accesses to the database. This will also limit the rate of # CANCEL and START messages if time.time() - last_schedule > SCHEDULE_INTERVAL: if self.dispatchers["lava-logs"].online: schedule(self.logger) # Dispatch scheduled jobs with transaction.atomic(): self.start_jobs(options) else: self.logger.warning("lava-logs is offline: can't schedule jobs") # Handle canceling jobs self.cancel_jobs() # Do not count the time taken to schedule jobs last_schedule = time.time() else: # Cancel the jobs and remove the jobs from the set if self.events["canceling"]: self.cancel_jobs(partial=True) self.events["canceling"] = set() except (OperationalError, InterfaceError): self.logger.info("[RESET] database connection reset.") # Closing the database connection will force Django to reopen # the connection connection.close() time.sleep(2)
def test_low_medium_high_without_hc(self): # Disable health checks Device.get_health_check = lambda cls: None jobs = [] for p in [TestJob.LOW, TestJob.MEDIUM, TestJob.HIGH, TestJob.MEDIUM, TestJob.LOW, 40]: j = TestJob.objects.create(requested_device_type=self.device_type01, user=self.user, submitter=self.user, is_public=True, definition=_minimal_valid_job(None), priority=p) jobs.append(j) log = DummyLogger() schedule(log) self.device01.refresh_from_db() self.assertEqual(self.device01.state, Device.STATE_RESERVED) self._check_job(jobs[0], TestJob.STATE_SUBMITTED) self._check_job(jobs[1], TestJob.STATE_SUBMITTED) self._check_job(jobs[2], TestJob.STATE_SCHEDULED, self.device01) self._check_job(jobs[3], TestJob.STATE_SUBMITTED) self._check_job(jobs[4], TestJob.STATE_SUBMITTED) self._check_job(jobs[5], TestJob.STATE_SUBMITTED) jobs[2].go_state_finished(TestJob.HEALTH_COMPLETE) jobs[2].save() self._check_job(jobs[2], TestJob.STATE_FINISHED, self.device01) schedule(log) self.device01.refresh_from_db() self.assertEqual(self.device01.state, Device.STATE_RESERVED) self._check_job(jobs[0], TestJob.STATE_SUBMITTED) self._check_job(jobs[1], TestJob.STATE_SCHEDULED, self.device01) self._check_job(jobs[2], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[3], TestJob.STATE_SUBMITTED) self._check_job(jobs[4], TestJob.STATE_SUBMITTED) self._check_job(jobs[5], TestJob.STATE_SUBMITTED) jobs[1].go_state_finished(TestJob.HEALTH_COMPLETE) jobs[1].save() self._check_job(jobs[1], TestJob.STATE_FINISHED, self.device01) schedule(log) self.device01.refresh_from_db() self.assertEqual(self.device01.state, Device.STATE_RESERVED) self._check_job(jobs[0], TestJob.STATE_SUBMITTED) self._check_job(jobs[1], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[2], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[3], TestJob.STATE_SCHEDULED, self.device01) self._check_job(jobs[4], TestJob.STATE_SUBMITTED) self._check_job(jobs[5], TestJob.STATE_SUBMITTED) jobs[3].go_state_finished(TestJob.HEALTH_COMPLETE) jobs[3].save() self._check_job(jobs[3], TestJob.STATE_FINISHED, self.device01) schedule(log) self.device01.refresh_from_db() self.assertEqual(self.device01.state, Device.STATE_RESERVED) self._check_job(jobs[0], TestJob.STATE_SUBMITTED) self._check_job(jobs[1], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[2], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[3], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[4], TestJob.STATE_SUBMITTED) self._check_job(jobs[5], TestJob.STATE_SCHEDULED, self.device01) jobs[5].go_state_finished(TestJob.HEALTH_COMPLETE) jobs[5].save() self._check_job(jobs[5], TestJob.STATE_FINISHED, self.device01) schedule(log) self.device01.refresh_from_db() self.assertEqual(self.device01.state, Device.STATE_RESERVED) self._check_job(jobs[0], TestJob.STATE_SCHEDULED, self.device01) self._check_job(jobs[1], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[2], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[3], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[4], TestJob.STATE_SUBMITTED) self._check_job(jobs[5], TestJob.STATE_FINISHED, self.device01) jobs[0].go_state_finished(TestJob.HEALTH_COMPLETE) jobs[0].save() self._check_job(jobs[0], TestJob.STATE_FINISHED, self.device01) schedule(log) self.device01.refresh_from_db() self.assertEqual(self.device01.state, Device.STATE_RESERVED) self._check_job(jobs[0], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[1], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[2], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[3], TestJob.STATE_FINISHED, self.device01) self._check_job(jobs[4], TestJob.STATE_SCHEDULED, self.device01) self._check_job(jobs[5], TestJob.STATE_FINISHED, self.device01)