def initiate_health_check_job(device): logger = logging.getLogger('dispatcher-master') logger.info("Initiating health check") if not device: # logic error logger.error("No device") return None if device.status in [Device.RETIRED]: # logic error logger.error("[%s] has been retired", device) return None job_data = device.get_health_check() user = User.objects.get(username='******') if not job_data: # This should never happen, it's a logic error. logger.error("No health check definition found for %s", device) device.put_into_maintenance_mode( user, "health check job not found in initiate_health_check_job") raise JSONDataError("no health check job found for %r", device.hostname) if is_deprecated_json(job_data): # only JSON supports 'target' and that needs to be set by the health-check not the admin. job_json = simplejson.loads(job_data) if 'target' in job_json: logger.error("[%s] JSON Health check definition must not specify a 'target'.", device.device_type.name) device.put_into_maintenance_mode( user, "target must not be defined in health check definitions.") return None try: job = testjob_submission(job_data, user, check_device=device) except (DevicesUnavailableException, SubmissionException) as exc: logger.error("[%s] failed to submit health check - %s", device.device_type.name, exc) return None return job
def testjob_submission(job_definition, user, check_device=None): """ Single submission frontend for JSON or YAML :param job_definition: string of the job submission :param user: user attempting the submission :param check_device: set specified device as the target **and** thereby set job as a health check job. (JSON only) :return: a job or a list of jobs :raises: SubmissionException, Device.DoesNotExist, DeviceType.DoesNotExist, DevicesUnavailableException, JSONDataError, JSONDecodeError, ValueError """ if is_deprecated_json(job_definition): allow_health = False job_json = simplejson.loads(job_definition) target_device = None if 'target' in job_json: target_device = Device.objects.get(hostname=job_json['target']) if check_device: job_json['target'] = check_device.hostname job_json['health-check'] = True job_definition = simplejson.dumps(job_json) allow_health = True try: # returns a single job or a list (not a QuerySet) of job objects. job = TestJob.from_json_and_user(job_definition, user, health_check=allow_health) if isinstance(job, list): # multinode health checks not supported return job job.health_check = allow_health if check_device: job.requested_device = check_device elif target_device: job.requested_device = target_device job.save(update_fields=['health_check', 'requested_device']) except (JSONDataError, ValueError) as exc: if check_device: check_device.put_into_maintenance_mode( user, "Job submission failed for health job for %s: %s" % (check_device, exc)) raise JSONDataError("Health check job submission failed for %s: %s" % (check_device, exc)) else: raise JSONDataError("Job submission failed: %s" % exc) else: validate_job(job_definition) # returns a single job or a list (not a QuerySet) of job objects. job = TestJob.from_yaml_and_user(job_definition, user) if check_device and isinstance(check_device, Device) and not isinstance(job, list): # the slave must neither know nor care if this is a health check, # only the master cares and that has the database connection. job.health_check = True job.requested_device = check_device job.save(update_fields=['health_check', 'requested_device']) return job
def testjob_submission(job_definition, user, check_device=None, original_job=None): """ Single submission frontend for JSON or YAML :param job_definition: string of the job submission :param user: user attempting the submission :param check_device: set specified device as the target **and** thereby set job as a health check job. (JSON only) :return: a job or a list of jobs :raises: SubmissionException, Device.DoesNotExist, DeviceType.DoesNotExist, DevicesUnavailableException, JSONDataError, JSONDecodeError, ValueError """ if is_deprecated_json(job_definition): allow_health = False job_json = simplejson.loads(job_definition) target_device = None if 'target' in job_json: target_device = Device.objects.get(hostname=job_json['target']) if check_device: job_json['target'] = check_device.hostname job_json['health-check'] = True job_definition = simplejson.dumps(job_json) allow_health = True try: # returns a single job or a list (not a QuerySet) of job objects. job = TestJob.from_json_and_user(job_definition, user, health_check=allow_health) if isinstance(job, list): # multinode health checks not supported return job job.health_check = allow_health if check_device: job.requested_device = check_device elif target_device: job.requested_device = target_device job.save(update_fields=['health_check', 'requested_device']) except (JSONDataError, ValueError) as exc: if check_device: check_device.put_into_maintenance_mode( user, "Job submission failed for health job for %s: %s" % (check_device, exc)) raise JSONDataError("Health check job submission failed for %s: %s" % (check_device, exc)) else: raise JSONDataError("Job submission failed: %s" % exc) else: validate_job(job_definition) # returns a single job or a list (not a QuerySet) of job objects. job = TestJob.from_yaml_and_user(job_definition, user, original_job=original_job) if check_device and isinstance(check_device, Device) and not isinstance(job, list): # the slave must neither know nor care if this is a health check, # only the master cares and that has the database connection. job.health_check = True job.requested_device = check_device job.save(update_fields=['health_check', 'requested_device']) return job
def initiate_health_check_job(device): logger = logging.getLogger('dispatcher-master') logger.info("Initiating health check") if not device: # logic error logger.error("No device") return None if device.status in [Device.RETIRED]: # logic error logger.error("[%s] has been retired", device) return None existing_health_check_job = device.get_existing_health_check_job() if existing_health_check_job: return existing_health_check_job job_data = device.device_type.health_check_job user = User.objects.get(username='******') if not job_data: # This should never happen, it's a logic error. logger.error("No health check definition found for %s", device) device.put_into_maintenance_mode( user, "health check job not found in initiate_health_check_job") raise JSONDataError("no health check job found for %r", device.hostname) if is_deprecated_json(job_data): # only JSON supports 'target' and that needs to be set by the health-check not the admin. job_json = simplejson.loads(job_data) if 'target' in job_json: logger.error("[%s] JSON Health check definition must not specify a 'target'.", device.device_type.name) device.put_into_maintenance_mode( user, "target must not be defined in health check definitions.") return None try: job = testjob_submission(job_data, user, check_device=device) except DevicesUnavailableException as exc: logger.error("[%s] failed to submit health check - %s", device.device_type.name, exc) return None return job
def handle(self, *_, **options): health_dir = "/etc/lava-server/dispatcher-config/health-checks" self.stdout.write("Moving health checks to %s:" % health_dir) # Create the directory try: os.mkdir(health_dir, 0o755) except OSError as exc: if exc.errno != errno.EEXIST: self.stderr.write("Unable to create the directory: %s" % str(exc)) return dt_skipped = [] for dt in DeviceType.objects.order_by('name'): if not dt.health_check_job or not dt.display or is_device_type_retired( dt): dt_skipped.append((dt.name, False)) continue # Check that the health-check is a v2 job if is_deprecated_json(dt.health_check_job): dt_skipped.append((dt.name, True)) continue # Dump to the filesystem self.stdout.write("* %s" % dt.name) filename = os.path.join(health_dir, dt.name + '.yaml') with open(filename, 'w') as f_out: f_out.write(dt.health_check_job) # Remove the health check from the data base (if needed) if options["clean"]: if is_device_type_exclusive(dt) or is_device_type_retired( dt) or not dt.display: dt.health_check_job = None dt.save(update_fields=["health_check_job"]) else: self.stderr.write( "-> Not cleaning %s, some devices still support V1" % dt.name) self.stdout.write("Device types skipped:") for (dt, has_health_check) in dt_skipped: if has_health_check: self.stdout.write("* %s (v1 health check)" % dt) else: self.stdout.write("* %s" % dt) self.stdout.write("Checking devices:") for device in Device.objects.exclude( status=Device.RETIRED).order_by('hostname'): device_dict = DeviceDictionary.get(device.hostname) if not device_dict: self.stderr.write("* %s => no device dictionary" % device.hostname) continue device_dict = device_dict.to_dict() extends = device_dict['parameters']['extends'] extends = os.path.splitext(extends)[0] filename = os.path.join( "/etc/lava-server/dispatcher-config/health-checks", "%s.yaml" % extends) if os.path.exists(filename): self.stdout.write("* %s => %s.yaml" % (device.hostname, extends)) else: self.stderr.write("* %s => no health check found for %s.yaml" % (device.hostname, extends))
def test_is_deprecated_json(self): self.assertTrue(is_deprecated_json(self.factory.make_job_json())) self.assertFalse(is_deprecated_json(self.factory.make_job_yaml())) invalid_job_data = self.factory.make_invalid_job_json() self.assertFalse(is_deprecated_json(invalid_job_data))