def test_job_handlers(self): self.restart() hostname = 'fakeqemu3' device = self.factory.make_device(self.device_type, hostname) job = TestJob.from_yaml_and_user( self.factory.make_job_yaml(), self.factory.make_user()) selected = select_device(job, self.dispatchers) self.assertIsNone(selected) job.actual_device = device selected = select_device(job, self.dispatchers) self.assertIsNone(selected) device.worker_host = self.worker selected = select_device(job, self.dispatchers) self.assertIsNone(selected) create_job(job, device) self.assertEqual(job.actual_device, device) self.assertEqual(device.status, Device.RESERVED)
def test_job_handlers(self): self.restart() hostname = 'fakeqemu3' device_dict = DeviceDictionary(hostname=hostname) device_dict.parameters = self.conf device_dict.save() device = self.factory.make_device(self.device_type, hostname) job = TestJob.from_yaml_and_user( self.factory.make_job_yaml(), self.factory.make_user()) selected = select_device(job, self.dispatchers) self.assertIsNone(selected) job.actual_device = device selected = select_device(job, self.dispatchers) self.assertIsNone(selected) device.worker_host = self.worker selected = select_device(job, self.dispatchers) self.assertIsNone(selected) create_job(job, device) self.assertEqual(job.actual_device, device) self.assertEqual(device.status, Device.RESERVED)
def test_dispatcher_restart(self): self.restart() hostname = 'fakeqemu4' device_dict = DeviceDictionary(hostname=hostname) device_dict.parameters = self.conf device_dict.save() device = self.factory.make_device(self.device_type, hostname) job = TestJob.from_yaml_and_user( self.factory.make_job_yaml(), self.factory.make_user()) job.actual_device = device self.assertEqual(job.status, TestJob.SUBMITTED) device.worker_host = self.remote selected = select_device(job, self.dispatchers) self.assertIsNone(selected) self.assertEqual(job.status, TestJob.SUBMITTED) create_job(job, device) self.assertEqual(job.actual_device, device) self.assertEqual(device.status, Device.RESERVED) selected = select_device(job, self.dispatchers) self.assertIsNone(selected) self.assertEqual(job.status, TestJob.SUBMITTED)
def process_jobs(self, options): for job in TestJob.objects.filter( Q(status=TestJob.SUBMITTED) & Q(is_pipeline=True) & ~Q(actual_device=None))\ .order_by('-health_check', '-priority', 'submit_time', 'target_group', 'id'): device = None worker_host = None device = select_device(job, self.dispatchers) if not device: # e.g. one or more jobs in the MultiNode group do not yet have Reserved devices. continue # selecting device can change the job job.refresh_from_db() self.logger.info("[%d] Assigning %s device", job.id, device) if job.actual_device is None: # health checks device = job.requested_device if not device.worker_host: msg = "Infrastructure error: Invalid worker information" self.logger.error("[%d] %s", job.id, msg) fail_job(job, msg, TestJob.INCOMPLETE) continue # Launch the job create_job(job, device) self.logger.info("[%d] START => %s (%s)", job.id, device.worker_host.hostname, device.hostname) worker_host = device.worker_host try: # Load job definition to get the variables for template # rendering job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) # Load env.yaml, env-dut.yaml and dispatcher configuration # All three are optional env_str = load_optional_yaml_file(options['env']) env_dut_str = load_optional_yaml_file(options['env_dut']) # Load device configuration if device: device_configuration = device.load_configuration(job_ctx) dispatcher_config_file = os.path.join( options['dispatchers_config'], "%s.yaml" % worker_host.hostname) dispatcher_config = load_optional_yaml_file( dispatcher_config_file) self.controler.send_multipart([ str(worker_host.hostname), 'START', str(job.id), self.export_definition(job), str(device_configuration), dispatcher_config, env_str, env_dut_str ]) if job.is_multinode: # All secondary connections must be made from a dispatcher local to the one host device # to allow for local firewalls etc. So the secondary connection is started on the # remote worker of the "nominated" host. # This job will not be a dynamic_connection, this is the parent. device = None device_configuration = None # to get this far, the rest of the multinode group must also be ready # so start the dynamic connections parent = job for group_job in job.sub_jobs_list: if group_job == parent or not group_job.dynamic_connection: continue worker_host = parent.actual_device.worker_host dispatcher_config_file = os.path.join( options['dispatchers_config'], "%s.yaml" % worker_host.hostname) dispatcher_config = load_optional_yaml_file( dispatcher_config_file) # inherit only enough configuration for dynamic_connection operation device_configuration = parent.actual_device.load_configuration( job_ctx) self.logger.info( "[%d] Trimming dynamic connection device configuration.", group_job.id) device_configuration = parent.actual_device.minimise_configuration( device_configuration) self.logger.info("[%d] START => %s (connection)", group_job.id, worker_host.hostname) self.controler.send_multipart([ str(worker_host.hostname), 'START', str(group_job.id), self.export_definition(group_job), str(device_configuration), dispatcher_config, env_str, env_dut_str ]) continue except jinja2.TemplateNotFound as exc: self.logger.error("[%d] Template not found: '%s'", job.id, exc.message) msg = "Infrastructure error: Template not found: '%s'" % \ exc.message except jinja2.TemplateSyntaxError as exc: self.logger.error( "[%d] Template syntax error in '%s', line %d: %s", job.id, exc.name, exc.lineno, exc.message) msg = "Infrastructure error: Template syntax error in '%s', line %d: %s" % \ (exc.name, exc.lineno, exc.message) except IOError as exc: self.logger.error("[%d] Unable to read '%s': %s", job.id, exc.filename, exc.strerror) msg = "Infrastructure error: cannot open '%s': %s" % \ (exc.filename, exc.strerror) except yaml.YAMLError as exc: self.logger.error("[%d] Unable to parse job definition: %s", job.id, exc) msg = "Infrastructure error: cannot parse job definition: %s" % \ exc self.logger.error("[%d] INCOMPLETE job", job.id) fail_job(job=job, fail_msg=msg, job_status=TestJob.INCOMPLETE)
def process_jobs(self, options): for job in TestJob.objects.filter( Q(status=TestJob.SUBMITTED) & Q(is_pipeline=True) & ~Q(actual_device=None))\ .order_by('-health_check', '-priority', 'submit_time', 'target_group', 'id'): if job.dynamic_connection: # A secondary connection must be made from a dispatcher local to the host device # to allow for local firewalls etc. So the secondary connection is started on the # remote worker of the "nominated" host. # FIXME: device = None worker_host = job.lookup_worker self.logger.info("[%d] START => %s (connection)", job.id, worker_host.hostname) else: device = select_device(job, self.dispatchers) if not device: continue # selecting device can change the job job = TestJob.objects.get(id=job.id) self.logger.info("[%d] Assigning %s device", job.id, device) if job.actual_device is None: device = job.requested_device if not device.worker_host: msg = "Infrastructure error: Invalid worker information" self.logger.error("[%d] %s", job.id, msg) fail_job(job, msg, TestJob.INCOMPLETE) continue # Launch the job create_job(job, device) self.logger.info("[%d] START => %s (%s)", job.id, device.worker_host.hostname, device.hostname) worker_host = device.worker_host else: device = job.actual_device if not device.worker_host: msg = "Infrastructure error: Invalid worker information" self.logger.error("[%d] %s", job.id, msg) fail_job(job, msg, TestJob.INCOMPLETE) continue self.logger.info("[%d] START => %s (%s) (retrying)", job.id, device.worker_host.hostname, device.hostname) worker_host = device.worker_host try: # Load job definition to get the variables for template # rendering job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) # Load device configuration device_configuration = '' \ if job.dynamic_connection else device.load_device_configuration(job_ctx) # Load env.yaml, env-dut.yaml and dispatcher configuration # All three are optional env_str = load_optional_yaml_file(options['env']) env_dut_str = load_optional_yaml_file(options['env_dut']) dispatcher_config_file = os.path.join(options['dispatchers_config'], "%s.yaml" % worker_host.hostname) dispatcher_config = load_optional_yaml_file(dispatcher_config_file) if job.is_multinode: for group_job in job.sub_jobs_list: if group_job.dynamic_connection: # to get this far, the rest of the multinode group must also be ready # so start the dynamic connections # FIXME: rationalise and streamline self.controler.send_multipart( [str(worker_host.hostname), 'START', str(group_job.id), self.export_definition(group_job), str(device_configuration), dispatcher_config, env_str, env_dut_str]) self.controler.send_multipart( [str(worker_host.hostname), 'START', str(job.id), self.export_definition(job), str(device_configuration), dispatcher_config, env_str, env_dut_str]) continue except jinja2.TemplateNotFound as exc: self.logger.error("[%d] Template not found: '%s'", job.id, exc.message) msg = "Infrastructure error: Template not found: '%s'" % \ exc.message except jinja2.TemplateSyntaxError as exc: self.logger.error("[%d] Template syntax error in '%s', line %d: %s", job.id, exc.name, exc.lineno, exc.message) msg = "Infrastructure error: Template syntax error in '%s', line %d: %s" % \ (exc.name, exc.lineno, exc.message) except IOError as exc: self.logger.error("[%d] Unable to read '%s': %s", job.id, exc.filename, exc.strerror) msg = "Infrastructure error: cannot open '%s': %s" % \ (exc.filename, exc.strerror) except yaml.YAMLError as exc: self.logger.error("[%d] Unable to parse job definition: %s", job.id, exc) msg = "Infrastructure error: cannot parse job definition: %s" % \ exc self.logger.error("[%d] INCOMPLETE job", job.id) fail_job(job=job, fail_msg=msg, job_status=TestJob.INCOMPLETE)
def process_jobs(self, options): for job in TestJob.objects.filter( Q(status=TestJob.SUBMITTED) & Q(is_pipeline=True) & ~Q(actual_device=None))\ .order_by('-health_check', '-priority', 'submit_time', 'target_group', 'id'): if job.dynamic_connection: # A secondary connection must be made from a dispatcher local to the host device # to allow for local firewalls etc. So the secondary connection is started on the # remote worker of the "nominated" host. # FIXME: device = None worker_host = job.lookup_worker self.logger.info("[%d] START => %s (connection)", job.id, worker_host.hostname) else: device = select_device(job, self.dispatchers) if not device: return False # selecting device can change the job job = TestJob.objects.get(id=job.id) self.logger.info("[%d] Assigning %s device", job.id, device) if job.actual_device is None: device = job.requested_device if not device.worker_host: msg = "Infrastructure error: Invalid worker information" self.logger.error("[%d] %s", job.id, msg) fail_job(job, msg, TestJob.INCOMPLETE) return False # Launch the job create_job(job, device) self.logger.info("[%d] START => %s (%s)", job.id, device.worker_host.hostname, device.hostname) worker_host = device.worker_host else: device = job.actual_device if not device.worker_host: msg = "Infrastructure error: Invalid worker information" self.logger.error("[%d] %s", job.id, msg) fail_job(job, msg, TestJob.INCOMPLETE) return False self.logger.info("[%d] START => %s (%s) (retrying)", job.id, device.worker_host.hostname, device.hostname) worker_host = device.worker_host try: # Load job definition to get the variables for template # rendering job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) # Load device configuration device_configuration = None \ if job.dynamic_connection else device.load_device_configuration(job_ctx) env_str = get_env_string(options['env']) env_dut_str = get_env_string(options['env_dut']) if job.is_multinode: for group_job in job.sub_jobs_list: if group_job.dynamic_connection: # to get this far, the rest of the multinode group must also be ready # so start the dynamic connections # FIXME: rationalise and streamline self.controler.send_multipart([ str(worker_host.hostname), 'START', str(group_job.id), self.export_definition(group_job), str(device_configuration), env_str, env_dut_str ]) self.controler.send_multipart([ str(worker_host.hostname), 'START', str(job.id), self.export_definition(job), str(device_configuration), env_str, env_dut_str ]) except (jinja2.TemplateError, IOError, yaml.YAMLError) as exc: if isinstance(exc, jinja2.TemplateNotFound): self.logger.error("Template not found: '%s'", exc.message) msg = "Infrastructure error: Template not found: '%s'" % \ exc.message elif isinstance(exc, jinja2.TemplateSyntaxError): self.logger.error( "Template syntax error in '%s', line %d: %s", exc.name, exc.lineno, exc.message) msg = "Infrastructure error: Template syntax error in '%s', line %d: %s" % \ (exc.name, exc.lineno, exc.message) elif isinstance(exc, IOError): self.logger.error("Unable to read '%s': %s", options['env'], exc.strerror) msg = "Infrastructure error: cannot open '%s': %s" % \ (options['env'], exc.strerror) elif isinstance(exc, yaml.YAMLError): self.logger.error("Unable to parse job definition: %s", exc) msg = "Infrastructure error: cannot parse job definition: %s" % \ exc else: self.logger.exception(exc) msg = "Infrastructure error: %s" % exc.message self.logger.error("[%d] INCOMPLETE job", job.id) fail_job(job=job, fail_msg=msg, job_status=TestJob.INCOMPLETE) return True
def process_jobs(self, options): for job in TestJob.objects.filter( Q(status=TestJob.SUBMITTED) & Q(is_pipeline=True) & ~Q(actual_device=None))\ .order_by('-health_check', '-priority', 'submit_time', 'target_group', 'id'): if job.dynamic_connection: # A secondary connection must be made from a dispatcher local to the host device # to allow for local firewalls etc. So the secondary connection is started on the # remote worker of the "nominated" host. # FIXME: worker_host = job.lookup_worker self.logger.info("[%d] START => %s (connection)", job.id, worker_host.hostname) else: device = select_device(job, self.dispatchers) if not device: return False # selecting device can change the job job = TestJob.objects.get(id=job.id) self.logger.info("[%d] Assigning %s device", job.id, device) if job.actual_device is None: device = job.requested_device if not device.worker_host: msg = "Infrastructure error: Invalid worker information" self.logger.error("[%d] %s", job.id, msg) fail_job(job, msg, TestJob.INCOMPLETE) return False # Launch the job create_job(job, device) self.logger.info("[%d] START => %s (%s)", job.id, device.worker_host.hostname, device.hostname) worker_host = device.worker_host else: device = job.actual_device if not device.worker_host: msg = "Infrastructure error: Invalid worker information" self.logger.error("[%d] %s", job.id, msg) fail_job(job, msg, TestJob.INCOMPLETE) return False self.logger.info("[%d] START => %s (%s) (retrying)", job.id, device.worker_host.hostname, device.hostname) worker_host = device.worker_host try: # Load job definition to get the variables for template # rendering job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) # Load device configuration device_configuration = None \ if job.dynamic_connection else device.load_device_configuration(job_ctx) if job.is_multinode: for group_job in job.sub_jobs_list: if group_job.dynamic_connection: # to get this far, the rest of the multinode group must also be ready # so start the dynamic connections # FIXME: rationalise and streamline self.controler.send_multipart( [str(worker_host.hostname), 'START', str(group_job.id), self.export_definition(group_job), str(device_configuration), get_env_string(options['env']), get_env_string(options['env_dut'])]) self.controler.send_multipart( [str(worker_host.hostname), 'START', str(job.id), self.export_definition(job), str(device_configuration), get_env_string(options['env']), get_env_string(options['env_dut'])]) except (jinja2.TemplateError, IOError, yaml.YAMLError) as exc: if isinstance(exc, jinja2.TemplateNotFound): self.logger.error("Template not found: '%s'", exc.message) msg = "Infrastructure error: Template not found: '%s'" % \ exc.message elif isinstance(exc, jinja2.TemplateSyntaxError): self.logger.error("Template syntax error in '%s', line %d: %s", exc.name, exc.lineno, exc.message) msg = "Infrastructure error: Template syntax error in '%s', line %d: %s" % \ (exc.name, exc.lineno, exc.message) elif isinstance(exc, IOError): self.logger.error("Unable to read '%s': %s", options['env'], exc.strerror) msg = "Infrastructure error: cannot open '%s': %s" % \ (options['env'], exc.strerror) elif isinstance(exc, yaml.YAMLError): self.logger.error("Unable to parse job definition: %s", exc) msg = "Infrastructure error: cannot parse job definition: %s" % \ exc else: self.logger.exception(exc) msg = "Infrastructure error: %s" % exc.message self.logger.error("[%d] INCOMPLETE job", job.id) job.status = TestJob.INCOMPLETE if job.dynamic_connection: job.failure_comment = msg job.save() else: new_status = Device.IDLE device.state_transition_to( new_status, message=msg, job=job) device.status = new_status device.current_job = None job.failure_comment = msg job.save() device.save() return True