def test_job(self): user = self.factory.make_user() job = TestJob.from_yaml_and_user( self.factory.make_job_yaml(), user) job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) device = Device.objects.get(hostname='fakeqemu1') device_config = device.load_device_configuration(job_ctx) # raw dict parser = JobParser() obj = PipelineDevice(device_config, device.hostname) pipeline_job = parser.parse(job.definition, obj, job.id, None, output_dir='/tmp') pipeline_job.pipeline.validate_actions() pipeline = pipeline_job.describe() map_metadata(yaml.dump(pipeline), job) self.assertEqual(MetaType.objects.filter(metatype=MetaType.DEPLOY_TYPE).count(), 1) self.assertEqual(MetaType.objects.filter(metatype=MetaType.BOOT_TYPE).count(), 1) count = ActionData.objects.all().count() self.assertEqual(TestData.objects.all().count(), 1) testdata = TestData.objects.all()[0] self.assertEqual(testdata.testjob, job) for actionlevel in ActionData.objects.all(): self.assertEqual(actionlevel.testdata, testdata) action_levels = [] for testdata in job.test_data.all(): action_levels.extend(testdata.actionlevels.all()) self.assertEqual(count, len(action_levels)) count = ActionData.objects.filter(meta_type__metatype=MetaType.DEPLOY_TYPE).count() self.assertNotEqual(ActionData.objects.filter(meta_type__metatype=MetaType.BOOT_TYPE).count(), 0) self.assertEqual(ActionData.objects.filter(meta_type__metatype=MetaType.UNKNOWN_TYPE).count(), 0) for actionlevel in ActionData.objects.filter(meta_type__metatype=MetaType.BOOT_TYPE): self.assertEqual(actionlevel.testdata.testjob.id, job.id) self.assertEqual(ActionData.objects.filter( meta_type__metatype=MetaType.DEPLOY_TYPE, testdata__testjob=job ).count(), count)
def test_job_multi(self): MetaType.objects.all().delete() multi_test_file = os.path.join(os.path.dirname(__file__), 'multi-test.yaml') self.assertTrue(os.path.exists(multi_test_file)) with open(multi_test_file, 'r') as test_support: data = test_support.read() job = TestJob.from_yaml_and_user(data, self.user) job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) device = Device.objects.get(hostname='fakeqemu1') device_config = device.load_device_configuration( job_ctx, system=False) # raw dict parser = JobParser() obj = PipelineDevice(device_config, device.hostname) pipeline_job = parser.parse(job.definition, obj, job.id, None, "", output_dir='/tmp') allow_missing_path(pipeline_job.pipeline.validate_actions, self, 'qemu-system-x86_64') pipeline = pipeline_job.describe() map_metadata(yaml.dump(pipeline), job)
def test_job(self): MetaType.objects.all().delete() TestJob.objects.all().delete() job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), self.user) job_def = yaml_safe_load(job.definition) job_ctx = job_def.get("context", {}) job_ctx.update( {"no_kvm": True} ) # override to allow unit tests on all types of systems device = Device.objects.get(hostname="fakeqemu1") device_config = device.load_configuration(job_ctx) # raw dict parser = JobParser() obj = PipelineDevice(device_config) pipeline_job = parser.parse(job.definition, obj, job.id, None, "") allow_missing_path( pipeline_job.pipeline.validate_actions, self, "qemu-system-x86_64" ) pipeline = pipeline_job.describe() map_metadata(yaml_dump(pipeline), job) self.assertEqual( MetaType.objects.filter(metatype=MetaType.DEPLOY_TYPE).count(), 1 ) self.assertEqual( MetaType.objects.filter(metatype=MetaType.BOOT_TYPE).count(), 1 ) count = ActionData.objects.all().count() self.assertEqual(TestData.objects.all().count(), 1) testdata = TestData.objects.all()[0] self.assertEqual(testdata.testjob, job) for actionlevel in ActionData.objects.all(): self.assertEqual(actionlevel.testdata, testdata) action_levels = [] action_levels.extend(job.testdata.actionlevels.all()) self.assertEqual(count, len(action_levels)) count = ActionData.objects.filter( meta_type__metatype=MetaType.DEPLOY_TYPE ).count() self.assertNotEqual( ActionData.objects.filter(meta_type__metatype=MetaType.BOOT_TYPE).count(), 0 ) self.assertEqual( ActionData.objects.filter( meta_type__metatype=MetaType.UNKNOWN_TYPE ).count(), 0, ) for actionlevel in ActionData.objects.filter( meta_type__metatype=MetaType.BOOT_TYPE ): self.assertEqual(actionlevel.testdata.testjob.id, job.id) self.assertEqual( ActionData.objects.filter( meta_type__metatype=MetaType.DEPLOY_TYPE, testdata__testjob=job ).count(), count, )
def test_job(self): MetaType.objects.all().delete() TestJob.objects.all().delete() job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), self.user) job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) device = Device.objects.get(hostname='fakeqemu1') device_config = device.load_device_configuration( job_ctx, system=False) # raw dict parser = JobParser() obj = PipelineDevice(device_config, device.hostname) pipeline_job = parser.parse(job.definition, obj, job.id, None, None, None, output_dir='/tmp') allow_missing_path(pipeline_job.pipeline.validate_actions, self, 'qemu-system-x86_64') pipeline = pipeline_job.describe() map_metadata(yaml.dump(pipeline), job) self.assertEqual( MetaType.objects.filter(metatype=MetaType.DEPLOY_TYPE).count(), 1) self.assertEqual( MetaType.objects.filter(metatype=MetaType.BOOT_TYPE).count(), 1) count = ActionData.objects.all().count() self.assertEqual(TestData.objects.all().count(), 1) testdata = TestData.objects.all()[0] self.assertEqual(testdata.testjob, job) for actionlevel in ActionData.objects.all(): self.assertEqual(actionlevel.testdata, testdata) action_levels = [] for testdata in job.testdata_set.all(): action_levels.extend(testdata.actionlevels.all()) self.assertEqual(count, len(action_levels)) count = ActionData.objects.filter( meta_type__metatype=MetaType.DEPLOY_TYPE).count() self.assertNotEqual( ActionData.objects.filter( meta_type__metatype=MetaType.BOOT_TYPE).count(), 0) self.assertEqual( ActionData.objects.filter( meta_type__metatype=MetaType.UNKNOWN_TYPE).count(), 0) for actionlevel in ActionData.objects.filter( meta_type__metatype=MetaType.BOOT_TYPE): self.assertEqual(actionlevel.testdata.testjob.id, job.id) self.assertEqual( ActionData.objects.filter(meta_type__metatype=MetaType.DEPLOY_TYPE, testdata__testjob=job).count(), count)
def test_inline(self): """ Test inline can be parsed without run steps """ data = self.factory.make_job_data() test_block = [block for block in data['actions'] if 'test' in block][0] smoke = [{ "path": "inline/smoke-tests-basic.yaml", "from": "inline", "name": "smoke-tests-inline", "repository": { "install": { "steps": [ "apt", ] }, "metadata": { "description": "Basic system test command for Linaro Ubuntu images", "format": "Lava-Test Test Definition 1.0", "name": "smoke-tests-basic" } } }] test_block['test']['definitions'] = smoke job = TestJob.from_yaml_and_user(yaml.dump(data), self.user) job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) job_ctx.update( {'no_kvm': True}) # override to allow unit tests on all types of systems device = Device.objects.get(hostname='fakeqemu1') device_config = device.load_device_configuration( job_ctx, system=False) # raw dict parser = JobParser() obj = PipelineDevice(device_config, device.hostname) pipeline_job = parser.parse(job.definition, obj, job.id, None, "", output_dir='/tmp') allow_missing_path(pipeline_job.pipeline.validate_actions, self, 'qemu-system-x86_64') pipeline = pipeline_job.describe() map_metadata(yaml.dump(pipeline), job)
def test_job_multi(self): MetaType.objects.all().delete() multi_test_file = os.path.join(os.path.dirname(__file__), 'multi-test.yaml') self.assertTrue(os.path.exists(multi_test_file)) with open(multi_test_file, 'r') as test_support: data = test_support.read() job = TestJob.from_yaml_and_user(data, self.user) job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) device = Device.objects.get(hostname='fakeqemu1') device_config = device.load_device_configuration(job_ctx, system=False) # raw dict parser = JobParser() obj = PipelineDevice(device_config, device.hostname) pipeline_job = parser.parse(job.definition, obj, job.id, None, "", output_dir='/tmp') allow_missing_path(pipeline_job.pipeline.validate_actions, self, 'qemu-system-x86_64') pipeline = pipeline_job.describe() map_metadata(yaml.dump(pipeline), job)
def test_job_multi(self): MetaType.objects.all().delete() multi_test_file = os.path.join(os.path.dirname(__file__), "multi-test.yaml") self.assertTrue(os.path.exists(multi_test_file)) with open(multi_test_file, "r") as test_support: data = test_support.read() job = TestJob.from_yaml_and_user(data, self.user) job_def = yaml.safe_load(job.definition) job_ctx = job_def.get("context", {}) job_ctx.update( {"no_kvm": True}) # override to allow unit tests on all types of systems device = Device.objects.get(hostname="fakeqemu1") device_config = device.load_configuration(job_ctx) # raw dict parser = JobParser() obj = PipelineDevice(device_config) pipeline_job = parser.parse(job.definition, obj, job.id, None, "") allow_missing_path(pipeline_job.pipeline.validate_actions, self, "qemu-system-x86_64") pipeline = pipeline_job.describe() map_metadata(yaml.dump(pipeline), job)
def test_inline(self): """ Test inline can be parsed without run steps """ data = self.factory.make_job_data() test_block = [block for block in data['actions'] if 'test' in block][0] smoke = [ { "path": "inline/smoke-tests-basic.yaml", "from": "inline", "name": "smoke-tests-inline", "repository": { "install": { "steps": [ "apt", ] }, "metadata": { "description": "Basic system test command for Linaro Ubuntu images", "format": "Lava-Test Test Definition 1.0", "name": "smoke-tests-basic" } } } ] test_block['test']['definitions'] = smoke job = TestJob.from_yaml_and_user(yaml.dump(data), self.user) job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) job_ctx.update({'no_kvm': True}) # override to allow unit tests on all types of systems device = Device.objects.get(hostname='fakeqemu1') device_config = device.load_configuration(job_ctx) # raw dict parser = JobParser() obj = PipelineDevice(device_config) pipeline_job = parser.parse(job.definition, obj, job.id, None, "") allow_missing_path(pipeline_job.pipeline.validate_actions, self, 'qemu-system-x86_64') pipeline = pipeline_job.describe() map_metadata(yaml.dump(pipeline), job)
def test_inline(self): """ Test inline can be parsed without run steps """ data = self.factory.make_job_data() test_block = [block for block in data["actions"] if "test" in block][0] smoke = [{ "path": "inline/smoke-tests-basic.yaml", "from": "inline", "name": "smoke-tests-inline", "repository": { "install": { "steps": ["apt"] }, "metadata": { "description": "Basic system test command for Linaro Ubuntu images", "format": "Lava-Test Test Definition 1.0", "name": "smoke-tests-basic", }, }, }] test_block["test"]["definitions"] = smoke job = TestJob.from_yaml_and_user(yaml.dump(data), self.user) job_def = yaml.safe_load(job.definition) job_ctx = job_def.get("context", {}) job_ctx.update( {"no_kvm": True}) # override to allow unit tests on all types of systems device = Device.objects.get(hostname="fakeqemu1") device_config = device.load_configuration(job_ctx) # raw dict parser = JobParser() obj = PipelineDevice(device_config) pipeline_job = parser.parse(job.definition, obj, job.id, None, "") allow_missing_path(pipeline_job.pipeline.validate_actions, self, "qemu-system-x86_64") pipeline = pipeline_job.describe() map_metadata(yaml.dump(pipeline), job)
def test_job(self): MetaType.objects.all().delete() TestJob.objects.all().delete() job = TestJob.from_yaml_and_user( self.factory.make_job_yaml(), self.user) job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) job_ctx.update({'no_kvm': True}) # override to allow unit tests on all types of systems device = Device.objects.get(hostname='fakeqemu1') device_config = device.load_configuration(job_ctx) # raw dict parser = JobParser() obj = PipelineDevice(device_config) pipeline_job = parser.parse(job.definition, obj, job.id, None, "") allow_missing_path(pipeline_job.pipeline.validate_actions, self, 'qemu-system-x86_64') pipeline = pipeline_job.describe() map_metadata(yaml.dump(pipeline), job) self.assertEqual(MetaType.objects.filter(metatype=MetaType.DEPLOY_TYPE).count(), 1) self.assertEqual(MetaType.objects.filter(metatype=MetaType.BOOT_TYPE).count(), 1) count = ActionData.objects.all().count() self.assertEqual(TestData.objects.all().count(), 1) testdata = TestData.objects.all()[0] self.assertEqual(testdata.testjob, job) for actionlevel in ActionData.objects.all(): self.assertEqual(actionlevel.testdata, testdata) action_levels = [] for testdata in job.testdata_set.all(): action_levels.extend(testdata.actionlevels.all()) self.assertEqual(count, len(action_levels)) count = ActionData.objects.filter(meta_type__metatype=MetaType.DEPLOY_TYPE).count() self.assertNotEqual(ActionData.objects.filter(meta_type__metatype=MetaType.BOOT_TYPE).count(), 0) self.assertEqual(ActionData.objects.filter(meta_type__metatype=MetaType.UNKNOWN_TYPE).count(), 0) for actionlevel in ActionData.objects.filter(meta_type__metatype=MetaType.BOOT_TYPE): self.assertEqual(actionlevel.testdata.testjob.id, job.id) self.assertEqual(ActionData.objects.filter( meta_type__metatype=MetaType.DEPLOY_TYPE, testdata__testjob=job ).count(), count)
def parse_job_description(job): filename = os.path.join(job.output_dir, "description.yaml") logger = logging.getLogger("lava-master") try: with open(filename, "r") as f_describe: description = f_describe.read() pipeline = yaml_load(description) except (OSError, yaml.YAMLError): logger.error("'Unable to open and parse '%s'", filename) return if not map_metadata(description, job): logger.warning("[%d] unable to map metadata", job.id) # add the compatibility result from the master to the definition for comparison on the slave. try: compat = int(pipeline["compatibility"]) except (TypeError, ValueError): compat = pipeline["compatibility"] if pipeline is not None else None logger.error("[%d] Unable to parse job compatibility: %s", job.id, compat) compat = 0 job.pipeline_compatibility = compat job.save(update_fields=["pipeline_compatibility"])
def parse_job_description(job): filename = os.path.join(job.output_dir, 'description.yaml') logger = logging.getLogger('dispatcher-master') try: with open(filename, 'r') as f_describe: description = f_describe.read() pipeline = yaml.load(description) except (IOError, yaml.YAMLError): logger.error("'Unable to open and parse '%s'", filename) return if not map_metadata(description, job): logger.warning("[%d] unable to map metadata", job.id) # add the compatibility result from the master to the definition for comparison on the slave. try: compat = int(pipeline['compatibility']) except ValueError: logger.error("[%d] Unable to parse job compatibility: %s", job.id, pipeline['compatibility']) compat = 0 job.pipeline_compatibility = compat job.save(update_fields=['pipeline_compatibility'])
def parse_job_description(job): filename = os.path.join(job.output_dir, 'description.yaml') logger = logging.getLogger('dispatcher-master') try: with open(filename, 'r') as f_describe: description = f_describe.read() pipeline = yaml.load(description) except (IOError, yaml.YAMLError): logger.error("'Unable to open and parse '%s'", filename) return if not map_metadata(description, job): logger.warning("[%d] unable to map metadata", job.id) # add the compatibility result from the master to the definition for comparison on the slave. try: compat = int(pipeline['compatibility']) except (TypeError, ValueError): compat = pipeline['compatibility'] if pipeline is not None else None logger.error("[%d] Unable to parse job compatibility: %s", job.id, compat) compat = 0 job.pipeline_compatibility = compat job.save(update_fields=['pipeline_compatibility'])
def select_device(job): """ Transitioning a device from Idle to Reserved is the responsibility of the scheduler_daemon (currently). This function just checks that the reserved device is valid for this job. Jobs will only enter this function if a device is already reserved for that job. Stores the pipeline description To prevent cycling between lava_scheduler_daemon:assign_jobs and here, if a job fails validation, the job is incomplete. Issues with this need to be fixed using device tags. """ logger = logging.getLogger('dispatcher-master') if not job.dynamic_connection: if not job.actual_device: return None if job.actual_device.status is not Device.RESERVED: # should not happen logger.error("[%d] device [%s] not in reserved state", job.id, job.actual_device) return None if job.actual_device.worker_host is None: fail_msg = "Misconfigured device configuration for %s - missing worker_host" % job.actual_device fail_job(job, fail_msg=fail_msg) logger.error(fail_msg) if job.is_multinode: # inject the actual group hostnames into the roles for the dispatcher to populate in the overlay. devices = {} for multinode_job in job.sub_jobs_list: # build a list of all devices in this group definition = yaml.load(multinode_job.definition) # devices are not necessarily assigned to all jobs in a group at the same time # check all jobs in this multinode group before allowing any to start. if multinode_job.dynamic_connection: logger.debug("[%s] dynamic connection job", multinode_job.sub_id) continue if not multinode_job.actual_device: logger.debug("[%s] job has no device yet", multinode_job.sub_id) return None devices[str(multinode_job.actual_device.hostname )] = definition['protocols']['lava-multinode']['role'] for multinode_job in job.sub_jobs_list: # apply the complete list to all jobs in this group definition = yaml.load(multinode_job.definition) definition['protocols']['lava-multinode']['roles'] = devices multinode_job.definition = yaml.dump(definition) multinode_job.save() # Load job definition to get the variables for template rendering job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) parser = JobParser() device = None device_object = None if not job.dynamic_connection: device = job.actual_device try: device_config = device.load_device_configuration( job_ctx) # raw dict except (jinja2.TemplateError, yaml.YAMLError, IOError) as exc: # FIXME: report the exceptions as useful user messages logger.error("[%d] jinja2 error: %s" % (job.id, exc)) return None if not device_config or type(device_config) is not dict: # it is an error to have a pipeline device without a device dictionary as it will never get any jobs. msg = "Administrative error. Device '%s' has no device dictionary." % device.hostname logger.error('[%d] device-dictionary error: %s' % (job.id, msg)) # as we don't control the scheduler, yet, this has to be an error and an incomplete job. # the scheduler_daemon sorts by a fixed order, so this would otherwise just keep on repeating. fail_job(job, fail_msg=msg) return None device_object = PipelineDevice( device_config, device.hostname ) # equivalent of the NewDevice in lava-dispatcher, without .yaml file. # FIXME: drop this nasty hack once 'target' is dropped as a parameter if 'target' not in device_object: device_object.target = device.hostname device_object['hostname'] = device.hostname validate_list = job.sub_jobs_list if job.is_multinode else [job] for check_job in validate_list: parser_device = None if job.dynamic_connection else device_object try: logger.debug("[%d] parsing definition" % check_job.id) # pass (unused) output_dir just for validation as there is no zmq socket either. pipeline_job = parser.parse(check_job.definition, parser_device, check_job.id, None, output_dir=check_job.output_dir) except (AttributeError, JobError, NotImplementedError, KeyError, TypeError) as exc: logger.error('[%d] parser error: %s' % (check_job.id, exc)) fail_job(check_job, fail_msg=exc) return None try: logger.debug("[%d] validating actions" % check_job.id) pipeline_job.pipeline.validate_actions() except (AttributeError, JobError, KeyError, TypeError) as exc: logger.error({device: exc}) fail_job(check_job, fail_msg=exc) return None if pipeline_job: pipeline = pipeline_job.describe() # write the pipeline description to the job output directory. if not os.path.exists(check_job.output_dir): os.makedirs(check_job.output_dir) with open(os.path.join(check_job.output_dir, 'description.yaml'), 'w') as describe_yaml: describe_yaml.write(yaml.dump(pipeline)) map_metadata(yaml.dump(pipeline), job) return device
def select_device(job, dispatchers): """ Transitioning a device from Idle to Reserved is the responsibility of the scheduler_daemon (currently). This function just checks that the reserved device is valid for this job. Jobs will only enter this function if a device is already reserved for that job. Stores the pipeline description To prevent cycling between lava_scheduler_daemon:assign_jobs and here, if a job fails validation, the job is incomplete. Issues with this need to be fixed using device tags. """ # FIXME: split out dynamic_connection, multinode and validation logger = logging.getLogger('dispatcher-master') if not job.dynamic_connection: if not job.actual_device: return None if job.actual_device.status is not Device.RESERVED: # should not happen logger.error("[%d] device [%s] not in reserved state", job.id, job.actual_device) return None if job.actual_device.worker_host is None: fail_msg = "Misconfigured device configuration for %s - missing worker_host" % job.actual_device fail_job(job, fail_msg=fail_msg) logger.error(fail_msg) return None if job.is_multinode: # inject the actual group hostnames into the roles for the dispatcher to populate in the overlay. devices = {} for multinode_job in job.sub_jobs_list: # build a list of all devices in this group definition = yaml.load(multinode_job.definition) # devices are not necessarily assigned to all jobs in a group at the same time # check all jobs in this multinode group before allowing any to start. if multinode_job.dynamic_connection: logger.debug("[%s] dynamic connection job", multinode_job.sub_id) continue if not multinode_job.actual_device: logger.debug("[%s] job has no device yet", multinode_job.sub_id) return None devices[str(multinode_job.actual_device.hostname)] = definition['protocols']['lava-multinode']['role'] for multinode_job in job.sub_jobs_list: # apply the complete list to all jobs in this group definition = yaml.load(multinode_job.definition) definition['protocols']['lava-multinode']['roles'] = devices multinode_job.definition = yaml.dump(definition) multinode_job.save() # Load job definition to get the variables for template rendering job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) parser = JobParser() device = None device_object = None if not job.dynamic_connection: device = job.actual_device try: device_config = device.load_device_configuration(job_ctx) # raw dict except (jinja2.TemplateError, yaml.YAMLError, IOError) as exc: logger.error("[%d] jinja2 error: %s" % (job.id, exc)) msg = "Administrative error. Unable to parse '%s'" % exc fail_job(job, fail_msg=msg) return None if not device_config or type(device_config) is not dict: # it is an error to have a pipeline device without a device dictionary as it will never get any jobs. msg = "Administrative error. Device '%s' has no device dictionary." % device.hostname logger.error('[%d] device-dictionary error: %s' % (job.id, msg)) # as we don't control the scheduler, yet, this has to be an error and an incomplete job. # the scheduler_daemon sorts by a fixed order, so this would otherwise just keep on repeating. fail_job(job, fail_msg=msg) return None if not device.worker_host or not device.worker_host.hostname: msg = "Administrative error. Device '%s' has no worker host." % device.hostname logger.error('[%d] worker host error: %s', job.id, msg) fail_job(job, fail_msg=msg) return None if device.worker_host.hostname not in dispatchers: # a configured worker has not called in to this master # likely that the worker is misconfigured - polling the wrong master # or simply not running at all. msg = """Administrative error. Device '{0}' has a worker_host setting of '{1}' but no slave has registered with this master using that FQDN.""".format(device.hostname, device.worker_host.hostname) logger.error('[%d] worker-hostname error: %s', job.id, msg) fail_job(job, fail_msg=msg) return None device_object = PipelineDevice(device_config, device.hostname) # equivalent of the NewDevice in lava-dispatcher, without .yaml file. # FIXME: drop this nasty hack once 'target' is dropped as a parameter if 'target' not in device_object: device_object.target = device.hostname device_object['hostname'] = device.hostname validate_list = job.sub_jobs_list if job.is_multinode else [job] for check_job in validate_list: parser_device = None if job.dynamic_connection else device_object try: logger.info("[%d] Parsing definition" % check_job.id) # pass (unused) output_dir just for validation as there is no zmq socket either. pipeline_job = parser.parse( check_job.definition, parser_device, check_job.id, None, output_dir=check_job.output_dir) except (AttributeError, JobError, NotImplementedError, KeyError, TypeError) as exc: logger.error('[%d] parser error: %s' % (check_job.id, exc)) fail_job(check_job, fail_msg=exc) return None try: logger.info("[%d] Validating actions" % check_job.id) pipeline_job.pipeline.validate_actions() except (AttributeError, JobError, KeyError, TypeError) as exc: logger.error({device: exc}) fail_job(check_job, fail_msg=exc) return None if pipeline_job: pipeline = pipeline_job.describe() # write the pipeline description to the job output directory. if not os.path.exists(check_job.output_dir): os.makedirs(check_job.output_dir) with open(os.path.join(check_job.output_dir, 'description.yaml'), 'w') as describe_yaml: describe_yaml.write(yaml.dump(pipeline)) map_metadata(yaml.dump(pipeline), job) # add the compatibility result from the master to the definition for comparison on the slave. if 'compatibility' in pipeline: try: compat = int(pipeline['compatibility']) except ValueError: logger.error("[%d] Unable to parse job compatibility: %s", check_job.id, pipeline['compatibility']) compat = 0 check_job.pipeline_compatibility = compat check_job.save(update_fields=['pipeline_compatibility']) else: logger.error("[%d] Unable to identify job compatibility.", check_job.id) fail_job(check_job, fail_msg='Unknown compatibility') return None return device
def select_device(job): """ Transitioning a device from Idle to Reserved is the responsibility of the scheduler_daemon (currently). This function just checks that the reserved device is valid for this job. Jobs will only enter this function if a device is already reserved for that job. Storse the pipeline description To prevent cycling between lava_scheduler_daemon:assign_jobs and here, if a job fails validation, the job is incomplete. Issues with this need to be fixed using device tags. """ logger = logging.getLogger('dispatcher-master') if not job.actual_device: # should not happen. logger.error("[%d] no device reserved", job.id) return None if job.actual_device.status is not Device.RESERVED: # should not happen logger.error("[%d] device [%s] not in reserved state", job.id, job.actual_device) return None if job.actual_device.worker_host is None: fail_msg = "Misconfigured device configuration for %s - missing worker_host" % job.actual_device end_job(job, fail_msg=fail_msg, job_status=TestJob.INCOMPLETE) logger.error(fail_msg) if job.is_multinode: # inject the actual group hostnames into the roles for the dispatcher to populate in the overlay. devices = {} for multinode_job in job.sub_jobs_list: # build a list of all devices in this group definition = yaml.load(multinode_job.definition) # devices are not necessarily assigned to all jobs in a group at the same time # check all jobs in this multinode group before allowing any to start. if not multinode_job.actual_device: logger.debug("[%s] job has no device yet", multinode_job.sub_id) return None devices[str(multinode_job.actual_device.hostname)] = definition['protocols']['lava-multinode']['role'] for multinode_job in job.sub_jobs_list: # apply the complete list to all jobs in this group definition = yaml.load(multinode_job.definition) definition['protocols']['lava-multinode']['roles'] = devices multinode_job.definition = yaml.dump(definition) multinode_job.save() # Load job definition to get the variables for template rendering job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) device = job.actual_device try: device_config = device.load_device_configuration(job_ctx) # raw dict except (jinja2.TemplateError, yaml.YAMLError, IOError) as exc: # FIXME: report the exceptions as useful user messages logger.error({'jinja2': exc}) return None if not device_config or type(device_config) is not dict: # it is an error to have a pipeline device without a device dictionary as it will never get any jobs. msg = "Administrative error. Device '%s' has no device dictionary." % device.hostname logger.error({'device-dictionary': msg}) # as we don't control the scheduler, yet, this has to be an error and an incomplete job. # the scheduler_daemon sorts by a fixed order, so this would otherwise just keep on repeating. end_job(job, fail_msg=msg, job_status=TestJob.INCOMPLETE) return None parser = JobParser() obj = PipelineDevice(device_config, device.hostname) # equivalent of the NewDevice in lava-dispatcher, without .yaml file. # FIXME: drop this nasty hack once 'target' is dropped as a parameter if 'target' not in obj: obj.target = device.hostname obj['hostname'] = device.hostname # pass (unused) output_dir just for validation as there is no zmq socket either. try: pipeline_job = parser.parse(job.definition, obj, job.id, None, output_dir='/tmp') except (JobError, AttributeError, NotImplementedError, KeyError, TypeError) as exc: logger.error({'parser': exc}) end_job(job, fail_msg=exc, job_status=TestJob.INCOMPLETE) return None try: pipeline_job.pipeline.validate_actions() except (AttributeError, JobError, KeyError, TypeError) as exc: logger.error({device: exc}) end_job(job, fail_msg=exc, job_status=TestJob.INCOMPLETE) return None if pipeline_job: pipeline = pipeline_job.describe() # write the pipeline description to the job output directory. if not os.path.exists(job.output_dir): os.makedirs(job.output_dir) with open(os.path.join(job.output_dir, 'description.yaml'), 'w') as describe_yaml: describe_yaml.write(yaml.dump(pipeline)) map_metadata(yaml.dump(pipeline), job) return device