def test_compatibility(self):  # pylint: disable=too-many-locals
        user = self.factory.make_user()
        # public set in the YAML
        yaml_str = self.factory.make_job_json()
        yaml_data = yaml.load(yaml_str)
        job = TestJob.from_yaml_and_user(
            yaml_str, user)
        self.assertTrue(job.is_public)
        self.assertTrue(job.can_view(user))
        # initial state prior to validation
        self.assertEqual(job.pipeline_compatibility, 0)
        self.assertNotIn('compatibility', yaml_data)
        # FIXME: dispatcher master needs to make this kind of test more accessible.
        definition = yaml.load(job.definition)
        self.assertNotIn('protocols', definition)
        job.actual_device = Device.objects.get(hostname='fakeqemu1')
        job_def = yaml.load(job.definition)
        job_ctx = job_def.get('context', {})
        parser = JobParser()
        device = job.actual_device

        try:
            device_config = device.load_device_configuration(job_ctx, system=False)  # raw dict
        except (jinja2.TemplateError, yaml.YAMLError, IOError) as exc:
            # FIXME: report the exceptions as useful user messages
            self.fail("[%d] jinja2 error: %s" % (job.id, exc))
        if not device_config or not isinstance(device_config, dict):
            # it is an error to have a pipeline device without a device dictionary as it will never get any jobs.
            msg = "Administrative error. Device '%s' has no device dictionary." % device.hostname
            self.fail('[%d] device-dictionary error: %s' % (job.id, msg))

        device_object = PipelineDevice(device_config, device.hostname)  # equivalent of the NewDevice in lava-dispatcher, without .yaml file.
        # FIXME: drop this nasty hack once 'target' is dropped as a parameter
        if 'target' not in device_object:
            device_object.target = device.hostname
        device_object['hostname'] = device.hostname

        parser_device = device_object
        try:
            # pass (unused) output_dir just for validation as there is no zmq socket either.
            pipeline_job = parser.parse(
                job.definition, parser_device,
                job.id, None, None, None, output_dir=job.output_dir)
        except (AttributeError, JobError, NotImplementedError, KeyError, TypeError) as exc:
            self.fail('[%s] parser error: %s' % (job.sub_id, exc))
        description = pipeline_job.describe()
        self.assertIn('compatibility', description)
        self.assertGreaterEqual(description['compatibility'], BootQEMU.compatibility)
    def test_invalid_multinode(self):  # pylint: disable=too-many-locals
        user = self.factory.make_user()
        device_type = self.factory.make_device_type()
        submission = yaml.load(open(
            os.path.join(os.path.dirname(__file__), 'kvm-multinode.yaml'), 'r'))

        tag_list = [
            self.factory.ensure_tag('usb-flash'),
            self.factory.ensure_tag('usb-eth')
        ]
        self.factory.make_device(device_type, 'fakeqemu1')
        self.factory.make_device(device_type, 'fakeqemu2')
        self.factory.make_device(device_type, 'fakeqemu3', tags=tag_list)
        deploy = [action['deploy'] for action in submission['actions'] if 'deploy' in action]
        # replace working image with a broken URL
        for block in deploy:
            block['images'] = {
                'rootfs': {
                    'url': 'http://localhost/unknown/invalid.gz',
                    'image_arg': '{rootfs}'
                }
            }
        job_object_list = _pipeline_protocols(submission, user, yaml.dump(submission))
        self.assertEqual(len(job_object_list), 2)
        self.assertEqual(
            job_object_list[0].sub_id,
            "%d.%d" % (int(job_object_list[0].id), 0))
        # FIXME: dispatcher master needs to make this kind of test more accessible.
        for job in job_object_list:
            definition = yaml.load(job.definition)
            self.assertNotEqual(definition['protocols']['lava-multinode']['sub_id'], '')
            job.actual_device = Device.objects.get(hostname='fakeqemu1')
            job_def = yaml.load(job.definition)
            job_ctx = job_def.get('context', {})
            parser = JobParser()
            device = None
            device_object = None
            if not job.dynamic_connection:
                device = job.actual_device

                try:
                    device_config = device.load_device_configuration(job_ctx, system=False)  # raw dict
                except (jinja2.TemplateError, yaml.YAMLError, IOError) as exc:
                    # FIXME: report the exceptions as useful user messages
                    self.fail("[%d] jinja2 error: %s" % (job.id, exc))
                if not device_config or not isinstance(device_config, dict):
                    # it is an error to have a pipeline device without a device dictionary as it will never get any jobs.
                    msg = "Administrative error. Device '%s' has no device dictionary." % device.hostname
                    self.fail('[%d] device-dictionary error: %s' % (job.id, msg))

                device_object = PipelineDevice(device_config, device.hostname)  # equivalent of the NewDevice in lava-dispatcher, without .yaml file.
                # FIXME: drop this nasty hack once 'target' is dropped as a parameter
                if 'target' not in device_object:
                    device_object.target = device.hostname
                device_object['hostname'] = device.hostname

            validate_list = job.sub_jobs_list if job.is_multinode else [job]
            for check_job in validate_list:
                parser_device = None if job.dynamic_connection else device_object
                try:
                    # pass (unused) output_dir just for validation as there is no zmq socket either.
                    pipeline_job = parser.parse(
                        check_job.definition, parser_device,
                        check_job.id, None, None, None,
                        output_dir=check_job.output_dir)
                except (AttributeError, JobError, NotImplementedError, KeyError, TypeError) as exc:
                    self.fail('[%s] parser error: %s' % (check_job.sub_id, exc))
                with TestCase.assertRaises(self, (JobError, InfrastructureError)) as check:
                    pipeline_job.pipeline.validate_actions()
                    check_missing_path(self, check, 'qemu-system-x86_64')
        for job in job_object_list:
            job = TestJob.objects.get(id=job.id)
            self.assertNotEqual(job.sub_id, '')
Beispiel #3
0
def select_device(job):
    """
    Transitioning a device from Idle to Reserved is the responsibility of the scheduler_daemon (currently).
    This function just checks that the reserved device is valid for this job.
    Jobs will only enter this function if a device is already reserved for that job.
    Stores the pipeline description

    To prevent cycling between lava_scheduler_daemon:assign_jobs and here, if a job
    fails validation, the job is incomplete. Issues with this need to be fixed using
    device tags.
    """
    logger = logging.getLogger('dispatcher-master')
    if not job.dynamic_connection:
        if not job.actual_device:
            return None
        if job.actual_device.status is not Device.RESERVED:
            # should not happen
            logger.error("[%d] device [%s] not in reserved state", job.id,
                         job.actual_device)
            return None

        if job.actual_device.worker_host is None:
            fail_msg = "Misconfigured device configuration for %s - missing worker_host" % job.actual_device
            fail_job(job, fail_msg=fail_msg)
            logger.error(fail_msg)

    if job.is_multinode:
        # inject the actual group hostnames into the roles for the dispatcher to populate in the overlay.
        devices = {}
        for multinode_job in job.sub_jobs_list:
            # build a list of all devices in this group
            definition = yaml.load(multinode_job.definition)
            # devices are not necessarily assigned to all jobs in a group at the same time
            # check all jobs in this multinode group before allowing any to start.
            if multinode_job.dynamic_connection:
                logger.debug("[%s] dynamic connection job",
                             multinode_job.sub_id)
                continue
            if not multinode_job.actual_device:
                logger.debug("[%s] job has no device yet",
                             multinode_job.sub_id)
                return None
            devices[str(multinode_job.actual_device.hostname
                        )] = definition['protocols']['lava-multinode']['role']
        for multinode_job in job.sub_jobs_list:
            # apply the complete list to all jobs in this group
            definition = yaml.load(multinode_job.definition)
            definition['protocols']['lava-multinode']['roles'] = devices
            multinode_job.definition = yaml.dump(definition)
            multinode_job.save()

    # Load job definition to get the variables for template rendering
    job_def = yaml.load(job.definition)
    job_ctx = job_def.get('context', {})
    parser = JobParser()
    device = None
    device_object = None
    if not job.dynamic_connection:
        device = job.actual_device

        try:
            device_config = device.load_device_configuration(
                job_ctx)  # raw dict
        except (jinja2.TemplateError, yaml.YAMLError, IOError) as exc:
            # FIXME: report the exceptions as useful user messages
            logger.error("[%d] jinja2 error: %s" % (job.id, exc))
            return None
        if not device_config or type(device_config) is not dict:
            # it is an error to have a pipeline device without a device dictionary as it will never get any jobs.
            msg = "Administrative error. Device '%s' has no device dictionary." % device.hostname
            logger.error('[%d] device-dictionary error: %s' % (job.id, msg))
            # as we don't control the scheduler, yet, this has to be an error and an incomplete job.
            # the scheduler_daemon sorts by a fixed order, so this would otherwise just keep on repeating.
            fail_job(job, fail_msg=msg)
            return None

        device_object = PipelineDevice(
            device_config, device.hostname
        )  # equivalent of the NewDevice in lava-dispatcher, without .yaml file.
        # FIXME: drop this nasty hack once 'target' is dropped as a parameter
        if 'target' not in device_object:
            device_object.target = device.hostname
        device_object['hostname'] = device.hostname

    validate_list = job.sub_jobs_list if job.is_multinode else [job]
    for check_job in validate_list:
        parser_device = None if job.dynamic_connection else device_object
        try:
            logger.debug("[%d] parsing definition" % check_job.id)
            # pass (unused) output_dir just for validation as there is no zmq socket either.
            pipeline_job = parser.parse(check_job.definition,
                                        parser_device,
                                        check_job.id,
                                        None,
                                        output_dir=check_job.output_dir)
        except (AttributeError, JobError, NotImplementedError, KeyError,
                TypeError) as exc:
            logger.error('[%d] parser error: %s' % (check_job.id, exc))
            fail_job(check_job, fail_msg=exc)
            return None
        try:
            logger.debug("[%d] validating actions" % check_job.id)
            pipeline_job.pipeline.validate_actions()
        except (AttributeError, JobError, KeyError, TypeError) as exc:
            logger.error({device: exc})
            fail_job(check_job, fail_msg=exc)
            return None
        if pipeline_job:
            pipeline = pipeline_job.describe()
            # write the pipeline description to the job output directory.
            if not os.path.exists(check_job.output_dir):
                os.makedirs(check_job.output_dir)
            with open(os.path.join(check_job.output_dir, 'description.yaml'),
                      'w') as describe_yaml:
                describe_yaml.write(yaml.dump(pipeline))
            map_metadata(yaml.dump(pipeline), job)
    return device
Beispiel #4
0
def select_device(job, dispatchers):
    """
    Transitioning a device from Idle to Reserved is the responsibility of the scheduler_daemon (currently).
    This function just checks that the reserved device is valid for this job.
    Jobs will only enter this function if a device is already reserved for that job.
    Stores the pipeline description

    To prevent cycling between lava_scheduler_daemon:assign_jobs and here, if a job
    fails validation, the job is incomplete. Issues with this need to be fixed using
    device tags.
    """
    # FIXME: split out dynamic_connection, multinode and validation
    logger = logging.getLogger('dispatcher-master')
    if not job.dynamic_connection:
        if not job.actual_device:
            return None
        if job.actual_device.status is not Device.RESERVED:
            # should not happen
            logger.error("[%d] device [%s] not in reserved state", job.id, job.actual_device)
            return None

        if job.actual_device.worker_host is None:
            fail_msg = "Misconfigured device configuration for %s - missing worker_host" % job.actual_device
            fail_job(job, fail_msg=fail_msg)
            logger.error(fail_msg)
            return None

    if job.is_multinode:
        # inject the actual group hostnames into the roles for the dispatcher to populate in the overlay.
        devices = {}
        for multinode_job in job.sub_jobs_list:
            # build a list of all devices in this group
            definition = yaml.load(multinode_job.definition)
            # devices are not necessarily assigned to all jobs in a group at the same time
            # check all jobs in this multinode group before allowing any to start.
            if multinode_job.dynamic_connection:
                logger.debug("[%s] dynamic connection job", multinode_job.sub_id)
                continue
            if not multinode_job.actual_device:
                logger.debug("[%s] job has no device yet", multinode_job.sub_id)
                return None
            devices[str(multinode_job.actual_device.hostname)] = definition['protocols']['lava-multinode']['role']
        for multinode_job in job.sub_jobs_list:
            # apply the complete list to all jobs in this group
            definition = yaml.load(multinode_job.definition)
            definition['protocols']['lava-multinode']['roles'] = devices
            multinode_job.definition = yaml.dump(definition)
            multinode_job.save()

    # Load job definition to get the variables for template rendering
    job_def = yaml.load(job.definition)
    job_ctx = job_def.get('context', {})
    parser = JobParser()
    device = None
    device_object = None
    if not job.dynamic_connection:
        device = job.actual_device

        try:
            device_config = device.load_device_configuration(job_ctx)  # raw dict
        except (jinja2.TemplateError, yaml.YAMLError, IOError) as exc:
            logger.error("[%d] jinja2 error: %s" % (job.id, exc))
            msg = "Administrative error. Unable to parse '%s'" % exc
            fail_job(job, fail_msg=msg)
            return None
        if not device_config or type(device_config) is not dict:
            # it is an error to have a pipeline device without a device dictionary as it will never get any jobs.
            msg = "Administrative error. Device '%s' has no device dictionary." % device.hostname
            logger.error('[%d] device-dictionary error: %s' % (job.id, msg))
            # as we don't control the scheduler, yet, this has to be an error and an incomplete job.
            # the scheduler_daemon sorts by a fixed order, so this would otherwise just keep on repeating.
            fail_job(job, fail_msg=msg)
            return None
        if not device.worker_host or not device.worker_host.hostname:
            msg = "Administrative error. Device '%s' has no worker host." % device.hostname
            logger.error('[%d] worker host error: %s', job.id, msg)
            fail_job(job, fail_msg=msg)
            return None
        if device.worker_host.hostname not in dispatchers:
            # a configured worker has not called in to this master
            # likely that the worker is misconfigured - polling the wrong master
            # or simply not running at all.
            msg = """Administrative error. Device '{0}' has a worker_host setting of
 '{1}' but no slave has registered with this master
 using that FQDN.""".format(device.hostname, device.worker_host.hostname)
            logger.error('[%d] worker-hostname error: %s', job.id, msg)
            fail_job(job, fail_msg=msg)
            return None

        device_object = PipelineDevice(device_config, device.hostname)  # equivalent of the NewDevice in lava-dispatcher, without .yaml file.
        # FIXME: drop this nasty hack once 'target' is dropped as a parameter
        if 'target' not in device_object:
            device_object.target = device.hostname
        device_object['hostname'] = device.hostname

    validate_list = job.sub_jobs_list if job.is_multinode else [job]
    for check_job in validate_list:
        parser_device = None if job.dynamic_connection else device_object
        try:
            logger.info("[%d] Parsing definition" % check_job.id)
            # pass (unused) output_dir just for validation as there is no zmq socket either.
            pipeline_job = parser.parse(
                check_job.definition, parser_device,
                check_job.id, None, output_dir=check_job.output_dir)
        except (AttributeError, JobError, NotImplementedError, KeyError, TypeError) as exc:
            logger.error('[%d] parser error: %s' % (check_job.id, exc))
            fail_job(check_job, fail_msg=exc)
            return None
        try:
            logger.info("[%d] Validating actions" % check_job.id)
            pipeline_job.pipeline.validate_actions()
        except (AttributeError, JobError, KeyError, TypeError) as exc:
            logger.error({device: exc})
            fail_job(check_job, fail_msg=exc)
            return None
        if pipeline_job:
            pipeline = pipeline_job.describe()
            # write the pipeline description to the job output directory.
            if not os.path.exists(check_job.output_dir):
                os.makedirs(check_job.output_dir)
            with open(os.path.join(check_job.output_dir, 'description.yaml'), 'w') as describe_yaml:
                describe_yaml.write(yaml.dump(pipeline))
            map_metadata(yaml.dump(pipeline), job)
            # add the compatibility result from the master to the definition for comparison on the slave.
            if 'compatibility' in pipeline:
                try:
                    compat = int(pipeline['compatibility'])
                except ValueError:
                    logger.error("[%d] Unable to parse job compatibility: %s",
                                 check_job.id, pipeline['compatibility'])
                    compat = 0
                check_job.pipeline_compatibility = compat
                check_job.save(update_fields=['pipeline_compatibility'])
            else:
                logger.error("[%d] Unable to identify job compatibility.", check_job.id)
                fail_job(check_job, fail_msg='Unknown compatibility')
                return None

    return device
Beispiel #5
0
def select_device(job, dispatchers):  # pylint: disable=too-many-return-statements
    """
    Transitioning a device from Idle to Reserved is the responsibility of the scheduler_daemon (currently).
    This function just checks that the reserved device is valid for this job.
    Jobs will only enter this function if a device is already reserved for that job.
    Stores the pipeline description

    To prevent cycling between lava_scheduler_daemon:assign_jobs and here, if a job
    fails validation, the job is incomplete. Issues with this need to be fixed using
    device tags.
    """
    # FIXME: split out dynamic_connection, multinode and validation
    logger = logging.getLogger('dispatcher-master')
    if not job.dynamic_connection:
        if not job.actual_device:
            return None
        if job.actual_device.status is not Device.RESERVED:
            # should not happen
            logger.error("[%d] device [%s] not in reserved state", job.id, job.actual_device)
            return None

        if job.actual_device.worker_host is None:
            fail_msg = "Misconfigured device configuration for %s - missing worker_host" % job.actual_device
            fail_job(job, fail_msg=fail_msg)
            logger.error(fail_msg)
            return None

    if job.is_multinode:
        # inject the actual group hostnames into the roles for the dispatcher to populate in the overlay.
        devices = {}
        for multinode_job in job.sub_jobs_list:
            # build a list of all devices in this group
            definition = yaml.load(multinode_job.definition)
            # devices are not necessarily assigned to all jobs in a group at the same time
            # check all jobs in this multinode group before allowing any to start.
            if multinode_job.dynamic_connection:
                logger.debug("[%s] dynamic connection job", multinode_job.sub_id)
                continue
            if not multinode_job.actual_device:
                logger.debug("[%s] job has no device yet", multinode_job.sub_id)
                return None
            devices[str(multinode_job.actual_device.hostname)] = definition['protocols']['lava-multinode']['role']
        for multinode_job in job.sub_jobs_list:
            # apply the complete list to all jobs in this group
            definition = yaml.load(multinode_job.definition)
            definition['protocols']['lava-multinode']['roles'] = devices
            multinode_job.definition = yaml.dump(definition)
            multinode_job.save()

    # Load job definition to get the variables for template rendering
    job_def = yaml.load(job.definition)
    job_ctx = job_def.get('context', {})
    device = None
    if not job.dynamic_connection:
        device = job.actual_device

        try:
            device_config = device.load_device_configuration(job_ctx)  # raw dict
        except (jinja2.TemplateError, yaml.YAMLError, IOError) as exc:
            logger.error("[%d] jinja2 error: %s", job.id, exc)
            msg = "Administrative error. Unable to parse device configuration: '%s'" % exc
            fail_job(job, fail_msg=msg)
            return None
        if not device_config or not isinstance(device_config, dict):
            # it is an error to have a pipeline device without a device dictionary as it will never get any jobs.
            msg = "Administrative error. Device '%s' has no device dictionary." % device.hostname
            logger.error('[%d] device-dictionary error: %s', job.id, msg)
            # as we don't control the scheduler, yet, this has to be an error and an incomplete job.
            # the scheduler_daemon sorts by a fixed order, so this would otherwise just keep on repeating.
            fail_job(job, fail_msg=msg)
            return None
        if not device.worker_host or not device.worker_host.hostname:
            msg = "Administrative error. Device '%s' has no worker host." % device.hostname
            logger.error('[%d] worker host error: %s', job.id, msg)
            fail_job(job, fail_msg=msg)
            return None
        if device.worker_host.hostname not in dispatchers:
            # A configured worker has not (yet) called in to this master.
            # It is likely that the worker is misconfigured - polling the wrong master
            # or simply not running at all. There is also a possible race condition
            # here when the master gets restarted with a queue of jobs and has not yet
            # received polls from all slaves, so do not fail the job.
            msg = "Device '{0}' has a worker_host setting of " \
                  "'{1}' but no slave has yet registered with this master " \
                  "using that FQDN.".format(device.hostname, device.worker_host.hostname)
            logger.info('[%d] worker-hostname not seen: %s', job.id, msg)
            return None

        device_object = PipelineDevice(device_config, device.hostname)  # equivalent of the NewDevice in lava-dispatcher, without .yaml file.
        # FIXME: drop this nasty hack once 'target' is dropped as a parameter
        if 'target' not in device_object:
            device_object.target = device.hostname
        device_object['hostname'] = device.hostname
    return device
def select_device(job):
    """
    Transitioning a device from Idle to Reserved is the responsibility of the scheduler_daemon (currently).
    This function just checks that the reserved device is valid for this job.
    Jobs will only enter this function if a device is already reserved for that job.
    Storse the pipeline description

    To prevent cycling between lava_scheduler_daemon:assign_jobs and here, if a job
    fails validation, the job is incomplete. Issues with this need to be fixed using
    device tags.
    """
    logger = logging.getLogger('dispatcher-master')
    if not job.actual_device:
        # should not happen.
        logger.error("[%d] no device reserved", job.id)
        return None

    if job.actual_device.status is not Device.RESERVED:
        # should not happen
        logger.error("[%d] device [%s] not in reserved state", job.id, job.actual_device)
        return None

    if job.actual_device.worker_host is None:
        fail_msg = "Misconfigured device configuration for %s - missing worker_host" % job.actual_device
        end_job(job, fail_msg=fail_msg, job_status=TestJob.INCOMPLETE)
        logger.error(fail_msg)

    if job.is_multinode:
        # inject the actual group hostnames into the roles for the dispatcher to populate in the overlay.
        devices = {}
        for multinode_job in job.sub_jobs_list:
            # build a list of all devices in this group
            definition = yaml.load(multinode_job.definition)
            # devices are not necessarily assigned to all jobs in a group at the same time
            # check all jobs in this multinode group before allowing any to start.
            if not multinode_job.actual_device:
                logger.debug("[%s] job has no device yet", multinode_job.sub_id)
                return None
            devices[str(multinode_job.actual_device.hostname)] = definition['protocols']['lava-multinode']['role']
        for multinode_job in job.sub_jobs_list:
            # apply the complete list to all jobs in this group
            definition = yaml.load(multinode_job.definition)
            definition['protocols']['lava-multinode']['roles'] = devices
            multinode_job.definition = yaml.dump(definition)
            multinode_job.save()

    # Load job definition to get the variables for template rendering
    job_def = yaml.load(job.definition)
    job_ctx = job_def.get('context', {})
    device = job.actual_device

    try:
        device_config = device.load_device_configuration(job_ctx)  # raw dict
    except (jinja2.TemplateError, yaml.YAMLError, IOError) as exc:
        # FIXME: report the exceptions as useful user messages
        logger.error({'jinja2': exc})
        return None
    if not device_config or type(device_config) is not dict:
        # it is an error to have a pipeline device without a device dictionary as it will never get any jobs.
        msg = "Administrative error. Device '%s' has no device dictionary." % device.hostname
        logger.error({'device-dictionary': msg})
        # as we don't control the scheduler, yet, this has to be an error and an incomplete job.
        # the scheduler_daemon sorts by a fixed order, so this would otherwise just keep on repeating.
        end_job(job, fail_msg=msg, job_status=TestJob.INCOMPLETE)
        return None

    parser = JobParser()
    obj = PipelineDevice(device_config, device.hostname)  # equivalent of the NewDevice in lava-dispatcher, without .yaml file.
    # FIXME: drop this nasty hack once 'target' is dropped as a parameter
    if 'target' not in obj:
        obj.target = device.hostname
    obj['hostname'] = device.hostname

    # pass (unused) output_dir just for validation as there is no zmq socket either.
    try:
        pipeline_job = parser.parse(job.definition, obj, job.id, None, output_dir='/tmp')
    except (JobError, AttributeError, NotImplementedError, KeyError, TypeError) as exc:
        logger.error({'parser': exc})
        end_job(job, fail_msg=exc, job_status=TestJob.INCOMPLETE)
        return None

    try:
        pipeline_job.pipeline.validate_actions()
    except (AttributeError, JobError, KeyError, TypeError) as exc:
        logger.error({device: exc})
        end_job(job, fail_msg=exc, job_status=TestJob.INCOMPLETE)
        return None
    if pipeline_job:
        pipeline = pipeline_job.describe()
        # write the pipeline description to the job output directory.
        if not os.path.exists(job.output_dir):
            os.makedirs(job.output_dir)
        with open(os.path.join(job.output_dir, 'description.yaml'), 'w') as describe_yaml:
            describe_yaml.write(yaml.dump(pipeline))
        map_metadata(yaml.dump(pipeline), job)
    return device
Beispiel #7
0
    def test_invalid_multinode(self):
        user = self.factory.make_user()
        self.device_type = self.factory.make_device_type()
        submission = yaml.load(
            open(os.path.join(os.path.dirname(__file__), 'kvm-multinode.yaml'),
                 'r'))

        tag_list = [
            self.factory.ensure_tag('usb-flash'),
            self.factory.ensure_tag('usb-eth')
        ]
        self.factory.make_device(self.device_type, 'fakeqemu1')
        self.factory.make_device(self.device_type, 'fakeqemu2')
        self.factory.make_device(self.device_type, 'fakeqemu3', tags=tag_list)
        deploy = [
            action['deploy'] for action in submission['actions']
            if 'deploy' in action
        ]
        # replace working image with a broken URL
        for block in deploy:
            block['image'] = 'http://localhost/unknown/invalid.gz'
        job_object_list = _pipeline_protocols(submission, user,
                                              yaml.dump(submission))
        self.assertEqual(len(job_object_list), 2)
        self.assertEqual(job_object_list[0].sub_id,
                         "%d.%d" % (int(job_object_list[0].id), 0))
        # FIXME: dispatcher master needs to make this kind of test more accessible.
        for job in job_object_list:
            definition = yaml.load(job.definition)
            self.assertNotEqual(
                definition['protocols']['lava-multinode']['sub_id'], '')
            job.actual_device = Device.objects.get(hostname='fakeqemu1')
            job_def = yaml.load(job.definition)
            job_ctx = job_def.get('context', {})
            parser = JobParser()
            device = None
            device_object = None
            if not job.dynamic_connection:
                device = job.actual_device

                try:
                    device_config = device.load_device_configuration(
                        job_ctx)  # raw dict
                except (jinja2.TemplateError, yaml.YAMLError, IOError) as exc:
                    # FIXME: report the exceptions as useful user messages
                    self.fail("[%d] jinja2 error: %s" % (job.id, exc))
                if not device_config or type(device_config) is not dict:
                    # it is an error to have a pipeline device without a device dictionary as it will never get any jobs.
                    msg = "Administrative error. Device '%s' has no device dictionary." % device.hostname
                    self.fail('[%d] device-dictionary error: %s' %
                              (job.id, msg))

                device_object = PipelineDevice(
                    device_config, device.hostname
                )  # equivalent of the NewDevice in lava-dispatcher, without .yaml file.
                # FIXME: drop this nasty hack once 'target' is dropped as a parameter
                if 'target' not in device_object:
                    device_object.target = device.hostname
                device_object['hostname'] = device.hostname

            validate_list = job.sub_jobs_list if job.is_multinode else [job]
            for check_job in validate_list:
                parser_device = None if job.dynamic_connection else device_object
                try:
                    # pass (unused) output_dir just for validation as there is no zmq socket either.
                    pipeline_job = parser.parse(
                        check_job.definition,
                        parser_device,
                        check_job.id,
                        None,
                        output_dir=check_job.output_dir)
                except (AttributeError, JobError, NotImplementedError,
                        KeyError, TypeError) as exc:
                    self.fail('[%s] parser error: %s' %
                              (check_job.sub_id, exc))
                if os.path.exists(
                        '/dev/loop0'
                ):  # rather than skipping the entire test, just the validation.
                    self.assertRaises(JobError,
                                      pipeline_job.pipeline.validate_actions)
        for job in job_object_list:
            job = TestJob.objects.get(id=job.id)
            self.assertNotEqual(job.sub_id, '')
Beispiel #8
0
def select_device(job, dispatchers):  # pylint: disable=too-many-return-statements
    """
    Transitioning a device from Idle to Reserved is the responsibility of the scheduler_daemon (currently).
    This function just checks that the reserved device is valid for this job.
    Jobs will only enter this function if a device is already reserved for that job.
    Stores the pipeline description

    To prevent cycling between lava_scheduler_daemon:assign_jobs and here, if a job
    fails validation, the job is incomplete. Issues with this need to be fixed using
    device tags.
    """
    # FIXME: split out dynamic_connection, multinode and validation
    logger = logging.getLogger('dispatcher-master')
    if not job.dynamic_connection:
        if not job.actual_device:
            return None
        if job.actual_device.status is not Device.RESERVED:
            # should not happen
            logger.error("[%d] device [%s] not in reserved state", job.id,
                         job.actual_device)
            return None

        if job.actual_device.worker_host is None:
            fail_msg = "Misconfigured device configuration for %s - missing worker_host" % job.actual_device
            fail_job(job, fail_msg=fail_msg)
            logger.error(fail_msg)
            return None

    if job.is_multinode:
        # inject the actual group hostnames into the roles for the dispatcher to populate in the overlay.
        devices = {}
        for multinode_job in job.sub_jobs_list:
            # build a list of all devices in this group
            definition = yaml.load(multinode_job.definition)
            # devices are not necessarily assigned to all jobs in a group at the same time
            # check all jobs in this multinode group before allowing any to start.
            if multinode_job.dynamic_connection:
                logger.debug("[%s] dynamic connection job",
                             multinode_job.sub_id)
                continue
            if not multinode_job.actual_device:
                logger.debug("[%s] job has no device yet",
                             multinode_job.sub_id)
                return None
            devices[str(multinode_job.actual_device.hostname
                        )] = definition['protocols']['lava-multinode']['role']
        for multinode_job in job.sub_jobs_list:
            # apply the complete list to all jobs in this group
            definition = yaml.load(multinode_job.definition)
            definition['protocols']['lava-multinode']['roles'] = devices
            multinode_job.definition = yaml.dump(definition)
            multinode_job.save()

    # Load job definition to get the variables for template rendering
    job_def = yaml.load(job.definition)
    job_ctx = job_def.get('context', {})
    device = None
    if not job.dynamic_connection:
        device = job.actual_device

        try:
            device_config = device.load_device_configuration(
                job_ctx)  # raw dict
        except (jinja2.TemplateError, yaml.YAMLError, IOError) as exc:
            logger.error("[%d] jinja2 error: %s", job.id, exc)
            msg = "Administrative error. Unable to parse device configuration: '%s'" % exc
            fail_job(job, fail_msg=msg)
            return None
        if not device_config or not isinstance(device_config, dict):
            # it is an error to have a pipeline device without a device dictionary as it will never get any jobs.
            msg = "Administrative error. Device '%s' has no device dictionary." % device.hostname
            logger.error('[%d] device-dictionary error: %s', job.id, msg)
            # as we don't control the scheduler, yet, this has to be an error and an incomplete job.
            # the scheduler_daemon sorts by a fixed order, so this would otherwise just keep on repeating.
            fail_job(job, fail_msg=msg)
            return None
        if not device.worker_host or not device.worker_host.hostname:
            msg = "Administrative error. Device '%s' has no worker host." % device.hostname
            logger.error('[%d] worker host error: %s', job.id, msg)
            fail_job(job, fail_msg=msg)
            return None
        if device.worker_host.hostname not in dispatchers:
            # A configured worker has not (yet) called in to this master.
            # It is likely that the worker is misconfigured - polling the wrong master
            # or simply not running at all. There is also a possible race condition
            # here when the master gets restarted with a queue of jobs and has not yet
            # received polls from all slaves, so do not fail the job.
            msg = "Device '{0}' has a worker_host setting of " \
                  "'{1}' but no slave has yet registered with this master " \
                  "using that FQDN.".format(device.hostname, device.worker_host.hostname)
            logger.info('[%d] worker-hostname not seen: %s', job.id, msg)
            return None

        device_object = PipelineDevice(
            device_config, device.hostname
        )  # equivalent of the NewDevice in lava-dispatcher, without .yaml file.
        # FIXME: drop this nasty hack once 'target' is dropped as a parameter
        if 'target' not in device_object:
            device_object.target = device.hostname
        device_object['hostname'] = device.hostname
    return device