Example #1
0
 def test_job_handlers(self):
     self.restart()
     hostname = 'fakeqemu3'
     device = self.factory.make_device(self.device_type, hostname)
     job = TestJob.from_yaml_and_user(
         self.factory.make_job_yaml(),
         self.factory.make_user())
     selected = select_device(job, self.dispatchers)
     self.assertIsNone(selected)
     job.actual_device = device
     selected = select_device(job, self.dispatchers)
     self.assertIsNone(selected)
     device.worker_host = self.worker
     selected = select_device(job, self.dispatchers)
     self.assertIsNone(selected)
     create_job(job, device)
     self.assertEqual(job.actual_device, device)
     self.assertEqual(device.status, Device.RESERVED)
Example #2
0
 def test_job_handlers(self):
     self.restart()
     hostname = 'fakeqemu3'
     device_dict = DeviceDictionary(hostname=hostname)
     device_dict.parameters = self.conf
     device_dict.save()
     device = self.factory.make_device(self.device_type, hostname)
     job = TestJob.from_yaml_and_user(
         self.factory.make_job_yaml(),
         self.factory.make_user())
     selected = select_device(job, self.dispatchers)
     self.assertIsNone(selected)
     job.actual_device = device
     selected = select_device(job, self.dispatchers)
     self.assertIsNone(selected)
     device.worker_host = self.worker
     selected = select_device(job, self.dispatchers)
     self.assertIsNone(selected)
     create_job(job, device)
     self.assertEqual(job.actual_device, device)
     self.assertEqual(device.status, Device.RESERVED)
Example #3
0
 def test_dispatcher_restart(self):
     self.restart()
     hostname = 'fakeqemu4'
     device_dict = DeviceDictionary(hostname=hostname)
     device_dict.parameters = self.conf
     device_dict.save()
     device = self.factory.make_device(self.device_type, hostname)
     job = TestJob.from_yaml_and_user(
         self.factory.make_job_yaml(),
         self.factory.make_user())
     job.actual_device = device
     self.assertEqual(job.status, TestJob.SUBMITTED)
     device.worker_host = self.remote
     selected = select_device(job, self.dispatchers)
     self.assertIsNone(selected)
     self.assertEqual(job.status, TestJob.SUBMITTED)
     create_job(job, device)
     self.assertEqual(job.actual_device, device)
     self.assertEqual(device.status, Device.RESERVED)
     selected = select_device(job, self.dispatchers)
     self.assertIsNone(selected)
     self.assertEqual(job.status, TestJob.SUBMITTED)
    def process_jobs(self, options):
        for job in TestJob.objects.filter(
                Q(status=TestJob.SUBMITTED) & Q(is_pipeline=True) & ~Q(actual_device=None))\
                .order_by('-health_check', '-priority', 'submit_time', 'target_group', 'id'):
            device = None
            worker_host = None

            device = select_device(job, self.dispatchers)
            if not device:
                # e.g. one or more jobs in the MultiNode group do not yet have Reserved devices.
                continue
            # selecting device can change the job
            job.refresh_from_db()

            self.logger.info("[%d] Assigning %s device", job.id, device)
            if job.actual_device is None:
                # health checks
                device = job.requested_device
                if not device.worker_host:
                    msg = "Infrastructure error: Invalid worker information"
                    self.logger.error("[%d] %s", job.id, msg)
                    fail_job(job, msg, TestJob.INCOMPLETE)
                    continue
            # Launch the job
            create_job(job, device)
            self.logger.info("[%d] START => %s (%s)", job.id,
                             device.worker_host.hostname, device.hostname)
            worker_host = device.worker_host
            try:
                # Load job definition to get the variables for template
                # rendering
                job_def = yaml.load(job.definition)
                job_ctx = job_def.get('context', {})

                # Load env.yaml, env-dut.yaml and dispatcher configuration
                # All three are optional
                env_str = load_optional_yaml_file(options['env'])
                env_dut_str = load_optional_yaml_file(options['env_dut'])

                # Load device configuration
                if device:
                    device_configuration = device.load_configuration(job_ctx)
                    dispatcher_config_file = os.path.join(
                        options['dispatchers_config'],
                        "%s.yaml" % worker_host.hostname)
                    dispatcher_config = load_optional_yaml_file(
                        dispatcher_config_file)

                    self.controler.send_multipart([
                        str(worker_host.hostname), 'START',
                        str(job.id),
                        self.export_definition(job),
                        str(device_configuration), dispatcher_config, env_str,
                        env_dut_str
                    ])

                if job.is_multinode:
                    # All secondary connections must be made from a dispatcher local to the one host device
                    # to allow for local firewalls etc. So the secondary connection is started on the
                    # remote worker of the "nominated" host.
                    # This job will not be a dynamic_connection, this is the parent.
                    device = None
                    device_configuration = None
                    # to get this far, the rest of the multinode group must also be ready
                    # so start the dynamic connections
                    parent = job

                    for group_job in job.sub_jobs_list:
                        if group_job == parent or not group_job.dynamic_connection:
                            continue

                        worker_host = parent.actual_device.worker_host
                        dispatcher_config_file = os.path.join(
                            options['dispatchers_config'],
                            "%s.yaml" % worker_host.hostname)
                        dispatcher_config = load_optional_yaml_file(
                            dispatcher_config_file)

                        # inherit only enough configuration for dynamic_connection operation
                        device_configuration = parent.actual_device.load_configuration(
                            job_ctx)
                        self.logger.info(
                            "[%d] Trimming dynamic connection device configuration.",
                            group_job.id)
                        device_configuration = parent.actual_device.minimise_configuration(
                            device_configuration)

                        self.logger.info("[%d] START => %s (connection)",
                                         group_job.id, worker_host.hostname)
                        self.controler.send_multipart([
                            str(worker_host.hostname), 'START',
                            str(group_job.id),
                            self.export_definition(group_job),
                            str(device_configuration), dispatcher_config,
                            env_str, env_dut_str
                        ])
                continue

            except jinja2.TemplateNotFound as exc:
                self.logger.error("[%d] Template not found: '%s'", job.id,
                                  exc.message)
                msg = "Infrastructure error: Template not found: '%s'" % \
                      exc.message
            except jinja2.TemplateSyntaxError as exc:
                self.logger.error(
                    "[%d] Template syntax error in '%s', line %d: %s", job.id,
                    exc.name, exc.lineno, exc.message)
                msg = "Infrastructure error: Template syntax error in '%s', line %d: %s" % \
                      (exc.name, exc.lineno, exc.message)
            except IOError as exc:
                self.logger.error("[%d] Unable to read '%s': %s", job.id,
                                  exc.filename, exc.strerror)
                msg = "Infrastructure error: cannot open '%s': %s" % \
                      (exc.filename, exc.strerror)
            except yaml.YAMLError as exc:
                self.logger.error("[%d] Unable to parse job definition: %s",
                                  job.id, exc)
                msg = "Infrastructure error: cannot parse job definition: %s" % \
                      exc

            self.logger.error("[%d] INCOMPLETE job", job.id)
            fail_job(job=job, fail_msg=msg, job_status=TestJob.INCOMPLETE)
Example #5
0
    def process_jobs(self, options):
        for job in TestJob.objects.filter(
                Q(status=TestJob.SUBMITTED) & Q(is_pipeline=True) & ~Q(actual_device=None))\
                .order_by('-health_check', '-priority', 'submit_time', 'target_group', 'id'):
            if job.dynamic_connection:
                # A secondary connection must be made from a dispatcher local to the host device
                # to allow for local firewalls etc. So the secondary connection is started on the
                # remote worker of the "nominated" host.
                # FIXME:
                device = None
                worker_host = job.lookup_worker
                self.logger.info("[%d] START => %s (connection)", job.id,
                                 worker_host.hostname)
            else:
                device = select_device(job, self.dispatchers)
                if not device:
                    continue
                # selecting device can change the job
                job = TestJob.objects.get(id=job.id)
                self.logger.info("[%d] Assigning %s device", job.id, device)
                if job.actual_device is None:
                    device = job.requested_device
                    if not device.worker_host:
                        msg = "Infrastructure error: Invalid worker information"
                        self.logger.error("[%d] %s", job.id, msg)
                        fail_job(job, msg, TestJob.INCOMPLETE)
                        continue

                    # Launch the job
                    create_job(job, device)
                    self.logger.info("[%d] START => %s (%s)", job.id,
                                     device.worker_host.hostname, device.hostname)
                    worker_host = device.worker_host
                else:
                    device = job.actual_device
                    if not device.worker_host:
                        msg = "Infrastructure error: Invalid worker information"
                        self.logger.error("[%d] %s", job.id, msg)
                        fail_job(job, msg, TestJob.INCOMPLETE)
                        continue
                    self.logger.info("[%d] START => %s (%s) (retrying)", job.id,
                                     device.worker_host.hostname, device.hostname)
                    worker_host = device.worker_host
            try:
                # Load job definition to get the variables for template
                # rendering
                job_def = yaml.load(job.definition)
                job_ctx = job_def.get('context', {})

                # Load device configuration
                device_configuration = '' \
                    if job.dynamic_connection else device.load_device_configuration(job_ctx)

                # Load env.yaml, env-dut.yaml and dispatcher configuration
                # All three are optional
                env_str = load_optional_yaml_file(options['env'])
                env_dut_str = load_optional_yaml_file(options['env_dut'])
                dispatcher_config_file = os.path.join(options['dispatchers_config'],
                                                      "%s.yaml" % worker_host.hostname)
                dispatcher_config = load_optional_yaml_file(dispatcher_config_file)

                if job.is_multinode:
                    for group_job in job.sub_jobs_list:
                        if group_job.dynamic_connection:
                            # to get this far, the rest of the multinode group must also be ready
                            # so start the dynamic connections
                            # FIXME: rationalise and streamline
                            self.controler.send_multipart(
                                [str(worker_host.hostname),
                                 'START', str(group_job.id),
                                 self.export_definition(group_job),
                                 str(device_configuration),
                                 dispatcher_config,
                                 env_str, env_dut_str])

                self.controler.send_multipart(
                    [str(worker_host.hostname),
                     'START', str(job.id),
                     self.export_definition(job),
                     str(device_configuration),
                     dispatcher_config,
                     env_str, env_dut_str])
                continue

            except jinja2.TemplateNotFound as exc:
                self.logger.error("[%d] Template not found: '%s'",
                                  job.id, exc.message)
                msg = "Infrastructure error: Template not found: '%s'" % \
                      exc.message
            except jinja2.TemplateSyntaxError as exc:
                self.logger.error("[%d] Template syntax error in '%s', line %d: %s",
                                  job.id, exc.name, exc.lineno, exc.message)
                msg = "Infrastructure error: Template syntax error in '%s', line %d: %s" % \
                      (exc.name, exc.lineno, exc.message)
            except IOError as exc:
                self.logger.error("[%d] Unable to read '%s': %s",
                                  job.id, exc.filename, exc.strerror)
                msg = "Infrastructure error: cannot open '%s': %s" % \
                      (exc.filename, exc.strerror)
            except yaml.YAMLError as exc:
                self.logger.error("[%d] Unable to parse job definition: %s",
                                  job.id, exc)
                msg = "Infrastructure error: cannot parse job definition: %s" % \
                      exc

            self.logger.error("[%d] INCOMPLETE job", job.id)
            fail_job(job=job, fail_msg=msg, job_status=TestJob.INCOMPLETE)
Example #6
0
    def process_jobs(self, options):
        for job in TestJob.objects.filter(
                Q(status=TestJob.SUBMITTED) & Q(is_pipeline=True) & ~Q(actual_device=None))\
                .order_by('-health_check', '-priority', 'submit_time', 'target_group', 'id'):
            if job.dynamic_connection:
                # A secondary connection must be made from a dispatcher local to the host device
                # to allow for local firewalls etc. So the secondary connection is started on the
                # remote worker of the "nominated" host.
                # FIXME:
                device = None
                worker_host = job.lookup_worker
                self.logger.info("[%d] START => %s (connection)", job.id,
                                 worker_host.hostname)
            else:
                device = select_device(job, self.dispatchers)
                if not device:
                    return False
                # selecting device can change the job
                job = TestJob.objects.get(id=job.id)
                self.logger.info("[%d] Assigning %s device", job.id, device)
                if job.actual_device is None:
                    device = job.requested_device
                    if not device.worker_host:
                        msg = "Infrastructure error: Invalid worker information"
                        self.logger.error("[%d] %s", job.id, msg)
                        fail_job(job, msg, TestJob.INCOMPLETE)
                        return False

                    # Launch the job
                    create_job(job, device)
                    self.logger.info("[%d] START => %s (%s)", job.id,
                                     device.worker_host.hostname,
                                     device.hostname)
                    worker_host = device.worker_host
                else:
                    device = job.actual_device
                    if not device.worker_host:
                        msg = "Infrastructure error: Invalid worker information"
                        self.logger.error("[%d] %s", job.id, msg)
                        fail_job(job, msg, TestJob.INCOMPLETE)
                        return False
                    self.logger.info("[%d] START => %s (%s) (retrying)",
                                     job.id, device.worker_host.hostname,
                                     device.hostname)
                    worker_host = device.worker_host
            try:
                # Load job definition to get the variables for template
                # rendering
                job_def = yaml.load(job.definition)
                job_ctx = job_def.get('context', {})

                # Load device configuration
                device_configuration = None \
                    if job.dynamic_connection else device.load_device_configuration(job_ctx)

                env_str = get_env_string(options['env'])
                env_dut_str = get_env_string(options['env_dut'])

                if job.is_multinode:
                    for group_job in job.sub_jobs_list:
                        if group_job.dynamic_connection:
                            # to get this far, the rest of the multinode group must also be ready
                            # so start the dynamic connections
                            # FIXME: rationalise and streamline
                            self.controler.send_multipart([
                                str(worker_host.hostname), 'START',
                                str(group_job.id),
                                self.export_definition(group_job),
                                str(device_configuration), env_str, env_dut_str
                            ])

                self.controler.send_multipart([
                    str(worker_host.hostname), 'START',
                    str(job.id),
                    self.export_definition(job),
                    str(device_configuration), env_str, env_dut_str
                ])

            except (jinja2.TemplateError, IOError, yaml.YAMLError) as exc:
                if isinstance(exc, jinja2.TemplateNotFound):
                    self.logger.error("Template not found: '%s'", exc.message)
                    msg = "Infrastructure error: Template not found: '%s'" % \
                          exc.message
                elif isinstance(exc, jinja2.TemplateSyntaxError):
                    self.logger.error(
                        "Template syntax error in '%s', line %d: %s", exc.name,
                        exc.lineno, exc.message)
                    msg = "Infrastructure error: Template syntax error in '%s', line %d: %s" % \
                          (exc.name, exc.lineno, exc.message)
                elif isinstance(exc, IOError):
                    self.logger.error("Unable to read '%s': %s",
                                      options['env'], exc.strerror)
                    msg = "Infrastructure error: cannot open '%s': %s" % \
                          (options['env'], exc.strerror)
                elif isinstance(exc, yaml.YAMLError):
                    self.logger.error("Unable to parse job definition: %s",
                                      exc)
                    msg = "Infrastructure error: cannot parse job definition: %s" % \
                          exc
                else:
                    self.logger.exception(exc)
                    msg = "Infrastructure error: %s" % exc.message

                self.logger.error("[%d] INCOMPLETE job", job.id)
                fail_job(job=job, fail_msg=msg, job_status=TestJob.INCOMPLETE)
        return True
    def process_jobs(self, options):
        for job in TestJob.objects.filter(
                Q(status=TestJob.SUBMITTED) & Q(is_pipeline=True) & ~Q(actual_device=None))\
                .order_by('-health_check', '-priority', 'submit_time', 'target_group', 'id'):
            if job.dynamic_connection:
                # A secondary connection must be made from a dispatcher local to the host device
                # to allow for local firewalls etc. So the secondary connection is started on the
                # remote worker of the "nominated" host.
                # FIXME:
                worker_host = job.lookup_worker
                self.logger.info("[%d] START => %s (connection)", job.id,
                                 worker_host.hostname)
            else:
                device = select_device(job, self.dispatchers)
                if not device:
                    return False
                # selecting device can change the job
                job = TestJob.objects.get(id=job.id)
                self.logger.info("[%d] Assigning %s device", job.id, device)
                if job.actual_device is None:
                    device = job.requested_device
                    if not device.worker_host:
                        msg = "Infrastructure error: Invalid worker information"
                        self.logger.error("[%d] %s", job.id, msg)
                        fail_job(job, msg, TestJob.INCOMPLETE)
                        return False

                    # Launch the job
                    create_job(job, device)
                    self.logger.info("[%d] START => %s (%s)", job.id,
                                     device.worker_host.hostname, device.hostname)
                    worker_host = device.worker_host
                else:
                    device = job.actual_device
                    if not device.worker_host:
                        msg = "Infrastructure error: Invalid worker information"
                        self.logger.error("[%d] %s", job.id, msg)
                        fail_job(job, msg, TestJob.INCOMPLETE)
                        return False
                    self.logger.info("[%d] START => %s (%s) (retrying)", job.id,
                                     device.worker_host.hostname, device.hostname)
                    worker_host = device.worker_host
            try:
                # Load job definition to get the variables for template
                # rendering
                job_def = yaml.load(job.definition)
                job_ctx = job_def.get('context', {})

                # Load device configuration
                device_configuration = None \
                    if job.dynamic_connection else device.load_device_configuration(job_ctx)

                if job.is_multinode:
                    for group_job in job.sub_jobs_list:
                        if group_job.dynamic_connection:
                            # to get this far, the rest of the multinode group must also be ready
                            # so start the dynamic connections
                            # FIXME: rationalise and streamline
                            self.controler.send_multipart(
                                [str(worker_host.hostname),
                                 'START', str(group_job.id), self.export_definition(group_job),
                                 str(device_configuration),
                                 get_env_string(options['env']),
                                 get_env_string(options['env_dut'])])

                self.controler.send_multipart(
                    [str(worker_host.hostname),
                     'START', str(job.id), self.export_definition(job),
                     str(device_configuration),
                     get_env_string(options['env']), get_env_string(options['env_dut'])])

            except (jinja2.TemplateError, IOError, yaml.YAMLError) as exc:
                if isinstance(exc, jinja2.TemplateNotFound):
                    self.logger.error("Template not found: '%s'", exc.message)
                    msg = "Infrastructure error: Template not found: '%s'" % \
                          exc.message
                elif isinstance(exc, jinja2.TemplateSyntaxError):
                    self.logger.error("Template syntax error in '%s', line %d: %s",
                                      exc.name, exc.lineno, exc.message)
                    msg = "Infrastructure error: Template syntax error in '%s', line %d: %s" % \
                          (exc.name, exc.lineno, exc.message)
                elif isinstance(exc, IOError):
                    self.logger.error("Unable to read '%s': %s",
                                      options['env'], exc.strerror)
                    msg = "Infrastructure error: cannot open '%s': %s" % \
                          (options['env'], exc.strerror)
                elif isinstance(exc, yaml.YAMLError):
                    self.logger.error("Unable to parse job definition: %s",
                                      exc)
                    msg = "Infrastructure error: cannot parse job definition: %s" % \
                          exc
                else:
                    self.logger.exception(exc)
                    msg = "Infrastructure error: %s" % exc.message

                self.logger.error("[%d] INCOMPLETE job", job.id)
                job.status = TestJob.INCOMPLETE
                if job.dynamic_connection:
                    job.failure_comment = msg
                    job.save()
                else:
                    new_status = Device.IDLE
                    device.state_transition_to(
                        new_status,
                        message=msg,
                        job=job)
                    device.status = new_status
                    device.current_job = None
                    job.failure_comment = msg
                    job.save()
                    device.save()
        return True