Exemple #1
0
 def force_prompt_wait(self, remaining=None):
     """
     One of the challenges we face is that kernel log messages can appear
     half way through a shell prompt.  So, if things are taking a while,
     we send a newline along to maybe provoke a new prompt.  We wait for
     half the timeout period and then wait for one tenth of the timeout
     6 times (so we wait for 1.1 times the timeout period overall).
     :return: the index into the connection.prompt_str list
     """
     logger = logging.getLogger('dispatcher')
     prompt_wait_count = 0
     if not remaining:
         return self.wait()
     # connection_prompt_limit
     partial_timeout = remaining / 2.0
     while True:
         try:
             return self.raw_connection.expect(self.prompt_str, timeout=partial_timeout)
         except (pexpect.TIMEOUT, TestError) as exc:
             if prompt_wait_count < 6:
                 logger.warning(
                     '%s: Sending %s in case of corruption. Connection timeout %s, retry in %s',
                     exc, self.check_char, seconds_to_str(remaining), seconds_to_str(partial_timeout))
                 logger.debug("pattern: %s", self.prompt_str)
                 prompt_wait_count += 1
                 partial_timeout = remaining / 10
                 self.sendline(self.check_char)
                 continue
             else:
                 # TODO: is someone expecting pexpect.TIMEOUT?
                 raise
Exemple #2
0
 def force_prompt_wait(self, remaining=None):
     """
     One of the challenges we face is that kernel log messages can appear
     half way through a shell prompt.  So, if things are taking a while,
     we send a newline along to maybe provoke a new prompt.  We wait for
     half the timeout period and then wait for one tenth of the timeout
     6 times (so we wait for 1.1 times the timeout period overall).
     :return: the index into the connection.prompt_str list
     """
     logger = logging.getLogger('dispatcher')
     prompt_wait_count = 0
     if not remaining:
         return self.wait()
     # connection_prompt_limit
     partial_timeout = remaining / 2.0
     while True:
         try:
             return self.raw_connection.expect(self.prompt_str, timeout=partial_timeout)
         except (pexpect.TIMEOUT, TestError) as exc:
             if prompt_wait_count < 6:
                 logger.warning(
                     '%s: Sending %s in case of corruption. Connection timeout %s, retry in %s',
                     exc, self.check_char, seconds_to_str(remaining), seconds_to_str(partial_timeout))
                 logger.debug("pattern: %s", self.prompt_str)
                 prompt_wait_count += 1
                 partial_timeout = remaining / 10
                 self.sendline(self.check_char)
                 continue
             else:
                 # TODO: is someone expecting pexpect.TIMEOUT?
                 raise
Exemple #3
0
    def run_actions(self, connection, max_end_time):  # pylint: disable=too-many-branches,too-many-statements,too-many-locals
        for action in self.actions:
            failed = False
            namespace = action.parameters.get('namespace', 'common')
            # Begin the action
            try:
                parent = self.parent if self.parent else self.job
                with action.timeout(parent,
                                    max_end_time) as action_max_end_time:
                    # Add action start timestamp to the log message
                    # Log in INFO for root actions and in DEBUG for the other actions
                    timeout = seconds_to_str(action_max_end_time -
                                             action.timeout.start)
                    msg = 'start: %s %s (timeout %s) [%s]' % (
                        action.level, action.name, timeout, namespace)
                    if self.parent is None:
                        action.logger.info(msg)
                    else:
                        action.logger.debug(msg)

                    new_connection = action.run(connection,
                                                action_max_end_time)
            except LAVAError as exc:
                action.logger.exception(str(exc))
                #str_errormsg =    "[%s]%s" % (exc.error_type,exc.error_help)
                #self.repexp2up(str_errormsg)
                self.repexp2up(str(exc))

                # allows retries without setting errors, which make the job incomplete.
                failed = True
                action.results = {'fail': str(exc)}
                self._diagnose(connection)
                raise
            except Exception as exc:
                action.logger.exception(traceback.format_exc())
                #self.repexp2up(traceback.format_exc())
                # allows retries without setting errors, which make the job incomplete.
                failed = True
                action.results = {'fail': str(exc)}
                # Raise a LAVABug that will be correctly classified later
                raise LAVABug(str(exc))
            finally:
                # Add action end timestamp to the log message
                duration = round(action.timeout.elapsed_time)
                msg = "end: %s %s (duration %s) [%s]" % (
                    action.level, action.name, seconds_to_str(duration),
                    namespace)
                if self.parent is None:
                    action.logger.info(msg)
                else:
                    action.logger.debug(msg)
                # set results including retries and failed actions
                action.log_action_results(fail=failed)

            if new_connection:
                connection = new_connection
        return connection
Exemple #4
0
    def run(self, connection, max_end_time):
        self.sleep = self.parameters.get("failure_retry_interval", self.sleep)

        retries = 0
        has_failed = False
        self.call_protocols()
        while retries < self.max_retries:
            retries += 1
            try:
                connection = self.pipeline.run_actions(connection,
                                                       max_end_time)
                if "repeat" not in self.parameters:
                    # failure_retry returns on first success. repeat returns only at max_retries.
                    return connection
            # Do not retry for LAVABug (as it's a bug in LAVA)
            except (InfrastructureError, JobError, TestError) as exc:
                has_failed = True
                # Print the error message
                self.logger.error(
                    "%s failed: %d of %d attempts. '%s'",
                    self.name,
                    retries,
                    self.max_retries,
                    exc,
                )
                # Cleanup the action to allow for a safe restart
                self.cleanup(connection)

                # re-raise if this is the last loop
                if retries == self.max_retries:
                    self.errors = "%s retries failed for %s" % (retries,
                                                                self.name)
                    raise

                # Wait some time before retrying
                time.sleep(self.sleep)
                # Restart max_end_time or the retry on a timeout fails with duration < 0
                max_end_time += time.time() - self.timeout.start
                self.timeout.start = time.time()
                self.logger.warning(
                    "Retrying: %s %s (timeout %s)",
                    self.level,
                    self.name,
                    seconds_to_str(max_end_time - self.timeout.start),
                )

        # If we are repeating, check that all repeat were a success.
        if has_failed:
            # tried and failed
            raise JobError("%s retries failed for %s" % (retries, self.name))
        return connection
Exemple #5
0
 def wait(self, connection, max_end_time=None):
     if not connection:
         return
     if not connection.connected:
         self.logger.debug("Already disconnected")
         return
     if not max_end_time:
         max_end_time = self.timeout.duration + self.timeout.start
     remaining = max_end_time - time.time()
     # FIXME: connection.prompt_str needs to be always a list
     # bootloader_prompt is one which does not get set that way
     # also need functionality to clear the list at times.
     self.logger.debug("%s: Wait for prompt %s (timeout %s)", self.name,
                       connection.prompt_str, seconds_to_str(remaining))
     if self.force_prompt:
         return connection.force_prompt_wait(remaining)
     else:
         return connection.wait(max_end_time)
Exemple #6
0
def test_seconds_to_str():
    assert seconds_to_str(0) == "00:00:00"
    assert seconds_to_str(1) == "00:00:01"
    assert seconds_to_str(62) == "00:01:02"
    assert seconds_to_str(147) == "00:02:27"
    assert seconds_to_str(3641) == "01:00:41"
Exemple #7
0
    def run_actions(self, connection, max_end_time):
        for action in self.actions:
            failed = False
            namespace = action.parameters.get("namespace", "common")
            # Begin the action
            try:
                parent = self.parent if self.parent else self.job
                with action.timeout(parent,
                                    max_end_time) as action_max_end_time:
                    # Add action start timestamp to the log message
                    # Log in INFO for root actions and in DEBUG for the other actions
                    timeout = seconds_to_str(action_max_end_time -
                                             action.timeout.start)
                    msg = "start: %s %s (timeout %s) [%s]" % (
                        action.level,
                        action.name,
                        timeout,
                        namespace,
                    )
                    if self.parent is None:
                        action.logger.info(msg)
                    else:
                        action.logger.debug(msg)

                    new_connection = action.run(connection,
                                                action_max_end_time)
            except LAVATimeoutError as exc:
                action.logger.exception(str(exc))
                # allows retries without setting errors, which make the job incomplete.
                failed = True
                action.results = {"fail": str(exc)}
                if action.timeout.can_skip(action.parameters):
                    if self.parent is None:
                        action.logger.warning(
                            "skip_timeout is set for %s - continuing to next action block."
                            % (action.name))
                    else:
                        raise
                    new_connection = None
                else:
                    raise TestError(str(exc))
            except LAVAError as exc:
                action.logger.exception(str(exc))
                # allows retries without setting errors, which make the job incomplete.
                failed = True
                action.results = {"fail": str(exc)}
                self._diagnose(connection)
                raise
            except Exception as exc:
                action.logger.exception(traceback.format_exc())
                # allows retries without setting errors, which make the job incomplete.
                failed = True
                action.results = {"fail": str(exc)}
                # Raise a LAVABug that will be correctly classified later
                raise LAVABug(str(exc))
            finally:
                # Add action end timestamp to the log message
                duration = round(action.timeout.elapsed_time)
                msg = "end: %s %s (duration %s) [%s]" % (
                    action.level,
                    action.name,
                    seconds_to_str(duration),
                    namespace,
                )
                if self.parent is None:
                    action.logger.info(msg)
                else:
                    action.logger.debug(msg)
                # set results including retries and failed actions
                action.log_action_results(fail=failed)

            if new_connection:
                connection = new_connection
        return connection
Exemple #8
0
    def parse_failures(cls, connection, action=None, max_end_time=None):  # pylint: disable=too-many-branches
        """
        Returns a list of dictionaries of matches for failure strings and
        other kernel messages.

        If kernel_prompts are in use, a success result is returned containing
        details of which of KERNEL_FREE_UNUSED_MSG and KERNEL_FREE_INIT_MSG
        were parsed. If the returned dictionary only contains this success
        message, then the the kernel-messages action can be deemed as pass.

        The init prompts exclude these messages, so a successful init parse
        returns an empty dictionary as init is generally processed by actions
        which do their own result parsing. If the returned list is not
        empty, add it to the results of the calling action.

        When multiple messages are identified, the list contains one dictionary
        for each message found.

        Always returns a list, the list may be empty.
        """
        results = []
        init = False
        if not connection:
            return results
        if cls.MESSAGE_CHOICES[cls.FREE_UNUSED][1] in connection.prompt_str:
            if cls.MESSAGE_CHOICES[cls.FREE_INIT][1] in connection.prompt_str:
                init = True
        remaining = max_end_time - time.time()

        while True:
            if action:
                action.logger.debug("[%s] Waiting for messages, (timeout %s)",
                                    action.name, seconds_to_str(remaining))
            try:
                index = connection.force_prompt_wait(remaining)
            except (pexpect.EOF, pexpect.TIMEOUT, TestError):
                if action:
                    msg = "Failed to match - connection timed out handling messages."
                    action.logger.warning(msg)
                    action.errors = msg
                break

            if action and index:
                action.logger.debug("Matched prompt #%s: %s", index,
                                    connection.prompt_str[index])
            message = connection.raw_connection.after
            if index == cls.ALERT or index == cls.TRACE:
                if action:
                    action.logger.warning(
                        "%s: %s" %
                        (action.name, cls.MESSAGE_CHOICES[index][2]))
                # ALERT or TRACE may need a newline to force a prompt
                connection.sendline(connection.check_char)
                # this is allowable behaviour, not a failure.
                results.append({
                    cls.MESSAGE_CHOICES[index][2]:
                    cls.MESSAGE_CHOICES[index][1],
                    'message':
                    message[:METADATA_MESSAGE_LIMIT]
                })
                continue
            elif index == cls.PANIC or index == cls.EXCEPTION:
                if action:
                    action.logger.error(
                        "%s %s" % (action.name, cls.MESSAGE_CHOICES[index][2]))
                results.append({
                    cls.MESSAGE_CHOICES[index][2]:
                    cls.MESSAGE_CHOICES[index][1],
                    'message':
                    message[:METADATA_MESSAGE_LIMIT]
                })
                continue
            elif index and index >= cls.FREE_UNUSED:
                if init and index <= cls.FREE_INIT:
                    results.append({
                        cls.MESSAGE_CHOICES[index][2]:
                        cls.MESSAGE_CHOICES[index][1],
                        'message':
                        'kernel-messages'
                    })
                    continue
                else:
                    results.append({'success': connection.prompt_str[index]})
                    break
            else:
                break
        # allow calling actions to pick up failures
        # without overriding their own success result, if any.
        return results
Exemple #9
0
    def parse_failures(cls, connection, action=None, max_end_time=None):  # pylint: disable=too-many-branches
        """
        Returns a list of dictionaries of matches for failure strings and
        other kernel messages.

        If kernel_prompts are in use, a success result is returned containing
        details of which of KERNEL_FREE_UNUSED_MSG and KERNEL_FREE_INIT_MSG
        were parsed. If the returned dictionary only contains this success
        message, then the the kernel-messages action can be deemed as pass.

        The init prompts exclude these messages, so a successful init parse
        returns an empty dictionary as init is generally processed by actions
        which do their own result parsing. If the returned list is not
        empty, add it to the results of the calling action.

        When multiple messages are identified, the list contains one dictionary
        for each message found.

        Always returns a list, the list may be empty.
        """
        results = []  # wrap inside a dict to use in results
        res = "pass"
        halt = None
        init = False
        start = time.time()
        if not connection:
            return results
        if cls.MESSAGE_CHOICES[cls.FREE_UNUSED][1] in connection.prompt_str:
            if cls.MESSAGE_CHOICES[cls.FREE_INIT][1] in connection.prompt_str:
                init = True
        remaining = max_end_time - time.time()

        while True:
            if action:
                action.logger.debug("[%s] Waiting for messages, (timeout %s)",
                                    action.name, seconds_to_str(remaining))
            try:
                index = connection.force_prompt_wait(remaining)
            except (pexpect.EOF, pexpect.TIMEOUT, TestError):
                if action:
                    msg = "Failed to match - connection timed out handling messages."
                    action.logger.warning(msg)
                    action.errors = msg
                break

            if action and index:
                action.logger.debug("Matched prompt #%s: %s", index,
                                    connection.prompt_str[index])
            message = connection.raw_connection.after
            if index == cls.TRACE or index == cls.EXCEPTION:
                res = "fail"
                if action:
                    action.logger.warning(
                        "%s: %s" %
                        (action.name, cls.MESSAGE_CHOICES[index][2]))
                # TRACE may need a newline to force a prompt
                connection.sendline(connection.check_char)
                # this is allowable behaviour, not a failure.
                results.append({
                    cls.MESSAGE_CHOICES[index][2]:
                    cls.MESSAGE_CHOICES[index][1],
                    'message':
                    message[:METADATA_MESSAGE_LIMIT]
                })
                continue
            elif index == cls.PANIC:
                res = "fail"
                if action:
                    action.logger.error(
                        "%s %s" % (action.name, cls.MESSAGE_CHOICES[index][2]))
                results.append({
                    cls.MESSAGE_CHOICES[index][2]:
                    cls.MESSAGE_CHOICES[index][1],
                    'message':
                    message[:METADATA_MESSAGE_LIMIT]
                })
                halt = message[:METADATA_MESSAGE_LIMIT]
                break
            elif index and index >= cls.FREE_UNUSED:
                if init and index <= cls.FREE_INIT:
                    results.append({
                        cls.MESSAGE_CHOICES[index][2]:
                        cls.MESSAGE_CHOICES[index][1],
                        'message':
                        'kernel-messages'
                    })
                    continue
                else:
                    results.append({'success': connection.prompt_str[index]})
                    break
            else:
                break
        # record a specific result for the kernel messages for later debugging.
        if action and isinstance(action.logger, YAMLLogger):
            action.logger.results({  # pylint: disable=no-member
                "definition":
                "lava",
                "namespace":
                action.parameters.get('namespace', 'common'),
                "case":
                cls.name,
                "level":
                action.level,
                "duration":
                "%.02f" % (time.time() - start),
                "result":
                res,
                "extra": {
                    'extra': results
                }
            })
        if halt:
            # end this job early, kernel is unable to continue
            raise JobError(halt)

        # allow calling actions to also pick up failures
        # without overriding their own success result, if any.
        return results
Exemple #10
0
    def parse_failures(
        cls, connection, action, max_end_time, fail_msg, auto_login=False
    ):
        """
        Returns a list of dictionaries of matches for failure strings and
        other kernel messages.

        If kernel_prompts are in use, a success result is returned containing
        details of which of KERNEL_FREE_UNUSED_MSG and KERNEL_FREE_INIT_MSG
        were parsed. If the returned dictionary only contains this success
        message, then the the kernel-messages action can be deemed as pass.

        The init prompts exclude these messages, so a successful init parse
        returns an empty dictionary as init is generally processed by actions
        which do their own result parsing. If the returned list is not
        empty, add it to the results of the calling action.

        When multiple messages are identified, the list contains one dictionary
        for each message found.

        Always returns a list, the list may be empty.
        """
        results = []  # wrap inside a dict to use in results
        res = "pass"
        halt = None
        init = False
        start = time.time()
        if not connection:
            return results
        if cls.MESSAGE_CHOICES[cls.FREE_UNUSED][1] in connection.prompt_str:
            if cls.MESSAGE_CHOICES[cls.FREE_INIT][1] in connection.prompt_str:
                init = True
        remaining = max_end_time - start
        if remaining < 0:
            raise LAVABug(
                "Invalid time remaining: max: %s start: %s" % (max_end_time, start)
            )

        while True:
            action.logger.debug(
                "[%s] Waiting for messages, (timeout %s)",
                action.name,
                seconds_to_str(remaining),
            )
            try:
                if action.force_prompt:
                    index = connection.force_prompt_wait(remaining)
                else:
                    index = connection.wait(max_end_time)
            except (pexpect.EOF, pexpect.TIMEOUT, TestError):
                msg = "Failed to match - connection timed out handling messages."
                action.logger.warning(msg)
                action.errors = msg
                break

            if index:
                action.logger.debug(
                    "Matched prompt #%s: %s", index, connection.prompt_str[index]
                )
            message = connection.raw_connection.after
            if index in [cls.TRACE, cls.EXCEPTION, cls.WARNING, cls.BUG]:
                res = "fail"
                action.logger.warning(
                    "%s: %s" % (action.name, cls.MESSAGE_CHOICES[index][2])
                )
                # TRACE may need a newline to force a prompt (only when not using auto-login)
                if not auto_login:
                    connection.sendline(connection.check_char)
                # this is allowable behaviour, not a failure.
                results.append(
                    {
                        cls.MESSAGE_CHOICES[index][2]: cls.MESSAGE_CHOICES[index][1],
                        "message": message[:METADATA_MESSAGE_LIMIT],
                    }
                )
                continue
            elif index == cls.PANIC:
                res = "fail"
                action.logger.error(
                    "%s %s" % (action.name, cls.MESSAGE_CHOICES[index][2])
                )
                results.append(
                    {
                        cls.MESSAGE_CHOICES[index][2]: cls.MESSAGE_CHOICES[index][1],
                        "message": message[:METADATA_MESSAGE_LIMIT],
                    }
                )
                halt = message[:METADATA_MESSAGE_LIMIT]
                break
            elif fail_msg and index and fail_msg == connection.prompt_str[index]:
                res = "fail"
                # user has declared this message to be terminal for this test job.
                halt = "Matched job-specific failure message: '%s'" % fail_msg
                action.logger.error("%s %s" % (action.name, halt))
                results.append({"message": "kernel-messages"})
            elif index and index == cls.FREE_UNUSED or index == cls.FREE_INIT:
                if init and index <= cls.FREE_INIT:
                    results.append(
                        {
                            cls.MESSAGE_CHOICES[index][2]: cls.MESSAGE_CHOICES[index][
                                1
                            ],
                            "message": "kernel-messages",
                        }
                    )
                    continue
                else:
                    results.append({"success": connection.prompt_str[index]})
                    break
            else:
                break
        # record a specific result for the kernel messages for later debugging.
        if isinstance(action.logger, YAMLLogger):
            action.logger.results(
                {
                    "definition": "lava",
                    "namespace": action.parameters.get("namespace", "common"),
                    "case": cls.name,
                    "level": action.level,
                    "duration": "%.02f" % (time.time() - start),
                    "result": res,
                    "extra": {"extra": results},
                }
            )
        if halt:
            # end this job early, kernel is unable to continue
            raise JobError(halt)

        # allow calling actions to also pick up failures
        # without overriding their own success result, if any.
        return results
Exemple #11
0
    def parse_failures(cls, connection, action=None, max_end_time=None):  # pylint: disable=too-many-branches
        """
        Returns a list of dictionaries of matches for failure strings and
        other kernel messages.

        If kernel_prompts are in use, a success result is returned containing
        details of which of KERNEL_FREE_UNUSED_MSG and KERNEL_FREE_INIT_MSG
        were parsed. If the returned dictionary only contains this success
        message, then the the kernel-messages action can be deemed as pass.

        The init prompts exclude these messages, so a successful init parse
        returns an empty dictionary as init is generally processed by actions
        which do their own result parsing. If the returned list is not
        empty, add it to the results of the calling action.

        When multiple messages are identified, the list contains one dictionary
        for each message found.

        Always returns a list, the list may be empty.
        """
        results = []
        init = False
        if not connection:
            return results
        if cls.MESSAGE_CHOICES[cls.FREE_UNUSED][1] in connection.prompt_str:
            if cls.MESSAGE_CHOICES[cls.FREE_INIT][1] in connection.prompt_str:
                init = True
        remaining = max_end_time - time.time()

        while True:
            if action:
                action.logger.debug(
                    "[%s] Waiting for messages, (timeout %s)",
                    action.name, seconds_to_str(remaining))
            try:
                index = connection.force_prompt_wait(remaining)
            except (pexpect.EOF, pexpect.TIMEOUT, TestError):
                if action:
                    msg = "Failed to match - connection timed out handling messages."
                    action.logger.warning(msg)
                    action.errors = msg
                break

            if action and index:
                action.logger.debug("Matched prompt #%s: %s", index, connection.prompt_str[index])
            message = connection.raw_connection.after
            if index == cls.ALERT or index == cls.TRACE:
                if action:
                    action.logger.warning("%s: %s" % (action.name, cls.MESSAGE_CHOICES[index][2]))
                # ALERT or TRACE may need a newline to force a prompt
                connection.sendline(connection.check_char)
                # this is allowable behaviour, not a failure.
                results.append({
                    cls.MESSAGE_CHOICES[index][2]: cls.MESSAGE_CHOICES[index][1],
                    'message': message[:METADATA_MESSAGE_LIMIT]
                })
                continue
            elif index == cls.PANIC or index == cls.EXCEPTION:
                if action:
                    action.logger.error("%s %s" % (action.name, cls.MESSAGE_CHOICES[index][2]))
                results.append({
                    cls.MESSAGE_CHOICES[index][2]: cls.MESSAGE_CHOICES[index][1],
                    'message': message[:METADATA_MESSAGE_LIMIT]
                })
                continue
            elif index and index >= cls.FREE_UNUSED:
                if init and index <= cls.FREE_INIT:
                    results.append({
                        cls.MESSAGE_CHOICES[index][2]: cls.MESSAGE_CHOICES[index][1],
                        'message': 'kernel-messages'
                    })
                    continue
                else:
                    results.append({
                        'success': connection.prompt_str[index]
                    })
                    break
            else:
                break
        # allow calling actions to pick up failures
        # without overriding their own success result, if any.
        return results